Import Upstream version 1.0.0~rc10
This commit is contained in:
commit
4912a38791
|
@ -0,0 +1,6 @@
|
|||
vendor/pkg
|
||||
/runc
|
||||
/runc-*
|
||||
contrib/cmd/recvtty/recvtty
|
||||
man/man8
|
||||
release
|
|
@ -0,0 +1,10 @@
|
|||
approve_by_comment: true
|
||||
approve_regex: ^LGTM
|
||||
reject_regex: ^Rejected
|
||||
reset_on_push: true
|
||||
author_approval: ignored
|
||||
reviewers:
|
||||
teams:
|
||||
- runc-maintainers
|
||||
name: default
|
||||
required: 2
|
|
@ -0,0 +1,54 @@
|
|||
dist: bionic
|
||||
language: go
|
||||
go:
|
||||
- 1.11.x
|
||||
- 1.12.x
|
||||
- tip
|
||||
|
||||
matrix:
|
||||
include:
|
||||
- go: 1.12.x
|
||||
env:
|
||||
- RUNC_USE_SYSTEMD=1
|
||||
script:
|
||||
- make BUILDTAGS="${BUILDTAGS}" all
|
||||
- sudo PATH="$PATH" make localintegration RUNC_USE_SYSTEMD=1
|
||||
- go: 1.12.x
|
||||
env:
|
||||
- VIRTUALBOX_VERSION=6.0
|
||||
- VAGRANT_VERSION=2.2.6
|
||||
- FEDORA_VERSION=31
|
||||
before_install:
|
||||
- cat /proc/cpuinfo
|
||||
- wget -q https://www.virtualbox.org/download/oracle_vbox_2016.asc -O- | sudo apt-key add - && sudo sh -c "echo deb https://download.virtualbox.org/virtualbox/debian $(lsb_release -cs) contrib >> /etc/apt/sources.list" && sudo apt-get update && sudo apt-get install -yq build-essential gcc make linux-headers-$(uname -r) virtualbox-${VIRTUALBOX_VERSION} && sudo usermod -aG vboxusers $(whoami)
|
||||
- wget https://releases.hashicorp.com/vagrant/${VAGRANT_VERSION}/vagrant_${VAGRANT_VERSION}_$(uname -m).deb && sudo dpkg -i vagrant_${VAGRANT_VERSION}_$(uname -m).deb
|
||||
- vagrant init bento/fedora-${FEDORA_VERSION} && vagrant up && mkdir -p ~/.ssh && vagrant ssh-config >> ~/.ssh/config
|
||||
- ssh default sudo dnf install -y podman
|
||||
script:
|
||||
- ssh default sudo podman build -t test /vagrant
|
||||
- ssh default sudo podman run --privileged --cgroupns=private test make localunittest
|
||||
allow_failures:
|
||||
- go: tip
|
||||
|
||||
go_import_path: github.com/opencontainers/runc
|
||||
|
||||
# `make ci` uses Docker.
|
||||
sudo: required
|
||||
services:
|
||||
- docker
|
||||
|
||||
env:
|
||||
global:
|
||||
- BUILDTAGS="seccomp apparmor selinux ambient"
|
||||
|
||||
before_install:
|
||||
- sudo apt-get -qq update
|
||||
- sudo apt-get install -y libseccomp-dev
|
||||
- go get -u golang.org/x/lint/golint
|
||||
- go get -u github.com/vbatts/git-validation
|
||||
- env | grep TRAVIS_
|
||||
|
||||
script:
|
||||
- git-validation -run DCO,short-subject -v
|
||||
- make BUILDTAGS="${BUILDTAGS}"
|
||||
- make BUILDTAGS="${BUILDTAGS}" clean ci cross
|
|
@ -0,0 +1,124 @@
|
|||
## Contribution Guidelines
|
||||
|
||||
### Security issues
|
||||
|
||||
If you are reporting a security issue, do not create an issue or file a pull
|
||||
request on GitHub. Instead, disclose the issue responsibly by sending an email
|
||||
to security@opencontainers.org (which is inhabited only by the maintainers of
|
||||
the various OCI projects).
|
||||
|
||||
### Pull requests are always welcome
|
||||
|
||||
We are always thrilled to receive pull requests, and do our best to
|
||||
process them as fast as possible. Not sure if that typo is worth a pull
|
||||
request? Do it! We will appreciate it.
|
||||
|
||||
If your pull request is not accepted on the first try, don't be
|
||||
discouraged! If there's a problem with the implementation, hopefully you
|
||||
received feedback on what to improve.
|
||||
|
||||
We're trying very hard to keep runc lean and focused. We don't want it
|
||||
to do everything for everybody. This means that we might decide against
|
||||
incorporating a new feature. However, there might be a way to implement
|
||||
that feature *on top of* runc.
|
||||
|
||||
|
||||
### Conventions
|
||||
|
||||
Fork the repo and make changes on your fork in a feature branch:
|
||||
|
||||
- If it's a bugfix branch, name it XXX-something where XXX is the number of the
|
||||
issue
|
||||
- If it's a feature branch, create an enhancement issue to announce your
|
||||
intentions, and name it XXX-something where XXX is the number of the issue.
|
||||
|
||||
Submit unit tests for your changes. Go has a great test framework built in; use
|
||||
it! Take a look at existing tests for inspiration. Run the full test suite on
|
||||
your branch before submitting a pull request.
|
||||
|
||||
Update the documentation when creating or modifying features. Test
|
||||
your documentation changes for clarity, concision, and correctness, as
|
||||
well as a clean documentation build. See ``docs/README.md`` for more
|
||||
information on building the docs and how docs get released.
|
||||
|
||||
Write clean code. Universally formatted code promotes ease of writing, reading,
|
||||
and maintenance. Always run `gofmt -s -w file.go` on each changed file before
|
||||
committing your changes. Most editors have plugins that do this automatically.
|
||||
|
||||
Pull requests descriptions should be as clear as possible and include a
|
||||
reference to all the issues that they address.
|
||||
|
||||
Pull requests must not contain commits from other users or branches.
|
||||
|
||||
Commit messages must start with a capitalized and short summary (max. 50
|
||||
chars) written in the imperative, followed by an optional, more detailed
|
||||
explanatory text which is separated from the summary by an empty line.
|
||||
|
||||
Code review comments may be added to your pull request. Discuss, then make the
|
||||
suggested modifications and push additional commits to your feature branch. Be
|
||||
sure to post a comment after pushing. The new commits will show up in the pull
|
||||
request automatically, but the reviewers will not be notified unless you
|
||||
comment.
|
||||
|
||||
Before the pull request is merged, make sure that you squash your commits into
|
||||
logical units of work using `git rebase -i` and `git push -f`. After every
|
||||
commit the test suite should be passing. Include documentation changes in the
|
||||
same commit so that a revert would remove all traces of the feature or fix.
|
||||
|
||||
Commits that fix or close an issue should include a reference like `Closes #XXX`
|
||||
or `Fixes #XXX`, which will automatically close the issue when merged.
|
||||
|
||||
### Sign your work
|
||||
|
||||
The sign-off is a simple line at the end of the explanation for the
|
||||
patch, which certifies that you wrote it or otherwise have the right to
|
||||
pass it on as an open-source patch. The rules are pretty simple: if you
|
||||
can certify the below (from
|
||||
[developercertificate.org](http://developercertificate.org/)):
|
||||
|
||||
```
|
||||
Developer Certificate of Origin
|
||||
Version 1.1
|
||||
|
||||
Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
|
||||
660 York Street, Suite 102,
|
||||
San Francisco, CA 94110 USA
|
||||
|
||||
Everyone is permitted to copy and distribute verbatim copies of this
|
||||
license document, but changing it is not allowed.
|
||||
|
||||
|
||||
Developer's Certificate of Origin 1.1
|
||||
|
||||
By making a contribution to this project, I certify that:
|
||||
|
||||
(a) The contribution was created in whole or in part by me and I
|
||||
have the right to submit it under the open source license
|
||||
indicated in the file; or
|
||||
|
||||
(b) The contribution is based upon previous work that, to the best
|
||||
of my knowledge, is covered under an appropriate open source
|
||||
license and I have the right under that license to submit that
|
||||
work with modifications, whether created in whole or in part
|
||||
by me, under the same open source license (unless I am
|
||||
permitted to submit under a different license), as indicated
|
||||
in the file; or
|
||||
|
||||
(c) The contribution was provided directly to me by some other
|
||||
person who certified (a), (b) or (c) and I have not modified
|
||||
it.
|
||||
|
||||
(d) I understand and agree that this project and the contribution
|
||||
are public and that a record of the contribution (including all
|
||||
personal information I submit with it, including my sign-off) is
|
||||
maintained indefinitely and may be redistributed consistent with
|
||||
this project or the open source license(s) involved.
|
||||
```
|
||||
|
||||
then you just add a line to every git commit message:
|
||||
|
||||
Signed-off-by: Joe Smith <joe@gmail.com>
|
||||
|
||||
using your real name (sorry, no pseudonyms or anonymous contributions.)
|
||||
|
||||
You can add the sign off when creating the git commit via `git commit -s`.
|
|
@ -0,0 +1,66 @@
|
|||
FROM golang:1.12-stretch
|
||||
|
||||
RUN dpkg --add-architecture armel \
|
||||
&& dpkg --add-architecture armhf \
|
||||
&& dpkg --add-architecture arm64 \
|
||||
&& dpkg --add-architecture ppc64el \
|
||||
&& apt-get update && apt-get install -y \
|
||||
build-essential \
|
||||
curl \
|
||||
sudo \
|
||||
gawk \
|
||||
iptables \
|
||||
jq \
|
||||
pkg-config \
|
||||
libaio-dev \
|
||||
libcap-dev \
|
||||
libprotobuf-dev \
|
||||
libprotobuf-c0-dev \
|
||||
libnl-3-dev \
|
||||
libnet-dev \
|
||||
libseccomp2 \
|
||||
libseccomp-dev \
|
||||
protobuf-c-compiler \
|
||||
protobuf-compiler \
|
||||
python-minimal \
|
||||
uidmap \
|
||||
kmod \
|
||||
crossbuild-essential-armel crossbuild-essential-armhf crossbuild-essential-arm64 crossbuild-essential-ppc64el \
|
||||
libseccomp-dev:armel libseccomp-dev:armhf libseccomp-dev:arm64 libseccomp-dev:ppc64el \
|
||||
--no-install-recommends \
|
||||
&& apt-get clean
|
||||
|
||||
# Add a dummy user for the rootless integration tests. While runC does
|
||||
# not require an entry in /etc/passwd to operate, one of the tests uses
|
||||
# `git clone` -- and `git clone` does not allow you to clone a
|
||||
# repository if the current uid does not have an entry in /etc/passwd.
|
||||
RUN useradd -u1000 -m -d/home/rootless -s/bin/bash rootless
|
||||
|
||||
# install bats
|
||||
RUN cd /tmp \
|
||||
&& git clone https://github.com/sstephenson/bats.git \
|
||||
&& cd bats \
|
||||
&& git reset --hard 03608115df2071fff4eaaff1605768c275e5f81f \
|
||||
&& ./install.sh /usr/local \
|
||||
&& rm -rf /tmp/bats
|
||||
|
||||
# install criu
|
||||
ENV CRIU_VERSION v3.12
|
||||
RUN mkdir -p /usr/src/criu \
|
||||
&& curl -sSL https://github.com/checkpoint-restore/criu/archive/${CRIU_VERSION}.tar.gz | tar -v -C /usr/src/criu/ -xz --strip-components=1 \
|
||||
&& cd /usr/src/criu \
|
||||
&& make install-criu \
|
||||
&& rm -rf /usr/src/criu
|
||||
|
||||
# setup a playground for us to spawn containers in
|
||||
ENV ROOTFS /busybox
|
||||
RUN mkdir -p ${ROOTFS}
|
||||
|
||||
COPY script/tmpmount /
|
||||
WORKDIR /go/src/github.com/opencontainers/runc
|
||||
ENTRYPOINT ["/tmpmount"]
|
||||
|
||||
ADD . /go/src/github.com/opencontainers/runc
|
||||
|
||||
RUN . tests/integration/multi-arch.bash \
|
||||
&& curl -o- -sSL `get_busybox` | tar xfJC - ${ROOTFS}
|
|
@ -0,0 +1,191 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
Copyright 2014 Docker, Inc.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
|
@ -0,0 +1,5 @@
|
|||
Michael Crosby <michael@docker.com> (@crosbymichael)
|
||||
Mrunal Patel <mpatel@redhat.com> (@mrunalp)
|
||||
Daniel, Dao Quang Minh <dqminh89@gmail.com> (@dqminh)
|
||||
Qiang Huang <h.huangqiang@huawei.com> (@hqhq)
|
||||
Aleksa Sarai <asarai@suse.de> (@cyphar)
|
|
@ -0,0 +1,120 @@
|
|||
## Introduction
|
||||
|
||||
Dear maintainer. Thank you for investing the time and energy to help
|
||||
make runc as useful as possible. Maintaining a project is difficult,
|
||||
sometimes unrewarding work. Sure, you will get to contribute cool
|
||||
features to the project. But most of your time will be spent reviewing,
|
||||
cleaning up, documenting, answering questions, justifying design
|
||||
decisions - while everyone has all the fun! But remember - the quality
|
||||
of the maintainers work is what distinguishes the good projects from the
|
||||
great. So please be proud of your work, even the unglamorous parts,
|
||||
and encourage a culture of appreciation and respect for *every* aspect
|
||||
of improving the project - not just the hot new features.
|
||||
|
||||
This document is a manual for maintainers old and new. It explains what
|
||||
is expected of maintainers, how they should work, and what tools are
|
||||
available to them.
|
||||
|
||||
This is a living document - if you see something out of date or missing,
|
||||
speak up!
|
||||
|
||||
## What are a maintainer's responsibility?
|
||||
|
||||
It is every maintainer's responsibility to:
|
||||
|
||||
* 1) Expose a clear roadmap for improving their component.
|
||||
* 2) Deliver prompt feedback and decisions on pull requests.
|
||||
* 3) Be available to anyone with questions, bug reports, criticism etc.
|
||||
on their component. This includes IRC and GitHub issues and pull requests.
|
||||
* 4) Make sure their component respects the philosophy, design and
|
||||
roadmap of the project.
|
||||
|
||||
## How are decisions made?
|
||||
|
||||
Short answer: with pull requests to the runc repository.
|
||||
|
||||
runc is an open-source project with an open design philosophy. This
|
||||
means that the repository is the source of truth for EVERY aspect of the
|
||||
project, including its philosophy, design, roadmap and APIs. *If it's
|
||||
part of the project, it's in the repo. It's in the repo, it's part of
|
||||
the project.*
|
||||
|
||||
As a result, all decisions can be expressed as changes to the
|
||||
repository. An implementation change is a change to the source code. An
|
||||
API change is a change to the API specification. A philosophy change is
|
||||
a change to the philosophy manifesto. And so on.
|
||||
|
||||
All decisions affecting runc, big and small, follow the same 3 steps:
|
||||
|
||||
* Step 1: Open a pull request. Anyone can do this.
|
||||
|
||||
* Step 2: Discuss the pull request. Anyone can do this.
|
||||
|
||||
* Step 3: Accept (`LGTM`) or refuse a pull request. The relevant maintainers do
|
||||
this (see below "Who decides what?")
|
||||
|
||||
*I'm a maintainer, should I make pull requests too?*
|
||||
|
||||
Yes. Nobody should ever push to master directly. All changes should be
|
||||
made through a pull request.
|
||||
|
||||
## Who decides what?
|
||||
|
||||
All decisions are pull requests, and the relevant maintainers make
|
||||
decisions by accepting or refusing the pull request. Review and acceptance
|
||||
by anyone is denoted by adding a comment in the pull request: `LGTM`.
|
||||
However, only currently listed `MAINTAINERS` are counted towards the required
|
||||
two LGTMs.
|
||||
|
||||
Overall the maintainer system works because of mutual respect across the
|
||||
maintainers of the project. The maintainers trust one another to make decisions
|
||||
in the best interests of the project. Sometimes maintainers can disagree and
|
||||
this is part of a healthy project to represent the point of views of various people.
|
||||
In the case where maintainers cannot find agreement on a specific change the
|
||||
role of a Chief Maintainer comes into play.
|
||||
|
||||
The Chief Maintainer for the project is responsible for overall architecture
|
||||
of the project to maintain conceptual integrity. Large decisions and
|
||||
architecture changes should be reviewed by the chief maintainer.
|
||||
The current chief maintainer for the project is Michael Crosby (@crosbymichael).
|
||||
|
||||
Even though the maintainer system is built on trust, if there is a conflict
|
||||
with the chief maintainer on a decision, their decision can be challenged
|
||||
and brought to the technical oversight board if two-thirds of the
|
||||
maintainers vote for an appeal. It is expected that this would be a
|
||||
very exceptional event.
|
||||
|
||||
|
||||
### How are maintainers added?
|
||||
|
||||
The best maintainers have a vested interest in the project. Maintainers
|
||||
are first and foremost contributors that have shown they are committed to
|
||||
the long term success of the project. Contributors wanting to become
|
||||
maintainers are expected to be deeply involved in contributing code,
|
||||
pull request review, and triage of issues in the project for more than two months.
|
||||
|
||||
Just contributing does not make you a maintainer, it is about building trust
|
||||
with the current maintainers of the project and being a person that they can
|
||||
depend on and trust to make decisions in the best interest of the project. The
|
||||
final vote to add a new maintainer should be approved by over 66% of the current
|
||||
maintainers with the chief maintainer having veto power. In case of a veto,
|
||||
conflict resolution rules expressed above apply. The voting period is
|
||||
five business days on the Pull Request to add the new maintainer.
|
||||
|
||||
|
||||
### What is expected of maintainers?
|
||||
|
||||
Part of a healthy project is to have active maintainers to support the community
|
||||
in contributions and perform tasks to keep the project running. Maintainers are
|
||||
expected to be able to respond in a timely manner if their help is required on specific
|
||||
issues where they are pinged. Being a maintainer is a time consuming commitment and should
|
||||
not be taken lightly.
|
||||
|
||||
When a maintainer is unable to perform the required duties they can be removed with
|
||||
a vote by 66% of the current maintainers with the chief maintainer having veto power.
|
||||
The voting period is ten business days. Issues related to a maintainer's performance should
|
||||
be discussed with them among the other maintainers so that they are not surprised by
|
||||
a pull request removing them.
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,133 @@
|
|||
.PHONY: all shell dbuild man release \
|
||||
localtest localunittest localintegration \
|
||||
test unittest integration \
|
||||
cross localcross
|
||||
|
||||
CONTAINER_ENGINE := docker
|
||||
GO := go
|
||||
|
||||
SOURCES := $(shell find . 2>&1 | grep -E '.*\.(c|h|go)$$')
|
||||
PREFIX := $(DESTDIR)/usr/local
|
||||
BINDIR := $(PREFIX)/sbin
|
||||
GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null)
|
||||
GIT_BRANCH_CLEAN := $(shell echo $(GIT_BRANCH) | sed -e "s/[^[:alnum:]]/-/g")
|
||||
RUNC_IMAGE := runc_dev$(if $(GIT_BRANCH_CLEAN),:$(GIT_BRANCH_CLEAN))
|
||||
PROJECT := github.com/opencontainers/runc
|
||||
BUILDTAGS ?= seccomp
|
||||
COMMIT_NO := $(shell git rev-parse HEAD 2> /dev/null || true)
|
||||
COMMIT ?= $(if $(shell git status --porcelain --untracked-files=no),"${COMMIT_NO}-dirty","${COMMIT_NO}")
|
||||
|
||||
MAN_DIR := $(CURDIR)/man/man8
|
||||
MAN_PAGES = $(shell ls $(MAN_DIR)/*.8)
|
||||
MAN_PAGES_BASE = $(notdir $(MAN_PAGES))
|
||||
MAN_INSTALL_PATH := ${PREFIX}/share/man/man8/
|
||||
|
||||
RELEASE_DIR := $(CURDIR)/release
|
||||
|
||||
VERSION := ${shell cat ./VERSION}
|
||||
|
||||
SHELL := $(shell command -v bash 2>/dev/null)
|
||||
|
||||
.DEFAULT: runc
|
||||
|
||||
runc: $(SOURCES)
|
||||
$(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o runc .
|
||||
|
||||
all: runc recvtty
|
||||
|
||||
recvtty: contrib/cmd/recvtty/recvtty
|
||||
|
||||
contrib/cmd/recvtty/recvtty: $(SOURCES)
|
||||
$(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o contrib/cmd/recvtty/recvtty ./contrib/cmd/recvtty
|
||||
|
||||
static: $(SOURCES)
|
||||
CGO_ENABLED=1 $(GO) build $(EXTRA_FLAGS) -tags "$(BUILDTAGS) netgo osusergo" -installsuffix netgo -ldflags "-w -extldflags -static -X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -o runc .
|
||||
CGO_ENABLED=1 $(GO) build $(EXTRA_FLAGS) -tags "$(BUILDTAGS) netgo osusergo" -installsuffix netgo -ldflags "-w -extldflags -static -X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -o contrib/cmd/recvtty/recvtty ./contrib/cmd/recvtty
|
||||
|
||||
release:
|
||||
script/release.sh -r release/$(VERSION) -v $(VERSION)
|
||||
|
||||
dbuild: runcimage
|
||||
$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} --rm -v $(CURDIR):/go/src/$(PROJECT) --privileged $(RUNC_IMAGE) make clean all
|
||||
|
||||
lint:
|
||||
$(GO) vet $(allpackages)
|
||||
$(GO) fmt $(allpackages)
|
||||
|
||||
man:
|
||||
man/md2man-all.sh
|
||||
|
||||
runcimage:
|
||||
$(CONTAINER_ENGINE) build ${CONTAINER_ENGINE_BUILD_FLAGS} -t $(RUNC_IMAGE) .
|
||||
|
||||
test:
|
||||
make unittest integration rootlessintegration
|
||||
|
||||
localtest:
|
||||
make localunittest localintegration localrootlessintegration
|
||||
|
||||
unittest: runcimage
|
||||
$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -t --privileged --rm -v /lib/modules:/lib/modules:ro -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localunittest TESTFLAGS=${TESTFLAGS}
|
||||
|
||||
localunittest: all
|
||||
$(GO) test -timeout 3m -tags "$(BUILDTAGS)" ${TESTFLAGS} -v $(allpackages)
|
||||
|
||||
integration: runcimage
|
||||
$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -t --privileged --rm -v /lib/modules:/lib/modules:ro -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localintegration TESTPATH=${TESTPATH}
|
||||
|
||||
localintegration: all
|
||||
bats -t tests/integration${TESTPATH}
|
||||
|
||||
rootlessintegration: runcimage
|
||||
$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -t --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localrootlessintegration
|
||||
|
||||
localrootlessintegration: all
|
||||
tests/rootless.sh
|
||||
|
||||
shell: runcimage
|
||||
$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -ti --privileged --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) bash
|
||||
|
||||
install:
|
||||
install -D -m0755 runc $(BINDIR)/runc
|
||||
|
||||
install-bash:
|
||||
install -D -m0644 contrib/completions/bash/runc $(PREFIX)/share/bash-completion/completions/runc
|
||||
|
||||
install-man:
|
||||
install -d -m 755 $(MAN_INSTALL_PATH)
|
||||
install -m 644 $(MAN_PAGES) $(MAN_INSTALL_PATH)
|
||||
|
||||
uninstall:
|
||||
rm -f $(BINDIR)/runc
|
||||
|
||||
uninstall-bash:
|
||||
rm -f $(PREFIX)/share/bash-completion/completions/runc
|
||||
|
||||
uninstall-man:
|
||||
rm -f $(addprefix $(MAN_INSTALL_PATH),$(MAN_PAGES_BASE))
|
||||
|
||||
clean:
|
||||
rm -f runc runc-*
|
||||
rm -f contrib/cmd/recvtty/recvtty
|
||||
rm -rf $(RELEASE_DIR)
|
||||
rm -rf $(MAN_DIR)
|
||||
|
||||
validate:
|
||||
script/validate-gofmt
|
||||
script/validate-c
|
||||
$(GO) vet $(allpackages)
|
||||
|
||||
ci: validate test release
|
||||
|
||||
cross: runcimage
|
||||
$(CONTAINER_ENGINE) run ${CONTAINER_ENGINE_RUN_FLAGS} -e BUILDTAGS="$(BUILDTAGS)" --rm -v $(CURDIR):/go/src/$(PROJECT) $(RUNC_IMAGE) make localcross
|
||||
|
||||
localcross:
|
||||
CGO_ENABLED=1 GOARCH=arm GOARM=6 CC=arm-linux-gnueabi-gcc $(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o runc-armel .
|
||||
CGO_ENABLED=1 GOARCH=arm GOARM=7 CC=arm-linux-gnueabihf-gcc $(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o runc-armhf .
|
||||
CGO_ENABLED=1 GOARCH=arm64 CC=aarch64-linux-gnu-gcc $(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o runc-arm64 .
|
||||
CGO_ENABLED=1 GOARCH=ppc64le CC=powerpc64le-linux-gnu-gcc $(GO) build -buildmode=pie $(EXTRA_FLAGS) -ldflags "-X main.gitCommit=${COMMIT} -X main.version=${VERSION} $(EXTRA_LDFLAGS)" -tags "$(BUILDTAGS)" -o runc-ppc64le .
|
||||
|
||||
# memoize allpackages, so that it's executed only once and only if used
|
||||
_allpackages = $(shell $(GO) list ./... | grep -v vendor)
|
||||
allpackages = $(if $(__allpackages),,$(eval __allpackages := $$(_allpackages)))$(__allpackages)
|
|
@ -0,0 +1,17 @@
|
|||
runc
|
||||
|
||||
Copyright 2012-2015 Docker, Inc.
|
||||
|
||||
This product includes software developed at Docker, Inc. (http://www.docker.com).
|
||||
|
||||
The following is courtesy of our legal counsel:
|
||||
|
||||
|
||||
Use and transfer of Docker may be subject to certain restrictions by the
|
||||
United States and other governments.
|
||||
It is your responsibility to ensure that your use and/or transfer does not
|
||||
violate applicable laws.
|
||||
|
||||
For more information, please see http://www.bis.doc.gov
|
||||
|
||||
See also http://www.apache.org/dev/crypto.html and/or seek legal counsel.
|
|
@ -0,0 +1,19 @@
|
|||
# runc principles
|
||||
|
||||
In the design and development of runc and libcontainer we try to follow these principles:
|
||||
|
||||
(Work in progress)
|
||||
|
||||
* Don't try to replace every tool. Instead, be an ingredient to improve them.
|
||||
* Less code is better.
|
||||
* Fewer components are better. Do you really need to add one more class?
|
||||
* 50 lines of straightforward, readable code is better than 10 lines of magic that nobody can understand.
|
||||
* Don't do later what you can do now. "//TODO: refactor" is not acceptable in new code.
|
||||
* When hesitating between two options, choose the one that is easier to reverse.
|
||||
* "No" is temporary; "Yes" is forever. If you're not sure about a new feature, say no. You can change your mind later.
|
||||
* Containers must be portable to the greatest possible number of machines. Be suspicious of any change which makes machines less interchangeable.
|
||||
* The fewer moving parts in a container, the better.
|
||||
* Don't merge it unless you document it.
|
||||
* Don't document it unless you can keep it up-to-date.
|
||||
* Don't merge it unless you test it!
|
||||
* Everyone's problem is slightly different. Focus on the part that is the same for everyone, and solve that.
|
|
@ -0,0 +1,280 @@
|
|||
# runc
|
||||
|
||||
[![Build Status](https://travis-ci.org/opencontainers/runc.svg?branch=master)](https://travis-ci.org/opencontainers/runc)
|
||||
[![Go Report Card](https://goreportcard.com/badge/github.com/opencontainers/runc)](https://goreportcard.com/report/github.com/opencontainers/runc)
|
||||
[![GoDoc](https://godoc.org/github.com/opencontainers/runc?status.svg)](https://godoc.org/github.com/opencontainers/runc)
|
||||
|
||||
## Introduction
|
||||
|
||||
`runc` is a CLI tool for spawning and running containers according to the OCI specification.
|
||||
|
||||
## Releases
|
||||
|
||||
`runc` depends on and tracks the [runtime-spec](https://github.com/opencontainers/runtime-spec) repository.
|
||||
We will try to make sure that `runc` and the OCI specification major versions stay in lockstep.
|
||||
This means that `runc` 1.0.0 should implement the 1.0 version of the specification.
|
||||
|
||||
You can find official releases of `runc` on the [release](https://github.com/opencontainers/runc/releases) page.
|
||||
|
||||
Currently, the following features are not considered to be production-ready:
|
||||
|
||||
* Support for cgroup v2
|
||||
|
||||
## Security
|
||||
|
||||
The reporting process and disclosure communications are outlined in [/org/security](https://github.com/opencontainers/org/blob/master/security/).
|
||||
|
||||
## Building
|
||||
|
||||
`runc` currently supports the Linux platform with various architecture support.
|
||||
It must be built with Go version 1.6 or higher in order for some features to function properly.
|
||||
|
||||
In order to enable seccomp support you will need to install `libseccomp` on your platform.
|
||||
> e.g. `libseccomp-devel` for CentOS, or `libseccomp-dev` for Ubuntu
|
||||
|
||||
Otherwise, if you do not want to build `runc` with seccomp support you can add `BUILDTAGS=""` when running make.
|
||||
|
||||
```bash
|
||||
# create a 'github.com/opencontainers' in your GOPATH/src
|
||||
cd github.com/opencontainers
|
||||
git clone https://github.com/opencontainers/runc
|
||||
cd runc
|
||||
|
||||
make
|
||||
sudo make install
|
||||
```
|
||||
|
||||
You can also use `go get` to install to your `GOPATH`, assuming that you have a `github.com` parent folder already created under `src`:
|
||||
|
||||
```bash
|
||||
go get github.com/opencontainers/runc
|
||||
cd $GOPATH/src/github.com/opencontainers/runc
|
||||
make
|
||||
sudo make install
|
||||
```
|
||||
|
||||
`runc` will be installed to `/usr/local/sbin/runc` on your system.
|
||||
|
||||
|
||||
#### Build Tags
|
||||
|
||||
`runc` supports optional build tags for compiling support of various features.
|
||||
To add build tags to the make option the `BUILDTAGS` variable must be set.
|
||||
|
||||
```bash
|
||||
make BUILDTAGS='seccomp apparmor'
|
||||
```
|
||||
|
||||
| Build Tag | Feature | Dependency |
|
||||
|-----------|------------------------------------|-------------|
|
||||
| seccomp | Syscall filtering | libseccomp |
|
||||
| selinux | selinux process and mount labeling | <none> |
|
||||
| apparmor | apparmor profile support | <none> |
|
||||
| ambient | ambient capability support | kernel 4.3 |
|
||||
| nokmem | disable kernel memory account | <none> |
|
||||
|
||||
|
||||
### Running the test suite
|
||||
|
||||
`runc` currently supports running its test suite via Docker.
|
||||
To run the suite just type `make test`.
|
||||
|
||||
```bash
|
||||
make test
|
||||
```
|
||||
|
||||
There are additional make targets for running the tests outside of a container but this is not recommended as the tests are written with the expectation that they can write and remove anywhere.
|
||||
|
||||
You can run a specific test case by setting the `TESTFLAGS` variable.
|
||||
|
||||
```bash
|
||||
# make test TESTFLAGS="-run=SomeTestFunction"
|
||||
```
|
||||
|
||||
You can run a specific integration test by setting the `TESTPATH` variable.
|
||||
|
||||
```bash
|
||||
# make test TESTPATH="/checkpoint.bats"
|
||||
```
|
||||
|
||||
You can run a test in your proxy environment by setting `DOCKER_BUILD_PROXY` and `DOCKER_RUN_PROXY` variables.
|
||||
|
||||
```bash
|
||||
# make test DOCKER_BUILD_PROXY="--build-arg HTTP_PROXY=http://yourproxy/" DOCKER_RUN_PROXY="-e HTTP_PROXY=http://yourproxy/"
|
||||
```
|
||||
|
||||
### Dependencies Management
|
||||
|
||||
`runc` uses [vndr](https://github.com/LK4D4/vndr) for dependencies management.
|
||||
Please refer to [vndr](https://github.com/LK4D4/vndr) for how to add or update
|
||||
new dependencies.
|
||||
|
||||
## Using runc
|
||||
|
||||
### Creating an OCI Bundle
|
||||
|
||||
In order to use runc you must have your container in the format of an OCI bundle.
|
||||
If you have Docker installed you can use its `export` method to acquire a root filesystem from an existing Docker container.
|
||||
|
||||
```bash
|
||||
# create the top most bundle directory
|
||||
mkdir /mycontainer
|
||||
cd /mycontainer
|
||||
|
||||
# create the rootfs directory
|
||||
mkdir rootfs
|
||||
|
||||
# export busybox via Docker into the rootfs directory
|
||||
docker export $(docker create busybox) | tar -C rootfs -xvf -
|
||||
```
|
||||
|
||||
After a root filesystem is populated you just generate a spec in the format of a `config.json` file inside your bundle.
|
||||
`runc` provides a `spec` command to generate a base template spec that you are then able to edit.
|
||||
To find features and documentation for fields in the spec please refer to the [specs](https://github.com/opencontainers/runtime-spec) repository.
|
||||
|
||||
```bash
|
||||
runc spec
|
||||
```
|
||||
|
||||
### Running Containers
|
||||
|
||||
Assuming you have an OCI bundle from the previous step you can execute the container in two different ways.
|
||||
|
||||
The first way is to use the convenience command `run` that will handle creating, starting, and deleting the container after it exits.
|
||||
|
||||
```bash
|
||||
# run as root
|
||||
cd /mycontainer
|
||||
runc run mycontainerid
|
||||
```
|
||||
|
||||
If you used the unmodified `runc spec` template this should give you a `sh` session inside the container.
|
||||
|
||||
The second way to start a container is using the specs lifecycle operations.
|
||||
This gives you more power over how the container is created and managed while it is running.
|
||||
This will also launch the container in the background so you will have to edit the `config.json` to remove the `terminal` setting for the simple examples here.
|
||||
Your process field in the `config.json` should look like this below with `"terminal": false` and `"args": ["sleep", "5"]`.
|
||||
|
||||
|
||||
```json
|
||||
"process": {
|
||||
"terminal": false,
|
||||
"user": {
|
||||
"uid": 0,
|
||||
"gid": 0
|
||||
},
|
||||
"args": [
|
||||
"sleep", "5"
|
||||
],
|
||||
"env": [
|
||||
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
|
||||
"TERM=xterm"
|
||||
],
|
||||
"cwd": "/",
|
||||
"capabilities": {
|
||||
"bounding": [
|
||||
"CAP_AUDIT_WRITE",
|
||||
"CAP_KILL",
|
||||
"CAP_NET_BIND_SERVICE"
|
||||
],
|
||||
"effective": [
|
||||
"CAP_AUDIT_WRITE",
|
||||
"CAP_KILL",
|
||||
"CAP_NET_BIND_SERVICE"
|
||||
],
|
||||
"inheritable": [
|
||||
"CAP_AUDIT_WRITE",
|
||||
"CAP_KILL",
|
||||
"CAP_NET_BIND_SERVICE"
|
||||
],
|
||||
"permitted": [
|
||||
"CAP_AUDIT_WRITE",
|
||||
"CAP_KILL",
|
||||
"CAP_NET_BIND_SERVICE"
|
||||
],
|
||||
"ambient": [
|
||||
"CAP_AUDIT_WRITE",
|
||||
"CAP_KILL",
|
||||
"CAP_NET_BIND_SERVICE"
|
||||
]
|
||||
},
|
||||
"rlimits": [
|
||||
{
|
||||
"type": "RLIMIT_NOFILE",
|
||||
"hard": 1024,
|
||||
"soft": 1024
|
||||
}
|
||||
],
|
||||
"noNewPrivileges": true
|
||||
},
|
||||
```
|
||||
|
||||
Now we can go through the lifecycle operations in your shell.
|
||||
|
||||
|
||||
```bash
|
||||
# run as root
|
||||
cd /mycontainer
|
||||
runc create mycontainerid
|
||||
|
||||
# view the container is created and in the "created" state
|
||||
runc list
|
||||
|
||||
# start the process inside the container
|
||||
runc start mycontainerid
|
||||
|
||||
# after 5 seconds view that the container has exited and is now in the stopped state
|
||||
runc list
|
||||
|
||||
# now delete the container
|
||||
runc delete mycontainerid
|
||||
```
|
||||
|
||||
This allows higher level systems to augment the containers creation logic with setup of various settings after the container is created and/or before it is deleted. For example, the container's network stack is commonly set up after `create` but before `start`.
|
||||
|
||||
#### Rootless containers
|
||||
`runc` has the ability to run containers without root privileges. This is called `rootless`. You need to pass some parameters to `runc` in order to run rootless containers. See below and compare with the previous version.
|
||||
|
||||
**Note:** In order to use this feature, "User Namespaces" must be compiled and enabled in your kernel. There are various ways to do this depending on your distribution:
|
||||
- Confirm `CONFIG_USER_NS=y` is set in your kernel configuration (normally found in `/proc/config.gz`)
|
||||
- Arch/Debian: `echo 1 > /proc/sys/kernel/unprivileged_userns_clone`
|
||||
- RHEL/CentOS 7: `echo 28633 > /proc/sys/user/max_user_namespaces`
|
||||
|
||||
Run the following commands as an ordinary user:
|
||||
```bash
|
||||
# Same as the first example
|
||||
mkdir ~/mycontainer
|
||||
cd ~/mycontainer
|
||||
mkdir rootfs
|
||||
docker export $(docker create busybox) | tar -C rootfs -xvf -
|
||||
|
||||
# The --rootless parameter instructs runc spec to generate a configuration for a rootless container, which will allow you to run the container as a non-root user.
|
||||
runc spec --rootless
|
||||
|
||||
# The --root parameter tells runc where to store the container state. It must be writable by the user.
|
||||
runc --root /tmp/runc run mycontainerid
|
||||
```
|
||||
|
||||
#### Supervisors
|
||||
|
||||
`runc` can be used with process supervisors and init systems to ensure that containers are restarted when they exit.
|
||||
An example systemd unit file looks something like this.
|
||||
|
||||
```systemd
|
||||
[Unit]
|
||||
Description=Start My Container
|
||||
|
||||
[Service]
|
||||
Type=forking
|
||||
ExecStart=/usr/local/sbin/runc run -d --pid-file /run/mycontainerid.pid mycontainerid
|
||||
ExecStopPost=/usr/local/sbin/runc delete mycontainerid
|
||||
WorkingDirectory=/mycontainer
|
||||
PIDFile=/run/mycontainerid.pid
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
The code and docs are released under the [Apache 2.0 license](LICENSE).
|
|
@ -0,0 +1,3 @@
|
|||
# Security
|
||||
|
||||
The reporting process and disclosure communications are outlined in [/org/security](https://github.com/opencontainers/org/blob/master/security/).
|
|
@ -0,0 +1,137 @@
|
|||
// +build linux
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
var checkpointCommand = cli.Command{
|
||||
Name: "checkpoint",
|
||||
Usage: "checkpoint a running container",
|
||||
ArgsUsage: `<container-id>
|
||||
|
||||
Where "<container-id>" is the name for the instance of the container to be
|
||||
checkpointed.`,
|
||||
Description: `The checkpoint command saves the state of the container instance.`,
|
||||
Flags: []cli.Flag{
|
||||
cli.StringFlag{Name: "image-path", Value: "", Usage: "path for saving criu image files"},
|
||||
cli.StringFlag{Name: "work-path", Value: "", Usage: "path for saving work files and logs"},
|
||||
cli.StringFlag{Name: "parent-path", Value: "", Usage: "path for previous criu image files in pre-dump"},
|
||||
cli.BoolFlag{Name: "leave-running", Usage: "leave the process running after checkpointing"},
|
||||
cli.BoolFlag{Name: "tcp-established", Usage: "allow open tcp connections"},
|
||||
cli.BoolFlag{Name: "ext-unix-sk", Usage: "allow external unix sockets"},
|
||||
cli.BoolFlag{Name: "shell-job", Usage: "allow shell jobs"},
|
||||
cli.BoolFlag{Name: "lazy-pages", Usage: "use userfaultfd to lazily restore memory pages"},
|
||||
cli.StringFlag{Name: "status-fd", Value: "", Usage: "criu writes \\0 to this FD once lazy-pages is ready"},
|
||||
cli.StringFlag{Name: "page-server", Value: "", Usage: "ADDRESS:PORT of the page server"},
|
||||
cli.BoolFlag{Name: "file-locks", Usage: "handle file locks, for safety"},
|
||||
cli.BoolFlag{Name: "pre-dump", Usage: "dump container's memory information only, leave the container running after this"},
|
||||
cli.StringFlag{Name: "manage-cgroups-mode", Value: "", Usage: "cgroups mode: 'soft' (default), 'full' and 'strict'"},
|
||||
cli.StringSliceFlag{Name: "empty-ns", Usage: "create a namespace, but don't restore its properties"},
|
||||
cli.BoolFlag{Name: "auto-dedup", Usage: "enable auto deduplication of memory images"},
|
||||
},
|
||||
Action: func(context *cli.Context) error {
|
||||
if err := checkArgs(context, 1, exactArgs); err != nil {
|
||||
return err
|
||||
}
|
||||
// XXX: Currently this is untested with rootless containers.
|
||||
if os.Geteuid() != 0 || system.RunningInUserNS() {
|
||||
logrus.Warn("runc checkpoint is untested with rootless containers")
|
||||
}
|
||||
|
||||
container, err := getContainer(context)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
status, err := container.Status()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if status == libcontainer.Created || status == libcontainer.Stopped {
|
||||
fatalf("Container cannot be checkpointed in %s state", status.String())
|
||||
}
|
||||
defer destroy(container)
|
||||
options := criuOptions(context)
|
||||
// these are the mandatory criu options for a container
|
||||
setPageServer(context, options)
|
||||
setManageCgroupsMode(context, options)
|
||||
if err := setEmptyNsMask(context, options); err != nil {
|
||||
return err
|
||||
}
|
||||
return container.Checkpoint(options)
|
||||
},
|
||||
}
|
||||
|
||||
func getCheckpointImagePath(context *cli.Context) string {
|
||||
imagePath := context.String("image-path")
|
||||
if imagePath == "" {
|
||||
imagePath = getDefaultImagePath(context)
|
||||
}
|
||||
return imagePath
|
||||
}
|
||||
|
||||
func setPageServer(context *cli.Context, options *libcontainer.CriuOpts) {
|
||||
// xxx following criu opts are optional
|
||||
// The dump image can be sent to a criu page server
|
||||
if psOpt := context.String("page-server"); psOpt != "" {
|
||||
addressPort := strings.Split(psOpt, ":")
|
||||
if len(addressPort) != 2 {
|
||||
fatal(fmt.Errorf("Use --page-server ADDRESS:PORT to specify page server"))
|
||||
}
|
||||
portInt, err := strconv.Atoi(addressPort[1])
|
||||
if err != nil {
|
||||
fatal(fmt.Errorf("Invalid port number"))
|
||||
}
|
||||
options.PageServer = libcontainer.CriuPageServerInfo{
|
||||
Address: addressPort[0],
|
||||
Port: int32(portInt),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func setManageCgroupsMode(context *cli.Context, options *libcontainer.CriuOpts) {
|
||||
if cgOpt := context.String("manage-cgroups-mode"); cgOpt != "" {
|
||||
switch cgOpt {
|
||||
case "soft":
|
||||
options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_SOFT
|
||||
case "full":
|
||||
options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_FULL
|
||||
case "strict":
|
||||
options.ManageCgroupsMode = libcontainer.CRIU_CG_MODE_STRICT
|
||||
default:
|
||||
fatal(fmt.Errorf("Invalid manage cgroups mode"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var namespaceMapping = map[specs.LinuxNamespaceType]int{
|
||||
specs.NetworkNamespace: unix.CLONE_NEWNET,
|
||||
}
|
||||
|
||||
func setEmptyNsMask(context *cli.Context, options *libcontainer.CriuOpts) error {
|
||||
/* Runc doesn't manage network devices and their configuration */
|
||||
nsmask := unix.CLONE_NEWNET
|
||||
|
||||
for _, ns := range context.StringSlice("empty-ns") {
|
||||
f, exists := namespaceMapping[specs.LinuxNamespaceType(ns)]
|
||||
if !exists {
|
||||
return fmt.Errorf("namespace %q is not supported", ns)
|
||||
}
|
||||
nsmask |= f
|
||||
}
|
||||
|
||||
options.EmptyNs = uint32(nsmask)
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,238 @@
|
|||
/*
|
||||
* Copyright 2016 SUSE LLC
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/containerd/console"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/urfave/cli"
|
||||
)
|
||||
|
||||
// version will be populated by the Makefile, read from
|
||||
// VERSION file of the source code.
|
||||
var version = ""
|
||||
|
||||
// gitCommit will be the hash that the binary was built from
|
||||
// and will be populated by the Makefile
|
||||
var gitCommit = ""
|
||||
|
||||
const (
|
||||
usage = `Open Container Initiative contrib/cmd/recvtty
|
||||
|
||||
recvtty is a reference implementation of a consumer of runC's --console-socket
|
||||
API. It has two main modes of operation:
|
||||
|
||||
* single: Only permit one terminal to be sent to the socket, which is
|
||||
then hooked up to the stdio of the recvtty process. This is useful
|
||||
for rudimentary shell management of a container.
|
||||
|
||||
* null: Permit as many terminals to be sent to the socket, but they
|
||||
are read to /dev/null. This is used for testing, and imitates the
|
||||
old runC API's --console=/dev/pts/ptmx hack which would allow for a
|
||||
similar trick. This is probably not what you want to use, unless
|
||||
you're doing something like our bats integration tests.
|
||||
|
||||
To use recvtty, just specify a socket path at which you want to receive
|
||||
terminals:
|
||||
|
||||
$ recvtty [--mode <single|null>] socket.sock
|
||||
`
|
||||
)
|
||||
|
||||
func bail(err error) {
|
||||
fmt.Fprintf(os.Stderr, "[recvtty] fatal error: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
func handleSingle(path string) error {
|
||||
// Open a socket.
|
||||
ln, err := net.Listen("unix", path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer ln.Close()
|
||||
|
||||
// We only accept a single connection, since we can only really have
|
||||
// one reader for os.Stdin. Plus this is all a PoC.
|
||||
conn, err := ln.Accept()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
// Close ln, to allow for other instances to take over.
|
||||
ln.Close()
|
||||
|
||||
// Get the fd of the connection.
|
||||
unixconn, ok := conn.(*net.UnixConn)
|
||||
if !ok {
|
||||
return fmt.Errorf("failed to cast to unixconn")
|
||||
}
|
||||
|
||||
socket, err := unixconn.File()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer socket.Close()
|
||||
|
||||
// Get the master file descriptor from runC.
|
||||
master, err := utils.RecvFd(socket)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
c, err := console.ConsoleFromFile(master)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
console.ClearONLCR(c.Fd())
|
||||
|
||||
// Copy from our stdio to the master fd.
|
||||
quitChan := make(chan struct{})
|
||||
go func() {
|
||||
io.Copy(os.Stdout, c)
|
||||
quitChan <- struct{}{}
|
||||
}()
|
||||
go func() {
|
||||
io.Copy(c, os.Stdin)
|
||||
quitChan <- struct{}{}
|
||||
}()
|
||||
|
||||
// Only close the master fd once we've stopped copying.
|
||||
<-quitChan
|
||||
c.Close()
|
||||
return nil
|
||||
}
|
||||
|
||||
func handleNull(path string) error {
|
||||
// Open a socket.
|
||||
ln, err := net.Listen("unix", path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer ln.Close()
|
||||
|
||||
// As opposed to handleSingle we accept as many connections as we get, but
|
||||
// we don't interact with Stdin at all (and we copy stdout to /dev/null).
|
||||
for {
|
||||
conn, err := ln.Accept()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
go func(conn net.Conn) {
|
||||
// Don't leave references lying around.
|
||||
defer conn.Close()
|
||||
|
||||
// Get the fd of the connection.
|
||||
unixconn, ok := conn.(*net.UnixConn)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
|
||||
socket, err := unixconn.File()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
defer socket.Close()
|
||||
|
||||
// Get the master file descriptor from runC.
|
||||
master, err := utils.RecvFd(socket)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Just do a dumb copy to /dev/null.
|
||||
devnull, err := os.OpenFile("/dev/null", os.O_RDWR, 0)
|
||||
if err != nil {
|
||||
// TODO: Handle this nicely.
|
||||
return
|
||||
}
|
||||
|
||||
io.Copy(devnull, master)
|
||||
devnull.Close()
|
||||
}(conn)
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
app := cli.NewApp()
|
||||
app.Name = "recvtty"
|
||||
app.Usage = usage
|
||||
|
||||
// Set version to be the same as runC.
|
||||
var v []string
|
||||
if version != "" {
|
||||
v = append(v, version)
|
||||
}
|
||||
if gitCommit != "" {
|
||||
v = append(v, fmt.Sprintf("commit: %s", gitCommit))
|
||||
}
|
||||
app.Version = strings.Join(v, "\n")
|
||||
|
||||
// Set the flags.
|
||||
app.Flags = []cli.Flag{
|
||||
cli.StringFlag{
|
||||
Name: "mode, m",
|
||||
Value: "single",
|
||||
Usage: "Mode of operation (single or null)",
|
||||
},
|
||||
cli.StringFlag{
|
||||
Name: "pid-file",
|
||||
Value: "",
|
||||
Usage: "Path to write daemon process ID to",
|
||||
},
|
||||
}
|
||||
|
||||
app.Action = func(ctx *cli.Context) error {
|
||||
args := ctx.Args()
|
||||
if len(args) != 1 {
|
||||
return fmt.Errorf("need to specify a single socket path")
|
||||
}
|
||||
path := ctx.Args()[0]
|
||||
|
||||
pidPath := ctx.String("pid-file")
|
||||
if pidPath != "" {
|
||||
pid := fmt.Sprintf("%d\n", os.Getpid())
|
||||
if err := ioutil.WriteFile(pidPath, []byte(pid), 0644); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
switch ctx.String("mode") {
|
||||
case "single":
|
||||
if err := handleSingle(path); err != nil {
|
||||
return err
|
||||
}
|
||||
case "null":
|
||||
if err := handleNull(path); err != nil {
|
||||
return err
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("need to select a valid mode: %s", ctx.String("mode"))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
if err := app.Run(os.Args); err != nil {
|
||||
bail(err)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,826 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# bash completion file for runc command
|
||||
#
|
||||
# This script provides completion of:
|
||||
# - commands and their options
|
||||
# - filepaths
|
||||
#
|
||||
# To enable the completions either:
|
||||
# - place this file in /usr/share/bash-completion/completions
|
||||
# or
|
||||
# - copy this file to e.g. ~/.runc-completion.sh and add the line
|
||||
# below to your .bashrc after bash completion features are loaded
|
||||
# . ~/.runc-completion.sh
|
||||
#
|
||||
# Configuration:
|
||||
#
|
||||
|
||||
# Note for developers:
|
||||
# Please arrange options sorted alphabetically by long name with the short
|
||||
# options immediately following their corresponding long form.
|
||||
# This order should be applied to lists, alternatives and code blocks.
|
||||
|
||||
__runc_previous_extglob_setting=$(shopt -p extglob)
|
||||
shopt -s extglob
|
||||
|
||||
__runc_list_all() {
|
||||
COMPREPLY=($(compgen -W "$(runc list -q)" -- $cur))
|
||||
}
|
||||
|
||||
__runc_pos_first_nonflag() {
|
||||
local argument_flags=$1
|
||||
|
||||
local counter=$((${subcommand_pos:-${command_pos}} + 1))
|
||||
while [ $counter -le $cword ]; do
|
||||
if [ -n "$argument_flags" ] && eval "case '${words[$counter]}' in $argument_flags) true ;; *) false ;; esac"; then
|
||||
((counter++))
|
||||
else
|
||||
case "${words[$counter]}" in
|
||||
-*) ;;
|
||||
*)
|
||||
break
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
((counter++))
|
||||
done
|
||||
|
||||
echo $counter
|
||||
}
|
||||
|
||||
# Transforms a multiline list of strings into a single line string
|
||||
# with the words separated by "|".
|
||||
# This is used to prepare arguments to __runc_pos_first_nonflag().
|
||||
__runc_to_alternatives() {
|
||||
local parts=($1)
|
||||
local IFS='|'
|
||||
echo "${parts[*]}"
|
||||
}
|
||||
|
||||
# Transforms a multiline list of options into an extglob pattern
|
||||
# suitable for use in case statements.
|
||||
__runc_to_extglob() {
|
||||
local extglob=$(__runc_to_alternatives "$1")
|
||||
echo "@($extglob)"
|
||||
}
|
||||
|
||||
# Subcommand processing.
|
||||
# Locates the first occurrence of any of the subcommands contained in the
|
||||
# first argument. In case of a match, calls the corresponding completion
|
||||
# function and returns 0.
|
||||
# If no match is found, 1 is returned. The calling function can then
|
||||
# continue processing its completion.
|
||||
#
|
||||
# TODO if the preceding command has options that accept arguments and an
|
||||
# argument is equal to one of the subcommands, this is falsely detected as
|
||||
# a match.
|
||||
__runc_subcommands() {
|
||||
local subcommands="$1"
|
||||
|
||||
local counter=$(($command_pos + 1))
|
||||
while [ $counter -lt $cword ]; do
|
||||
case "${words[$counter]}" in
|
||||
$(__runc_to_extglob "$subcommands"))
|
||||
subcommand_pos=$counter
|
||||
local subcommand=${words[$counter]}
|
||||
local completions_func=_runc_${command}_${subcommand}
|
||||
declare -F $completions_func >/dev/null && $completions_func
|
||||
return 0
|
||||
;;
|
||||
esac
|
||||
((counter++))
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
# List all Signals
|
||||
__runc_list_signals() {
|
||||
COMPREPLY=($(compgen -W "$(for i in $(kill -l | xargs); do echo $i; done | grep SIG)"))
|
||||
}
|
||||
|
||||
# suppress trailing whitespace
|
||||
__runc_nospace() {
|
||||
# compopt is not available in ancient bash versions
|
||||
type compopt &>/dev/null && compopt -o nospace
|
||||
}
|
||||
|
||||
# The list of capabilities is defined in types.go, ALL was added manually.
|
||||
__runc_complete_capabilities() {
|
||||
COMPREPLY=($(compgen -W "
|
||||
ALL
|
||||
AUDIT_CONTROL
|
||||
AUDIT_WRITE
|
||||
AUDIT_READ
|
||||
BLOCK_SUSPEND
|
||||
CHOWN
|
||||
DAC_OVERRIDE
|
||||
DAC_READ_SEARCH
|
||||
FOWNER
|
||||
FSETID
|
||||
IPC_LOCK
|
||||
IPC_OWNER
|
||||
KILL
|
||||
LEASE
|
||||
LINUX_IMMUTABLE
|
||||
MAC_ADMIN
|
||||
MAC_OVERRIDE
|
||||
MKNOD
|
||||
NET_ADMIN
|
||||
NET_BIND_SERVICE
|
||||
NET_BROADCAST
|
||||
NET_RAW
|
||||
SETFCAP
|
||||
SETGID
|
||||
SETPCAP
|
||||
SETUID
|
||||
SYS_ADMIN
|
||||
SYS_BOOT
|
||||
SYS_CHROOT
|
||||
SYSLOG
|
||||
SYS_MODULE
|
||||
SYS_NICE
|
||||
SYS_PACCT
|
||||
SYS_PTRACE
|
||||
SYS_RAWIO
|
||||
SYS_RESOURCE
|
||||
SYS_TIME
|
||||
SYS_TTY_CONFIG
|
||||
WAKE_ALARM
|
||||
" -- "$cur"))
|
||||
}
|
||||
|
||||
_runc_exec() {
|
||||
local boolean_options="
|
||||
--help
|
||||
--no-new-privs
|
||||
--tty, -t
|
||||
--detach, -d
|
||||
"
|
||||
|
||||
local options_with_args="
|
||||
--console-socket
|
||||
--cwd
|
||||
--env, -e
|
||||
--user, -u
|
||||
--additional-gids, -g
|
||||
--process, -p
|
||||
--pid-file
|
||||
--process-label
|
||||
--apparmor
|
||||
--cap, -c
|
||||
--preserve-fds
|
||||
"
|
||||
|
||||
local all_options="$options_with_args $boolean_options"
|
||||
|
||||
case "$prev" in
|
||||
--cap | -c)
|
||||
__runc_complete_capabilities
|
||||
return
|
||||
;;
|
||||
|
||||
--console-socket | --cwd | --process | --apparmor)
|
||||
case "$cur" in
|
||||
*:*) ;; # TODO somehow do _filedir for stuff inside the image, if it's already specified (which is also somewhat difficult to determine)
|
||||
'')
|
||||
COMPREPLY=($(compgen -W '/' -- "$cur"))
|
||||
__runc_nospace
|
||||
;;
|
||||
/*)
|
||||
_filedir
|
||||
__runc_nospace
|
||||
;;
|
||||
esac
|
||||
return
|
||||
;;
|
||||
--env | -e)
|
||||
COMPREPLY=($(compgen -e -- "$cur"))
|
||||
__runc_nospace
|
||||
return
|
||||
;;
|
||||
$(__runc_to_extglob "$options_with_args"))
|
||||
return
|
||||
;;
|
||||
esac
|
||||
|
||||
case "$cur" in
|
||||
-*)
|
||||
COMPREPLY=($(compgen -W "$all_options" -- "$cur"))
|
||||
;;
|
||||
*)
|
||||
__runc_list_all
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# global options that may appear after the runc command
|
||||
_runc_runc() {
|
||||
local boolean_options="
|
||||
$global_boolean_options
|
||||
--help
|
||||
--version -v
|
||||
--debug
|
||||
"
|
||||
local options_with_args="
|
||||
--log
|
||||
--log-format
|
||||
--root
|
||||
--criu
|
||||
--rootless
|
||||
"
|
||||
|
||||
case "$prev" in
|
||||
--log | --root | --criu)
|
||||
case "$cur" in
|
||||
*:*) ;; # TODO somehow do _filedir for stuff inside the image, if it's already specified (which is also somewhat difficult to determine)
|
||||
'')
|
||||
COMPREPLY=($(compgen -W '/' -- "$cur"))
|
||||
__runc_nospace
|
||||
;;
|
||||
*)
|
||||
_filedir
|
||||
__runc_nospace
|
||||
;;
|
||||
esac
|
||||
return
|
||||
;;
|
||||
|
||||
--log-format)
|
||||
COMPREPLY=($(compgen -W 'text json' -- "$cur"))
|
||||
return
|
||||
;;
|
||||
|
||||
$(__runc_to_extglob "$options_with_args"))
|
||||
return
|
||||
;;
|
||||
esac
|
||||
|
||||
case "$cur" in
|
||||
-*)
|
||||
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
|
||||
;;
|
||||
*)
|
||||
local counter=$(__runc_pos_first_nonflag $(__runc_to_extglob "$options_with_args"))
|
||||
if [ $cword -eq $counter ]; then
|
||||
COMPREPLY=($(compgen -W "${commands[*]} help" -- "$cur"))
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
_runc_pause() {
|
||||
local boolean_options="
|
||||
--help
|
||||
-h
|
||||
"
|
||||
|
||||
case "$cur" in
|
||||
-*)
|
||||
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
|
||||
;;
|
||||
*)
|
||||
__runc_list_all
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
_runc_ps() {
|
||||
local boolean_options="
|
||||
--help
|
||||
-h
|
||||
"
|
||||
local options_with_args="
|
||||
--format, -f
|
||||
"
|
||||
|
||||
case "$cur" in
|
||||
-*)
|
||||
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
|
||||
;;
|
||||
*)
|
||||
__runc_list_all
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
_runc_delete() {
|
||||
local boolean_options="
|
||||
--help
|
||||
-h
|
||||
--format, -f
|
||||
"
|
||||
|
||||
case "$cur" in
|
||||
-*)
|
||||
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
|
||||
;;
|
||||
*)
|
||||
__runc_list_all
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
_runc_kill() {
|
||||
local boolean_options="
|
||||
--help
|
||||
-h
|
||||
--all
|
||||
-a
|
||||
"
|
||||
|
||||
case "$prev" in
|
||||
"kill")
|
||||
__runc_list_all
|
||||
return
|
||||
;;
|
||||
*)
|
||||
__runc_list_signals
|
||||
return
|
||||
;;
|
||||
esac
|
||||
case "$cur" in
|
||||
-*)
|
||||
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
|
||||
;;
|
||||
*)
|
||||
__runc_list_all
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
_runc_events() {
|
||||
local boolean_options="
|
||||
--help
|
||||
--stats
|
||||
"
|
||||
|
||||
local options_with_args="
|
||||
--interval
|
||||
"
|
||||
|
||||
case "$prev" in
|
||||
$(__runc_to_extglob "$options_with_args"))
|
||||
return
|
||||
;;
|
||||
esac
|
||||
|
||||
case "$cur" in
|
||||
-*)
|
||||
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
|
||||
;;
|
||||
*)
|
||||
__runc_list_all
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
_runc_list() {
|
||||
local boolean_options="
|
||||
--help
|
||||
--quiet
|
||||
-q
|
||||
"
|
||||
|
||||
local options_with_args="
|
||||
--format
|
||||
-f
|
||||
"
|
||||
|
||||
case "$prev" in
|
||||
--format | -f)
|
||||
COMPREPLY=($(compgen -W 'text json' -- "$cur"))
|
||||
return
|
||||
;;
|
||||
|
||||
$(__runc_to_extglob "$options_with_args"))
|
||||
return
|
||||
;;
|
||||
esac
|
||||
|
||||
case "$cur" in
|
||||
-*)
|
||||
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
|
||||
;;
|
||||
*)
|
||||
local counter=$(__runc_pos_first_nonflag $(__runc_to_extglob "$options_with_args"))
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
_runc_spec() {
|
||||
local boolean_options="
|
||||
--help
|
||||
--rootless
|
||||
"
|
||||
|
||||
local options_with_args="
|
||||
--bundle
|
||||
-b
|
||||
"
|
||||
|
||||
case "$prev" in
|
||||
--bundle | -b)
|
||||
case "$cur" in
|
||||
'')
|
||||
COMPREPLY=($(compgen -W '/' -- "$cur"))
|
||||
__runc_nospace
|
||||
;;
|
||||
/*)
|
||||
_filedir
|
||||
__runc_nospace
|
||||
;;
|
||||
esac
|
||||
return
|
||||
;;
|
||||
|
||||
$(__runc_to_extglob "$options_with_args"))
|
||||
return
|
||||
;;
|
||||
esac
|
||||
|
||||
case "$cur" in
|
||||
-*)
|
||||
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
|
||||
;;
|
||||
*)
|
||||
local counter=$(__runc_pos_first_nonflag $(__runc_to_extglob "$options_with_args"))
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
_runc_run() {
|
||||
local boolean_options="
|
||||
--help
|
||||
--detatch
|
||||
-d
|
||||
--no-subreaper
|
||||
--no-pivot
|
||||
--no-new-keyring
|
||||
"
|
||||
|
||||
local options_with_args="
|
||||
--bundle
|
||||
-b
|
||||
--console-socket
|
||||
--pid-file
|
||||
--preserve-fds
|
||||
"
|
||||
|
||||
case "$prev" in
|
||||
--bundle | -b | --console-socket | --pid-file)
|
||||
case "$cur" in
|
||||
'')
|
||||
COMPREPLY=($(compgen -W '/' -- "$cur"))
|
||||
__runc_nospace
|
||||
;;
|
||||
/*)
|
||||
_filedir
|
||||
__runc_nospace
|
||||
;;
|
||||
esac
|
||||
return
|
||||
;;
|
||||
|
||||
$(__runc_to_extglob "$options_with_args"))
|
||||
return
|
||||
;;
|
||||
esac
|
||||
|
||||
case "$cur" in
|
||||
-*)
|
||||
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
|
||||
;;
|
||||
*)
|
||||
__runc_list_all
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
_runc_checkpoint() {
|
||||
local boolean_options="
|
||||
--help
|
||||
-h
|
||||
--leave-running
|
||||
--tcp-established
|
||||
--ext-unix-sk
|
||||
--shell-job
|
||||
--lazy-pages
|
||||
--file-locks
|
||||
--pre-dump
|
||||
--auto-dedup
|
||||
"
|
||||
|
||||
local options_with_args="
|
||||
--image-path
|
||||
--work-path
|
||||
--parent-path
|
||||
--status-fd
|
||||
--page-server
|
||||
--manage-cgroups-mode
|
||||
--empty-ns
|
||||
"
|
||||
|
||||
case "$prev" in
|
||||
--page-server) ;;
|
||||
|
||||
--manage-cgroups-mode)
|
||||
COMPREPLY=($(compgen -W "soft full strict" -- "$cur"))
|
||||
return
|
||||
;;
|
||||
|
||||
--image-path | --work-path | --parent-path)
|
||||
case "$cur" in
|
||||
*:*) ;; # TODO somehow do _filedir for stuff inside the image, if it's already specified (which is also somewhat difficult to determine)
|
||||
'')
|
||||
COMPREPLY=($(compgen -W '/' -- "$cur"))
|
||||
__runc_nospace
|
||||
;;
|
||||
*)
|
||||
_filedir
|
||||
__runc_nospace
|
||||
;;
|
||||
esac
|
||||
return
|
||||
;;
|
||||
|
||||
$(__runc_to_extglob "$options_with_args"))
|
||||
return
|
||||
;;
|
||||
esac
|
||||
|
||||
case "$cur" in
|
||||
-*)
|
||||
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
|
||||
;;
|
||||
*)
|
||||
__runc_list_all
|
||||
;;
|
||||
esac
|
||||
}
|
||||
_runc_create() {
|
||||
local boolean_options="
|
||||
--help
|
||||
--no-pivot
|
||||
--no-new-keyring
|
||||
"
|
||||
|
||||
local options_with_args="
|
||||
--bundle
|
||||
-b
|
||||
--console-socket
|
||||
--pid-file
|
||||
--preserve-fds
|
||||
"
|
||||
case "$prev" in
|
||||
--bundle | -b | --console-socket | --pid-file)
|
||||
case "$cur" in
|
||||
'')
|
||||
COMPREPLY=($(compgen -W '/' -- "$cur"))
|
||||
__runc_nospace
|
||||
;;
|
||||
/*)
|
||||
_filedir
|
||||
__runc_nospace
|
||||
;;
|
||||
esac
|
||||
return
|
||||
;;
|
||||
|
||||
$(__runc_to_extglob "$options_with_args"))
|
||||
return
|
||||
;;
|
||||
esac
|
||||
|
||||
case "$cur" in
|
||||
-*)
|
||||
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
|
||||
;;
|
||||
*)
|
||||
__runc_list_all
|
||||
;;
|
||||
esac
|
||||
|
||||
}
|
||||
|
||||
_runc_help() {
|
||||
local counter=$(__runc_pos_first_nonflag)
|
||||
if [ $cword -eq $counter ]; then
|
||||
COMPREPLY=($(compgen -W "${commands[*]}" -- "$cur"))
|
||||
fi
|
||||
}
|
||||
|
||||
_runc_restore() {
|
||||
local boolean_options="
|
||||
--help
|
||||
--tcp-established
|
||||
--ext-unix-sk
|
||||
--shell-job
|
||||
--file-locks
|
||||
--detach
|
||||
-d
|
||||
--no-subreaper
|
||||
--no-pivot
|
||||
--auto-dedup
|
||||
--lazy-pages
|
||||
"
|
||||
|
||||
local options_with_args="
|
||||
-b
|
||||
--bundle
|
||||
--image-path
|
||||
--work-path
|
||||
--manage-cgroups-mode
|
||||
--pid-file
|
||||
--empty-ns
|
||||
"
|
||||
|
||||
local all_options="$options_with_args $boolean_options"
|
||||
|
||||
case "$prev" in
|
||||
--manage-cgroups-mode)
|
||||
COMPREPLY=($(compgen -W "soft full strict" -- "$cur"))
|
||||
return
|
||||
;;
|
||||
|
||||
--pid-file | --image-path | --work-path | --bundle | -b)
|
||||
case "$cur" in
|
||||
*:*) ;; # TODO somehow do _filedir for stuff inside the image, if it's already specified (which is also somewhat difficult to determine)
|
||||
'')
|
||||
COMPREPLY=($(compgen -W '/' -- "$cur"))
|
||||
__runc_nospace
|
||||
;;
|
||||
/*)
|
||||
_filedir
|
||||
__runc_nospace
|
||||
;;
|
||||
esac
|
||||
return
|
||||
;;
|
||||
|
||||
$(__runc_to_extglob "$options_with_args"))
|
||||
return
|
||||
;;
|
||||
esac
|
||||
|
||||
case "$cur" in
|
||||
-*)
|
||||
COMPREPLY=($(compgen -W "$all_options" -- "$cur"))
|
||||
;;
|
||||
*)
|
||||
__runc_list_all
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
_runc_resume() {
|
||||
local boolean_options="
|
||||
--help
|
||||
-h
|
||||
"
|
||||
|
||||
case "$cur" in
|
||||
-*)
|
||||
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
|
||||
;;
|
||||
*)
|
||||
__runc_list_all
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
_runc_state() {
|
||||
local boolean_options="
|
||||
--help
|
||||
-h
|
||||
"
|
||||
|
||||
case "$cur" in
|
||||
-*)
|
||||
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
|
||||
;;
|
||||
*)
|
||||
__runc_list_all
|
||||
;;
|
||||
esac
|
||||
}
|
||||
_runc_start() {
|
||||
local boolean_options="
|
||||
--help
|
||||
-h
|
||||
"
|
||||
|
||||
case "$cur" in
|
||||
-*)
|
||||
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
|
||||
;;
|
||||
*)
|
||||
__runc_list_all
|
||||
;;
|
||||
esac
|
||||
}
|
||||
_runc_update() {
|
||||
local boolean_options="
|
||||
--help
|
||||
"
|
||||
|
||||
local options_with_args="
|
||||
--blkio-weight
|
||||
--cpu-period
|
||||
--cpu-quota
|
||||
--cpu-rt-period
|
||||
--cpu-rt-runtime
|
||||
--cpu-share
|
||||
--cpuset-cpus
|
||||
--cpuset-mems
|
||||
--kernel-memory
|
||||
--kernel-memory-tcp
|
||||
--memory
|
||||
--memory-reservation
|
||||
--memory-swap
|
||||
--pids-limit
|
||||
--l3-cache-schema
|
||||
--mem-bw-schema
|
||||
"
|
||||
|
||||
case "$prev" in
|
||||
$(__runc_to_extglob "$options_with_args"))
|
||||
return
|
||||
;;
|
||||
esac
|
||||
|
||||
case "$cur" in
|
||||
-*)
|
||||
COMPREPLY=($(compgen -W "$boolean_options $options_with_args" -- "$cur"))
|
||||
;;
|
||||
*)
|
||||
__runc_list_all
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
_runc() {
|
||||
local previous_extglob_setting=$(shopt -p extglob)
|
||||
shopt -s extglob
|
||||
|
||||
local commands=(
|
||||
checkpoint
|
||||
create
|
||||
delete
|
||||
events
|
||||
exec
|
||||
init
|
||||
kill
|
||||
list
|
||||
pause
|
||||
ps
|
||||
restore
|
||||
resume
|
||||
run
|
||||
spec
|
||||
start
|
||||
state
|
||||
update
|
||||
help
|
||||
h
|
||||
)
|
||||
|
||||
# These options are valid as global options for all client commands
|
||||
# and valid as command options for `runc daemon`
|
||||
local global_boolean_options="
|
||||
--help -h
|
||||
--version -v
|
||||
"
|
||||
|
||||
COMPREPLY=()
|
||||
local cur prev words cword
|
||||
_get_comp_words_by_ref -n : cur prev words cword
|
||||
|
||||
local command='runc' command_pos=0 subcommand_pos
|
||||
local counter=1
|
||||
while [ $counter -lt $cword ]; do
|
||||
case "${words[$counter]}" in
|
||||
-*) ;;
|
||||
=)
|
||||
((counter++))
|
||||
;;
|
||||
*)
|
||||
command="${words[$counter]}"
|
||||
command_pos=$counter
|
||||
break
|
||||
;;
|
||||
esac
|
||||
((counter++))
|
||||
done
|
||||
|
||||
local completions_func=_runc_${command}
|
||||
declare -F $completions_func >/dev/null && $completions_func
|
||||
|
||||
eval "$previous_extglob_setting"
|
||||
return 0
|
||||
}
|
||||
|
||||
eval "$__runc_previous_extglob_setting"
|
||||
unset __runc_previous_extglob_setting
|
||||
|
||||
complete -F _runc runc
|
|
@ -0,0 +1,74 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/urfave/cli"
|
||||
)
|
||||
|
||||
var createCommand = cli.Command{
|
||||
Name: "create",
|
||||
Usage: "create a container",
|
||||
ArgsUsage: `<container-id>
|
||||
|
||||
Where "<container-id>" is your name for the instance of the container that you
|
||||
are starting. The name you provide for the container instance must be unique on
|
||||
your host.`,
|
||||
Description: `The create command creates an instance of a container for a bundle. The bundle
|
||||
is a directory with a specification file named "` + specConfig + `" and a root
|
||||
filesystem.
|
||||
|
||||
The specification file includes an args parameter. The args parameter is used
|
||||
to specify command(s) that get run when the container is started. To change the
|
||||
command(s) that get executed on start, edit the args parameter of the spec. See
|
||||
"runc spec --help" for more explanation.`,
|
||||
Flags: []cli.Flag{
|
||||
cli.StringFlag{
|
||||
Name: "bundle, b",
|
||||
Value: "",
|
||||
Usage: `path to the root of the bundle directory, defaults to the current directory`,
|
||||
},
|
||||
cli.StringFlag{
|
||||
Name: "console-socket",
|
||||
Value: "",
|
||||
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal",
|
||||
},
|
||||
cli.StringFlag{
|
||||
Name: "pid-file",
|
||||
Value: "",
|
||||
Usage: "specify the file to write the process id to",
|
||||
},
|
||||
cli.BoolFlag{
|
||||
Name: "no-pivot",
|
||||
Usage: "do not use pivot root to jail process inside rootfs. This should be used whenever the rootfs is on top of a ramdisk",
|
||||
},
|
||||
cli.BoolFlag{
|
||||
Name: "no-new-keyring",
|
||||
Usage: "do not create a new session keyring for the container. This will cause the container to inherit the calling processes session key",
|
||||
},
|
||||
cli.IntFlag{
|
||||
Name: "preserve-fds",
|
||||
Usage: "Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total)",
|
||||
},
|
||||
},
|
||||
Action: func(context *cli.Context) error {
|
||||
if err := checkArgs(context, 1, exactArgs); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := revisePidFile(context); err != nil {
|
||||
return err
|
||||
}
|
||||
spec, err := setupSpec(context)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
status, err := startContainer(context, spec, CT_ACT_CREATE, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// exit with the container's exit status so any external supervisor is
|
||||
// notified of the exit with the correct exit status.
|
||||
os.Exit(status)
|
||||
return nil
|
||||
},
|
||||
}
|
|
@ -0,0 +1,89 @@
|
|||
// +build !solaris
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer"
|
||||
"github.com/urfave/cli"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func killContainer(container libcontainer.Container) error {
|
||||
_ = container.Signal(unix.SIGKILL, false)
|
||||
for i := 0; i < 100; i++ {
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
if err := container.Signal(syscall.Signal(0), false); err != nil {
|
||||
destroy(container)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("container init still running")
|
||||
}
|
||||
|
||||
var deleteCommand = cli.Command{
|
||||
Name: "delete",
|
||||
Usage: "delete any resources held by the container often used with detached container",
|
||||
ArgsUsage: `<container-id>
|
||||
|
||||
Where "<container-id>" is the name for the instance of the container.
|
||||
|
||||
EXAMPLE:
|
||||
For example, if the container id is "ubuntu01" and runc list currently shows the
|
||||
status of "ubuntu01" as "stopped" the following will delete resources held for
|
||||
"ubuntu01" removing "ubuntu01" from the runc list of containers:
|
||||
|
||||
# runc delete ubuntu01`,
|
||||
Flags: []cli.Flag{
|
||||
cli.BoolFlag{
|
||||
Name: "force, f",
|
||||
Usage: "Forcibly deletes the container if it is still running (uses SIGKILL)",
|
||||
},
|
||||
},
|
||||
Action: func(context *cli.Context) error {
|
||||
if err := checkArgs(context, 1, exactArgs); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
id := context.Args().First()
|
||||
force := context.Bool("force")
|
||||
container, err := getContainer(context)
|
||||
if err != nil {
|
||||
if lerr, ok := err.(libcontainer.Error); ok && lerr.Code() == libcontainer.ContainerNotExists {
|
||||
// if there was an aborted start or something of the sort then the container's directory could exist but
|
||||
// libcontainer does not see it because the state.json file inside that directory was never created.
|
||||
path := filepath.Join(context.GlobalString("root"), id)
|
||||
if e := os.RemoveAll(path); e != nil {
|
||||
fmt.Fprintf(os.Stderr, "remove %s: %v\n", path, e)
|
||||
}
|
||||
if force {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
s, err := container.Status()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
switch s {
|
||||
case libcontainer.Stopped:
|
||||
destroy(container)
|
||||
case libcontainer.Created:
|
||||
return killContainer(container)
|
||||
default:
|
||||
if force {
|
||||
return killContainer(container)
|
||||
}
|
||||
return fmt.Errorf("cannot delete container %s that is not stopped: %s\n", id, s)
|
||||
}
|
||||
|
||||
return nil
|
||||
},
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
# Checkpoint and Restore #
|
||||
|
||||
For a basic description about checkpointing and restoring containers with
|
||||
`runc` please see [runc-checkpoint(8)](../man/runc-checkpoint.8.md) and
|
||||
[runc-restore(8)](../man/runc-restore.8.md).
|
||||
|
||||
## Checkpoint/Restore Annotations ##
|
||||
|
||||
In addition to specifying options on the command-line like it is described
|
||||
in the man-pages (see above), it is also possible to influence CRIU's
|
||||
behaviour using CRIU configuration files. For details about CRIU's
|
||||
configuration file support please see [CRIU's wiki](https://criu.org/Configuration_files).
|
||||
|
||||
In addition to CRIU's default configuration files `runc` tells CRIU to
|
||||
also evaluate the file `/etc/criu/runc.conf`. Using the annotation
|
||||
`org.criu.config` it is, however, possible to change this additional
|
||||
CRIU configuration file.
|
||||
|
||||
If the annotation `org.criu.config` is set to an empty string `runc`
|
||||
will not pass any additional configuration file to CRIU. With an empty
|
||||
string it is therefore possible to disable the additional CRIU configuration
|
||||
file. This can be used to make sure that no additional configuration file
|
||||
changes CRIU's behaviour accidentally.
|
||||
|
||||
If the annotation `org.criu.config` is set to a non-empty string `runc` will
|
||||
pass that string to CRIU to be evaluated as an additional configuration file.
|
||||
If CRIU cannot open this additional configuration file, it will ignore this
|
||||
file and continue.
|
||||
|
||||
### Annotation Example to disable additional CRIU configuration file ###
|
||||
|
||||
```
|
||||
{
|
||||
"ociVersion": "1.0.0",
|
||||
"annotations": {
|
||||
"org.criu.config": ""
|
||||
},
|
||||
"process": {
|
||||
```
|
||||
|
||||
### Annotation Example to set a specific CRIU configuration file ###
|
||||
|
||||
```
|
||||
{
|
||||
"ociVersion": "1.0.0",
|
||||
"annotations": {
|
||||
"org.criu.config": "/etc/special-runc-criu-options"
|
||||
},
|
||||
"process": {
|
||||
```
|
|
@ -0,0 +1,314 @@
|
|||
# Terminals and Standard IO #
|
||||
|
||||
*Note that the default configuration of `runc` (foreground, new terminal) is
|
||||
generally the best option for most users. This document exists to help explain
|
||||
what the purpose of the different modes is, and to try to steer users away from
|
||||
common mistakes and misunderstandings.*
|
||||
|
||||
In general, most processes on Unix (and Unix-like) operating systems have 3
|
||||
standard file descriptors provided at the start, collectively referred to as
|
||||
"standard IO" (`stdio`):
|
||||
|
||||
* `0`: standard-in (`stdin`), the input stream into the process
|
||||
* `1`: standard-out (`stdout`), the output stream from the process
|
||||
* `2`: standard-error (`stderr`), the error stream from the process
|
||||
|
||||
When creating and running a container via `runc`, it is important to take care
|
||||
to structure the `stdio` the new container's process receives. In some ways
|
||||
containers are just regular processes, while in other ways they're an isolated
|
||||
sub-partition of your machine (in a similar sense to a VM). This means that the
|
||||
structure of IO is not as simple as with ordinary programs (which generally
|
||||
just use the file descriptors you give them).
|
||||
|
||||
## Other File Descriptors ##
|
||||
|
||||
Before we continue, it is important to note that processes can have more file
|
||||
descriptors than just `stdio`. By default in `runc` no other file descriptors
|
||||
will be passed to the spawned container process. If you wish to explicitly pass
|
||||
file descriptors to the container you have to use the `--preserve-fds` option.
|
||||
These ancillary file descriptors don't have any of the strange semantics
|
||||
discussed further in this document (those only apply to `stdio`) -- they are
|
||||
passed untouched by `runc`.
|
||||
|
||||
It should be noted that `--preserve-fds` does not take individual file
|
||||
descriptors to preserve. Instead, it takes how many file descriptors (not
|
||||
including `stdio` or `LISTEN_FDS`) should be passed to the container. In the
|
||||
following example:
|
||||
|
||||
```
|
||||
% runc run --preserve-fds 5 <container>
|
||||
```
|
||||
|
||||
`runc` will pass the first `5` file descriptors (`3`, `4`, `5`, `6`, and `7` --
|
||||
assuming that `LISTEN_FDS` has not been configured) to the container.
|
||||
|
||||
In addition to `--preserve-fds`, `LISTEN_FDS` file descriptors are passed
|
||||
automatically to allow for `systemd`-style socket activation. To extend the
|
||||
above example:
|
||||
|
||||
```
|
||||
% LISTEN_PID=$pid_of_runc LISTEN_FDS=3 runc run --preserve-fds 5 <container>
|
||||
```
|
||||
|
||||
`runc` will now pass the first `8` file descriptors (and it will also pass
|
||||
`LISTEN_FDS=3` and `LISTEN_PID=1` to the container). The first `3` (`3`, `4`,
|
||||
and `5`) were passed due to `LISTEN_FDS` and the other `5` (`6`, `7`, `8`, `9`,
|
||||
and `10`) were passed due to `--preserve-fds`. You should keep this in mind if
|
||||
you use `runc` directly in something like a `systemd` unit file. To disable
|
||||
this `LISTEN_FDS`-style passing just unset `LISTEN_FDS`.
|
||||
|
||||
**Be very careful when passing file descriptors to a container process.** Due
|
||||
to some Linux kernel (mis)features, a container with access to certain types of
|
||||
file descriptors (such as `O_PATH` descriptors) outside of the container's root
|
||||
file system can use these to break out of the container's pivoted mount
|
||||
namespace. [This has resulted in CVEs in the past.][CVE-2016-9962]
|
||||
|
||||
[CVE-2016-9962]: https://nvd.nist.gov/vuln/detail/CVE-2016-9962
|
||||
|
||||
## <a name="terminal-modes" /> Terminal Modes ##
|
||||
|
||||
`runc` supports two distinct methods for passing `stdio` to the container's
|
||||
primary process:
|
||||
|
||||
* [new terminal](#new-terminal) (`terminal: true`)
|
||||
* [pass-through](#pass-through) (`terminal: false`)
|
||||
|
||||
When first using `runc` these two modes will look incredibly similar, but this
|
||||
can be quite deceptive as these different modes have quite different
|
||||
characteristics.
|
||||
|
||||
By default, `runc spec` will create a configuration that will create a new
|
||||
terminal (`terminal: true`). However, if the `terminal: ...` line is not
|
||||
present in `config.json` then pass-through is the default.
|
||||
|
||||
*In general we recommend using new terminal, because it means that tools like
|
||||
`sudo` will work inside your container. But pass-through can be useful if you
|
||||
know what you're doing, or if you're using `runc` as part of a non-interactive
|
||||
pipeline.*
|
||||
|
||||
### <a name="new-terminal"> New Terminal ###
|
||||
|
||||
In new terminal mode, `runc` will create a brand-new "console" (or more
|
||||
precisely, a new pseudo-terminal using the container's namespaced
|
||||
`/dev/pts/ptmx`) for your contained process to use as its `stdio`.
|
||||
|
||||
When you start a process in new terminal mode, `runc` will do the following:
|
||||
|
||||
1. Create a new pseudo-terminal.
|
||||
2. Pass the slave end to the container's primary process as its `stdio`.
|
||||
3. Send the master end to a process to interact with the `stdio` for the
|
||||
container's primary process ([details below](#runc-modes)).
|
||||
|
||||
It should be noted that since a new pseudo-terminal is being used for
|
||||
communication with the container, some strange properties of pseudo-terminals
|
||||
might surprise you. For instance, by default, all new pseudo-terminals
|
||||
translate the byte `'\n'` to the sequence `'\r\n'` on both `stdout` and
|
||||
`stderr`. In addition there are [a whole range of `ioctls(2)` that can only
|
||||
interact with pseudo-terminal `stdio`][tty_ioctl(4)].
|
||||
|
||||
> **NOTE**: In new terminal mode, all three `stdio` file descriptors are the
|
||||
> same underlying file. The reason for this is to match how a shell's `stdio`
|
||||
> looks to a process (as well as remove race condition issues with having to
|
||||
> deal with multiple master pseudo-terminal file descriptors). However this
|
||||
> means that it is not really possible to uniquely distinguish between `stdout`
|
||||
> and `stderr` from the caller's perspective.
|
||||
|
||||
[tty_ioctl(4)]: https://linux.die.net/man/4/tty_ioctl
|
||||
|
||||
### <a name="pass-through"> Pass-Through ###
|
||||
|
||||
If you have already set up some file handles that you wish your contained
|
||||
process to use as its `stdio`, then you can ask `runc` to pass them through to
|
||||
the contained process (this is not necessarily the same as `--preserve-fds`'s
|
||||
passing of file descriptors -- [details below](#runc-modes)). As an example
|
||||
(assuming that `terminal: false` is set in `config.json`):
|
||||
|
||||
```
|
||||
% echo input | runc run some_container > /tmp/log.out 2>& /tmp/log.err
|
||||
```
|
||||
|
||||
Here the container's various `stdio` file descriptors will be substituted with
|
||||
the following:
|
||||
|
||||
* `stdin` will be sourced from the `echo input` pipeline.
|
||||
* `stdout` will be output into `/tmp/log.out` on the host.
|
||||
* `stderr` will be output into `/tmp/log.err` on the host.
|
||||
|
||||
It should be noted that the actual file handles seen inside the container may
|
||||
be different [based on the mode `runc` is being used in](#runc-modes) (for
|
||||
instance, the file referenced by `1` could be `/tmp/log.out` directly or a pipe
|
||||
which `runc` is using to buffer output, based on the mode). However the net
|
||||
result will be the same in either case. In principle you could use the [new
|
||||
terminal mode](#new-terminal) in a pipeline, but the difference will become
|
||||
more clear when you are introduced to [`runc`'s detached mode](#runc-modes).
|
||||
|
||||
## <a name="runc-modes" /> `runc` Modes ##
|
||||
|
||||
`runc` itself runs in two modes:
|
||||
|
||||
* [foreground](#foreground)
|
||||
* [detached](#detached)
|
||||
|
||||
You can use either [terminal mode](#terminal-modes) with either `runc` mode.
|
||||
However, there are considerations that may indicate preference for one mode
|
||||
over another. It should be noted that while two types of modes (terminal and
|
||||
`runc`) are conceptually independent from each other, you should be aware of
|
||||
the intricacies of which combination you are using.
|
||||
|
||||
*In general we recommend using foreground because it's the most
|
||||
straight-forward to use, with the only downside being that you will have a
|
||||
long-running `runc` process. Detached mode is difficult to get right and
|
||||
generally requires having your own `stdio` management.*
|
||||
|
||||
### Foreground ###
|
||||
|
||||
The default (and most straight-forward) mode of `runc`. In this mode, your
|
||||
`runc` command remains in the foreground with the container process as a child.
|
||||
All `stdio` is buffered through the foreground `runc` process (irrespective of
|
||||
which terminal mode you are using). This is conceptually quite similar to
|
||||
running a normal process interactively in a shell (and if you are using `runc`
|
||||
in a shell interactively, this is what you should use).
|
||||
|
||||
Because the `stdio` will be buffered in this mode, some very important
|
||||
peculiarities of this mode should be kept in mind:
|
||||
|
||||
* With [new terminal mode](#new-terminal), the container will see a
|
||||
pseudo-terminal as its `stdio` (as you might expect). However, the `stdio` of
|
||||
the foreground `runc` process will remain the `stdio` that the process was
|
||||
started with -- and `runc` will copy all `stdio` between its `stdio` and the
|
||||
container's `stdio`. This means that while a new pseudo-terminal has been
|
||||
created, the foreground `runc` process manages it over the lifetime of the
|
||||
container.
|
||||
|
||||
* With [pass-through mode](#pass-through), the foreground `runc`'s `stdio` is
|
||||
**not** passed to the container. Instead, the container's `stdio` is a set of
|
||||
pipes which are used to copy data between `runc`'s `stdio` and the
|
||||
container's `stdio`. This means that the container never has direct access to
|
||||
host file descriptors (aside from the pipes created by the container runtime,
|
||||
but that shouldn't be an issue).
|
||||
|
||||
The main drawback of the foreground mode of operation is that it requires a
|
||||
long-running foreground `runc` process. If you kill the foreground `runc`
|
||||
process then you will no longer have access to the `stdio` of the container
|
||||
(and in most cases this will result in the container dying abnormally due to
|
||||
`SIGPIPE` or some other error). By extension this means that any bug in the
|
||||
long-running foreground `runc` process (such as a memory leak) or a stray
|
||||
OOM-kill sweep could result in your container being killed **through no fault
|
||||
of the user**. In addition, there is no way in foreground mode of passing a
|
||||
file descriptor directly to the container process as its `stdio` (like
|
||||
`--preserve-fds` does).
|
||||
|
||||
These shortcomings are obviously sub-optimal and are the reason that `runc` has
|
||||
an additional mode called "detached mode".
|
||||
|
||||
### Detached ###
|
||||
|
||||
In contrast to foreground mode, in detached mode there is no long-running
|
||||
foreground `runc` process once the container has started. In fact, there is no
|
||||
long-running `runc` process at all. However, this means that it is up to the
|
||||
caller to handle the `stdio` after `runc` has set it up for you. In a shell
|
||||
this means that the `runc` command will exit and control will return to the
|
||||
shell, after the container has been set up.
|
||||
|
||||
You can run `runc` in detached mode in one of the following ways:
|
||||
|
||||
* `runc run -d ...` which operates similar to `runc run` but is detached.
|
||||
* `runc create` followed by `runc start` which is the standard container
|
||||
lifecycle defined by the OCI runtime specification (`runc create` sets up the
|
||||
container completely, waiting for `runc start` to begin execution of user
|
||||
code).
|
||||
|
||||
The main use-case of detached mode is for higher-level tools that want to be
|
||||
wrappers around `runc`. By running `runc` in detached mode, those tools have
|
||||
far more control over the container's `stdio` without `runc` getting in the
|
||||
way (most wrappers around `runc` like `cri-o` or `containerd` use detached mode
|
||||
for this reason).
|
||||
|
||||
Unfortunately using detached mode is a bit more complicated and requires more
|
||||
care than the foreground mode -- mainly because it is now up to the caller to
|
||||
handle the `stdio` of the container.
|
||||
|
||||
#### Detached Pass-Through ####
|
||||
|
||||
In detached mode, pass-through actually does what it says on the tin -- the
|
||||
`stdio` file descriptors of the `runc` process are passed through (untouched)
|
||||
to the container's `stdio`. The purpose of this option is to allow a user to
|
||||
set up `stdio` for a container themselves and then force `runc` to just use
|
||||
their pre-prepared `stdio` (without any pseudo-terminal funny business). *If
|
||||
you don't see why this would be useful, don't use this option.*
|
||||
|
||||
**You must be incredibly careful when using detached pass-through (especially
|
||||
in a shell).** The reason for this is that by using detached pass-through you
|
||||
are passing host file descriptors to the container. In the case of a shell,
|
||||
usually your `stdio` is going to be a pseudo-terminal (on your host). A
|
||||
malicious container could take advantage of TTY-specific `ioctls` like
|
||||
`TIOCSTI` to fake input into the **host** shell (remember that in detached
|
||||
mode, control is returned to your shell and so the terminal you've given the
|
||||
container is being read by a shell prompt).
|
||||
|
||||
There are also several other issues with running non-malicious containers in a
|
||||
shell with detached pass-through (where you pass your shell's `stdio` to the
|
||||
container):
|
||||
|
||||
* Output from the container will be interleaved with output from your shell (in
|
||||
a non-deterministic way), without any real way of distinguishing from where a
|
||||
particular piece of output came from.
|
||||
|
||||
* Any input to `stdin` will be non-deterministically split and given to either
|
||||
the container or the shell (because both are blocked on a `read(2)` of the
|
||||
same FIFO-style file descriptor).
|
||||
|
||||
They are all related to the fact that there is going to be a race when either
|
||||
your host or the container tries to read from (or write to) `stdio`. This
|
||||
problem is especially obvious when in a shell, where usually the terminal has
|
||||
been put into raw mode (where each individual key-press should cause `read(2)`
|
||||
to return).
|
||||
|
||||
> **NOTE**: There is also currently a [known problem][issue-1721] where using
|
||||
> detached pass-through will result in the container hanging if the `stdout` or
|
||||
> `stderr` is a pipe (though this should be a temporary issue).
|
||||
|
||||
[issue-1721]: https://github.com/opencontainers/runc/issues/1721
|
||||
|
||||
#### Detached New Terminal ####
|
||||
|
||||
When creating a new pseudo-terminal in detached mode, and fairly obvious
|
||||
problem appears -- how do we use the new terminal that `runc` created? Unlike
|
||||
in pass-through, `runc` has created a new set of file descriptors that need to
|
||||
be used by *something* in order for container communication to work.
|
||||
|
||||
The way this problem is resolved is through the use of Unix domain sockets.
|
||||
There is a feature of Unix sockets called `SCM_RIGHTS` which allows a file
|
||||
descriptor to be sent through a Unix socket to a completely separate process
|
||||
(which can then use that file descriptor as though they opened it). When using
|
||||
`runc` in detached new terminal mode, this is how a user gets access to the
|
||||
pseudo-terminal's master file descriptor.
|
||||
|
||||
To this end, there is a new option (which is required if you want to use `runc`
|
||||
in detached new terminal mode): `--console-socket`. This option takes the path
|
||||
to a Unix domain socket which `runc` will connect to and send the
|
||||
pseudo-terminal master file descriptor down. The general process for getting
|
||||
the pseudo-terminal master is as follows:
|
||||
|
||||
1. Create a Unix domain socket at some path, `$socket_path`.
|
||||
2. Call `runc run` or `runc create` with the argument `--console-socket
|
||||
$socket_path`.
|
||||
3. Using `recvmsg(2)` retrieve the file descriptor sent using `SCM_RIGHTS` by
|
||||
`runc`.
|
||||
4. Now the manager can interact with the `stdio` of the container, using the
|
||||
retrieved pseudo-terminal master.
|
||||
|
||||
After `runc` exits, the only process with a copy of the pseudo-terminal master
|
||||
file descriptor is whoever read the file descriptor from the socket.
|
||||
|
||||
> **NOTE**: Currently `runc` doesn't support abstract socket addresses (due to
|
||||
> it not being possible to pass an `argv` with a null-byte as the first
|
||||
> character). In the future this may change, but currently you must use a valid
|
||||
> path name.
|
||||
|
||||
In order to help users make use of detached new terminal mode, we have provided
|
||||
a [Go implementation in the `go-runc` bindings][containerd/go-runc.Socket], as
|
||||
well as [a simple client][recvtty].
|
||||
|
||||
[containerd/go-runc.Socket]: https://godoc.org/github.com/containerd/go-runc#Socket
|
||||
[recvtty]: /contrib/cmd/recvtty
|
|
@ -0,0 +1,215 @@
|
|||
// +build linux
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/intelrdt"
|
||||
"github.com/opencontainers/runc/types"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli"
|
||||
)
|
||||
|
||||
var eventsCommand = cli.Command{
|
||||
Name: "events",
|
||||
Usage: "display container events such as OOM notifications, cpu, memory, and IO usage statistics",
|
||||
ArgsUsage: `<container-id>
|
||||
|
||||
Where "<container-id>" is the name for the instance of the container.`,
|
||||
Description: `The events command displays information about the container. By default the
|
||||
information is displayed once every 5 seconds.`,
|
||||
Flags: []cli.Flag{
|
||||
cli.DurationFlag{Name: "interval", Value: 5 * time.Second, Usage: "set the stats collection interval"},
|
||||
cli.BoolFlag{Name: "stats", Usage: "display the container's stats then exit"},
|
||||
},
|
||||
Action: func(context *cli.Context) error {
|
||||
if err := checkArgs(context, 1, exactArgs); err != nil {
|
||||
return err
|
||||
}
|
||||
container, err := getContainer(context)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
duration := context.Duration("interval")
|
||||
if duration <= 0 {
|
||||
return fmt.Errorf("duration interval must be greater than 0")
|
||||
}
|
||||
status, err := container.Status()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if status == libcontainer.Stopped {
|
||||
return fmt.Errorf("container with id %s is not running", container.ID())
|
||||
}
|
||||
var (
|
||||
stats = make(chan *libcontainer.Stats, 1)
|
||||
events = make(chan *types.Event, 1024)
|
||||
group = &sync.WaitGroup{}
|
||||
)
|
||||
group.Add(1)
|
||||
go func() {
|
||||
defer group.Done()
|
||||
enc := json.NewEncoder(os.Stdout)
|
||||
for e := range events {
|
||||
if err := enc.Encode(e); err != nil {
|
||||
logrus.Error(err)
|
||||
}
|
||||
}
|
||||
}()
|
||||
if context.Bool("stats") {
|
||||
s, err := container.Stats()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
events <- &types.Event{Type: "stats", ID: container.ID(), Data: convertLibcontainerStats(s)}
|
||||
close(events)
|
||||
group.Wait()
|
||||
return nil
|
||||
}
|
||||
go func() {
|
||||
for range time.Tick(context.Duration("interval")) {
|
||||
s, err := container.Stats()
|
||||
if err != nil {
|
||||
logrus.Error(err)
|
||||
continue
|
||||
}
|
||||
stats <- s
|
||||
}
|
||||
}()
|
||||
n, err := container.NotifyOOM()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for {
|
||||
select {
|
||||
case _, ok := <-n:
|
||||
if ok {
|
||||
// this means an oom event was received, if it is !ok then
|
||||
// the channel was closed because the container stopped and
|
||||
// the cgroups no longer exist.
|
||||
events <- &types.Event{Type: "oom", ID: container.ID()}
|
||||
} else {
|
||||
n = nil
|
||||
}
|
||||
case s := <-stats:
|
||||
events <- &types.Event{Type: "stats", ID: container.ID(), Data: convertLibcontainerStats(s)}
|
||||
}
|
||||
if n == nil {
|
||||
close(events)
|
||||
break
|
||||
}
|
||||
}
|
||||
group.Wait()
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
func convertLibcontainerStats(ls *libcontainer.Stats) *types.Stats {
|
||||
cg := ls.CgroupStats
|
||||
if cg == nil {
|
||||
return nil
|
||||
}
|
||||
var s types.Stats
|
||||
s.Pids.Current = cg.PidsStats.Current
|
||||
s.Pids.Limit = cg.PidsStats.Limit
|
||||
|
||||
s.CPU.Usage.Kernel = cg.CpuStats.CpuUsage.UsageInKernelmode
|
||||
s.CPU.Usage.User = cg.CpuStats.CpuUsage.UsageInUsermode
|
||||
s.CPU.Usage.Total = cg.CpuStats.CpuUsage.TotalUsage
|
||||
s.CPU.Usage.Percpu = cg.CpuStats.CpuUsage.PercpuUsage
|
||||
s.CPU.Throttling.Periods = cg.CpuStats.ThrottlingData.Periods
|
||||
s.CPU.Throttling.ThrottledPeriods = cg.CpuStats.ThrottlingData.ThrottledPeriods
|
||||
s.CPU.Throttling.ThrottledTime = cg.CpuStats.ThrottlingData.ThrottledTime
|
||||
|
||||
s.Memory.Cache = cg.MemoryStats.Cache
|
||||
s.Memory.Kernel = convertMemoryEntry(cg.MemoryStats.KernelUsage)
|
||||
s.Memory.KernelTCP = convertMemoryEntry(cg.MemoryStats.KernelTCPUsage)
|
||||
s.Memory.Swap = convertMemoryEntry(cg.MemoryStats.SwapUsage)
|
||||
s.Memory.Usage = convertMemoryEntry(cg.MemoryStats.Usage)
|
||||
s.Memory.Raw = cg.MemoryStats.Stats
|
||||
|
||||
s.Blkio.IoServiceBytesRecursive = convertBlkioEntry(cg.BlkioStats.IoServiceBytesRecursive)
|
||||
s.Blkio.IoServicedRecursive = convertBlkioEntry(cg.BlkioStats.IoServicedRecursive)
|
||||
s.Blkio.IoQueuedRecursive = convertBlkioEntry(cg.BlkioStats.IoQueuedRecursive)
|
||||
s.Blkio.IoServiceTimeRecursive = convertBlkioEntry(cg.BlkioStats.IoServiceTimeRecursive)
|
||||
s.Blkio.IoWaitTimeRecursive = convertBlkioEntry(cg.BlkioStats.IoWaitTimeRecursive)
|
||||
s.Blkio.IoMergedRecursive = convertBlkioEntry(cg.BlkioStats.IoMergedRecursive)
|
||||
s.Blkio.IoTimeRecursive = convertBlkioEntry(cg.BlkioStats.IoTimeRecursive)
|
||||
s.Blkio.SectorsRecursive = convertBlkioEntry(cg.BlkioStats.SectorsRecursive)
|
||||
|
||||
s.Hugetlb = make(map[string]types.Hugetlb)
|
||||
for k, v := range cg.HugetlbStats {
|
||||
s.Hugetlb[k] = convertHugtlb(v)
|
||||
}
|
||||
|
||||
if is := ls.IntelRdtStats; is != nil {
|
||||
if intelrdt.IsCatEnabled() {
|
||||
s.IntelRdt.L3CacheInfo = convertL3CacheInfo(is.L3CacheInfo)
|
||||
s.IntelRdt.L3CacheSchemaRoot = is.L3CacheSchemaRoot
|
||||
s.IntelRdt.L3CacheSchema = is.L3CacheSchema
|
||||
}
|
||||
if intelrdt.IsMbaEnabled() {
|
||||
s.IntelRdt.MemBwInfo = convertMemBwInfo(is.MemBwInfo)
|
||||
s.IntelRdt.MemBwSchemaRoot = is.MemBwSchemaRoot
|
||||
s.IntelRdt.MemBwSchema = is.MemBwSchema
|
||||
}
|
||||
}
|
||||
|
||||
s.NetworkInterfaces = ls.Interfaces
|
||||
return &s
|
||||
}
|
||||
|
||||
func convertHugtlb(c cgroups.HugetlbStats) types.Hugetlb {
|
||||
return types.Hugetlb{
|
||||
Usage: c.Usage,
|
||||
Max: c.MaxUsage,
|
||||
Failcnt: c.Failcnt,
|
||||
}
|
||||
}
|
||||
|
||||
func convertMemoryEntry(c cgroups.MemoryData) types.MemoryEntry {
|
||||
return types.MemoryEntry{
|
||||
Limit: c.Limit,
|
||||
Usage: c.Usage,
|
||||
Max: c.MaxUsage,
|
||||
Failcnt: c.Failcnt,
|
||||
}
|
||||
}
|
||||
|
||||
func convertBlkioEntry(c []cgroups.BlkioStatEntry) []types.BlkioEntry {
|
||||
var out []types.BlkioEntry
|
||||
for _, e := range c {
|
||||
out = append(out, types.BlkioEntry{
|
||||
Major: e.Major,
|
||||
Minor: e.Minor,
|
||||
Op: e.Op,
|
||||
Value: e.Value,
|
||||
})
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func convertL3CacheInfo(i *intelrdt.L3CacheInfo) *types.L3CacheInfo {
|
||||
return &types.L3CacheInfo{
|
||||
CbmMask: i.CbmMask,
|
||||
MinCbmBits: i.MinCbmBits,
|
||||
NumClosids: i.NumClosids,
|
||||
}
|
||||
}
|
||||
|
||||
func convertMemBwInfo(i *intelrdt.MemBwInfo) *types.MemBwInfo {
|
||||
return &types.MemBwInfo{
|
||||
BandwidthGran: i.BandwidthGran,
|
||||
DelayLinear: i.DelayLinear,
|
||||
MinBandwidth: i.MinBandwidth,
|
||||
NumClosids: i.NumClosids,
|
||||
}
|
||||
}
|
|
@ -0,0 +1,235 @@
|
|||
// +build linux
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/urfave/cli"
|
||||
)
|
||||
|
||||
var execCommand = cli.Command{
|
||||
Name: "exec",
|
||||
Usage: "execute new process inside the container",
|
||||
ArgsUsage: `<container-id> <command> [command options] || -p process.json <container-id>
|
||||
|
||||
Where "<container-id>" is the name for the instance of the container and
|
||||
"<command>" is the command to be executed in the container.
|
||||
"<command>" can't be empty unless a "-p" flag provided.
|
||||
|
||||
EXAMPLE:
|
||||
For example, if the container is configured to run the linux ps command the
|
||||
following will output a list of processes running in the container:
|
||||
|
||||
# runc exec <container-id> ps`,
|
||||
Flags: []cli.Flag{
|
||||
cli.StringFlag{
|
||||
Name: "console-socket",
|
||||
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal",
|
||||
},
|
||||
cli.StringFlag{
|
||||
Name: "cwd",
|
||||
Usage: "current working directory in the container",
|
||||
},
|
||||
cli.StringSliceFlag{
|
||||
Name: "env, e",
|
||||
Usage: "set environment variables",
|
||||
},
|
||||
cli.BoolFlag{
|
||||
Name: "tty, t",
|
||||
Usage: "allocate a pseudo-TTY",
|
||||
},
|
||||
cli.StringFlag{
|
||||
Name: "user, u",
|
||||
Usage: "UID (format: <uid>[:<gid>])",
|
||||
},
|
||||
cli.Int64SliceFlag{
|
||||
Name: "additional-gids, g",
|
||||
Usage: "additional gids",
|
||||
},
|
||||
cli.StringFlag{
|
||||
Name: "process, p",
|
||||
Usage: "path to the process.json",
|
||||
},
|
||||
cli.BoolFlag{
|
||||
Name: "detach,d",
|
||||
Usage: "detach from the container's process",
|
||||
},
|
||||
cli.StringFlag{
|
||||
Name: "pid-file",
|
||||
Value: "",
|
||||
Usage: "specify the file to write the process id to",
|
||||
},
|
||||
cli.StringFlag{
|
||||
Name: "process-label",
|
||||
Usage: "set the asm process label for the process commonly used with selinux",
|
||||
},
|
||||
cli.StringFlag{
|
||||
Name: "apparmor",
|
||||
Usage: "set the apparmor profile for the process",
|
||||
},
|
||||
cli.BoolFlag{
|
||||
Name: "no-new-privs",
|
||||
Usage: "set the no new privileges value for the process",
|
||||
},
|
||||
cli.StringSliceFlag{
|
||||
Name: "cap, c",
|
||||
Value: &cli.StringSlice{},
|
||||
Usage: "add a capability to the bounding set for the process",
|
||||
},
|
||||
cli.BoolFlag{
|
||||
Name: "no-subreaper",
|
||||
Usage: "disable the use of the subreaper used to reap reparented processes",
|
||||
Hidden: true,
|
||||
},
|
||||
cli.IntFlag{
|
||||
Name: "preserve-fds",
|
||||
Usage: "Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total)",
|
||||
},
|
||||
},
|
||||
Action: func(context *cli.Context) error {
|
||||
if err := checkArgs(context, 1, minArgs); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := revisePidFile(context); err != nil {
|
||||
return err
|
||||
}
|
||||
status, err := execProcess(context)
|
||||
if err == nil {
|
||||
os.Exit(status)
|
||||
}
|
||||
return fmt.Errorf("exec failed: %v", err)
|
||||
},
|
||||
SkipArgReorder: true,
|
||||
}
|
||||
|
||||
func execProcess(context *cli.Context) (int, error) {
|
||||
container, err := getContainer(context)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
status, err := container.Status()
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
if status == libcontainer.Stopped {
|
||||
return -1, fmt.Errorf("cannot exec a container that has stopped")
|
||||
}
|
||||
path := context.String("process")
|
||||
if path == "" && len(context.Args()) == 1 {
|
||||
return -1, fmt.Errorf("process args cannot be empty")
|
||||
}
|
||||
detach := context.Bool("detach")
|
||||
state, err := container.State()
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
bundle := utils.SearchLabels(state.Config.Labels, "bundle")
|
||||
p, err := getProcess(context, bundle)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
|
||||
logLevel := "info"
|
||||
if context.GlobalBool("debug") {
|
||||
logLevel = "debug"
|
||||
}
|
||||
|
||||
r := &runner{
|
||||
enableSubreaper: false,
|
||||
shouldDestroy: false,
|
||||
container: container,
|
||||
consoleSocket: context.String("console-socket"),
|
||||
detach: detach,
|
||||
pidFile: context.String("pid-file"),
|
||||
action: CT_ACT_RUN,
|
||||
init: false,
|
||||
preserveFDs: context.Int("preserve-fds"),
|
||||
logLevel: logLevel,
|
||||
}
|
||||
return r.run(p)
|
||||
}
|
||||
|
||||
func getProcess(context *cli.Context, bundle string) (*specs.Process, error) {
|
||||
if path := context.String("process"); path != "" {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
var p specs.Process
|
||||
if err := json.NewDecoder(f).Decode(&p); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &p, validateProcessSpec(&p)
|
||||
}
|
||||
// process via cli flags
|
||||
if err := os.Chdir(bundle); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
spec, err := loadSpec(specConfig)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
p := spec.Process
|
||||
p.Args = context.Args()[1:]
|
||||
// override the cwd, if passed
|
||||
if context.String("cwd") != "" {
|
||||
p.Cwd = context.String("cwd")
|
||||
}
|
||||
if ap := context.String("apparmor"); ap != "" {
|
||||
p.ApparmorProfile = ap
|
||||
}
|
||||
if l := context.String("process-label"); l != "" {
|
||||
p.SelinuxLabel = l
|
||||
}
|
||||
if caps := context.StringSlice("cap"); len(caps) > 0 {
|
||||
for _, c := range caps {
|
||||
p.Capabilities.Bounding = append(p.Capabilities.Bounding, c)
|
||||
p.Capabilities.Inheritable = append(p.Capabilities.Inheritable, c)
|
||||
p.Capabilities.Effective = append(p.Capabilities.Effective, c)
|
||||
p.Capabilities.Permitted = append(p.Capabilities.Permitted, c)
|
||||
p.Capabilities.Ambient = append(p.Capabilities.Ambient, c)
|
||||
}
|
||||
}
|
||||
// append the passed env variables
|
||||
p.Env = append(p.Env, context.StringSlice("env")...)
|
||||
|
||||
// set the tty
|
||||
if context.IsSet("tty") {
|
||||
p.Terminal = context.Bool("tty")
|
||||
}
|
||||
if context.IsSet("no-new-privs") {
|
||||
p.NoNewPrivileges = context.Bool("no-new-privs")
|
||||
}
|
||||
// override the user, if passed
|
||||
if context.String("user") != "" {
|
||||
u := strings.SplitN(context.String("user"), ":", 2)
|
||||
if len(u) > 1 {
|
||||
gid, err := strconv.Atoi(u[1])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parsing %s as int for gid failed: %v", u[1], err)
|
||||
}
|
||||
p.User.GID = uint32(gid)
|
||||
}
|
||||
uid, err := strconv.Atoi(u[0])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parsing %s as int for uid failed: %v", u[0], err)
|
||||
}
|
||||
p.User.UID = uint32(uid)
|
||||
}
|
||||
for _, gid := range context.Int64Slice("additional-gids") {
|
||||
if gid < 0 {
|
||||
return nil, fmt.Errorf("additional-gids must be a positive number %d", gid)
|
||||
}
|
||||
p.User.AdditionalGids = append(p.User.AdditionalGids, uint32(gid))
|
||||
}
|
||||
return p, validateProcessSpec(p)
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"runtime"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer"
|
||||
"github.com/opencontainers/runc/libcontainer/logs"
|
||||
_ "github.com/opencontainers/runc/libcontainer/nsenter"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli"
|
||||
)
|
||||
|
||||
func init() {
|
||||
if len(os.Args) > 1 && os.Args[1] == "init" {
|
||||
runtime.GOMAXPROCS(1)
|
||||
runtime.LockOSThread()
|
||||
|
||||
level := os.Getenv("_LIBCONTAINER_LOGLEVEL")
|
||||
logLevel, err := logrus.ParseLevel(level)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("libcontainer: failed to parse log level: %q: %v", level, err))
|
||||
}
|
||||
|
||||
err = logs.ConfigureLogging(logs.Config{
|
||||
LogPipeFd: os.Getenv("_LIBCONTAINER_LOGPIPE"),
|
||||
LogFormat: "json",
|
||||
LogLevel: logLevel,
|
||||
})
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("libcontainer: failed to configure logging: %v", err))
|
||||
}
|
||||
logrus.Debug("child process in init()")
|
||||
}
|
||||
}
|
||||
|
||||
var initCommand = cli.Command{
|
||||
Name: "init",
|
||||
Usage: `initialize the namespaces and launch the process (do not call it outside of runc)`,
|
||||
Action: func(context *cli.Context) error {
|
||||
factory, _ := libcontainer.New("")
|
||||
if err := factory.StartInitialization(); err != nil {
|
||||
// as the error is sent back to the parent there is no need to log
|
||||
// or write it to stderr because the parent process will handle this
|
||||
os.Exit(1)
|
||||
}
|
||||
panic("libcontainer: container init failed to exec")
|
||||
},
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
// +build linux
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/urfave/cli"
|
||||
)
|
||||
|
||||
var killCommand = cli.Command{
|
||||
Name: "kill",
|
||||
Usage: "kill sends the specified signal (default: SIGTERM) to the container's init process",
|
||||
ArgsUsage: `<container-id> [signal]
|
||||
|
||||
Where "<container-id>" is the name for the instance of the container and
|
||||
"[signal]" is the signal to be sent to the init process.
|
||||
|
||||
EXAMPLE:
|
||||
For example, if the container id is "ubuntu01" the following will send a "KILL"
|
||||
signal to the init process of the "ubuntu01" container:
|
||||
|
||||
# runc kill ubuntu01 KILL`,
|
||||
Flags: []cli.Flag{
|
||||
cli.BoolFlag{
|
||||
Name: "all, a",
|
||||
Usage: "send the specified signal to all processes inside the container",
|
||||
},
|
||||
},
|
||||
Action: func(context *cli.Context) error {
|
||||
if err := checkArgs(context, 1, minArgs); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := checkArgs(context, 2, maxArgs); err != nil {
|
||||
return err
|
||||
}
|
||||
container, err := getContainer(context)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
sigstr := context.Args().Get(1)
|
||||
if sigstr == "" {
|
||||
sigstr = "SIGTERM"
|
||||
}
|
||||
|
||||
signal, err := parseSignal(sigstr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return container.Signal(signal, context.Bool("all"))
|
||||
},
|
||||
}
|
||||
|
||||
func parseSignal(rawSignal string) (syscall.Signal, error) {
|
||||
s, err := strconv.Atoi(rawSignal)
|
||||
if err == nil {
|
||||
return syscall.Signal(s), nil
|
||||
}
|
||||
signal, ok := signalMap[strings.TrimPrefix(strings.ToUpper(rawSignal), "SIG")]
|
||||
if !ok {
|
||||
return -1, fmt.Errorf("unknown signal %q", rawSignal)
|
||||
}
|
||||
return signal, nil
|
||||
}
|
|
@ -0,0 +1,331 @@
|
|||
# libcontainer
|
||||
|
||||
[![GoDoc](https://godoc.org/github.com/opencontainers/runc/libcontainer?status.svg)](https://godoc.org/github.com/opencontainers/runc/libcontainer)
|
||||
|
||||
Libcontainer provides a native Go implementation for creating containers
|
||||
with namespaces, cgroups, capabilities, and filesystem access controls.
|
||||
It allows you to manage the lifecycle of the container performing additional operations
|
||||
after the container is created.
|
||||
|
||||
|
||||
#### Container
|
||||
A container is a self contained execution environment that shares the kernel of the
|
||||
host system and which is (optionally) isolated from other containers in the system.
|
||||
|
||||
#### Using libcontainer
|
||||
|
||||
Because containers are spawned in a two step process you will need a binary that
|
||||
will be executed as the init process for the container. In libcontainer, we use
|
||||
the current binary (/proc/self/exe) to be executed as the init process, and use
|
||||
arg "init", we call the first step process "bootstrap", so you always need a "init"
|
||||
function as the entry of "bootstrap".
|
||||
|
||||
In addition to the go init function the early stage bootstrap is handled by importing
|
||||
[nsenter](https://github.com/opencontainers/runc/blob/master/libcontainer/nsenter/README.md).
|
||||
|
||||
```go
|
||||
import (
|
||||
_ "github.com/opencontainers/runc/libcontainer/nsenter"
|
||||
)
|
||||
|
||||
func init() {
|
||||
if len(os.Args) > 1 && os.Args[1] == "init" {
|
||||
runtime.GOMAXPROCS(1)
|
||||
runtime.LockOSThread()
|
||||
factory, _ := libcontainer.New("")
|
||||
if err := factory.StartInitialization(); err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
panic("--this line should have never been executed, congratulations--")
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Then to create a container you first have to initialize an instance of a factory
|
||||
that will handle the creation and initialization for a container.
|
||||
|
||||
```go
|
||||
factory, err := libcontainer.New("/var/lib/container", libcontainer.Cgroupfs, libcontainer.InitArgs(os.Args[0], "init"))
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
return
|
||||
}
|
||||
```
|
||||
|
||||
Once you have an instance of the factory created we can create a configuration
|
||||
struct describing how the container is to be created. A sample would look similar to this:
|
||||
|
||||
```go
|
||||
defaultMountFlags := unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
|
||||
config := &configs.Config{
|
||||
Rootfs: "/your/path/to/rootfs",
|
||||
Capabilities: &configs.Capabilities{
|
||||
Bounding: []string{
|
||||
"CAP_CHOWN",
|
||||
"CAP_DAC_OVERRIDE",
|
||||
"CAP_FSETID",
|
||||
"CAP_FOWNER",
|
||||
"CAP_MKNOD",
|
||||
"CAP_NET_RAW",
|
||||
"CAP_SETGID",
|
||||
"CAP_SETUID",
|
||||
"CAP_SETFCAP",
|
||||
"CAP_SETPCAP",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
"CAP_SYS_CHROOT",
|
||||
"CAP_KILL",
|
||||
"CAP_AUDIT_WRITE",
|
||||
},
|
||||
Effective: []string{
|
||||
"CAP_CHOWN",
|
||||
"CAP_DAC_OVERRIDE",
|
||||
"CAP_FSETID",
|
||||
"CAP_FOWNER",
|
||||
"CAP_MKNOD",
|
||||
"CAP_NET_RAW",
|
||||
"CAP_SETGID",
|
||||
"CAP_SETUID",
|
||||
"CAP_SETFCAP",
|
||||
"CAP_SETPCAP",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
"CAP_SYS_CHROOT",
|
||||
"CAP_KILL",
|
||||
"CAP_AUDIT_WRITE",
|
||||
},
|
||||
Inheritable: []string{
|
||||
"CAP_CHOWN",
|
||||
"CAP_DAC_OVERRIDE",
|
||||
"CAP_FSETID",
|
||||
"CAP_FOWNER",
|
||||
"CAP_MKNOD",
|
||||
"CAP_NET_RAW",
|
||||
"CAP_SETGID",
|
||||
"CAP_SETUID",
|
||||
"CAP_SETFCAP",
|
||||
"CAP_SETPCAP",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
"CAP_SYS_CHROOT",
|
||||
"CAP_KILL",
|
||||
"CAP_AUDIT_WRITE",
|
||||
},
|
||||
Permitted: []string{
|
||||
"CAP_CHOWN",
|
||||
"CAP_DAC_OVERRIDE",
|
||||
"CAP_FSETID",
|
||||
"CAP_FOWNER",
|
||||
"CAP_MKNOD",
|
||||
"CAP_NET_RAW",
|
||||
"CAP_SETGID",
|
||||
"CAP_SETUID",
|
||||
"CAP_SETFCAP",
|
||||
"CAP_SETPCAP",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
"CAP_SYS_CHROOT",
|
||||
"CAP_KILL",
|
||||
"CAP_AUDIT_WRITE",
|
||||
},
|
||||
Ambient: []string{
|
||||
"CAP_CHOWN",
|
||||
"CAP_DAC_OVERRIDE",
|
||||
"CAP_FSETID",
|
||||
"CAP_FOWNER",
|
||||
"CAP_MKNOD",
|
||||
"CAP_NET_RAW",
|
||||
"CAP_SETGID",
|
||||
"CAP_SETUID",
|
||||
"CAP_SETFCAP",
|
||||
"CAP_SETPCAP",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
"CAP_SYS_CHROOT",
|
||||
"CAP_KILL",
|
||||
"CAP_AUDIT_WRITE",
|
||||
},
|
||||
},
|
||||
Namespaces: configs.Namespaces([]configs.Namespace{
|
||||
{Type: configs.NEWNS},
|
||||
{Type: configs.NEWUTS},
|
||||
{Type: configs.NEWIPC},
|
||||
{Type: configs.NEWPID},
|
||||
{Type: configs.NEWUSER},
|
||||
{Type: configs.NEWNET},
|
||||
{Type: configs.NEWCGROUP},
|
||||
}),
|
||||
Cgroups: &configs.Cgroup{
|
||||
Name: "test-container",
|
||||
Parent: "system",
|
||||
Resources: &configs.Resources{
|
||||
MemorySwappiness: nil,
|
||||
AllowAllDevices: nil,
|
||||
AllowedDevices: configs.DefaultAllowedDevices,
|
||||
},
|
||||
},
|
||||
MaskPaths: []string{
|
||||
"/proc/kcore",
|
||||
"/sys/firmware",
|
||||
},
|
||||
ReadonlyPaths: []string{
|
||||
"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
|
||||
},
|
||||
Devices: configs.DefaultAutoCreatedDevices,
|
||||
Hostname: "testing",
|
||||
Mounts: []*configs.Mount{
|
||||
{
|
||||
Source: "proc",
|
||||
Destination: "/proc",
|
||||
Device: "proc",
|
||||
Flags: defaultMountFlags,
|
||||
},
|
||||
{
|
||||
Source: "tmpfs",
|
||||
Destination: "/dev",
|
||||
Device: "tmpfs",
|
||||
Flags: unix.MS_NOSUID | unix.MS_STRICTATIME,
|
||||
Data: "mode=755",
|
||||
},
|
||||
{
|
||||
Source: "devpts",
|
||||
Destination: "/dev/pts",
|
||||
Device: "devpts",
|
||||
Flags: unix.MS_NOSUID | unix.MS_NOEXEC,
|
||||
Data: "newinstance,ptmxmode=0666,mode=0620,gid=5",
|
||||
},
|
||||
{
|
||||
Device: "tmpfs",
|
||||
Source: "shm",
|
||||
Destination: "/dev/shm",
|
||||
Data: "mode=1777,size=65536k",
|
||||
Flags: defaultMountFlags,
|
||||
},
|
||||
{
|
||||
Source: "mqueue",
|
||||
Destination: "/dev/mqueue",
|
||||
Device: "mqueue",
|
||||
Flags: defaultMountFlags,
|
||||
},
|
||||
{
|
||||
Source: "sysfs",
|
||||
Destination: "/sys",
|
||||
Device: "sysfs",
|
||||
Flags: defaultMountFlags | unix.MS_RDONLY,
|
||||
},
|
||||
},
|
||||
UidMappings: []configs.IDMap{
|
||||
{
|
||||
ContainerID: 0,
|
||||
HostID: 1000,
|
||||
Size: 65536,
|
||||
},
|
||||
},
|
||||
GidMappings: []configs.IDMap{
|
||||
{
|
||||
ContainerID: 0,
|
||||
HostID: 1000,
|
||||
Size: 65536,
|
||||
},
|
||||
},
|
||||
Networks: []*configs.Network{
|
||||
{
|
||||
Type: "loopback",
|
||||
Address: "127.0.0.1/0",
|
||||
Gateway: "localhost",
|
||||
},
|
||||
},
|
||||
Rlimits: []configs.Rlimit{
|
||||
{
|
||||
Type: unix.RLIMIT_NOFILE,
|
||||
Hard: uint64(1025),
|
||||
Soft: uint64(1025),
|
||||
},
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
Once you have the configuration populated you can create a container:
|
||||
|
||||
```go
|
||||
container, err := factory.Create("container-id", config)
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
return
|
||||
}
|
||||
```
|
||||
|
||||
To spawn bash as the initial process inside the container and have the
|
||||
processes pid returned in order to wait, signal, or kill the process:
|
||||
|
||||
```go
|
||||
process := &libcontainer.Process{
|
||||
Args: []string{"/bin/bash"},
|
||||
Env: []string{"PATH=/bin"},
|
||||
User: "daemon",
|
||||
Stdin: os.Stdin,
|
||||
Stdout: os.Stdout,
|
||||
Stderr: os.Stderr,
|
||||
Init: true,
|
||||
}
|
||||
|
||||
err := container.Run(process)
|
||||
if err != nil {
|
||||
container.Destroy()
|
||||
logrus.Fatal(err)
|
||||
return
|
||||
}
|
||||
|
||||
// wait for the process to finish.
|
||||
_, err := process.Wait()
|
||||
if err != nil {
|
||||
logrus.Fatal(err)
|
||||
}
|
||||
|
||||
// destroy the container.
|
||||
container.Destroy()
|
||||
```
|
||||
|
||||
Additional ways to interact with a running container are:
|
||||
|
||||
```go
|
||||
// return all the pids for all processes running inside the container.
|
||||
processes, err := container.Processes()
|
||||
|
||||
// get detailed cpu, memory, io, and network statistics for the container and
|
||||
// it's processes.
|
||||
stats, err := container.Stats()
|
||||
|
||||
// pause all processes inside the container.
|
||||
container.Pause()
|
||||
|
||||
// resume all paused processes.
|
||||
container.Resume()
|
||||
|
||||
// send signal to container's init process.
|
||||
container.Signal(signal)
|
||||
|
||||
// update container resource constraints.
|
||||
container.Set(config)
|
||||
|
||||
// get current status of the container.
|
||||
status, err := container.Status()
|
||||
|
||||
// get current container's state information.
|
||||
state, err := container.State()
|
||||
```
|
||||
|
||||
|
||||
#### Checkpoint & Restore
|
||||
|
||||
libcontainer now integrates [CRIU](http://criu.org/) for checkpointing and restoring containers.
|
||||
This let's you save the state of a process running inside a container to disk, and then restore
|
||||
that state into a new process, on the same machine or on another machine.
|
||||
|
||||
`criu` version 1.5.2 or higher is required to use checkpoint and restore.
|
||||
If you don't already have `criu` installed, you can build it from source, following the
|
||||
[online instructions](http://criu.org/Installation). `criu` is also installed in the docker image
|
||||
generated when building libcontainer with docker.
|
||||
|
||||
|
||||
## Copyright and license
|
||||
|
||||
Code and documentation copyright 2014 Docker, inc.
|
||||
The code and documentation are released under the [Apache 2.0 license](../LICENSE).
|
||||
The documentation is also released under Creative Commons Attribution 4.0 International License.
|
||||
You may obtain a copy of the license, titled CC-BY-4.0, at http://creativecommons.org/licenses/by/4.0/.
|
|
@ -0,0 +1,465 @@
|
|||
## Container Specification - v1
|
||||
|
||||
This is the standard configuration for version 1 containers. It includes
|
||||
namespaces, standard filesystem setup, a default Linux capability set, and
|
||||
information about resource reservations. It also has information about any
|
||||
populated environment settings for the processes running inside a container.
|
||||
|
||||
Along with the configuration of how a container is created the standard also
|
||||
discusses actions that can be performed on a container to manage and inspect
|
||||
information about the processes running inside.
|
||||
|
||||
The v1 profile is meant to be able to accommodate the majority of applications
|
||||
with a strong security configuration.
|
||||
|
||||
### System Requirements and Compatibility
|
||||
|
||||
Minimum requirements:
|
||||
* Kernel version - 3.10 recommended 2.6.2x minimum(with backported patches)
|
||||
* Mounted cgroups with each subsystem in its own hierarchy
|
||||
|
||||
|
||||
### Namespaces
|
||||
|
||||
| Flag | Enabled |
|
||||
| --------------- | ------- |
|
||||
| CLONE_NEWPID | 1 |
|
||||
| CLONE_NEWUTS | 1 |
|
||||
| CLONE_NEWIPC | 1 |
|
||||
| CLONE_NEWNET | 1 |
|
||||
| CLONE_NEWNS | 1 |
|
||||
| CLONE_NEWUSER | 1 |
|
||||
| CLONE_NEWCGROUP | 1 |
|
||||
|
||||
Namespaces are created for the container via the `unshare` syscall.
|
||||
|
||||
|
||||
### Filesystem
|
||||
|
||||
A root filesystem must be provided to a container for execution. The container
|
||||
will use this root filesystem (rootfs) to jail and spawn processes inside where
|
||||
the binaries and system libraries are local to that directory. Any binaries
|
||||
to be executed must be contained within this rootfs.
|
||||
|
||||
Mounts that happen inside the container are automatically cleaned up when the
|
||||
container exits as the mount namespace is destroyed and the kernel will
|
||||
unmount all the mounts that were setup within that namespace.
|
||||
|
||||
For a container to execute properly there are certain filesystems that
|
||||
are required to be mounted within the rootfs that the runtime will setup.
|
||||
|
||||
| Path | Type | Flags | Data |
|
||||
| ----------- | ------ | -------------------------------------- | ---------------------------------------- |
|
||||
| /proc | proc | MS_NOEXEC,MS_NOSUID,MS_NODEV | |
|
||||
| /dev | tmpfs | MS_NOEXEC,MS_STRICTATIME | mode=755 |
|
||||
| /dev/shm | tmpfs | MS_NOEXEC,MS_NOSUID,MS_NODEV | mode=1777,size=65536k |
|
||||
| /dev/mqueue | mqueue | MS_NOEXEC,MS_NOSUID,MS_NODEV | |
|
||||
| /dev/pts | devpts | MS_NOEXEC,MS_NOSUID | newinstance,ptmxmode=0666,mode=620,gid=5 |
|
||||
| /sys | sysfs | MS_NOEXEC,MS_NOSUID,MS_NODEV,MS_RDONLY | |
|
||||
|
||||
|
||||
After a container's filesystems are mounted within the newly created
|
||||
mount namespace `/dev` will need to be populated with a set of device nodes.
|
||||
It is expected that a rootfs does not need to have any device nodes specified
|
||||
for `/dev` within the rootfs as the container will setup the correct devices
|
||||
that are required for executing a container's process.
|
||||
|
||||
| Path | Mode | Access |
|
||||
| ------------ | ---- | ---------- |
|
||||
| /dev/null | 0666 | rwm |
|
||||
| /dev/zero | 0666 | rwm |
|
||||
| /dev/full | 0666 | rwm |
|
||||
| /dev/tty | 0666 | rwm |
|
||||
| /dev/random | 0666 | rwm |
|
||||
| /dev/urandom | 0666 | rwm |
|
||||
|
||||
|
||||
**ptmx**
|
||||
`/dev/ptmx` will need to be a symlink to the host's `/dev/ptmx` within
|
||||
the container.
|
||||
|
||||
The use of a pseudo TTY is optional within a container and it should support both.
|
||||
If a pseudo is provided to the container `/dev/console` will need to be
|
||||
setup by binding the console in `/dev/` after it has been populated and mounted
|
||||
in tmpfs.
|
||||
|
||||
| Source | Destination | UID GID | Mode | Type |
|
||||
| --------------- | ------------ | ------- | ---- | ---- |
|
||||
| *pty host path* | /dev/console | 0 0 | 0600 | bind |
|
||||
|
||||
|
||||
After `/dev/null` has been setup we check for any external links between
|
||||
the container's io, STDIN, STDOUT, STDERR. If the container's io is pointing
|
||||
to `/dev/null` outside the container we close and `dup2` the `/dev/null`
|
||||
that is local to the container's rootfs.
|
||||
|
||||
|
||||
After the container has `/proc` mounted a few standard symlinks are setup
|
||||
within `/dev/` for the io.
|
||||
|
||||
| Source | Destination |
|
||||
| --------------- | ----------- |
|
||||
| /proc/self/fd | /dev/fd |
|
||||
| /proc/self/fd/0 | /dev/stdin |
|
||||
| /proc/self/fd/1 | /dev/stdout |
|
||||
| /proc/self/fd/2 | /dev/stderr |
|
||||
|
||||
A `pivot_root` is used to change the root for the process, effectively
|
||||
jailing the process inside the rootfs.
|
||||
|
||||
```c
|
||||
put_old = mkdir(...);
|
||||
pivot_root(rootfs, put_old);
|
||||
chdir("/");
|
||||
unmount(put_old, MS_DETACH);
|
||||
rmdir(put_old);
|
||||
```
|
||||
|
||||
For container's running with a rootfs inside `ramfs` a `MS_MOVE` combined
|
||||
with a `chroot` is required as `pivot_root` is not supported in `ramfs`.
|
||||
|
||||
```c
|
||||
mount(rootfs, "/", NULL, MS_MOVE, NULL);
|
||||
chroot(".");
|
||||
chdir("/");
|
||||
```
|
||||
|
||||
The `umask` is set back to `0022` after the filesystem setup has been completed.
|
||||
|
||||
### Resources
|
||||
|
||||
Cgroups are used to handle resource allocation for containers. This includes
|
||||
system resources like cpu, memory, and device access.
|
||||
|
||||
| Subsystem | Enabled |
|
||||
| ---------- | ------- |
|
||||
| devices | 1 |
|
||||
| memory | 1 |
|
||||
| cpu | 1 |
|
||||
| cpuacct | 1 |
|
||||
| cpuset | 1 |
|
||||
| blkio | 1 |
|
||||
| perf_event | 1 |
|
||||
| freezer | 1 |
|
||||
| hugetlb | 1 |
|
||||
| pids | 1 |
|
||||
|
||||
|
||||
All cgroup subsystem are joined so that statistics can be collected from
|
||||
each of the subsystems. Freezer does not expose any stats but is joined
|
||||
so that containers can be paused and resumed.
|
||||
|
||||
The parent process of the container's init must place the init pid inside
|
||||
the correct cgroups before the initialization begins. This is done so
|
||||
that no processes or threads escape the cgroups. This sync is
|
||||
done via a pipe ( specified in the runtime section below ) that the container's
|
||||
init process will block waiting for the parent to finish setup.
|
||||
|
||||
### IntelRdt
|
||||
|
||||
Intel platforms with new Xeon CPU support Resource Director Technology (RDT).
|
||||
Cache Allocation Technology (CAT) and Memory Bandwidth Allocation (MBA) are
|
||||
two sub-features of RDT.
|
||||
|
||||
Cache Allocation Technology (CAT) provides a way for the software to restrict
|
||||
cache allocation to a defined 'subset' of L3 cache which may be overlapping
|
||||
with other 'subsets'. The different subsets are identified by class of
|
||||
service (CLOS) and each CLOS has a capacity bitmask (CBM).
|
||||
|
||||
Memory Bandwidth Allocation (MBA) provides indirect and approximate throttle
|
||||
over memory bandwidth for the software. A user controls the resource by
|
||||
indicating the percentage of maximum memory bandwidth or memory bandwidth limit
|
||||
in MBps unit if MBA Software Controller is enabled.
|
||||
|
||||
It can be used to handle L3 cache and memory bandwidth resources allocation
|
||||
for containers if hardware and kernel support Intel RDT CAT and MBA features.
|
||||
|
||||
In Linux 4.10 kernel or newer, the interface is defined and exposed via
|
||||
"resource control" filesystem, which is a "cgroup-like" interface.
|
||||
|
||||
Comparing with cgroups, it has similar process management lifecycle and
|
||||
interfaces in a container. But unlike cgroups' hierarchy, it has single level
|
||||
filesystem layout.
|
||||
|
||||
CAT and MBA features are introduced in Linux 4.10 and 4.12 kernel via
|
||||
"resource control" filesystem.
|
||||
|
||||
Intel RDT "resource control" filesystem hierarchy:
|
||||
```
|
||||
mount -t resctrl resctrl /sys/fs/resctrl
|
||||
tree /sys/fs/resctrl
|
||||
/sys/fs/resctrl/
|
||||
|-- info
|
||||
| |-- L3
|
||||
| | |-- cbm_mask
|
||||
| | |-- min_cbm_bits
|
||||
| | |-- num_closids
|
||||
| |-- MB
|
||||
| |-- bandwidth_gran
|
||||
| |-- delay_linear
|
||||
| |-- min_bandwidth
|
||||
| |-- num_closids
|
||||
|-- ...
|
||||
|-- schemata
|
||||
|-- tasks
|
||||
|-- <container_id>
|
||||
|-- ...
|
||||
|-- schemata
|
||||
|-- tasks
|
||||
```
|
||||
|
||||
For runc, we can make use of `tasks` and `schemata` configuration for L3
|
||||
cache and memory bandwidth resources constraints.
|
||||
|
||||
The file `tasks` has a list of tasks that belongs to this group (e.g.,
|
||||
<container_id>" group). Tasks can be added to a group by writing the task ID
|
||||
to the "tasks" file (which will automatically remove them from the previous
|
||||
group to which they belonged). New tasks created by fork(2) and clone(2) are
|
||||
added to the same group as their parent.
|
||||
|
||||
The file `schemata` has a list of all the resources available to this group.
|
||||
Each resource (L3 cache, memory bandwidth) has its own line and format.
|
||||
|
||||
L3 cache schema:
|
||||
It has allocation bitmasks/values for L3 cache on each socket, which
|
||||
contains L3 cache id and capacity bitmask (CBM).
|
||||
```
|
||||
Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||
```
|
||||
For example, on a two-socket machine, the schema line could be "L3:0=ff;1=c0"
|
||||
which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
|
||||
|
||||
The valid L3 cache CBM is a *contiguous bits set* and number of bits that can
|
||||
be set is less than the max bit. The max bits in the CBM is varied among
|
||||
supported Intel CPU models. Kernel will check if it is valid when writing.
|
||||
e.g., default value 0xfffff in root indicates the max bits of CBM is 20
|
||||
bits, which mapping to entire L3 cache capacity. Some valid CBM values to
|
||||
set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
|
||||
|
||||
Memory bandwidth schema:
|
||||
It has allocation values for memory bandwidth on each socket, which contains
|
||||
L3 cache id and memory bandwidth.
|
||||
```
|
||||
Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
|
||||
```
|
||||
For example, on a two-socket machine, the schema line could be "MB:0=20;1=70"
|
||||
|
||||
The minimum bandwidth percentage value for each CPU model is predefined and
|
||||
can be looked up through "info/MB/min_bandwidth". The bandwidth granularity
|
||||
that is allocated is also dependent on the CPU model and can be looked up at
|
||||
"info/MB/bandwidth_gran". The available bandwidth control steps are:
|
||||
min_bw + N * bw_gran. Intermediate values are rounded to the next control
|
||||
step available on the hardware.
|
||||
|
||||
If MBA Software Controller is enabled through mount option "-o mba_MBps"
|
||||
mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl
|
||||
We could specify memory bandwidth in "MBps" (Mega Bytes per second) unit
|
||||
instead of "percentages". The kernel underneath would use a software feedback
|
||||
mechanism or a "Software Controller" which reads the actual bandwidth using
|
||||
MBM counters and adjust the memory bandwidth percentages to ensure:
|
||||
"actual memory bandwidth < user specified memory bandwidth".
|
||||
|
||||
For example, on a two-socket machine, the schema line could be
|
||||
"MB:0=5000;1=7000" which means 5000 MBps memory bandwidth limit on socket 0
|
||||
and 7000 MBps memory bandwidth limit on socket 1.
|
||||
|
||||
For more information about Intel RDT kernel interface:
|
||||
https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
|
||||
|
||||
```
|
||||
An example for runc:
|
||||
Consider a two-socket machine with two L3 caches where the default CBM is
|
||||
0x7ff and the max CBM length is 11 bits, and minimum memory bandwidth of 10%
|
||||
with a memory bandwidth granularity of 10%.
|
||||
|
||||
Tasks inside the container only have access to the "upper" 7/11 of L3 cache
|
||||
on socket 0 and the "lower" 5/11 L3 cache on socket 1, and may use a
|
||||
maximum memory bandwidth of 20% on socket 0 and 70% on socket 1.
|
||||
|
||||
"linux": {
|
||||
"intelRdt": {
|
||||
"closID": "guaranteed_group",
|
||||
"l3CacheSchema": "L3:0=7f0;1=1f",
|
||||
"memBwSchema": "MB:0=20;1=70"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Security
|
||||
|
||||
The standard set of Linux capabilities that are set in a container
|
||||
provide a good default for security and flexibility for the applications.
|
||||
|
||||
|
||||
| Capability | Enabled |
|
||||
| -------------------- | ------- |
|
||||
| CAP_NET_RAW | 1 |
|
||||
| CAP_NET_BIND_SERVICE | 1 |
|
||||
| CAP_AUDIT_READ | 1 |
|
||||
| CAP_AUDIT_WRITE | 1 |
|
||||
| CAP_DAC_OVERRIDE | 1 |
|
||||
| CAP_SETFCAP | 1 |
|
||||
| CAP_SETPCAP | 1 |
|
||||
| CAP_SETGID | 1 |
|
||||
| CAP_SETUID | 1 |
|
||||
| CAP_MKNOD | 1 |
|
||||
| CAP_CHOWN | 1 |
|
||||
| CAP_FOWNER | 1 |
|
||||
| CAP_FSETID | 1 |
|
||||
| CAP_KILL | 1 |
|
||||
| CAP_SYS_CHROOT | 1 |
|
||||
| CAP_NET_BROADCAST | 0 |
|
||||
| CAP_SYS_MODULE | 0 |
|
||||
| CAP_SYS_RAWIO | 0 |
|
||||
| CAP_SYS_PACCT | 0 |
|
||||
| CAP_SYS_ADMIN | 0 |
|
||||
| CAP_SYS_NICE | 0 |
|
||||
| CAP_SYS_RESOURCE | 0 |
|
||||
| CAP_SYS_TIME | 0 |
|
||||
| CAP_SYS_TTY_CONFIG | 0 |
|
||||
| CAP_AUDIT_CONTROL | 0 |
|
||||
| CAP_MAC_OVERRIDE | 0 |
|
||||
| CAP_MAC_ADMIN | 0 |
|
||||
| CAP_NET_ADMIN | 0 |
|
||||
| CAP_SYSLOG | 0 |
|
||||
| CAP_DAC_READ_SEARCH | 0 |
|
||||
| CAP_LINUX_IMMUTABLE | 0 |
|
||||
| CAP_IPC_LOCK | 0 |
|
||||
| CAP_IPC_OWNER | 0 |
|
||||
| CAP_SYS_PTRACE | 0 |
|
||||
| CAP_SYS_BOOT | 0 |
|
||||
| CAP_LEASE | 0 |
|
||||
| CAP_WAKE_ALARM | 0 |
|
||||
| CAP_BLOCK_SUSPEND | 0 |
|
||||
|
||||
|
||||
Additional security layers like [apparmor](https://wiki.ubuntu.com/AppArmor)
|
||||
and [selinux](http://selinuxproject.org/page/Main_Page) can be used with
|
||||
the containers. A container should support setting an apparmor profile or
|
||||
selinux process and mount labels if provided in the configuration.
|
||||
|
||||
Standard apparmor profile:
|
||||
```c
|
||||
#include <tunables/global>
|
||||
profile <profile_name> flags=(attach_disconnected,mediate_deleted) {
|
||||
#include <abstractions/base>
|
||||
network,
|
||||
capability,
|
||||
file,
|
||||
umount,
|
||||
|
||||
deny @{PROC}/sys/fs/** wklx,
|
||||
deny @{PROC}/sysrq-trigger rwklx,
|
||||
deny @{PROC}/mem rwklx,
|
||||
deny @{PROC}/kmem rwklx,
|
||||
deny @{PROC}/sys/kernel/[^s][^h][^m]* wklx,
|
||||
deny @{PROC}/sys/kernel/*/** wklx,
|
||||
|
||||
deny mount,
|
||||
|
||||
deny /sys/[^f]*/** wklx,
|
||||
deny /sys/f[^s]*/** wklx,
|
||||
deny /sys/fs/[^c]*/** wklx,
|
||||
deny /sys/fs/c[^g]*/** wklx,
|
||||
deny /sys/fs/cg[^r]*/** wklx,
|
||||
deny /sys/firmware/efi/efivars/** rwklx,
|
||||
deny /sys/kernel/security/** rwklx,
|
||||
}
|
||||
```
|
||||
|
||||
*TODO: seccomp work is being done to find a good default config*
|
||||
|
||||
### Runtime and Init Process
|
||||
|
||||
During container creation the parent process needs to talk to the container's init
|
||||
process and have a form of synchronization. This is accomplished by creating
|
||||
a pipe that is passed to the container's init. When the init process first spawns
|
||||
it will block on its side of the pipe until the parent closes its side. This
|
||||
allows the parent to have time to set the new process inside a cgroup hierarchy
|
||||
and/or write any uid/gid mappings required for user namespaces.
|
||||
The pipe is passed to the init process via FD 3.
|
||||
|
||||
The application consuming libcontainer should be compiled statically. libcontainer
|
||||
does not define any init process and the arguments provided are used to `exec` the
|
||||
process inside the application. There should be no long running init within the
|
||||
container spec.
|
||||
|
||||
If a pseudo tty is provided to a container it will open and `dup2` the console
|
||||
as the container's STDIN, STDOUT, STDERR as well as mounting the console
|
||||
as `/dev/console`.
|
||||
|
||||
An extra set of mounts are provided to a container and setup for use. A container's
|
||||
rootfs can contain some non portable files inside that can cause side effects during
|
||||
execution of a process. These files are usually created and populated with the container
|
||||
specific information via the runtime.
|
||||
|
||||
**Extra runtime files:**
|
||||
* /etc/hosts
|
||||
* /etc/resolv.conf
|
||||
* /etc/hostname
|
||||
* /etc/localtime
|
||||
|
||||
|
||||
#### Defaults
|
||||
|
||||
There are a few defaults that can be overridden by users, but in their omission
|
||||
these apply to processes within a container.
|
||||
|
||||
| Type | Value |
|
||||
| ------------------- | ------------------------------ |
|
||||
| Parent Death Signal | SIGKILL |
|
||||
| UID | 0 |
|
||||
| GID | 0 |
|
||||
| GROUPS | 0, NULL |
|
||||
| CWD | "/" |
|
||||
| $HOME | Current user's home dir or "/" |
|
||||
| Readonly rootfs | false |
|
||||
| Pseudo TTY | false |
|
||||
|
||||
|
||||
## Actions
|
||||
|
||||
After a container is created there is a standard set of actions that can
|
||||
be done to the container. These actions are part of the public API for
|
||||
a container.
|
||||
|
||||
| Action | Description |
|
||||
| -------------- | ------------------------------------------------------------------ |
|
||||
| Get processes | Return all the pids for processes running inside a container |
|
||||
| Get Stats | Return resource statistics for the container as a whole |
|
||||
| Wait | Waits on the container's init process ( pid 1 ) |
|
||||
| Wait Process | Wait on any of the container's processes returning the exit status |
|
||||
| Destroy | Kill the container's init process and remove any filesystem state |
|
||||
| Signal | Send a signal to the container's init process |
|
||||
| Signal Process | Send a signal to any of the container's processes |
|
||||
| Pause | Pause all processes inside the container |
|
||||
| Resume | Resume all processes inside the container if paused |
|
||||
| Exec | Execute a new process inside of the container ( requires setns ) |
|
||||
| Set | Setup configs of the container after it's created |
|
||||
|
||||
### Execute a new process inside of a running container
|
||||
|
||||
User can execute a new process inside of a running container. Any binaries to be
|
||||
executed must be accessible within the container's rootfs.
|
||||
|
||||
The started process will run inside the container's rootfs. Any changes
|
||||
made by the process to the container's filesystem will persist after the
|
||||
process finished executing.
|
||||
|
||||
The started process will join all the container's existing namespaces. When the
|
||||
container is paused, the process will also be paused and will resume when
|
||||
the container is unpaused. The started process will only run when the container's
|
||||
primary process (PID 1) is running, and will not be restarted when the container
|
||||
is restarted.
|
||||
|
||||
#### Planned additions
|
||||
|
||||
The started process will have its own cgroups nested inside the container's
|
||||
cgroups. This is used for process tracking and optionally resource allocation
|
||||
handling for the new process. Freezer cgroup is required, the rest of the cgroups
|
||||
are optional. The process executor must place its pid inside the correct
|
||||
cgroups before starting the process. This is done so that no child processes or
|
||||
threads can escape the cgroups.
|
||||
|
||||
When the process is stopped, the process executor will try (in a best-effort way)
|
||||
to stop all its children and remove the sub-cgroups.
|
|
@ -0,0 +1,60 @@
|
|||
// +build apparmor,linux
|
||||
|
||||
package apparmor
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
// IsEnabled returns true if apparmor is enabled for the host.
|
||||
func IsEnabled() bool {
|
||||
if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil && os.Getenv("container") == "" {
|
||||
if _, err = os.Stat("/sbin/apparmor_parser"); err == nil {
|
||||
buf, err := ioutil.ReadFile("/sys/module/apparmor/parameters/enabled")
|
||||
return err == nil && len(buf) > 1 && buf[0] == 'Y'
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func setProcAttr(attr, value string) error {
|
||||
// Under AppArmor you can only change your own attr, so use /proc/self/
|
||||
// instead of /proc/<tid>/ like libapparmor does
|
||||
path := fmt.Sprintf("/proc/self/attr/%s", attr)
|
||||
|
||||
f, err := os.OpenFile(path, os.O_WRONLY, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
if err := utils.EnsureProcHandle(f); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = fmt.Fprintf(f, "%s", value)
|
||||
return err
|
||||
}
|
||||
|
||||
// changeOnExec reimplements aa_change_onexec from libapparmor in Go
|
||||
func changeOnExec(name string) error {
|
||||
value := "exec " + name
|
||||
if err := setProcAttr("exec", value); err != nil {
|
||||
return fmt.Errorf("apparmor failed to apply profile: %s", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ApplyProfile will apply the profile with the specified name to the process after
|
||||
// the next exec.
|
||||
func ApplyProfile(name string) error {
|
||||
if name == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
return changeOnExec(name)
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
// +build !apparmor !linux
|
||||
|
||||
package apparmor
|
||||
|
||||
import (
|
||||
"errors"
|
||||
)
|
||||
|
||||
var ErrApparmorNotEnabled = errors.New("apparmor: config provided but apparmor not supported")
|
||||
|
||||
func IsEnabled() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func ApplyProfile(name string) error {
|
||||
if name != "" {
|
||||
return ErrApparmorNotEnabled
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,117 @@
|
|||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/syndtr/gocapability/capability"
|
||||
)
|
||||
|
||||
const allCapabilityTypes = capability.CAPS | capability.BOUNDS | capability.AMBS
|
||||
|
||||
var capabilityMap map[string]capability.Cap
|
||||
|
||||
func init() {
|
||||
capabilityMap = make(map[string]capability.Cap)
|
||||
last := capability.CAP_LAST_CAP
|
||||
// workaround for RHEL6 which has no /proc/sys/kernel/cap_last_cap
|
||||
if last == capability.Cap(63) {
|
||||
last = capability.CAP_BLOCK_SUSPEND
|
||||
}
|
||||
for _, cap := range capability.List() {
|
||||
if cap > last {
|
||||
continue
|
||||
}
|
||||
capKey := fmt.Sprintf("CAP_%s", strings.ToUpper(cap.String()))
|
||||
capabilityMap[capKey] = cap
|
||||
}
|
||||
}
|
||||
|
||||
func newContainerCapList(capConfig *configs.Capabilities) (*containerCapabilities, error) {
|
||||
bounding := []capability.Cap{}
|
||||
for _, c := range capConfig.Bounding {
|
||||
v, ok := capabilityMap[c]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown capability %q", c)
|
||||
}
|
||||
bounding = append(bounding, v)
|
||||
}
|
||||
effective := []capability.Cap{}
|
||||
for _, c := range capConfig.Effective {
|
||||
v, ok := capabilityMap[c]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown capability %q", c)
|
||||
}
|
||||
effective = append(effective, v)
|
||||
}
|
||||
inheritable := []capability.Cap{}
|
||||
for _, c := range capConfig.Inheritable {
|
||||
v, ok := capabilityMap[c]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown capability %q", c)
|
||||
}
|
||||
inheritable = append(inheritable, v)
|
||||
}
|
||||
permitted := []capability.Cap{}
|
||||
for _, c := range capConfig.Permitted {
|
||||
v, ok := capabilityMap[c]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown capability %q", c)
|
||||
}
|
||||
permitted = append(permitted, v)
|
||||
}
|
||||
ambient := []capability.Cap{}
|
||||
for _, c := range capConfig.Ambient {
|
||||
v, ok := capabilityMap[c]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown capability %q", c)
|
||||
}
|
||||
ambient = append(ambient, v)
|
||||
}
|
||||
pid, err := capability.NewPid2(0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = pid.Load()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &containerCapabilities{
|
||||
bounding: bounding,
|
||||
effective: effective,
|
||||
inheritable: inheritable,
|
||||
permitted: permitted,
|
||||
ambient: ambient,
|
||||
pid: pid,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type containerCapabilities struct {
|
||||
pid capability.Capabilities
|
||||
bounding []capability.Cap
|
||||
effective []capability.Cap
|
||||
inheritable []capability.Cap
|
||||
permitted []capability.Cap
|
||||
ambient []capability.Cap
|
||||
}
|
||||
|
||||
// ApplyBoundingSet sets the capability bounding set to those specified in the whitelist.
|
||||
func (c *containerCapabilities) ApplyBoundingSet() error {
|
||||
c.pid.Clear(capability.BOUNDS)
|
||||
c.pid.Set(capability.BOUNDS, c.bounding...)
|
||||
return c.pid.Apply(capability.BOUNDS)
|
||||
}
|
||||
|
||||
// Apply sets all the capabilities for the current process in the config.
|
||||
func (c *containerCapabilities) ApplyCaps() error {
|
||||
c.pid.Clear(allCapabilityTypes)
|
||||
c.pid.Set(capability.BOUNDS, c.bounding...)
|
||||
c.pid.Set(capability.PERMITTED, c.permitted...)
|
||||
c.pid.Set(capability.INHERITABLE, c.inheritable...)
|
||||
c.pid.Set(capability.EFFECTIVE, c.effective...)
|
||||
c.pid.Set(capability.AMBIENT, c.ambient...)
|
||||
return c.pid.Apply(allCapabilityTypes)
|
||||
}
|
|
@ -0,0 +1,74 @@
|
|||
// +build linux
|
||||
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type Manager interface {
|
||||
// Applies cgroup configuration to the process with the specified pid
|
||||
Apply(pid int) error
|
||||
|
||||
// Returns the PIDs inside the cgroup set
|
||||
GetPids() ([]int, error)
|
||||
|
||||
// Returns the PIDs inside the cgroup set & all sub-cgroups
|
||||
GetAllPids() ([]int, error)
|
||||
|
||||
// Returns statistics for the cgroup set
|
||||
GetStats() (*Stats, error)
|
||||
|
||||
// Toggles the freezer cgroup according with specified state
|
||||
Freeze(state configs.FreezerState) error
|
||||
|
||||
// Destroys the cgroup set
|
||||
Destroy() error
|
||||
|
||||
// The option func SystemdCgroups() and Cgroupfs() require following attributes:
|
||||
// Paths map[string]string
|
||||
// Cgroups *configs.Cgroup
|
||||
// Paths maps cgroup subsystem to path at which it is mounted.
|
||||
// Cgroups specifies specific cgroup settings for the various subsystems
|
||||
|
||||
// Returns cgroup paths to save in a state file and to be able to
|
||||
// restore the object later.
|
||||
GetPaths() map[string]string
|
||||
|
||||
// GetUnifiedPath returns the unified path when running in unified mode.
|
||||
// The value corresponds to the all values of GetPaths() map.
|
||||
//
|
||||
// GetUnifiedPath returns error when running in hybrid mode as well as
|
||||
// in legacy mode.
|
||||
GetUnifiedPath() (string, error)
|
||||
|
||||
// Sets the cgroup as configured.
|
||||
Set(container *configs.Config) error
|
||||
|
||||
// Gets the cgroup as configured.
|
||||
GetCgroups() (*configs.Cgroup, error)
|
||||
}
|
||||
|
||||
type NotFoundError struct {
|
||||
Subsystem string
|
||||
}
|
||||
|
||||
func (e *NotFoundError) Error() string {
|
||||
return fmt.Sprintf("mountpoint for %s not found", e.Subsystem)
|
||||
}
|
||||
|
||||
func NewNotFoundError(sub string) error {
|
||||
return &NotFoundError{
|
||||
Subsystem: sub,
|
||||
}
|
||||
}
|
||||
|
||||
func IsNotFound(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
_, ok := err.(*NotFoundError)
|
||||
return ok
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
// +build linux
|
||||
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseCgroups(t *testing.T) {
|
||||
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if IsCgroup2UnifiedMode() {
|
||||
return
|
||||
}
|
||||
if _, ok := cgroups["cpu"]; !ok {
|
||||
t.Fail()
|
||||
}
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
// +build !linux
|
||||
|
||||
package cgroups
|
|
@ -0,0 +1,180 @@
|
|||
// Package devicefilter containes eBPF device filter program
|
||||
//
|
||||
// The implementation is based on https://github.com/containers/crun/blob/0.10.2/src/libcrun/ebpf.c
|
||||
//
|
||||
// Although ebpf.c is originally licensed under LGPL-3.0-or-later, the author (Giuseppe Scrivano)
|
||||
// agreed to relicense the file in Apache License 2.0: https://github.com/opencontainers/runc/issues/2144#issuecomment-543116397
|
||||
package devicefilter
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
|
||||
"github.com/cilium/ebpf/asm"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
// license string format is same as kernel MODULE_LICENSE macro
|
||||
license = "Apache"
|
||||
)
|
||||
|
||||
// DeviceFilter returns eBPF device filter program and its license string
|
||||
func DeviceFilter(devices []*configs.Device) (asm.Instructions, string, error) {
|
||||
p := &program{}
|
||||
p.init()
|
||||
for i := len(devices) - 1; i >= 0; i-- {
|
||||
if err := p.appendDevice(devices[i]); err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
}
|
||||
insts, err := p.finalize()
|
||||
return insts, license, err
|
||||
}
|
||||
|
||||
type program struct {
|
||||
insts asm.Instructions
|
||||
hasWildCard bool
|
||||
blockID int
|
||||
}
|
||||
|
||||
func (p *program) init() {
|
||||
// struct bpf_cgroup_dev_ctx: https://elixir.bootlin.com/linux/v5.3.6/source/include/uapi/linux/bpf.h#L3423
|
||||
/*
|
||||
u32 access_type
|
||||
u32 major
|
||||
u32 minor
|
||||
*/
|
||||
// R2 <- type (lower 16 bit of u32 access_type at R1[0])
|
||||
p.insts = append(p.insts,
|
||||
asm.LoadMem(asm.R2, asm.R1, 0, asm.Half))
|
||||
|
||||
// R3 <- access (upper 16 bit of u32 access_type at R1[0])
|
||||
p.insts = append(p.insts,
|
||||
asm.LoadMem(asm.R3, asm.R1, 0, asm.Word),
|
||||
// RSh: bitwise shift right
|
||||
asm.RSh.Imm32(asm.R3, 16))
|
||||
|
||||
// R4 <- major (u32 major at R1[4])
|
||||
p.insts = append(p.insts,
|
||||
asm.LoadMem(asm.R4, asm.R1, 4, asm.Word))
|
||||
|
||||
// R5 <- minor (u32 minor at R1[8])
|
||||
p.insts = append(p.insts,
|
||||
asm.LoadMem(asm.R5, asm.R1, 8, asm.Word))
|
||||
}
|
||||
|
||||
// appendDevice needs to be called from the last element of OCI linux.resources.devices to the head element.
|
||||
func (p *program) appendDevice(dev *configs.Device) error {
|
||||
if p.blockID < 0 {
|
||||
return errors.New("the program is finalized")
|
||||
}
|
||||
if p.hasWildCard {
|
||||
// All entries after wildcard entry are ignored
|
||||
return nil
|
||||
}
|
||||
|
||||
bpfType := int32(-1)
|
||||
hasType := true
|
||||
switch dev.Type {
|
||||
case 'c':
|
||||
bpfType = int32(unix.BPF_DEVCG_DEV_CHAR)
|
||||
case 'b':
|
||||
bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK)
|
||||
case 'a':
|
||||
hasType = false
|
||||
default:
|
||||
// if not specified in OCI json, typ is set to DeviceTypeAll
|
||||
return errors.Errorf("invalid DeviceType %q", string(dev.Type))
|
||||
}
|
||||
if dev.Major > math.MaxUint32 {
|
||||
return errors.Errorf("invalid major %d", dev.Major)
|
||||
}
|
||||
if dev.Minor > math.MaxUint32 {
|
||||
return errors.Errorf("invalid minor %d", dev.Major)
|
||||
}
|
||||
hasMajor := dev.Major >= 0 // if not specified in OCI json, major is set to -1
|
||||
hasMinor := dev.Minor >= 0
|
||||
bpfAccess := int32(0)
|
||||
for _, r := range dev.Permissions {
|
||||
switch r {
|
||||
case 'r':
|
||||
bpfAccess |= unix.BPF_DEVCG_ACC_READ
|
||||
case 'w':
|
||||
bpfAccess |= unix.BPF_DEVCG_ACC_WRITE
|
||||
case 'm':
|
||||
bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD
|
||||
default:
|
||||
return errors.Errorf("unknown device access %v", r)
|
||||
}
|
||||
}
|
||||
// If the access is rwm, skip the check.
|
||||
hasAccess := bpfAccess != (unix.BPF_DEVCG_ACC_READ | unix.BPF_DEVCG_ACC_WRITE | unix.BPF_DEVCG_ACC_MKNOD)
|
||||
|
||||
blockSym := fmt.Sprintf("block-%d", p.blockID)
|
||||
nextBlockSym := fmt.Sprintf("block-%d", p.blockID+1)
|
||||
prevBlockLastIdx := len(p.insts) - 1
|
||||
if hasType {
|
||||
p.insts = append(p.insts,
|
||||
// if (R2 != bpfType) goto next
|
||||
asm.JNE.Imm(asm.R2, bpfType, nextBlockSym),
|
||||
)
|
||||
}
|
||||
if hasAccess {
|
||||
p.insts = append(p.insts,
|
||||
// if (R3 & bpfAccess == 0 /* use R1 as a temp var */) goto next
|
||||
asm.Mov.Reg32(asm.R1, asm.R3),
|
||||
asm.And.Imm32(asm.R1, bpfAccess),
|
||||
asm.JEq.Imm(asm.R1, 0, nextBlockSym),
|
||||
)
|
||||
}
|
||||
if hasMajor {
|
||||
p.insts = append(p.insts,
|
||||
// if (R4 != major) goto next
|
||||
asm.JNE.Imm(asm.R4, int32(dev.Major), nextBlockSym),
|
||||
)
|
||||
}
|
||||
if hasMinor {
|
||||
p.insts = append(p.insts,
|
||||
// if (R5 != minor) goto next
|
||||
asm.JNE.Imm(asm.R5, int32(dev.Minor), nextBlockSym),
|
||||
)
|
||||
}
|
||||
if !hasType && !hasAccess && !hasMajor && !hasMinor {
|
||||
p.hasWildCard = true
|
||||
}
|
||||
p.insts = append(p.insts, acceptBlock(dev.Allow)...)
|
||||
// set blockSym to the first instruction we added in this iteration
|
||||
p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].Sym(blockSym)
|
||||
p.blockID++
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *program) finalize() (asm.Instructions, error) {
|
||||
if p.hasWildCard {
|
||||
// acceptBlock with asm.Return() is already inserted
|
||||
return p.insts, nil
|
||||
}
|
||||
blockSym := fmt.Sprintf("block-%d", p.blockID)
|
||||
p.insts = append(p.insts,
|
||||
// R0 <- 0
|
||||
asm.Mov.Imm32(asm.R0, 0).Sym(blockSym),
|
||||
asm.Return(),
|
||||
)
|
||||
p.blockID = -1
|
||||
return p.insts, nil
|
||||
}
|
||||
|
||||
func acceptBlock(accept bool) asm.Instructions {
|
||||
v := int32(0)
|
||||
if accept {
|
||||
v = 1
|
||||
}
|
||||
return []asm.Instruction{
|
||||
// R0 <- v
|
||||
asm.Mov.Imm32(asm.R0, v),
|
||||
asm.Return(),
|
||||
}
|
||||
}
|
|
@ -0,0 +1,258 @@
|
|||
package devicefilter
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/specconv"
|
||||
)
|
||||
|
||||
func hash(s, comm string) string {
|
||||
var res []string
|
||||
for _, l := range strings.Split(s, "\n") {
|
||||
trimmed := strings.TrimSpace(l)
|
||||
if trimmed == "" || strings.HasPrefix(trimmed, comm) {
|
||||
continue
|
||||
}
|
||||
res = append(res, trimmed)
|
||||
}
|
||||
return strings.Join(res, "\n")
|
||||
}
|
||||
|
||||
func testDeviceFilter(t testing.TB, devices []*configs.Device, expectedStr string) {
|
||||
insts, _, err := DeviceFilter(devices)
|
||||
if err != nil {
|
||||
t.Fatalf("%s: %v (devices: %+v)", t.Name(), err, devices)
|
||||
}
|
||||
s := insts.String()
|
||||
t.Logf("%s: devices: %+v\n%s", t.Name(), devices, s)
|
||||
if expectedStr != "" {
|
||||
hashed := hash(s, "//")
|
||||
expectedHashed := hash(expectedStr, "//")
|
||||
if expectedHashed != hashed {
|
||||
t.Fatalf("expected:\n%q\ngot\n%q", expectedHashed, hashed)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeviceFilter_Nil(t *testing.T) {
|
||||
expected := `
|
||||
// load parameters into registers
|
||||
0: LdXMemH dst: r2 src: r1 off: 0 imm: 0
|
||||
1: LdXMemW dst: r3 src: r1 off: 0 imm: 0
|
||||
2: RSh32Imm dst: r3 imm: 16
|
||||
3: LdXMemW dst: r4 src: r1 off: 4 imm: 0
|
||||
4: LdXMemW dst: r5 src: r1 off: 8 imm: 0
|
||||
block-0:
|
||||
// return 0 (reject)
|
||||
5: Mov32Imm dst: r0 imm: 0
|
||||
6: Exit
|
||||
`
|
||||
testDeviceFilter(t, nil, expected)
|
||||
}
|
||||
|
||||
func TestDeviceFilter_BuiltInAllowList(t *testing.T) {
|
||||
expected := `
|
||||
// load parameters into registers
|
||||
0: LdXMemH dst: r2 src: r1 off: 0 imm: 0
|
||||
1: LdXMemW dst: r3 src: r1 off: 0 imm: 0
|
||||
2: RSh32Imm dst: r3 imm: 16
|
||||
3: LdXMemW dst: r4 src: r1 off: 4 imm: 0
|
||||
4: LdXMemW dst: r5 src: r1 off: 8 imm: 0
|
||||
block-0:
|
||||
// tuntap (c, 10, 200, rwm, allow)
|
||||
5: JNEImm dst: r2 off: -1 imm: 2 <block-1>
|
||||
6: JNEImm dst: r4 off: -1 imm: 10 <block-1>
|
||||
7: JNEImm dst: r5 off: -1 imm: 200 <block-1>
|
||||
8: Mov32Imm dst: r0 imm: 1
|
||||
9: Exit
|
||||
block-1:
|
||||
10: JNEImm dst: r2 off: -1 imm: 2 <block-2>
|
||||
11: JNEImm dst: r4 off: -1 imm: 5 <block-2>
|
||||
12: JNEImm dst: r5 off: -1 imm: 2 <block-2>
|
||||
13: Mov32Imm dst: r0 imm: 1
|
||||
14: Exit
|
||||
block-2:
|
||||
// /dev/pts (c, 136, wildcard, rwm, true)
|
||||
15: JNEImm dst: r2 off: -1 imm: 2 <block-3>
|
||||
16: JNEImm dst: r4 off: -1 imm: 136 <block-3>
|
||||
17: Mov32Imm dst: r0 imm: 1
|
||||
18: Exit
|
||||
block-3:
|
||||
19: JNEImm dst: r2 off: -1 imm: 2 <block-4>
|
||||
20: JNEImm dst: r4 off: -1 imm: 5 <block-4>
|
||||
21: JNEImm dst: r5 off: -1 imm: 1 <block-4>
|
||||
22: Mov32Imm dst: r0 imm: 1
|
||||
23: Exit
|
||||
block-4:
|
||||
24: JNEImm dst: r2 off: -1 imm: 2 <block-5>
|
||||
25: JNEImm dst: r4 off: -1 imm: 1 <block-5>
|
||||
26: JNEImm dst: r5 off: -1 imm: 9 <block-5>
|
||||
27: Mov32Imm dst: r0 imm: 1
|
||||
28: Exit
|
||||
block-5:
|
||||
29: JNEImm dst: r2 off: -1 imm: 2 <block-6>
|
||||
30: JNEImm dst: r4 off: -1 imm: 1 <block-6>
|
||||
31: JNEImm dst: r5 off: -1 imm: 5 <block-6>
|
||||
32: Mov32Imm dst: r0 imm: 1
|
||||
33: Exit
|
||||
block-6:
|
||||
34: JNEImm dst: r2 off: -1 imm: 2 <block-7>
|
||||
35: JNEImm dst: r4 off: -1 imm: 5 <block-7>
|
||||
36: JNEImm dst: r5 off: -1 imm: 0 <block-7>
|
||||
37: Mov32Imm dst: r0 imm: 1
|
||||
38: Exit
|
||||
block-7:
|
||||
39: JNEImm dst: r2 off: -1 imm: 2 <block-8>
|
||||
40: JNEImm dst: r4 off: -1 imm: 1 <block-8>
|
||||
41: JNEImm dst: r5 off: -1 imm: 7 <block-8>
|
||||
42: Mov32Imm dst: r0 imm: 1
|
||||
43: Exit
|
||||
block-8:
|
||||
44: JNEImm dst: r2 off: -1 imm: 2 <block-9>
|
||||
45: JNEImm dst: r4 off: -1 imm: 1 <block-9>
|
||||
46: JNEImm dst: r5 off: -1 imm: 8 <block-9>
|
||||
47: Mov32Imm dst: r0 imm: 1
|
||||
48: Exit
|
||||
block-9:
|
||||
49: JNEImm dst: r2 off: -1 imm: 2 <block-10>
|
||||
50: JNEImm dst: r4 off: -1 imm: 1 <block-10>
|
||||
51: JNEImm dst: r5 off: -1 imm: 3 <block-10>
|
||||
52: Mov32Imm dst: r0 imm: 1
|
||||
53: Exit
|
||||
block-10:
|
||||
// (b, wildcard, wildcard, m, true)
|
||||
54: JNEImm dst: r2 off: -1 imm: 1 <block-11>
|
||||
55: Mov32Reg dst: r1 src: r3
|
||||
56: And32Imm dst: r1 imm: 1
|
||||
57: JEqImm dst: r1 off: -1 imm: 0 <block-11>
|
||||
58: Mov32Imm dst: r0 imm: 1
|
||||
59: Exit
|
||||
block-11:
|
||||
// (c, wildcard, wildcard, m, true)
|
||||
60: JNEImm dst: r2 off: -1 imm: 2 <block-12>
|
||||
61: Mov32Reg dst: r1 src: r3
|
||||
62: And32Imm dst: r1 imm: 1
|
||||
63: JEqImm dst: r1 off: -1 imm: 0 <block-12>
|
||||
64: Mov32Imm dst: r0 imm: 1
|
||||
65: Exit
|
||||
block-12:
|
||||
66: Mov32Imm dst: r0 imm: 0
|
||||
67: Exit
|
||||
`
|
||||
testDeviceFilter(t, specconv.AllowedDevices, expected)
|
||||
}
|
||||
|
||||
func TestDeviceFilter_Privileged(t *testing.T) {
|
||||
devices := []*configs.Device{
|
||||
{
|
||||
Type: 'a',
|
||||
Major: -1,
|
||||
Minor: -1,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
},
|
||||
}
|
||||
expected :=
|
||||
`
|
||||
// load parameters into registers
|
||||
0: LdXMemH dst: r2 src: r1 off: 0 imm: 0
|
||||
1: LdXMemW dst: r3 src: r1 off: 0 imm: 0
|
||||
2: RSh32Imm dst: r3 imm: 16
|
||||
3: LdXMemW dst: r4 src: r1 off: 4 imm: 0
|
||||
4: LdXMemW dst: r5 src: r1 off: 8 imm: 0
|
||||
block-0:
|
||||
// return 1 (accept)
|
||||
5: Mov32Imm dst: r0 imm: 1
|
||||
6: Exit
|
||||
`
|
||||
testDeviceFilter(t, devices, expected)
|
||||
}
|
||||
|
||||
func TestDeviceFilter_PrivilegedExceptSingleDevice(t *testing.T) {
|
||||
devices := []*configs.Device{
|
||||
{
|
||||
Type: 'a',
|
||||
Major: -1,
|
||||
Minor: -1,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
},
|
||||
{
|
||||
Type: 'b',
|
||||
Major: 8,
|
||||
Minor: 0,
|
||||
Permissions: "rwm",
|
||||
Allow: false,
|
||||
},
|
||||
}
|
||||
expected := `
|
||||
// load parameters into registers
|
||||
0: LdXMemH dst: r2 src: r1 off: 0 imm: 0
|
||||
1: LdXMemW dst: r3 src: r1 off: 0 imm: 0
|
||||
2: RSh32Imm dst: r3 imm: 16
|
||||
3: LdXMemW dst: r4 src: r1 off: 4 imm: 0
|
||||
4: LdXMemW dst: r5 src: r1 off: 8 imm: 0
|
||||
block-0:
|
||||
// return 0 (reject) if type==b && major == 8 && minor == 0
|
||||
5: JNEImm dst: r2 off: -1 imm: 1 <block-1>
|
||||
6: JNEImm dst: r4 off: -1 imm: 8 <block-1>
|
||||
7: JNEImm dst: r5 off: -1 imm: 0 <block-1>
|
||||
8: Mov32Imm dst: r0 imm: 0
|
||||
9: Exit
|
||||
block-1:
|
||||
// return 1 (accept)
|
||||
10: Mov32Imm dst: r0 imm: 1
|
||||
11: Exit
|
||||
`
|
||||
testDeviceFilter(t, devices, expected)
|
||||
}
|
||||
|
||||
func TestDeviceFilter_Weird(t *testing.T) {
|
||||
devices := []*configs.Device{
|
||||
{
|
||||
Type: 'b',
|
||||
Major: 8,
|
||||
Minor: 1,
|
||||
Permissions: "rwm",
|
||||
Allow: false,
|
||||
},
|
||||
{
|
||||
Type: 'a',
|
||||
Major: -1,
|
||||
Minor: -1,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
},
|
||||
{
|
||||
Type: 'b',
|
||||
Major: 8,
|
||||
Minor: 2,
|
||||
Permissions: "rwm",
|
||||
Allow: false,
|
||||
},
|
||||
}
|
||||
// 8/1 is allowed, 8/2 is not allowed.
|
||||
// This conforms to runc v1.0.0-rc.9 (cgroup1) behavior.
|
||||
expected := `
|
||||
// load parameters into registers
|
||||
0: LdXMemH dst: r2 src: r1 off: 0 imm: 0
|
||||
1: LdXMemW dst: r3 src: r1 off: 0 imm: 0
|
||||
2: RSh32Imm dst: r3 imm: 16
|
||||
3: LdXMemW dst: r4 src: r1 off: 4 imm: 0
|
||||
4: LdXMemW dst: r5 src: r1 off: 8 imm: 0
|
||||
block-0:
|
||||
// return 0 (reject) if type==b && major == 8 && minor == 2
|
||||
5: JNEImm dst: r2 off: -1 imm: 1 <block-1>
|
||||
6: JNEImm dst: r4 off: -1 imm: 8 <block-1>
|
||||
7: JNEImm dst: r5 off: -1 imm: 2 <block-1>
|
||||
8: Mov32Imm dst: r0 imm: 0
|
||||
9: Exit
|
||||
block-1:
|
||||
// return 1 (accept)
|
||||
10: Mov32Imm dst: r0 imm: 1
|
||||
11: Exit
|
||||
`
|
||||
testDeviceFilter(t, devices, expected)
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
package ebpf
|
||||
|
||||
import (
|
||||
"github.com/cilium/ebpf"
|
||||
"github.com/cilium/ebpf/asm"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/<foo> directory.
|
||||
//
|
||||
// Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 .
|
||||
//
|
||||
// https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92
|
||||
func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFD int) (func() error, error) {
|
||||
nilCloser := func() error {
|
||||
return nil
|
||||
}
|
||||
// Increase `ulimit -l` limit to avoid BPF_PROG_LOAD error (#2167).
|
||||
// This limit is not inherited into the container.
|
||||
memlockLimit := &unix.Rlimit{
|
||||
Cur: unix.RLIM_INFINITY,
|
||||
Max: unix.RLIM_INFINITY,
|
||||
}
|
||||
_ = unix.Setrlimit(unix.RLIMIT_MEMLOCK, memlockLimit)
|
||||
spec := &ebpf.ProgramSpec{
|
||||
Type: ebpf.CGroupDevice,
|
||||
Instructions: insts,
|
||||
License: license,
|
||||
}
|
||||
prog, err := ebpf.NewProgram(spec)
|
||||
if err != nil {
|
||||
return nilCloser, err
|
||||
}
|
||||
if err := prog.Attach(dirFD, ebpf.AttachCGroupDevice, unix.BPF_F_ALLOW_MULTI); err != nil {
|
||||
return nilCloser, errors.Wrap(err, "failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)")
|
||||
}
|
||||
closer := func() error {
|
||||
if err := prog.Detach(dirFD, ebpf.AttachCGroupDevice, unix.BPF_F_ALLOW_MULTI); err != nil {
|
||||
return errors.Wrap(err, "failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return closer, nil
|
||||
}
|
|
@ -0,0 +1,411 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
var (
|
||||
subsystemsLegacy = subsystemSet{
|
||||
&CpusetGroup{},
|
||||
&DevicesGroup{},
|
||||
&MemoryGroup{},
|
||||
&CpuGroup{},
|
||||
&CpuacctGroup{},
|
||||
&PidsGroup{},
|
||||
&BlkioGroup{},
|
||||
&HugetlbGroup{},
|
||||
&NetClsGroup{},
|
||||
&NetPrioGroup{},
|
||||
&PerfEventGroup{},
|
||||
&FreezerGroup{},
|
||||
&NameGroup{GroupName: "name=systemd", Join: true},
|
||||
}
|
||||
HugePageSizes, _ = cgroups.GetHugePageSize()
|
||||
)
|
||||
|
||||
var errSubsystemDoesNotExist = fmt.Errorf("cgroup: subsystem does not exist")
|
||||
|
||||
type subsystemSet []subsystem
|
||||
|
||||
func (s subsystemSet) Get(name string) (subsystem, error) {
|
||||
for _, ss := range s {
|
||||
if ss.Name() == name {
|
||||
return ss, nil
|
||||
}
|
||||
}
|
||||
return nil, errSubsystemDoesNotExist
|
||||
}
|
||||
|
||||
type subsystem interface {
|
||||
// Name returns the name of the subsystem.
|
||||
Name() string
|
||||
// Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
|
||||
GetStats(path string, stats *cgroups.Stats) error
|
||||
// Removes the cgroup represented by 'cgroupData'.
|
||||
Remove(*cgroupData) error
|
||||
// Creates and joins the cgroup represented by 'cgroupData'.
|
||||
Apply(*cgroupData) error
|
||||
// Set the cgroup represented by cgroup.
|
||||
Set(path string, cgroup *configs.Cgroup) error
|
||||
}
|
||||
|
||||
type Manager struct {
|
||||
mu sync.Mutex
|
||||
Cgroups *configs.Cgroup
|
||||
Rootless bool // ignore permission-related errors
|
||||
Paths map[string]string
|
||||
}
|
||||
|
||||
// The absolute path to the root of the cgroup hierarchies.
|
||||
var cgroupRootLock sync.Mutex
|
||||
var cgroupRoot string
|
||||
|
||||
// Gets the cgroupRoot.
|
||||
func getCgroupRoot() (string, error) {
|
||||
cgroupRootLock.Lock()
|
||||
defer cgroupRootLock.Unlock()
|
||||
|
||||
if cgroupRoot != "" {
|
||||
return cgroupRoot, nil
|
||||
}
|
||||
|
||||
root, err := cgroups.FindCgroupMountpointDir()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if _, err := os.Stat(root); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
cgroupRoot = root
|
||||
return cgroupRoot, nil
|
||||
}
|
||||
|
||||
type cgroupData struct {
|
||||
root string
|
||||
innerPath string
|
||||
config *configs.Cgroup
|
||||
pid int
|
||||
}
|
||||
|
||||
// isIgnorableError returns whether err is a permission error (in the loose
|
||||
// sense of the word). This includes EROFS (which for an unprivileged user is
|
||||
// basically a permission error) and EACCES (for similar reasons) as well as
|
||||
// the normal EPERM.
|
||||
func isIgnorableError(rootless bool, err error) bool {
|
||||
// We do not ignore errors if we are root.
|
||||
if !rootless {
|
||||
return false
|
||||
}
|
||||
// Is it an ordinary EPERM?
|
||||
if os.IsPermission(errors.Cause(err)) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Try to handle other errnos.
|
||||
var errno error
|
||||
switch err := errors.Cause(err).(type) {
|
||||
case *os.PathError:
|
||||
errno = err.Err
|
||||
case *os.LinkError:
|
||||
errno = err.Err
|
||||
case *os.SyscallError:
|
||||
errno = err.Err
|
||||
}
|
||||
return errno == unix.EROFS || errno == unix.EPERM || errno == unix.EACCES
|
||||
}
|
||||
|
||||
func (m *Manager) getSubsystems() subsystemSet {
|
||||
return subsystemsLegacy
|
||||
}
|
||||
|
||||
func (m *Manager) Apply(pid int) (err error) {
|
||||
if m.Cgroups == nil {
|
||||
return nil
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
var c = m.Cgroups
|
||||
|
||||
d, err := getCgroupData(m.Cgroups, pid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
m.Paths = make(map[string]string)
|
||||
if c.Paths != nil {
|
||||
for name, path := range c.Paths {
|
||||
_, err := d.path(name)
|
||||
if err != nil {
|
||||
if cgroups.IsNotFound(err) {
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
m.Paths[name] = path
|
||||
}
|
||||
return cgroups.EnterPid(m.Paths, pid)
|
||||
}
|
||||
|
||||
for _, sys := range m.getSubsystems() {
|
||||
// TODO: Apply should, ideally, be reentrant or be broken up into a separate
|
||||
// create and join phase so that the cgroup hierarchy for a container can be
|
||||
// created then join consists of writing the process pids to cgroup.procs
|
||||
p, err := d.path(sys.Name())
|
||||
if err != nil {
|
||||
// The non-presence of the devices subsystem is
|
||||
// considered fatal for security reasons.
|
||||
if cgroups.IsNotFound(err) && sys.Name() != "devices" {
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
m.Paths[sys.Name()] = p
|
||||
|
||||
if err := sys.Apply(d); err != nil {
|
||||
// In the case of rootless (including euid=0 in userns), where an explicit cgroup path hasn't
|
||||
// been set, we don't bail on error in case of permission problems.
|
||||
// Cases where limits have been set (and we couldn't create our own
|
||||
// cgroup) are handled by Set.
|
||||
if isIgnorableError(m.Rootless, err) && m.Cgroups.Path == "" {
|
||||
delete(m.Paths, sys.Name())
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) Destroy() error {
|
||||
if m.Cgroups == nil || m.Cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
if err := cgroups.RemovePaths(m.Paths); err != nil {
|
||||
return err
|
||||
}
|
||||
m.Paths = make(map[string]string)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetPaths() map[string]string {
|
||||
m.mu.Lock()
|
||||
paths := m.Paths
|
||||
m.mu.Unlock()
|
||||
return paths
|
||||
}
|
||||
|
||||
func (m *Manager) GetUnifiedPath() (string, error) {
|
||||
return "", errors.New("unified path is only supported when running in unified mode")
|
||||
}
|
||||
|
||||
func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
stats := cgroups.NewStats()
|
||||
for name, path := range m.Paths {
|
||||
sys, err := m.getSubsystems().Get(name)
|
||||
if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) {
|
||||
continue
|
||||
}
|
||||
if err := sys.GetStats(path, stats); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func (m *Manager) Set(container *configs.Config) error {
|
||||
if container.Cgroups == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// If Paths are set, then we are just joining cgroups paths
|
||||
// and there is no need to set any values.
|
||||
if m.Cgroups != nil && m.Cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
paths := m.GetPaths()
|
||||
for _, sys := range m.getSubsystems() {
|
||||
path := paths[sys.Name()]
|
||||
if err := sys.Set(path, container.Cgroups); err != nil {
|
||||
if m.Rootless && sys.Name() == "devices" {
|
||||
continue
|
||||
}
|
||||
// When m.Rootless is true, errors from the device subsystem are ignored because it is really not expected to work.
|
||||
// However, errors from other subsystems are not ignored.
|
||||
// see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error"
|
||||
if path == "" {
|
||||
// We never created a path for this cgroup, so we cannot set
|
||||
// limits for it (though we have already tried at this point).
|
||||
return fmt.Errorf("cannot set %s limit: container could not join or create cgroup", sys.Name())
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if m.Paths["cpu"] != "" {
|
||||
if err := CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Freeze toggles the container's freezer cgroup depending on the state
|
||||
// provided
|
||||
func (m *Manager) Freeze(state configs.FreezerState) error {
|
||||
if m.Cgroups == nil {
|
||||
return errors.New("cannot toggle freezer: cgroups not configured for container")
|
||||
}
|
||||
|
||||
paths := m.GetPaths()
|
||||
dir := paths["freezer"]
|
||||
prevState := m.Cgroups.Resources.Freezer
|
||||
m.Cgroups.Resources.Freezer = state
|
||||
freezer, err := m.getSubsystems().Get("freezer")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = freezer.Set(dir, m.Cgroups)
|
||||
if err != nil {
|
||||
m.Cgroups.Resources.Freezer = prevState
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetPids() ([]int, error) {
|
||||
paths := m.GetPaths()
|
||||
return cgroups.GetPids(paths["devices"])
|
||||
}
|
||||
|
||||
func (m *Manager) GetAllPids() ([]int, error) {
|
||||
paths := m.GetPaths()
|
||||
return cgroups.GetAllPids(paths["devices"])
|
||||
}
|
||||
|
||||
func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
|
||||
root, err := getCgroupRoot()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if (c.Name != "" || c.Parent != "") && c.Path != "" {
|
||||
return nil, fmt.Errorf("cgroup: either Path or Name and Parent should be used")
|
||||
}
|
||||
|
||||
// XXX: Do not remove this code. Path safety is important! -- cyphar
|
||||
cgPath := libcontainerUtils.CleanPath(c.Path)
|
||||
cgParent := libcontainerUtils.CleanPath(c.Parent)
|
||||
cgName := libcontainerUtils.CleanPath(c.Name)
|
||||
|
||||
innerPath := cgPath
|
||||
if innerPath == "" {
|
||||
innerPath = filepath.Join(cgParent, cgName)
|
||||
}
|
||||
|
||||
return &cgroupData{
|
||||
root: root,
|
||||
innerPath: innerPath,
|
||||
config: c,
|
||||
pid: pid,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (raw *cgroupData) path(subsystem string) (string, error) {
|
||||
mnt, err := cgroups.FindCgroupMountpoint(raw.root, subsystem)
|
||||
// If we didn't mount the subsystem, there is no point we make the path.
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
|
||||
if filepath.IsAbs(raw.innerPath) {
|
||||
// Sometimes subsystems can be mounted together as 'cpu,cpuacct'.
|
||||
return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil
|
||||
}
|
||||
|
||||
// Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
|
||||
// process could in container and shared pid namespace with host, and
|
||||
// /proc/1/cgroup could point to whole other world of cgroups.
|
||||
parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(parentPath, raw.innerPath), nil
|
||||
}
|
||||
|
||||
func (raw *cgroupData) join(subsystem string) (string, error) {
|
||||
path, err := raw.path(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
return "", err
|
||||
}
|
||||
if err := cgroups.WriteCgroupProc(path, raw.pid); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func removePath(p string, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if p != "" {
|
||||
return os.RemoveAll(p)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func CheckCpushares(path string, c uint64) error {
|
||||
var cpuShares uint64
|
||||
|
||||
if c == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
fd, err := os.Open(filepath.Join(path, "cpu.shares"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer fd.Close()
|
||||
|
||||
_, err = fmt.Fscanf(fd, "%d", &cpuShares)
|
||||
if err != nil && err != io.EOF {
|
||||
return err
|
||||
}
|
||||
|
||||
if c > cpuShares {
|
||||
return fmt.Errorf("The maximum allowed cpu-shares is %d", cpuShares)
|
||||
} else if c < cpuShares {
|
||||
return fmt.Errorf("The minimum allowed cpu-shares is %d", cpuShares)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.Cgroups, nil
|
||||
}
|
|
@ -0,0 +1,297 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func TestInvalidCgroupPath(t *testing.T) {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
t.Skip("cgroup v1 is not supported")
|
||||
}
|
||||
root, err := getCgroupRoot()
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup root: %v", err)
|
||||
}
|
||||
|
||||
config := &configs.Cgroup{
|
||||
Path: "../../../../../../../../../../some/path",
|
||||
}
|
||||
|
||||
data, err := getCgroupData(config, 0)
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup data: %v", err)
|
||||
}
|
||||
|
||||
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
|
||||
if strings.HasPrefix(data.innerPath, "..") {
|
||||
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
|
||||
}
|
||||
|
||||
// Double-check, using an actual cgroup.
|
||||
deviceRoot := filepath.Join(root, "devices")
|
||||
devicePath, err := data.path("devices")
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup path: %v", err)
|
||||
}
|
||||
if !strings.HasPrefix(devicePath, deviceRoot) {
|
||||
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
|
||||
}
|
||||
}
|
||||
|
||||
func TestInvalidAbsoluteCgroupPath(t *testing.T) {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
t.Skip("cgroup v1 is not supported")
|
||||
}
|
||||
root, err := getCgroupRoot()
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup root: %v", err)
|
||||
}
|
||||
|
||||
config := &configs.Cgroup{
|
||||
Path: "/../../../../../../../../../../some/path",
|
||||
}
|
||||
|
||||
data, err := getCgroupData(config, 0)
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup data: %v", err)
|
||||
}
|
||||
|
||||
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
|
||||
if strings.HasPrefix(data.innerPath, "..") {
|
||||
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
|
||||
}
|
||||
|
||||
// Double-check, using an actual cgroup.
|
||||
deviceRoot := filepath.Join(root, "devices")
|
||||
devicePath, err := data.path("devices")
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup path: %v", err)
|
||||
}
|
||||
if !strings.HasPrefix(devicePath, deviceRoot) {
|
||||
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
|
||||
}
|
||||
}
|
||||
|
||||
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
|
||||
func TestInvalidCgroupParent(t *testing.T) {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
t.Skip("cgroup v1 is not supported")
|
||||
}
|
||||
root, err := getCgroupRoot()
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup root: %v", err)
|
||||
}
|
||||
|
||||
config := &configs.Cgroup{
|
||||
Parent: "../../../../../../../../../../some/path",
|
||||
Name: "name",
|
||||
}
|
||||
|
||||
data, err := getCgroupData(config, 0)
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup data: %v", err)
|
||||
}
|
||||
|
||||
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
|
||||
if strings.HasPrefix(data.innerPath, "..") {
|
||||
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
|
||||
}
|
||||
|
||||
// Double-check, using an actual cgroup.
|
||||
deviceRoot := filepath.Join(root, "devices")
|
||||
devicePath, err := data.path("devices")
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup path: %v", err)
|
||||
}
|
||||
if !strings.HasPrefix(devicePath, deviceRoot) {
|
||||
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
|
||||
}
|
||||
}
|
||||
|
||||
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
|
||||
func TestInvalidAbsoluteCgroupParent(t *testing.T) {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
t.Skip("cgroup v1 is not supported")
|
||||
}
|
||||
root, err := getCgroupRoot()
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup root: %v", err)
|
||||
}
|
||||
|
||||
config := &configs.Cgroup{
|
||||
Parent: "/../../../../../../../../../../some/path",
|
||||
Name: "name",
|
||||
}
|
||||
|
||||
data, err := getCgroupData(config, 0)
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup data: %v", err)
|
||||
}
|
||||
|
||||
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
|
||||
if strings.HasPrefix(data.innerPath, "..") {
|
||||
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
|
||||
}
|
||||
|
||||
// Double-check, using an actual cgroup.
|
||||
deviceRoot := filepath.Join(root, "devices")
|
||||
devicePath, err := data.path("devices")
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup path: %v", err)
|
||||
}
|
||||
if !strings.HasPrefix(devicePath, deviceRoot) {
|
||||
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
|
||||
}
|
||||
}
|
||||
|
||||
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
|
||||
func TestInvalidCgroupName(t *testing.T) {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
t.Skip("cgroup v1 is not supported")
|
||||
}
|
||||
root, err := getCgroupRoot()
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup root: %v", err)
|
||||
}
|
||||
|
||||
config := &configs.Cgroup{
|
||||
Parent: "parent",
|
||||
Name: "../../../../../../../../../../some/path",
|
||||
}
|
||||
|
||||
data, err := getCgroupData(config, 0)
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup data: %v", err)
|
||||
}
|
||||
|
||||
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
|
||||
if strings.HasPrefix(data.innerPath, "..") {
|
||||
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
|
||||
}
|
||||
|
||||
// Double-check, using an actual cgroup.
|
||||
deviceRoot := filepath.Join(root, "devices")
|
||||
devicePath, err := data.path("devices")
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup path: %v", err)
|
||||
}
|
||||
if !strings.HasPrefix(devicePath, deviceRoot) {
|
||||
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
|
||||
func TestInvalidAbsoluteCgroupName(t *testing.T) {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
t.Skip("cgroup v1 is not supported")
|
||||
}
|
||||
root, err := getCgroupRoot()
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup root: %v", err)
|
||||
}
|
||||
|
||||
config := &configs.Cgroup{
|
||||
Parent: "parent",
|
||||
Name: "/../../../../../../../../../../some/path",
|
||||
}
|
||||
|
||||
data, err := getCgroupData(config, 0)
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup data: %v", err)
|
||||
}
|
||||
|
||||
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
|
||||
if strings.HasPrefix(data.innerPath, "..") {
|
||||
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
|
||||
}
|
||||
|
||||
// Double-check, using an actual cgroup.
|
||||
deviceRoot := filepath.Join(root, "devices")
|
||||
devicePath, err := data.path("devices")
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup path: %v", err)
|
||||
}
|
||||
if !strings.HasPrefix(devicePath, deviceRoot) {
|
||||
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
|
||||
}
|
||||
}
|
||||
|
||||
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
|
||||
func TestInvalidCgroupNameAndParent(t *testing.T) {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
t.Skip("cgroup v1 is not supported")
|
||||
}
|
||||
root, err := getCgroupRoot()
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup root: %v", err)
|
||||
}
|
||||
|
||||
config := &configs.Cgroup{
|
||||
Parent: "../../../../../../../../../../some/path",
|
||||
Name: "../../../../../../../../../../some/path",
|
||||
}
|
||||
|
||||
data, err := getCgroupData(config, 0)
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup data: %v", err)
|
||||
}
|
||||
|
||||
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
|
||||
if strings.HasPrefix(data.innerPath, "..") {
|
||||
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
|
||||
}
|
||||
|
||||
// Double-check, using an actual cgroup.
|
||||
deviceRoot := filepath.Join(root, "devices")
|
||||
devicePath, err := data.path("devices")
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup path: %v", err)
|
||||
}
|
||||
if !strings.HasPrefix(devicePath, deviceRoot) {
|
||||
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
|
||||
}
|
||||
}
|
||||
|
||||
// XXX: Remove me after we get rid of configs.Cgroup.Name and configs.Cgroup.Parent.
|
||||
func TestInvalidAbsoluteCgroupNameAndParent(t *testing.T) {
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
t.Skip("cgroup v1 is not supported")
|
||||
}
|
||||
root, err := getCgroupRoot()
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup root: %v", err)
|
||||
}
|
||||
|
||||
config := &configs.Cgroup{
|
||||
Parent: "/../../../../../../../../../../some/path",
|
||||
Name: "/../../../../../../../../../../some/path",
|
||||
}
|
||||
|
||||
data, err := getCgroupData(config, 0)
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup data: %v", err)
|
||||
}
|
||||
|
||||
// Make sure the final innerPath doesn't go outside the cgroup mountpoint.
|
||||
if strings.HasPrefix(data.innerPath, "..") {
|
||||
t.Errorf("SECURITY: cgroup innerPath is outside cgroup mountpoint!")
|
||||
}
|
||||
|
||||
// Double-check, using an actual cgroup.
|
||||
deviceRoot := filepath.Join(root, "devices")
|
||||
devicePath, err := data.path("devices")
|
||||
if err != nil {
|
||||
t.Errorf("couldn't get cgroup path: %v", err)
|
||||
}
|
||||
if !strings.HasPrefix(devicePath, deviceRoot) {
|
||||
t.Errorf("SECURITY: cgroup path() is outside cgroup mountpoint!")
|
||||
}
|
||||
}
|
|
@ -0,0 +1,238 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type BlkioGroup struct {
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) Name() string {
|
||||
return "blkio"
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) Apply(d *cgroupData) error {
|
||||
_, err := d.join("blkio")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.BlkioWeight != 0 {
|
||||
if err := fscommon.WriteFile(path, "blkio.weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioWeight), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if cgroup.Resources.BlkioLeafWeight != 0 {
|
||||
if err := fscommon.WriteFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(cgroup.Resources.BlkioLeafWeight), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, wd := range cgroup.Resources.BlkioWeightDevice {
|
||||
if err := fscommon.WriteFile(path, "blkio.weight_device", wd.WeightString()); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := fscommon.WriteFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleReadBpsDevice {
|
||||
if err := fscommon.WriteFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleWriteBpsDevice {
|
||||
if err := fscommon.WriteFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleReadIOPSDevice {
|
||||
if err := fscommon.WriteFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleWriteIOPSDevice {
|
||||
if err := fscommon.WriteFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("blkio"))
|
||||
}
|
||||
|
||||
/*
|
||||
examples:
|
||||
|
||||
blkio.sectors
|
||||
8:0 6792
|
||||
|
||||
blkio.io_service_bytes
|
||||
8:0 Read 1282048
|
||||
8:0 Write 2195456
|
||||
8:0 Sync 2195456
|
||||
8:0 Async 1282048
|
||||
8:0 Total 3477504
|
||||
Total 3477504
|
||||
|
||||
blkio.io_serviced
|
||||
8:0 Read 124
|
||||
8:0 Write 104
|
||||
8:0 Sync 104
|
||||
8:0 Async 124
|
||||
8:0 Total 228
|
||||
Total 228
|
||||
|
||||
blkio.io_queued
|
||||
8:0 Read 0
|
||||
8:0 Write 0
|
||||
8:0 Sync 0
|
||||
8:0 Async 0
|
||||
8:0 Total 0
|
||||
Total 0
|
||||
*/
|
||||
|
||||
func splitBlkioStatLine(r rune) bool {
|
||||
return r == ' ' || r == ':'
|
||||
}
|
||||
|
||||
func getBlkioStat(path string) ([]cgroups.BlkioStatEntry, error) {
|
||||
var blkioStats []cgroups.BlkioStatEntry
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return blkioStats, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
// format: dev type amount
|
||||
fields := strings.FieldsFunc(sc.Text(), splitBlkioStatLine)
|
||||
if len(fields) < 3 {
|
||||
if len(fields) == 2 && fields[0] == "Total" {
|
||||
// skip total line
|
||||
continue
|
||||
} else {
|
||||
return nil, fmt.Errorf("Invalid line found while parsing %s: %s", path, sc.Text())
|
||||
}
|
||||
}
|
||||
|
||||
v, err := strconv.ParseUint(fields[0], 10, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
major := v
|
||||
|
||||
v, err = strconv.ParseUint(fields[1], 10, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
minor := v
|
||||
|
||||
op := ""
|
||||
valueField := 2
|
||||
if len(fields) == 4 {
|
||||
op = fields[2]
|
||||
valueField = 3
|
||||
}
|
||||
v, err = strconv.ParseUint(fields[valueField], 10, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
blkioStats = append(blkioStats, cgroups.BlkioStatEntry{Major: major, Minor: minor, Op: op, Value: v})
|
||||
}
|
||||
|
||||
return blkioStats, nil
|
||||
}
|
||||
|
||||
func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
// Try to read CFQ stats available on all CFQ enabled kernels first
|
||||
if blkioStats, err := getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err == nil && blkioStats != nil {
|
||||
return getCFQStats(path, stats)
|
||||
}
|
||||
return getStats(path, stats) // Use generic stats as fallback
|
||||
}
|
||||
|
||||
func getCFQStats(path string, stats *cgroups.Stats) error {
|
||||
var blkioStats []cgroups.BlkioStatEntry
|
||||
var err error
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.sectors_recursive")); err != nil {
|
||||
return err
|
||||
}
|
||||
stats.BlkioStats.SectorsRecursive = blkioStats
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_bytes_recursive")); err != nil {
|
||||
return err
|
||||
}
|
||||
stats.BlkioStats.IoServiceBytesRecursive = blkioStats
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_serviced_recursive")); err != nil {
|
||||
return err
|
||||
}
|
||||
stats.BlkioStats.IoServicedRecursive = blkioStats
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_queued_recursive")); err != nil {
|
||||
return err
|
||||
}
|
||||
stats.BlkioStats.IoQueuedRecursive = blkioStats
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_service_time_recursive")); err != nil {
|
||||
return err
|
||||
}
|
||||
stats.BlkioStats.IoServiceTimeRecursive = blkioStats
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_wait_time_recursive")); err != nil {
|
||||
return err
|
||||
}
|
||||
stats.BlkioStats.IoWaitTimeRecursive = blkioStats
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.io_merged_recursive")); err != nil {
|
||||
return err
|
||||
}
|
||||
stats.BlkioStats.IoMergedRecursive = blkioStats
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.time_recursive")); err != nil {
|
||||
return err
|
||||
}
|
||||
stats.BlkioStats.IoTimeRecursive = blkioStats
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getStats(path string, stats *cgroups.Stats) error {
|
||||
var blkioStats []cgroups.BlkioStatEntry
|
||||
var err error
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.throttle.io_service_bytes")); err != nil {
|
||||
return err
|
||||
}
|
||||
stats.BlkioStats.IoServiceBytesRecursive = blkioStats
|
||||
|
||||
if blkioStats, err = getBlkioStat(filepath.Join(path, "blkio.throttle.io_serviced")); err != nil {
|
||||
return err
|
||||
}
|
||||
stats.BlkioStats.IoServicedRecursive = blkioStats
|
||||
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,637 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
const (
|
||||
sectorsRecursiveContents = `8:0 1024`
|
||||
serviceBytesRecursiveContents = `8:0 Read 100
|
||||
8:0 Write 200
|
||||
8:0 Sync 300
|
||||
8:0 Async 500
|
||||
8:0 Total 500
|
||||
Total 500`
|
||||
servicedRecursiveContents = `8:0 Read 10
|
||||
8:0 Write 40
|
||||
8:0 Sync 20
|
||||
8:0 Async 30
|
||||
8:0 Total 50
|
||||
Total 50`
|
||||
queuedRecursiveContents = `8:0 Read 1
|
||||
8:0 Write 4
|
||||
8:0 Sync 2
|
||||
8:0 Async 3
|
||||
8:0 Total 5
|
||||
Total 5`
|
||||
serviceTimeRecursiveContents = `8:0 Read 173959
|
||||
8:0 Write 0
|
||||
8:0 Sync 0
|
||||
8:0 Async 173959
|
||||
8:0 Total 17395
|
||||
Total 17395`
|
||||
waitTimeRecursiveContents = `8:0 Read 15571
|
||||
8:0 Write 0
|
||||
8:0 Sync 0
|
||||
8:0 Async 15571
|
||||
8:0 Total 15571`
|
||||
mergedRecursiveContents = `8:0 Read 5
|
||||
8:0 Write 10
|
||||
8:0 Sync 0
|
||||
8:0 Async 0
|
||||
8:0 Total 15
|
||||
Total 15`
|
||||
timeRecursiveContents = `8:0 8`
|
||||
throttleServiceBytes = `8:0 Read 11030528
|
||||
8:0 Write 23
|
||||
8:0 Sync 42
|
||||
8:0 Async 11030528
|
||||
8:0 Total 11030528
|
||||
252:0 Read 11030528
|
||||
252:0 Write 23
|
||||
252:0 Sync 42
|
||||
252:0 Async 11030528
|
||||
252:0 Total 11030528
|
||||
Total 22061056`
|
||||
throttleServiced = `8:0 Read 164
|
||||
8:0 Write 23
|
||||
8:0 Sync 42
|
||||
8:0 Async 164
|
||||
8:0 Total 164
|
||||
252:0 Read 164
|
||||
252:0 Write 23
|
||||
252:0 Sync 42
|
||||
252:0 Async 164
|
||||
252:0 Total 164
|
||||
Total 328`
|
||||
)
|
||||
|
||||
func appendBlkioStatEntry(blkioStatEntries *[]cgroups.BlkioStatEntry, major, minor, value uint64, op string) {
|
||||
*blkioStatEntries = append(*blkioStatEntries, cgroups.BlkioStatEntry{Major: major, Minor: minor, Value: value, Op: op})
|
||||
}
|
||||
|
||||
func TestBlkioSetWeight(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
weightBefore = 100
|
||||
weightAfter = 200
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.weight": strconv.Itoa(weightBefore),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.BlkioWeight = weightAfter
|
||||
blkio := &BlkioGroup{}
|
||||
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "blkio.weight")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse blkio.weight - %s", err)
|
||||
}
|
||||
|
||||
if value != weightAfter {
|
||||
t.Fatal("Got the wrong value, set blkio.weight failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlkioSetWeightDevice(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
weightDeviceBefore = "8:0 400"
|
||||
)
|
||||
|
||||
wd := configs.NewWeightDevice(8, 0, 500, 0)
|
||||
weightDeviceAfter := wd.WeightString()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.weight_device": weightDeviceBefore,
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.BlkioWeightDevice = []*configs.WeightDevice{wd}
|
||||
blkio := &BlkioGroup{}
|
||||
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "blkio.weight_device")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse blkio.weight_device - %s", err)
|
||||
}
|
||||
|
||||
if value != weightDeviceAfter {
|
||||
t.Fatal("Got the wrong value, set blkio.weight_device failed.")
|
||||
}
|
||||
}
|
||||
|
||||
// regression #274
|
||||
func TestBlkioSetMultipleWeightDevice(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
weightDeviceBefore = "8:0 400"
|
||||
)
|
||||
|
||||
wd1 := configs.NewWeightDevice(8, 0, 500, 0)
|
||||
wd2 := configs.NewWeightDevice(8, 16, 500, 0)
|
||||
// we cannot actually set and check both because normal ioutil.WriteFile
|
||||
// when writing to cgroup file will overwrite the whole file content instead
|
||||
// of updating it as the kernel is doing. Just check the second device
|
||||
// is present will suffice for the test to ensure multiple writes are done.
|
||||
weightDeviceAfter := wd2.WeightString()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.weight_device": weightDeviceBefore,
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.BlkioWeightDevice = []*configs.WeightDevice{wd1, wd2}
|
||||
blkio := &BlkioGroup{}
|
||||
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "blkio.weight_device")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse blkio.weight_device - %s", err)
|
||||
}
|
||||
|
||||
if value != weightDeviceAfter {
|
||||
t.Fatal("Got the wrong value, set blkio.weight_device failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlkioStats(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
|
||||
"blkio.io_serviced_recursive": servicedRecursiveContents,
|
||||
"blkio.io_queued_recursive": queuedRecursiveContents,
|
||||
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
|
||||
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
|
||||
"blkio.io_merged_recursive": mergedRecursiveContents,
|
||||
"blkio.time_recursive": timeRecursiveContents,
|
||||
"blkio.sectors_recursive": sectorsRecursiveContents,
|
||||
})
|
||||
|
||||
blkio := &BlkioGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := blkio.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Verify expected stats.
|
||||
expectedStats := cgroups.BlkioStats{}
|
||||
appendBlkioStatEntry(&expectedStats.SectorsRecursive, 8, 0, 1024, "")
|
||||
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 100, "Read")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 200, "Write")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 300, "Sync")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 500, "Async")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 500, "Total")
|
||||
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 10, "Read")
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 40, "Write")
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 20, "Sync")
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 30, "Async")
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 50, "Total")
|
||||
|
||||
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 1, "Read")
|
||||
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 4, "Write")
|
||||
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 2, "Sync")
|
||||
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 3, "Async")
|
||||
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 5, "Total")
|
||||
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 173959, "Read")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 0, "Write")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 0, "Sync")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 173959, "Async")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 17395, "Total")
|
||||
|
||||
appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Read")
|
||||
appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 0, "Write")
|
||||
appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 0, "Sync")
|
||||
appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Async")
|
||||
appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Total")
|
||||
|
||||
appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 5, "Read")
|
||||
appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 10, "Write")
|
||||
appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 0, "Sync")
|
||||
appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 0, "Async")
|
||||
appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 15, "Total")
|
||||
|
||||
appendBlkioStatEntry(&expectedStats.IoTimeRecursive, 8, 0, 8, "")
|
||||
|
||||
expectBlkioStatsEquals(t, expectedStats, actualStats.BlkioStats)
|
||||
}
|
||||
|
||||
func TestBlkioStatsNoSectorsFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
|
||||
"blkio.io_serviced_recursive": servicedRecursiveContents,
|
||||
"blkio.io_queued_recursive": queuedRecursiveContents,
|
||||
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
|
||||
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
|
||||
"blkio.io_merged_recursive": mergedRecursiveContents,
|
||||
"blkio.time_recursive": timeRecursiveContents,
|
||||
})
|
||||
|
||||
blkio := &BlkioGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := blkio.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed unexpectedly: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlkioStatsNoServiceBytesFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.io_serviced_recursive": servicedRecursiveContents,
|
||||
"blkio.io_queued_recursive": queuedRecursiveContents,
|
||||
"blkio.sectors_recursive": sectorsRecursiveContents,
|
||||
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
|
||||
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
|
||||
"blkio.io_merged_recursive": mergedRecursiveContents,
|
||||
"blkio.time_recursive": timeRecursiveContents,
|
||||
})
|
||||
|
||||
blkio := &BlkioGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := blkio.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed unexpectedly: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlkioStatsNoServicedFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
|
||||
"blkio.io_queued_recursive": queuedRecursiveContents,
|
||||
"blkio.sectors_recursive": sectorsRecursiveContents,
|
||||
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
|
||||
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
|
||||
"blkio.io_merged_recursive": mergedRecursiveContents,
|
||||
"blkio.time_recursive": timeRecursiveContents,
|
||||
})
|
||||
|
||||
blkio := &BlkioGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := blkio.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed unexpectedly: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlkioStatsNoQueuedFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
|
||||
"blkio.io_serviced_recursive": servicedRecursiveContents,
|
||||
"blkio.sectors_recursive": sectorsRecursiveContents,
|
||||
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
|
||||
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
|
||||
"blkio.io_merged_recursive": mergedRecursiveContents,
|
||||
"blkio.time_recursive": timeRecursiveContents,
|
||||
})
|
||||
|
||||
blkio := &BlkioGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := blkio.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed unexpectedly: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlkioStatsNoServiceTimeFile(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping test in short mode.")
|
||||
}
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
|
||||
"blkio.io_serviced_recursive": servicedRecursiveContents,
|
||||
"blkio.io_queued_recursive": queuedRecursiveContents,
|
||||
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
|
||||
"blkio.io_merged_recursive": mergedRecursiveContents,
|
||||
"blkio.time_recursive": timeRecursiveContents,
|
||||
"blkio.sectors_recursive": sectorsRecursiveContents,
|
||||
})
|
||||
|
||||
blkio := &BlkioGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := blkio.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed unexpectedly: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlkioStatsNoWaitTimeFile(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping test in short mode.")
|
||||
}
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
|
||||
"blkio.io_serviced_recursive": servicedRecursiveContents,
|
||||
"blkio.io_queued_recursive": queuedRecursiveContents,
|
||||
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
|
||||
"blkio.io_merged_recursive": mergedRecursiveContents,
|
||||
"blkio.time_recursive": timeRecursiveContents,
|
||||
"blkio.sectors_recursive": sectorsRecursiveContents,
|
||||
})
|
||||
|
||||
blkio := &BlkioGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := blkio.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed unexpectedly: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlkioStatsNoMergedFile(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping test in short mode.")
|
||||
}
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
|
||||
"blkio.io_serviced_recursive": servicedRecursiveContents,
|
||||
"blkio.io_queued_recursive": queuedRecursiveContents,
|
||||
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
|
||||
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
|
||||
"blkio.time_recursive": timeRecursiveContents,
|
||||
"blkio.sectors_recursive": sectorsRecursiveContents,
|
||||
})
|
||||
|
||||
blkio := &BlkioGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := blkio.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed unexpectedly: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlkioStatsNoTimeFile(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping test in short mode.")
|
||||
}
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
|
||||
"blkio.io_serviced_recursive": servicedRecursiveContents,
|
||||
"blkio.io_queued_recursive": queuedRecursiveContents,
|
||||
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
|
||||
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
|
||||
"blkio.io_merged_recursive": mergedRecursiveContents,
|
||||
"blkio.sectors_recursive": sectorsRecursiveContents,
|
||||
})
|
||||
|
||||
blkio := &BlkioGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := blkio.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed unexpectedly: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlkioStatsUnexpectedNumberOfFields(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.io_service_bytes_recursive": "8:0 Read 100 100",
|
||||
"blkio.io_serviced_recursive": servicedRecursiveContents,
|
||||
"blkio.io_queued_recursive": queuedRecursiveContents,
|
||||
"blkio.sectors_recursive": sectorsRecursiveContents,
|
||||
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
|
||||
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
|
||||
"blkio.io_merged_recursive": mergedRecursiveContents,
|
||||
"blkio.time_recursive": timeRecursiveContents,
|
||||
})
|
||||
|
||||
blkio := &BlkioGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := blkio.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected to fail, but did not")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlkioStatsUnexpectedFieldType(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.io_service_bytes_recursive": "8:0 Read Write",
|
||||
"blkio.io_serviced_recursive": servicedRecursiveContents,
|
||||
"blkio.io_queued_recursive": queuedRecursiveContents,
|
||||
"blkio.sectors_recursive": sectorsRecursiveContents,
|
||||
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
|
||||
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
|
||||
"blkio.io_merged_recursive": mergedRecursiveContents,
|
||||
"blkio.time_recursive": timeRecursiveContents,
|
||||
})
|
||||
|
||||
blkio := &BlkioGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := blkio.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected to fail, but did not")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNonCFQBlkioStats(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.io_service_bytes_recursive": "",
|
||||
"blkio.io_serviced_recursive": "",
|
||||
"blkio.io_queued_recursive": "",
|
||||
"blkio.sectors_recursive": "",
|
||||
"blkio.io_service_time_recursive": "",
|
||||
"blkio.io_wait_time_recursive": "",
|
||||
"blkio.io_merged_recursive": "",
|
||||
"blkio.time_recursive": "",
|
||||
"blkio.throttle.io_service_bytes": throttleServiceBytes,
|
||||
"blkio.throttle.io_serviced": throttleServiced,
|
||||
})
|
||||
|
||||
blkio := &BlkioGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := blkio.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Verify expected stats.
|
||||
expectedStats := cgroups.BlkioStats{}
|
||||
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Read")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 23, "Write")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 42, "Sync")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Async")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Total")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Read")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 23, "Write")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 42, "Sync")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Async")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Total")
|
||||
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Read")
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 23, "Write")
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 42, "Sync")
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Async")
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Total")
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Read")
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 23, "Write")
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 42, "Sync")
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Async")
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Total")
|
||||
|
||||
expectBlkioStatsEquals(t, expectedStats, actualStats.BlkioStats)
|
||||
}
|
||||
|
||||
func TestBlkioSetThrottleReadBpsDevice(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
throttleBefore = `8:0 1024`
|
||||
)
|
||||
|
||||
td := configs.NewThrottleDevice(8, 0, 2048)
|
||||
throttleAfter := td.String()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.throttle.read_bps_device": throttleBefore,
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.BlkioThrottleReadBpsDevice = []*configs.ThrottleDevice{td}
|
||||
blkio := &BlkioGroup{}
|
||||
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "blkio.throttle.read_bps_device")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse blkio.throttle.read_bps_device - %s", err)
|
||||
}
|
||||
|
||||
if value != throttleAfter {
|
||||
t.Fatal("Got the wrong value, set blkio.throttle.read_bps_device failed.")
|
||||
}
|
||||
}
|
||||
func TestBlkioSetThrottleWriteBpsDevice(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
throttleBefore = `8:0 1024`
|
||||
)
|
||||
|
||||
td := configs.NewThrottleDevice(8, 0, 2048)
|
||||
throttleAfter := td.String()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.throttle.write_bps_device": throttleBefore,
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.BlkioThrottleWriteBpsDevice = []*configs.ThrottleDevice{td}
|
||||
blkio := &BlkioGroup{}
|
||||
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "blkio.throttle.write_bps_device")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse blkio.throttle.write_bps_device - %s", err)
|
||||
}
|
||||
|
||||
if value != throttleAfter {
|
||||
t.Fatal("Got the wrong value, set blkio.throttle.write_bps_device failed.")
|
||||
}
|
||||
}
|
||||
func TestBlkioSetThrottleReadIOpsDevice(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
throttleBefore = `8:0 1024`
|
||||
)
|
||||
|
||||
td := configs.NewThrottleDevice(8, 0, 2048)
|
||||
throttleAfter := td.String()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.throttle.read_iops_device": throttleBefore,
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.BlkioThrottleReadIOPSDevice = []*configs.ThrottleDevice{td}
|
||||
blkio := &BlkioGroup{}
|
||||
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "blkio.throttle.read_iops_device")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse blkio.throttle.read_iops_device - %s", err)
|
||||
}
|
||||
|
||||
if value != throttleAfter {
|
||||
t.Fatal("Got the wrong value, set blkio.throttle.read_iops_device failed.")
|
||||
}
|
||||
}
|
||||
func TestBlkioSetThrottleWriteIOpsDevice(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
throttleBefore = `8:0 1024`
|
||||
)
|
||||
|
||||
td := configs.NewThrottleDevice(8, 0, 2048)
|
||||
throttleAfter := td.String()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.throttle.write_iops_device": throttleBefore,
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.BlkioThrottleWriteIOPSDevice = []*configs.ThrottleDevice{td}
|
||||
blkio := &BlkioGroup{}
|
||||
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "blkio.throttle.write_iops_device")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse blkio.throttle.write_iops_device - %s", err)
|
||||
}
|
||||
|
||||
if value != throttleAfter {
|
||||
t.Fatal("Got the wrong value, set blkio.throttle.write_iops_device failed.")
|
||||
}
|
||||
}
|
|
@ -0,0 +1,118 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type CpuGroup struct {
|
||||
}
|
||||
|
||||
func (s *CpuGroup) Name() string {
|
||||
return "cpu"
|
||||
}
|
||||
|
||||
func (s *CpuGroup) Apply(d *cgroupData) error {
|
||||
// We always want to join the cpu group, to allow fair cpu scheduling
|
||||
// on a container basis
|
||||
path, err := d.path("cpu")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return s.ApplyDir(path, d.config, d.pid)
|
||||
}
|
||||
|
||||
func (s *CpuGroup) ApplyDir(path string, cgroup *configs.Cgroup, pid int) error {
|
||||
// This might happen if we have no cpu cgroup mounted.
|
||||
// Just do nothing and don't fail.
|
||||
if path == "" {
|
||||
return nil
|
||||
}
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
// We should set the real-Time group scheduling settings before moving
|
||||
// in the process because if the process is already in SCHED_RR mode
|
||||
// and no RT bandwidth is set, adding it will fail.
|
||||
if err := s.SetRtSched(path, cgroup); err != nil {
|
||||
return err
|
||||
}
|
||||
// because we are not using d.join we need to place the pid into the procs file
|
||||
// unlike the other subsystems
|
||||
return cgroups.WriteCgroupProc(path, pid)
|
||||
}
|
||||
|
||||
func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.CpuRtPeriod != 0 {
|
||||
if err := fscommon.WriteFile(path, "cpu.rt_period_us", strconv.FormatUint(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpuRtRuntime != 0 {
|
||||
if err := fscommon.WriteFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.Resources.CpuRtRuntime, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.CpuShares != 0 {
|
||||
if err := fscommon.WriteFile(path, "cpu.shares", strconv.FormatUint(cgroup.Resources.CpuShares, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpuPeriod != 0 {
|
||||
if err := fscommon.WriteFile(path, "cpu.cfs_period_us", strconv.FormatUint(cgroup.Resources.CpuPeriod, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpuQuota != 0 {
|
||||
if err := fscommon.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(cgroup.Resources.CpuQuota, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return s.SetRtSched(path, cgroup)
|
||||
}
|
||||
|
||||
func (s *CpuGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("cpu"))
|
||||
}
|
||||
|
||||
func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
f, err := os.Open(filepath.Join(path, "cpu.stat"))
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
switch t {
|
||||
case "nr_periods":
|
||||
stats.CpuStats.ThrottlingData.Periods = v
|
||||
|
||||
case "nr_throttled":
|
||||
stats.CpuStats.ThrottlingData.ThrottledPeriods = v
|
||||
|
||||
case "throttled_time":
|
||||
stats.CpuStats.ThrottlingData.ThrottledTime = v
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,210 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
)
|
||||
|
||||
func TestCpuSetShares(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("cpu", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
sharesBefore = 1024
|
||||
sharesAfter = 512
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"cpu.shares": strconv.Itoa(sharesBefore),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.CpuShares = sharesAfter
|
||||
cpu := &CpuGroup{}
|
||||
if err := cpu.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.shares")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse cpu.shares - %s", err)
|
||||
}
|
||||
|
||||
if value != sharesAfter {
|
||||
t.Fatal("Got the wrong value, set cpu.shares failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCpuSetBandWidth(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("cpu", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
quotaBefore = 8000
|
||||
quotaAfter = 5000
|
||||
periodBefore = 10000
|
||||
periodAfter = 7000
|
||||
rtRuntimeBefore = 8000
|
||||
rtRuntimeAfter = 5000
|
||||
rtPeriodBefore = 10000
|
||||
rtPeriodAfter = 7000
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"cpu.cfs_quota_us": strconv.Itoa(quotaBefore),
|
||||
"cpu.cfs_period_us": strconv.Itoa(periodBefore),
|
||||
"cpu.rt_runtime_us": strconv.Itoa(rtRuntimeBefore),
|
||||
"cpu.rt_period_us": strconv.Itoa(rtPeriodBefore),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.CpuQuota = quotaAfter
|
||||
helper.CgroupData.config.Resources.CpuPeriod = periodAfter
|
||||
helper.CgroupData.config.Resources.CpuRtRuntime = rtRuntimeAfter
|
||||
helper.CgroupData.config.Resources.CpuRtPeriod = rtPeriodAfter
|
||||
cpu := &CpuGroup{}
|
||||
if err := cpu.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
quota, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.cfs_quota_us")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse cpu.cfs_quota_us - %s", err)
|
||||
}
|
||||
if quota != quotaAfter {
|
||||
t.Fatal("Got the wrong value, set cpu.cfs_quota_us failed.")
|
||||
}
|
||||
|
||||
period, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.cfs_period_us")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse cpu.cfs_period_us - %s", err)
|
||||
}
|
||||
if period != periodAfter {
|
||||
t.Fatal("Got the wrong value, set cpu.cfs_period_us failed.")
|
||||
}
|
||||
rtRuntime, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.rt_runtime_us")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse cpu.rt_runtime_us - %s", err)
|
||||
}
|
||||
if rtRuntime != rtRuntimeAfter {
|
||||
t.Fatal("Got the wrong value, set cpu.rt_runtime_us failed.")
|
||||
}
|
||||
rtPeriod, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.rt_period_us")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse cpu.rt_period_us - %s", err)
|
||||
}
|
||||
if rtPeriod != rtPeriodAfter {
|
||||
t.Fatal("Got the wrong value, set cpu.rt_period_us failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCpuStats(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("cpu", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
nrPeriods = 2000
|
||||
nrThrottled = 200
|
||||
throttledTime = uint64(18446744073709551615)
|
||||
)
|
||||
|
||||
cpuStatContent := fmt.Sprintf("nr_periods %d\n nr_throttled %d\n throttled_time %d\n",
|
||||
nrPeriods, nrThrottled, throttledTime)
|
||||
helper.writeFileContents(map[string]string{
|
||||
"cpu.stat": cpuStatContent,
|
||||
})
|
||||
|
||||
cpu := &CpuGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := cpu.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
expectedStats := cgroups.ThrottlingData{
|
||||
Periods: nrPeriods,
|
||||
ThrottledPeriods: nrThrottled,
|
||||
ThrottledTime: throttledTime}
|
||||
|
||||
expectThrottlingDataEquals(t, expectedStats, actualStats.CpuStats.ThrottlingData)
|
||||
}
|
||||
|
||||
func TestNoCpuStatFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("cpu", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
cpu := &CpuGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := cpu.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatal("Expected not to fail, but did")
|
||||
}
|
||||
}
|
||||
|
||||
func TestInvalidCpuStat(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("cpu", t)
|
||||
defer helper.cleanup()
|
||||
cpuStatContent := `nr_periods 2000
|
||||
nr_throttled 200
|
||||
throttled_time fortytwo`
|
||||
helper.writeFileContents(map[string]string{
|
||||
"cpu.stat": cpuStatContent,
|
||||
})
|
||||
|
||||
cpu := &CpuGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := cpu.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failed stat parsing.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCpuSetRtSchedAtApply(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("cpu", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
rtRuntimeBefore = 0
|
||||
rtRuntimeAfter = 5000
|
||||
rtPeriodBefore = 0
|
||||
rtPeriodAfter = 7000
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"cpu.rt_runtime_us": strconv.Itoa(rtRuntimeBefore),
|
||||
"cpu.rt_period_us": strconv.Itoa(rtPeriodBefore),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.CpuRtRuntime = rtRuntimeAfter
|
||||
helper.CgroupData.config.Resources.CpuRtPeriod = rtPeriodAfter
|
||||
cpu := &CpuGroup{}
|
||||
if err := cpu.ApplyDir(helper.CgroupPath, helper.CgroupData.config, 1234); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
rtRuntime, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.rt_runtime_us")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse cpu.rt_runtime_us - %s", err)
|
||||
}
|
||||
if rtRuntime != rtRuntimeAfter {
|
||||
t.Fatal("Got the wrong value, set cpu.rt_runtime_us failed.")
|
||||
}
|
||||
rtPeriod, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cpu.rt_period_us")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse cpu.rt_period_us - %s", err)
|
||||
}
|
||||
if rtPeriod != rtPeriodAfter {
|
||||
t.Fatal("Got the wrong value, set cpu.rt_period_us failed.")
|
||||
}
|
||||
pid, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "cgroup.procs")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse cgroup.procs - %s", err)
|
||||
}
|
||||
if pid != 1234 {
|
||||
t.Fatal("Got the wrong value, set cgroup.procs failed.")
|
||||
}
|
||||
}
|
|
@ -0,0 +1,122 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
)
|
||||
|
||||
const (
|
||||
cgroupCpuacctStat = "cpuacct.stat"
|
||||
nanosecondsInSecond = 1000000000
|
||||
)
|
||||
|
||||
var clockTicks = uint64(system.GetClockTicks())
|
||||
|
||||
type CpuacctGroup struct {
|
||||
}
|
||||
|
||||
func (s *CpuacctGroup) Name() string {
|
||||
return "cpuacct"
|
||||
}
|
||||
|
||||
func (s *CpuacctGroup) Apply(d *cgroupData) error {
|
||||
// we just want to join this group even though we don't set anything
|
||||
if _, err := d.join("cpuacct"); err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpuacctGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpuacctGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("cpuacct"))
|
||||
}
|
||||
|
||||
func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
userModeUsage, kernelModeUsage, err := getCpuUsageBreakdown(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
totalUsage, err := fscommon.GetCgroupParamUint(path, "cpuacct.usage")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
percpuUsage, err := getPercpuUsage(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
stats.CpuStats.CpuUsage.TotalUsage = totalUsage
|
||||
stats.CpuStats.CpuUsage.PercpuUsage = percpuUsage
|
||||
stats.CpuStats.CpuUsage.UsageInUsermode = userModeUsage
|
||||
stats.CpuStats.CpuUsage.UsageInKernelmode = kernelModeUsage
|
||||
return nil
|
||||
}
|
||||
|
||||
// Returns user and kernel usage breakdown in nanoseconds.
|
||||
func getCpuUsageBreakdown(path string) (uint64, uint64, error) {
|
||||
userModeUsage := uint64(0)
|
||||
kernelModeUsage := uint64(0)
|
||||
const (
|
||||
userField = "user"
|
||||
systemField = "system"
|
||||
)
|
||||
|
||||
// Expected format:
|
||||
// user <usage in ticks>
|
||||
// system <usage in ticks>
|
||||
data, err := ioutil.ReadFile(filepath.Join(path, cgroupCpuacctStat))
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
fields := strings.Fields(string(data))
|
||||
if len(fields) < 4 {
|
||||
return 0, 0, fmt.Errorf("failure - %s is expected to have at least 4 fields", filepath.Join(path, cgroupCpuacctStat))
|
||||
}
|
||||
if fields[0] != userField {
|
||||
return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[0], cgroupCpuacctStat, userField)
|
||||
}
|
||||
if fields[2] != systemField {
|
||||
return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[2], cgroupCpuacctStat, systemField)
|
||||
}
|
||||
if userModeUsage, err = strconv.ParseUint(fields[1], 10, 64); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
if kernelModeUsage, err = strconv.ParseUint(fields[3], 10, 64); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
|
||||
return (userModeUsage * nanosecondsInSecond) / clockTicks, (kernelModeUsage * nanosecondsInSecond) / clockTicks, nil
|
||||
}
|
||||
|
||||
func getPercpuUsage(path string) ([]uint64, error) {
|
||||
percpuUsage := []uint64{}
|
||||
data, err := ioutil.ReadFile(filepath.Join(path, "cpuacct.usage_percpu"))
|
||||
if err != nil {
|
||||
return percpuUsage, err
|
||||
}
|
||||
for _, value := range strings.Fields(string(data)) {
|
||||
value, err := strconv.ParseUint(value, 10, 64)
|
||||
if err != nil {
|
||||
return percpuUsage, fmt.Errorf("Unable to convert param value to uint64: %s", err)
|
||||
}
|
||||
percpuUsage = append(percpuUsage, value)
|
||||
}
|
||||
return percpuUsage, nil
|
||||
}
|
|
@ -0,0 +1,160 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
type CpusetGroup struct {
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) Name() string {
|
||||
return "cpuset"
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) Apply(d *cgroupData) error {
|
||||
dir, err := d.path("cpuset")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return s.ApplyDir(dir, d.config, d.pid)
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.CpusetCpus != "" {
|
||||
if err := fscommon.WriteFile(path, "cpuset.cpus", cgroup.Resources.CpusetCpus); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpusetMems != "" {
|
||||
if err := fscommon.WriteFile(path, "cpuset.mems", cgroup.Resources.CpusetMems); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("cpuset"))
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) error {
|
||||
// This might happen if we have no cpuset cgroup mounted.
|
||||
// Just do nothing and don't fail.
|
||||
if dir == "" {
|
||||
return nil
|
||||
}
|
||||
mountInfo, err := ioutil.ReadFile("/proc/self/mountinfo")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
root := filepath.Dir(cgroups.GetClosestMountpointAncestor(dir, string(mountInfo)))
|
||||
// 'ensureParent' start with parent because we don't want to
|
||||
// explicitly inherit from parent, it could conflict with
|
||||
// 'cpuset.cpu_exclusive'.
|
||||
if err := s.ensureParent(filepath.Dir(dir), root); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.MkdirAll(dir, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
// We didn't inherit cpuset configs from parent, but we have
|
||||
// to ensure cpuset configs are set before moving task into the
|
||||
// cgroup.
|
||||
// The logic is, if user specified cpuset configs, use these
|
||||
// specified configs, otherwise, inherit from parent. This makes
|
||||
// cpuset configs work correctly with 'cpuset.cpu_exclusive', and
|
||||
// keep backward compatibility.
|
||||
if err := s.ensureCpusAndMems(dir, cgroup); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// because we are not using d.join we need to place the pid into the procs file
|
||||
// unlike the other subsystems
|
||||
return cgroups.WriteCgroupProc(dir, pid)
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) getSubsystemSettings(parent string) (cpus []byte, mems []byte, err error) {
|
||||
if cpus, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.cpus")); err != nil {
|
||||
return
|
||||
}
|
||||
if mems, err = ioutil.ReadFile(filepath.Join(parent, "cpuset.mems")); err != nil {
|
||||
return
|
||||
}
|
||||
return cpus, mems, nil
|
||||
}
|
||||
|
||||
// ensureParent makes sure that the parent directory of current is created
|
||||
// and populated with the proper cpus and mems files copied from
|
||||
// it's parent.
|
||||
func (s *CpusetGroup) ensureParent(current, root string) error {
|
||||
parent := filepath.Dir(current)
|
||||
if libcontainerUtils.CleanPath(parent) == root {
|
||||
return nil
|
||||
}
|
||||
// Avoid infinite recursion.
|
||||
if parent == current {
|
||||
return fmt.Errorf("cpuset: cgroup parent path outside cgroup root")
|
||||
}
|
||||
if err := s.ensureParent(parent, root); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.MkdirAll(current, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
return s.copyIfNeeded(current, parent)
|
||||
}
|
||||
|
||||
// copyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent
|
||||
// directory to the current directory if the file's contents are 0
|
||||
func (s *CpusetGroup) copyIfNeeded(current, parent string) error {
|
||||
var (
|
||||
err error
|
||||
currentCpus, currentMems []byte
|
||||
parentCpus, parentMems []byte
|
||||
)
|
||||
|
||||
if currentCpus, currentMems, err = s.getSubsystemSettings(current); err != nil {
|
||||
return err
|
||||
}
|
||||
if parentCpus, parentMems, err = s.getSubsystemSettings(parent); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if s.isEmpty(currentCpus) {
|
||||
if err := fscommon.WriteFile(current, "cpuset.cpus", string(parentCpus)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if s.isEmpty(currentMems) {
|
||||
if err := fscommon.WriteFile(current, "cpuset.mems", string(parentMems)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) isEmpty(b []byte) bool {
|
||||
return len(bytes.Trim(b, "\n")) == 0
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) ensureCpusAndMems(path string, cgroup *configs.Cgroup) error {
|
||||
if err := s.Set(path, cgroup); err != nil {
|
||||
return err
|
||||
}
|
||||
return s.copyIfNeeded(path, filepath.Dir(path))
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
)
|
||||
|
||||
func TestCpusetSetCpus(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("cpuset", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
cpusBefore = "0"
|
||||
cpusAfter = "1-3"
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"cpuset.cpus": cpusBefore,
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.CpusetCpus = cpusAfter
|
||||
cpuset := &CpusetGroup{}
|
||||
if err := cpuset.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "cpuset.cpus")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse cpuset.cpus - %s", err)
|
||||
}
|
||||
|
||||
if value != cpusAfter {
|
||||
t.Fatal("Got the wrong value, set cpuset.cpus failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCpusetSetMems(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("cpuset", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
memsBefore = "0"
|
||||
memsAfter = "1"
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"cpuset.mems": memsBefore,
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.CpusetMems = memsAfter
|
||||
cpuset := &CpusetGroup{}
|
||||
if err := cpuset.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "cpuset.mems")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse cpuset.mems - %s", err)
|
||||
}
|
||||
|
||||
if value != memsAfter {
|
||||
t.Fatal("Got the wrong value, set cpuset.mems failed.")
|
||||
}
|
||||
}
|
|
@ -0,0 +1,81 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
)
|
||||
|
||||
type DevicesGroup struct {
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) Name() string {
|
||||
return "devices"
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) Apply(d *cgroupData) error {
|
||||
_, err := d.join("devices")
|
||||
if err != nil {
|
||||
// We will return error even it's `not found` error, devices
|
||||
// cgroup is hard requirement for container's security.
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if system.RunningInUserNS() {
|
||||
return nil
|
||||
}
|
||||
|
||||
devices := cgroup.Resources.Devices
|
||||
if len(devices) > 0 {
|
||||
for _, dev := range devices {
|
||||
file := "devices.deny"
|
||||
if dev.Allow {
|
||||
file = "devices.allow"
|
||||
}
|
||||
if err := fscommon.WriteFile(path, file, dev.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
if cgroup.Resources.AllowAllDevices != nil {
|
||||
if *cgroup.Resources.AllowAllDevices == false {
|
||||
if err := fscommon.WriteFile(path, "devices.deny", "a"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, dev := range cgroup.Resources.AllowedDevices {
|
||||
if err := fscommon.WriteFile(path, "devices.allow", dev.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := fscommon.WriteFile(path, "devices.allow", "a"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for _, dev := range cgroup.Resources.DeniedDevices {
|
||||
if err := fscommon.WriteFile(path, "devices.deny", dev.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("devices"))
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,99 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
var (
|
||||
allowedDevices = []*configs.Device{
|
||||
{
|
||||
Path: "/dev/zero",
|
||||
Type: 'c',
|
||||
Major: 1,
|
||||
Minor: 5,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
}
|
||||
allowedList = "c 1:5 rwm"
|
||||
deniedDevices = []*configs.Device{
|
||||
{
|
||||
Path: "/dev/null",
|
||||
Type: 'c',
|
||||
Major: 1,
|
||||
Minor: 3,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
}
|
||||
deniedList = "c 1:3 rwm"
|
||||
)
|
||||
|
||||
func TestDevicesSetAllow(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("devices", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"devices.deny": "a",
|
||||
})
|
||||
allowAllDevices := false
|
||||
helper.CgroupData.config.Resources.AllowAllDevices = &allowAllDevices
|
||||
helper.CgroupData.config.Resources.AllowedDevices = allowedDevices
|
||||
devices := &DevicesGroup{}
|
||||
if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "devices.allow")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse devices.allow - %s", err)
|
||||
}
|
||||
|
||||
if value != allowedList {
|
||||
t.Fatal("Got the wrong value, set devices.allow failed.")
|
||||
}
|
||||
|
||||
// When AllowAllDevices is nil, devices.allow file should not be modified.
|
||||
helper.CgroupData.config.Resources.AllowAllDevices = nil
|
||||
if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
value, err = fscommon.GetCgroupParamString(helper.CgroupPath, "devices.allow")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse devices.allow - %s", err)
|
||||
}
|
||||
if value != allowedList {
|
||||
t.Fatal("devices policy shouldn't have changed on AllowedAllDevices=nil.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDevicesSetDeny(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("devices", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"devices.allow": "a",
|
||||
})
|
||||
|
||||
allowAllDevices := true
|
||||
helper.CgroupData.config.Resources.AllowAllDevices = &allowAllDevices
|
||||
helper.CgroupData.config.Resources.DeniedDevices = deniedDevices
|
||||
devices := &DevicesGroup{}
|
||||
if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "devices.deny")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse devices.deny - %s", err)
|
||||
}
|
||||
|
||||
if value != deniedList {
|
||||
t.Fatal("Got the wrong value, set devices.deny failed.")
|
||||
}
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type FreezerGroup struct {
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) Name() string {
|
||||
return "freezer"
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) Apply(d *cgroupData) error {
|
||||
_, err := d.join("freezer")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
switch cgroup.Resources.Freezer {
|
||||
case configs.Frozen, configs.Thawed:
|
||||
for {
|
||||
// In case this loop does not exit because it doesn't get the expected
|
||||
// state, let's write again this state, hoping it's going to be properly
|
||||
// set this time. Otherwise, this loop could run infinitely, waiting for
|
||||
// a state change that would never happen.
|
||||
if err := fscommon.WriteFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
state, err := fscommon.ReadFile(path, "freezer.state")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if strings.TrimSpace(state) == string(cgroup.Resources.Freezer) {
|
||||
break
|
||||
}
|
||||
|
||||
time.Sleep(1 * time.Millisecond)
|
||||
}
|
||||
case configs.Undefined:
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("Invalid argument '%s' to freezer.state", string(cgroup.Resources.Freezer))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("freezer"))
|
||||
}
|
||||
|
||||
func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func TestFreezerSetState(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("freezer", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"freezer.state": string(configs.Frozen),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.Freezer = configs.Thawed
|
||||
freezer := &FreezerGroup{}
|
||||
if err := freezer.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "freezer.state")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse freezer.state - %s", err)
|
||||
}
|
||||
if value != string(configs.Thawed) {
|
||||
t.Fatal("Got the wrong value, set freezer.state failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFreezerSetInvalidState(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("freezer", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
invalidArg configs.FreezerState = "Invalid"
|
||||
)
|
||||
|
||||
helper.CgroupData.config.Resources.Freezer = invalidArg
|
||||
freezer := &FreezerGroup{}
|
||||
if err := freezer.Set(helper.CgroupPath, helper.CgroupData.config); err == nil {
|
||||
t.Fatal("Failed to return invalid argument error")
|
||||
}
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
// +build !linux
|
||||
|
||||
package fs
|
|
@ -0,0 +1,72 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type HugetlbGroup struct {
|
||||
}
|
||||
|
||||
func (s *HugetlbGroup) Name() string {
|
||||
return "hugetlb"
|
||||
}
|
||||
|
||||
func (s *HugetlbGroup) Apply(d *cgroupData) error {
|
||||
_, err := d.join("hugetlb")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *HugetlbGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
for _, hugetlb := range cgroup.Resources.HugetlbLimit {
|
||||
if err := fscommon.WriteFile(path, strings.Join([]string{"hugetlb", hugetlb.Pagesize, "limit_in_bytes"}, "."), strconv.FormatUint(hugetlb.Limit, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *HugetlbGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("hugetlb"))
|
||||
}
|
||||
|
||||
func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
hugetlbStats := cgroups.HugetlbStats{}
|
||||
for _, pageSize := range HugePageSizes {
|
||||
usage := strings.Join([]string{"hugetlb", pageSize, "usage_in_bytes"}, ".")
|
||||
value, err := fscommon.GetCgroupParamUint(path, usage)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse %s - %v", usage, err)
|
||||
}
|
||||
hugetlbStats.Usage = value
|
||||
|
||||
maxUsage := strings.Join([]string{"hugetlb", pageSize, "max_usage_in_bytes"}, ".")
|
||||
value, err = fscommon.GetCgroupParamUint(path, maxUsage)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse %s - %v", maxUsage, err)
|
||||
}
|
||||
hugetlbStats.MaxUsage = value
|
||||
|
||||
failcnt := strings.Join([]string{"hugetlb", pageSize, "failcnt"}, ".")
|
||||
value, err = fscommon.GetCgroupParamUint(path, failcnt)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse %s - %v", failcnt, err)
|
||||
}
|
||||
hugetlbStats.Failcnt = value
|
||||
|
||||
stats.HugetlbStats[pageSize] = hugetlbStats
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,155 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
const (
|
||||
hugetlbUsageContents = "128\n"
|
||||
hugetlbMaxUsageContents = "256\n"
|
||||
hugetlbFailcnt = "100\n"
|
||||
)
|
||||
|
||||
var (
|
||||
usage = "hugetlb.%s.usage_in_bytes"
|
||||
limit = "hugetlb.%s.limit_in_bytes"
|
||||
maxUsage = "hugetlb.%s.max_usage_in_bytes"
|
||||
failcnt = "hugetlb.%s.failcnt"
|
||||
)
|
||||
|
||||
func TestHugetlbSetHugetlb(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("hugetlb", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
hugetlbBefore = 256
|
||||
hugetlbAfter = 512
|
||||
)
|
||||
|
||||
for _, pageSize := range HugePageSizes {
|
||||
helper.writeFileContents(map[string]string{
|
||||
fmt.Sprintf(limit, pageSize): strconv.Itoa(hugetlbBefore),
|
||||
})
|
||||
}
|
||||
|
||||
for _, pageSize := range HugePageSizes {
|
||||
helper.CgroupData.config.Resources.HugetlbLimit = []*configs.HugepageLimit{
|
||||
{
|
||||
Pagesize: pageSize,
|
||||
Limit: hugetlbAfter,
|
||||
},
|
||||
}
|
||||
hugetlb := &HugetlbGroup{}
|
||||
if err := hugetlb.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
for _, pageSize := range HugePageSizes {
|
||||
limit := fmt.Sprintf(limit, pageSize)
|
||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, limit)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse %s - %s", limit, err)
|
||||
}
|
||||
if value != hugetlbAfter {
|
||||
t.Fatalf("Set hugetlb.limit_in_bytes failed. Expected: %v, Got: %v", hugetlbAfter, value)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestHugetlbStats(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("hugetlb", t)
|
||||
defer helper.cleanup()
|
||||
for _, pageSize := range HugePageSizes {
|
||||
helper.writeFileContents(map[string]string{
|
||||
fmt.Sprintf(usage, pageSize): hugetlbUsageContents,
|
||||
fmt.Sprintf(maxUsage, pageSize): hugetlbMaxUsageContents,
|
||||
fmt.Sprintf(failcnt, pageSize): hugetlbFailcnt,
|
||||
})
|
||||
}
|
||||
|
||||
hugetlb := &HugetlbGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
expectedStats := cgroups.HugetlbStats{Usage: 128, MaxUsage: 256, Failcnt: 100}
|
||||
for _, pageSize := range HugePageSizes {
|
||||
expectHugetlbStatEquals(t, expectedStats, actualStats.HugetlbStats[pageSize])
|
||||
}
|
||||
}
|
||||
|
||||
func TestHugetlbStatsNoUsageFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("hugetlb", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
maxUsage: hugetlbMaxUsageContents,
|
||||
})
|
||||
|
||||
hugetlb := &HugetlbGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHugetlbStatsNoMaxUsageFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("hugetlb", t)
|
||||
defer helper.cleanup()
|
||||
for _, pageSize := range HugePageSizes {
|
||||
helper.writeFileContents(map[string]string{
|
||||
fmt.Sprintf(usage, pageSize): hugetlbUsageContents,
|
||||
})
|
||||
}
|
||||
|
||||
hugetlb := &HugetlbGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHugetlbStatsBadUsageFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("hugetlb", t)
|
||||
defer helper.cleanup()
|
||||
for _, pageSize := range HugePageSizes {
|
||||
helper.writeFileContents(map[string]string{
|
||||
fmt.Sprintf(usage, pageSize): "bad",
|
||||
maxUsage: hugetlbMaxUsageContents,
|
||||
})
|
||||
}
|
||||
|
||||
hugetlb := &HugetlbGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHugetlbStatsBadMaxUsageFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("hugetlb", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
usage: hugetlbUsageContents,
|
||||
maxUsage: "bad",
|
||||
})
|
||||
|
||||
hugetlb := &HugetlbGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
|
@ -0,0 +1,62 @@
|
|||
// +build linux,!nokmem
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"syscall" // for Errno type only
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
|
||||
|
||||
func EnableKernelMemoryAccounting(path string) error {
|
||||
// Ensure that kernel memory is available in this kernel build. If it
|
||||
// isn't, we just ignore it because EnableKernelMemoryAccounting is
|
||||
// automatically called for all memory limits.
|
||||
if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
|
||||
return nil
|
||||
}
|
||||
// We have to limit the kernel memory here as it won't be accounted at all
|
||||
// until a limit is set on the cgroup and limit cannot be set once the
|
||||
// cgroup has children, or if there are already tasks in the cgroup.
|
||||
for _, i := range []int64{1, -1} {
|
||||
if err := setKernelMemory(path, i); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setKernelMemory(path string, kernelMemoryLimit int64) error {
|
||||
if path == "" {
|
||||
return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit)
|
||||
}
|
||||
if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
|
||||
// We have specifically been asked to set a kmem limit. If the kernel
|
||||
// doesn't support it we *must* error out.
|
||||
return errors.New("kernel memory accounting not supported by this kernel")
|
||||
}
|
||||
if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil {
|
||||
// Check if the error number returned by the syscall is "EBUSY"
|
||||
// The EBUSY signal is returned on attempts to write to the
|
||||
// memory.kmem.limit_in_bytes file if the cgroup has children or
|
||||
// once tasks have been attached to the cgroup
|
||||
if pathErr, ok := err.(*os.PathError); ok {
|
||||
if errNo, ok := pathErr.Err.(syscall.Errno); ok {
|
||||
if errNo == unix.EBUSY {
|
||||
return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
|
||||
}
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("failed to write %v to %v: %v", kernelMemoryLimit, cgroupKernelMemoryLimit, err)
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
// +build linux,nokmem
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
)
|
||||
|
||||
func EnableKernelMemoryAccounting(path string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func setKernelMemory(path string, kernelMemoryLimit int64) error {
|
||||
return errors.New("kernel memory accounting disabled in this runc build")
|
||||
}
|
|
@ -0,0 +1,271 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
const (
|
||||
cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
|
||||
cgroupMemoryLimit = "memory.limit_in_bytes"
|
||||
)
|
||||
|
||||
type MemoryGroup struct {
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) Name() string {
|
||||
return "memory"
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
|
||||
path, err := d.path("memory")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
} else if path == "" {
|
||||
return nil
|
||||
}
|
||||
if memoryAssigned(d.config) {
|
||||
if _, err := os.Stat(path); os.IsNotExist(err) {
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
// Only enable kernel memory accouting when this cgroup
|
||||
// is created by libcontainer, otherwise we might get
|
||||
// error when people use `cgroupsPath` to join an existed
|
||||
// cgroup whose kernel memory is not initialized.
|
||||
if err := EnableKernelMemoryAccounting(path); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
os.RemoveAll(path)
|
||||
}
|
||||
}()
|
||||
|
||||
// We need to join memory cgroup after set memory limits, because
|
||||
// kmem.limit_in_bytes can only be set when the cgroup is empty.
|
||||
_, err = d.join("memory")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
|
||||
// If the memory update is set to -1 we should also
|
||||
// set swap to -1, it means unlimited memory.
|
||||
if cgroup.Resources.Memory == -1 {
|
||||
// Only set swap if it's enabled in kernel
|
||||
if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) {
|
||||
cgroup.Resources.MemorySwap = -1
|
||||
}
|
||||
}
|
||||
|
||||
// When memory and swap memory are both set, we need to handle the cases
|
||||
// for updating container.
|
||||
if cgroup.Resources.Memory != 0 && cgroup.Resources.MemorySwap != 0 {
|
||||
memoryUsage, err := getMemoryData(path, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// When update memory limit, we should adapt the write sequence
|
||||
// for memory and swap memory, so it won't fail because the new
|
||||
// value and the old value don't fit kernel's validation.
|
||||
if cgroup.Resources.MemorySwap == -1 || memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) {
|
||||
if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if cgroup.Resources.Memory != 0 {
|
||||
if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.MemorySwap != 0 {
|
||||
if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if err := setMemoryAndSwap(path, cgroup); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if cgroup.Resources.KernelMemory != 0 {
|
||||
if err := setKernelMemory(path, cgroup.Resources.KernelMemory); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if cgroup.Resources.MemoryReservation != 0 {
|
||||
if err := fscommon.WriteFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if cgroup.Resources.KernelMemoryTCP != 0 {
|
||||
if err := fscommon.WriteFile(path, "memory.kmem.tcp.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemoryTCP, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.OomKillDisable {
|
||||
if err := fscommon.WriteFile(path, "memory.oom_control", "1"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.MemorySwappiness == nil || int64(*cgroup.Resources.MemorySwappiness) == -1 {
|
||||
return nil
|
||||
} else if *cgroup.Resources.MemorySwappiness <= 100 {
|
||||
if err := fscommon.WriteFile(path, "memory.swappiness", strconv.FormatUint(*cgroup.Resources.MemorySwappiness, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", *cgroup.Resources.MemorySwappiness)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("memory"))
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
// Set stats from memory.stat.
|
||||
statsFile, err := os.Open(filepath.Join(path, "memory.stat"))
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
defer statsFile.Close()
|
||||
|
||||
sc := bufio.NewScanner(statsFile)
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse memory.stat (%q) - %v", sc.Text(), err)
|
||||
}
|
||||
stats.MemoryStats.Stats[t] = v
|
||||
}
|
||||
stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"]
|
||||
|
||||
memoryUsage, err := getMemoryData(path, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.Usage = memoryUsage
|
||||
swapUsage, err := getMemoryData(path, "memsw")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.SwapUsage = swapUsage
|
||||
kernelUsage, err := getMemoryData(path, "kmem")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.KernelUsage = kernelUsage
|
||||
kernelTCPUsage, err := getMemoryData(path, "kmem.tcp")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.KernelTCPUsage = kernelTCPUsage
|
||||
|
||||
useHierarchy := strings.Join([]string{"memory", "use_hierarchy"}, ".")
|
||||
value, err := fscommon.GetCgroupParamUint(path, useHierarchy)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if value == 1 {
|
||||
stats.MemoryStats.UseHierarchy = true
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func memoryAssigned(cgroup *configs.Cgroup) bool {
|
||||
return cgroup.Resources.Memory != 0 ||
|
||||
cgroup.Resources.MemoryReservation != 0 ||
|
||||
cgroup.Resources.MemorySwap > 0 ||
|
||||
cgroup.Resources.KernelMemory > 0 ||
|
||||
cgroup.Resources.KernelMemoryTCP > 0 ||
|
||||
cgroup.Resources.OomKillDisable ||
|
||||
(cgroup.Resources.MemorySwappiness != nil && int64(*cgroup.Resources.MemorySwappiness) != -1)
|
||||
}
|
||||
|
||||
func getMemoryData(path, name string) (cgroups.MemoryData, error) {
|
||||
memoryData := cgroups.MemoryData{}
|
||||
|
||||
moduleName := "memory"
|
||||
if name != "" {
|
||||
moduleName = strings.Join([]string{"memory", name}, ".")
|
||||
}
|
||||
usage := strings.Join([]string{moduleName, "usage_in_bytes"}, ".")
|
||||
maxUsage := strings.Join([]string{moduleName, "max_usage_in_bytes"}, ".")
|
||||
failcnt := strings.Join([]string{moduleName, "failcnt"}, ".")
|
||||
limit := strings.Join([]string{moduleName, "limit_in_bytes"}, ".")
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(path, usage)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", usage, err)
|
||||
}
|
||||
memoryData.Usage = value
|
||||
value, err = fscommon.GetCgroupParamUint(path, maxUsage)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", maxUsage, err)
|
||||
}
|
||||
memoryData.MaxUsage = value
|
||||
value, err = fscommon.GetCgroupParamUint(path, failcnt)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", failcnt, err)
|
||||
}
|
||||
memoryData.Failcnt = value
|
||||
value, err = fscommon.GetCgroupParamUint(path, limit)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", limit, err)
|
||||
}
|
||||
memoryData.Limit = value
|
||||
|
||||
return memoryData, nil
|
||||
}
|
|
@ -0,0 +1,456 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
)
|
||||
|
||||
const (
|
||||
memoryStatContents = `cache 512
|
||||
rss 1024`
|
||||
memoryUsageContents = "2048\n"
|
||||
memoryMaxUsageContents = "4096\n"
|
||||
memoryFailcnt = "100\n"
|
||||
memoryLimitContents = "8192\n"
|
||||
memoryUseHierarchyContents = "1\n"
|
||||
)
|
||||
|
||||
func TestMemorySetMemory(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
memoryBefore = 314572800 // 300M
|
||||
memoryAfter = 524288000 // 500M
|
||||
reservationBefore = 209715200 // 200M
|
||||
reservationAfter = 314572800 // 300M
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.limit_in_bytes": strconv.Itoa(memoryBefore),
|
||||
"memory.soft_limit_in_bytes": strconv.Itoa(reservationBefore),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.Memory = memoryAfter
|
||||
helper.CgroupData.config.Resources.MemoryReservation = reservationAfter
|
||||
memory := &MemoryGroup{}
|
||||
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err)
|
||||
}
|
||||
if value != memoryAfter {
|
||||
t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.")
|
||||
}
|
||||
|
||||
value, err = fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.soft_limit_in_bytes")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse memory.soft_limit_in_bytes - %s", err)
|
||||
}
|
||||
if value != reservationAfter {
|
||||
t.Fatal("Got the wrong value, set memory.soft_limit_in_bytes failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemorySetMemoryswap(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
memoryswapBefore = 314572800 // 300M
|
||||
memoryswapAfter = 524288000 // 500M
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.MemorySwap = memoryswapAfter
|
||||
memory := &MemoryGroup{}
|
||||
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err)
|
||||
}
|
||||
if value != memoryswapAfter {
|
||||
t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemorySetMemoryLargerThanSwap(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
memoryBefore = 314572800 // 300M
|
||||
memoryswapBefore = 524288000 // 500M
|
||||
memoryAfter = 629145600 // 600M
|
||||
memoryswapAfter = 838860800 // 800M
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.limit_in_bytes": strconv.Itoa(memoryBefore),
|
||||
"memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore),
|
||||
// Set will call getMemoryData when memory and swap memory are
|
||||
// both set, fake these fields so we don't get error.
|
||||
"memory.usage_in_bytes": "0",
|
||||
"memory.max_usage_in_bytes": "0",
|
||||
"memory.failcnt": "0",
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.Memory = memoryAfter
|
||||
helper.CgroupData.config.Resources.MemorySwap = memoryswapAfter
|
||||
memory := &MemoryGroup{}
|
||||
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err)
|
||||
}
|
||||
if value != memoryAfter {
|
||||
t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.")
|
||||
}
|
||||
value, err = fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err)
|
||||
}
|
||||
if value != memoryswapAfter {
|
||||
t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemorySetSwapSmallerThanMemory(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
memoryBefore = 629145600 // 600M
|
||||
memoryswapBefore = 838860800 // 800M
|
||||
memoryAfter = 314572800 // 300M
|
||||
memoryswapAfter = 524288000 // 500M
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.limit_in_bytes": strconv.Itoa(memoryBefore),
|
||||
"memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore),
|
||||
// Set will call getMemoryData when memory and swap memory are
|
||||
// both set, fake these fields so we don't get error.
|
||||
"memory.usage_in_bytes": "0",
|
||||
"memory.max_usage_in_bytes": "0",
|
||||
"memory.failcnt": "0",
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.Memory = memoryAfter
|
||||
helper.CgroupData.config.Resources.MemorySwap = memoryswapAfter
|
||||
memory := &MemoryGroup{}
|
||||
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err)
|
||||
}
|
||||
if value != memoryAfter {
|
||||
t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.")
|
||||
}
|
||||
value, err = fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err)
|
||||
}
|
||||
if value != memoryswapAfter {
|
||||
t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemorySetKernelMemory(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
kernelMemoryBefore = 314572800 // 300M
|
||||
kernelMemoryAfter = 524288000 // 500M
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.kmem.limit_in_bytes": strconv.Itoa(kernelMemoryBefore),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.KernelMemory = kernelMemoryAfter
|
||||
memory := &MemoryGroup{}
|
||||
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.kmem.limit_in_bytes")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse memory.kmem.limit_in_bytes - %s", err)
|
||||
}
|
||||
if value != kernelMemoryAfter {
|
||||
t.Fatal("Got the wrong value, set memory.kmem.limit_in_bytes failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemorySetKernelMemoryTCP(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
kernelMemoryTCPBefore = 314572800 // 300M
|
||||
kernelMemoryTCPAfter = 524288000 // 500M
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.kmem.tcp.limit_in_bytes": strconv.Itoa(kernelMemoryTCPBefore),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.KernelMemoryTCP = kernelMemoryTCPAfter
|
||||
memory := &MemoryGroup{}
|
||||
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.kmem.tcp.limit_in_bytes")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse memory.kmem.tcp.limit_in_bytes - %s", err)
|
||||
}
|
||||
if value != kernelMemoryTCPAfter {
|
||||
t.Fatal("Got the wrong value, set memory.kmem.tcp.limit_in_bytes failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemorySetMemorySwappinessDefault(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
swappinessBefore := 60 //default is 60
|
||||
swappinessAfter := uint64(0)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.swappiness": strconv.Itoa(swappinessBefore),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.MemorySwappiness = &swappinessAfter
|
||||
memory := &MemoryGroup{}
|
||||
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.swappiness")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse memory.swappiness - %s", err)
|
||||
}
|
||||
if value != swappinessAfter {
|
||||
t.Fatalf("Got the wrong value (%d), set memory.swappiness = %d failed.", value, swappinessAfter)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryStats(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.stat": memoryStatContents,
|
||||
"memory.usage_in_bytes": memoryUsageContents,
|
||||
"memory.limit_in_bytes": memoryLimitContents,
|
||||
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
||||
"memory.failcnt": memoryFailcnt,
|
||||
"memory.memsw.usage_in_bytes": memoryUsageContents,
|
||||
"memory.memsw.max_usage_in_bytes": memoryMaxUsageContents,
|
||||
"memory.memsw.failcnt": memoryFailcnt,
|
||||
"memory.memsw.limit_in_bytes": memoryLimitContents,
|
||||
"memory.kmem.usage_in_bytes": memoryUsageContents,
|
||||
"memory.kmem.max_usage_in_bytes": memoryMaxUsageContents,
|
||||
"memory.kmem.failcnt": memoryFailcnt,
|
||||
"memory.kmem.limit_in_bytes": memoryLimitContents,
|
||||
"memory.use_hierarchy": memoryUseHierarchyContents,
|
||||
})
|
||||
|
||||
memory := &MemoryGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
expectedStats := cgroups.MemoryStats{Cache: 512, Usage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, SwapUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, KernelUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, Stats: map[string]uint64{"cache": 512, "rss": 1024}, UseHierarchy: true}
|
||||
expectMemoryStatEquals(t, expectedStats, actualStats.MemoryStats)
|
||||
}
|
||||
|
||||
func TestMemoryStatsNoStatFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.usage_in_bytes": memoryUsageContents,
|
||||
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
||||
"memory.limit_in_bytes": memoryLimitContents,
|
||||
})
|
||||
|
||||
memory := &MemoryGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryStatsNoUsageFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.stat": memoryStatContents,
|
||||
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
||||
"memory.limit_in_bytes": memoryLimitContents,
|
||||
})
|
||||
|
||||
memory := &MemoryGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryStatsNoMaxUsageFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.stat": memoryStatContents,
|
||||
"memory.usage_in_bytes": memoryUsageContents,
|
||||
"memory.limit_in_bytes": memoryLimitContents,
|
||||
})
|
||||
|
||||
memory := &MemoryGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryStatsNoLimitInBytesFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.stat": memoryStatContents,
|
||||
"memory.usage_in_bytes": memoryUsageContents,
|
||||
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
||||
})
|
||||
|
||||
memory := &MemoryGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryStatsBadStatFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.stat": "rss rss",
|
||||
"memory.usage_in_bytes": memoryUsageContents,
|
||||
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
||||
"memory.limit_in_bytes": memoryLimitContents,
|
||||
})
|
||||
|
||||
memory := &MemoryGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryStatsBadUsageFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.stat": memoryStatContents,
|
||||
"memory.usage_in_bytes": "bad",
|
||||
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
||||
"memory.limit_in_bytes": memoryLimitContents,
|
||||
})
|
||||
|
||||
memory := &MemoryGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryStatsBadMaxUsageFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.stat": memoryStatContents,
|
||||
"memory.usage_in_bytes": memoryUsageContents,
|
||||
"memory.max_usage_in_bytes": "bad",
|
||||
"memory.limit_in_bytes": memoryLimitContents,
|
||||
})
|
||||
|
||||
memory := &MemoryGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryStatsBadLimitInBytesFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.stat": memoryStatContents,
|
||||
"memory.usage_in_bytes": memoryUsageContents,
|
||||
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
||||
"memory.limit_in_bytes": "bad",
|
||||
})
|
||||
|
||||
memory := &MemoryGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemorySetOomControl(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
oomKillDisable = 1 // disable oom killer, default is 0
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.oom_control": strconv.Itoa(oomKillDisable),
|
||||
})
|
||||
|
||||
memory := &MemoryGroup{}
|
||||
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.oom_control")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse memory.oom_control - %s", err)
|
||||
}
|
||||
|
||||
if value != oomKillDisable {
|
||||
t.Fatalf("Got the wrong value, set memory.oom_control failed.")
|
||||
}
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type NameGroup struct {
|
||||
GroupName string
|
||||
Join bool
|
||||
}
|
||||
|
||||
func (s *NameGroup) Name() string {
|
||||
return s.GroupName
|
||||
}
|
||||
|
||||
func (s *NameGroup) Apply(d *cgroupData) error {
|
||||
if s.Join {
|
||||
// ignore errors if the named cgroup does not exist
|
||||
d.join(s.GroupName)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NameGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NameGroup) Remove(d *cgroupData) error {
|
||||
if s.Join {
|
||||
removePath(d.path(s.GroupName))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NameGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type NetClsGroup struct {
|
||||
}
|
||||
|
||||
func (s *NetClsGroup) Name() string {
|
||||
return "net_cls"
|
||||
}
|
||||
|
||||
func (s *NetClsGroup) Apply(d *cgroupData) error {
|
||||
_, err := d.join("net_cls")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NetClsGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.NetClsClassid != 0 {
|
||||
if err := fscommon.WriteFile(path, "net_cls.classid", strconv.FormatUint(uint64(cgroup.Resources.NetClsClassid), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NetClsGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("net_cls"))
|
||||
}
|
||||
|
||||
func (s *NetClsGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
)
|
||||
|
||||
const (
|
||||
classidBefore = 0x100002
|
||||
classidAfter = 0x100001
|
||||
)
|
||||
|
||||
func TestNetClsSetClassid(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("net_cls", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"net_cls.classid": strconv.FormatUint(classidBefore, 10),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.NetClsClassid = classidAfter
|
||||
netcls := &NetClsGroup{}
|
||||
if err := netcls.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// As we are in mock environment, we can't get correct value of classid from
|
||||
// net_cls.classid.
|
||||
// So. we just judge if we successfully write classid into file
|
||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "net_cls.classid")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse net_cls.classid - %s", err)
|
||||
}
|
||||
if value != classidAfter {
|
||||
t.Fatal("Got the wrong value, set net_cls.classid failed.")
|
||||
}
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type NetPrioGroup struct {
|
||||
}
|
||||
|
||||
func (s *NetPrioGroup) Name() string {
|
||||
return "net_prio"
|
||||
}
|
||||
|
||||
func (s *NetPrioGroup) Apply(d *cgroupData) error {
|
||||
_, err := d.join("net_prio")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NetPrioGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
for _, prioMap := range cgroup.Resources.NetPrioIfpriomap {
|
||||
if err := fscommon.WriteFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *NetPrioGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("net_prio"))
|
||||
}
|
||||
|
||||
func (s *NetPrioGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
var (
|
||||
prioMap = []*configs.IfPrioMap{
|
||||
{
|
||||
Interface: "test",
|
||||
Priority: 5,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
func TestNetPrioSetIfPrio(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("net_prio", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
helper.CgroupData.config.Resources.NetPrioIfpriomap = prioMap
|
||||
netPrio := &NetPrioGroup{}
|
||||
if err := netPrio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "net_prio.ifpriomap")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse net_prio.ifpriomap - %s", err)
|
||||
}
|
||||
if !strings.Contains(value, "test 5") {
|
||||
t.Fatal("Got the wrong value, set net_prio.ifpriomap failed.")
|
||||
}
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type PerfEventGroup struct {
|
||||
}
|
||||
|
||||
func (s *PerfEventGroup) Name() string {
|
||||
return "perf_event"
|
||||
}
|
||||
|
||||
func (s *PerfEventGroup) Apply(d *cgroupData) error {
|
||||
// we just want to join this group even though we don't set anything
|
||||
if _, err := d.join("perf_event"); err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *PerfEventGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *PerfEventGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("perf_event"))
|
||||
}
|
||||
|
||||
func (s *PerfEventGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,74 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type PidsGroup struct {
|
||||
}
|
||||
|
||||
func (s *PidsGroup) Name() string {
|
||||
return "pids"
|
||||
}
|
||||
|
||||
func (s *PidsGroup) Apply(d *cgroupData) error {
|
||||
_, err := d.join("pids")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *PidsGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.PidsLimit != 0 {
|
||||
// "max" is the fallback value.
|
||||
limit := "max"
|
||||
|
||||
if cgroup.Resources.PidsLimit > 0 {
|
||||
limit = strconv.FormatInt(cgroup.Resources.PidsLimit, 10)
|
||||
}
|
||||
|
||||
if err := fscommon.WriteFile(path, "pids.max", limit); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *PidsGroup) Remove(d *cgroupData) error {
|
||||
return removePath(d.path("pids"))
|
||||
}
|
||||
|
||||
func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
current, err := fscommon.GetCgroupParamUint(path, "pids.current")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse pids.current - %s", err)
|
||||
}
|
||||
|
||||
maxString, err := fscommon.GetCgroupParamString(path, "pids.max")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse pids.max - %s", err)
|
||||
}
|
||||
|
||||
// Default if pids.max == "max" is 0 -- which represents "no limit".
|
||||
var max uint64
|
||||
if maxString != "max" {
|
||||
max, err = fscommon.ParseUint(maxString, 10, 64)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse pids.max - unable to parse %q as a uint from Cgroup file %q", maxString, filepath.Join(path, "pids.max"))
|
||||
}
|
||||
}
|
||||
|
||||
stats.PidsStats.Current = current
|
||||
stats.PidsStats.Limit = max
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,112 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
)
|
||||
|
||||
const (
|
||||
maxUnlimited = -1
|
||||
maxLimited = 1024
|
||||
)
|
||||
|
||||
func TestPidsSetMax(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("pids", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"pids.max": "max",
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.PidsLimit = maxLimited
|
||||
pids := &PidsGroup{}
|
||||
if err := pids.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(helper.CgroupPath, "pids.max")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse pids.max - %s", err)
|
||||
}
|
||||
|
||||
if value != maxLimited {
|
||||
t.Fatalf("Expected %d, got %d for setting pids.max - limited", maxLimited, value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPidsSetUnlimited(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("pids", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"pids.max": strconv.Itoa(maxLimited),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.PidsLimit = maxUnlimited
|
||||
pids := &PidsGroup{}
|
||||
if err := pids.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := fscommon.GetCgroupParamString(helper.CgroupPath, "pids.max")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse pids.max - %s", err)
|
||||
}
|
||||
|
||||
if value != "max" {
|
||||
t.Fatalf("Expected %s, got %s for setting pids.max - unlimited", "max", value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPidsStats(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("pids", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"pids.current": strconv.Itoa(1337),
|
||||
"pids.max": strconv.Itoa(maxLimited),
|
||||
})
|
||||
|
||||
pids := &PidsGroup{}
|
||||
stats := *cgroups.NewStats()
|
||||
if err := pids.GetStats(helper.CgroupPath, &stats); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if stats.PidsStats.Current != 1337 {
|
||||
t.Fatalf("Expected %d, got %d for pids.current", 1337, stats.PidsStats.Current)
|
||||
}
|
||||
|
||||
if stats.PidsStats.Limit != maxLimited {
|
||||
t.Fatalf("Expected %d, got %d for pids.max", maxLimited, stats.PidsStats.Limit)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPidsStatsUnlimited(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("pids", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"pids.current": strconv.Itoa(4096),
|
||||
"pids.max": "max",
|
||||
})
|
||||
|
||||
pids := &PidsGroup{}
|
||||
stats := *cgroups.NewStats()
|
||||
if err := pids.GetStats(helper.CgroupPath, &stats); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if stats.PidsStats.Current != 4096 {
|
||||
t.Fatalf("Expected %d, got %d for pids.current", 4096, stats.PidsStats.Current)
|
||||
}
|
||||
|
||||
if stats.PidsStats.Limit != 0 {
|
||||
t.Fatalf("Expected %d, got %d for pids.max", 0, stats.PidsStats.Limit)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,123 @@
|
|||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
func blkioStatEntryEquals(expected, actual []cgroups.BlkioStatEntry) error {
|
||||
if len(expected) != len(actual) {
|
||||
return fmt.Errorf("blkioStatEntries length do not match")
|
||||
}
|
||||
for i, expValue := range expected {
|
||||
actValue := actual[i]
|
||||
if expValue != actValue {
|
||||
return fmt.Errorf("Expected blkio stat entry %v but found %v", expValue, actValue)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func expectBlkioStatsEquals(t *testing.T, expected, actual cgroups.BlkioStats) {
|
||||
if err := blkioStatEntryEquals(expected.IoServiceBytesRecursive, actual.IoServiceBytesRecursive); err != nil {
|
||||
logrus.Printf("blkio IoServiceBytesRecursive do not match - %s\n", err)
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
if err := blkioStatEntryEquals(expected.IoServicedRecursive, actual.IoServicedRecursive); err != nil {
|
||||
logrus.Printf("blkio IoServicedRecursive do not match - %s\n", err)
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
if err := blkioStatEntryEquals(expected.IoQueuedRecursive, actual.IoQueuedRecursive); err != nil {
|
||||
logrus.Printf("blkio IoQueuedRecursive do not match - %s\n", err)
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
if err := blkioStatEntryEquals(expected.SectorsRecursive, actual.SectorsRecursive); err != nil {
|
||||
logrus.Printf("blkio SectorsRecursive do not match - %s\n", err)
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
if err := blkioStatEntryEquals(expected.IoServiceTimeRecursive, actual.IoServiceTimeRecursive); err != nil {
|
||||
logrus.Printf("blkio IoServiceTimeRecursive do not match - %s\n", err)
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
if err := blkioStatEntryEquals(expected.IoWaitTimeRecursive, actual.IoWaitTimeRecursive); err != nil {
|
||||
logrus.Printf("blkio IoWaitTimeRecursive do not match - %s\n", err)
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
if err := blkioStatEntryEquals(expected.IoMergedRecursive, actual.IoMergedRecursive); err != nil {
|
||||
logrus.Printf("blkio IoMergedRecursive do not match - %v vs %v\n", expected.IoMergedRecursive, actual.IoMergedRecursive)
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
if err := blkioStatEntryEquals(expected.IoTimeRecursive, actual.IoTimeRecursive); err != nil {
|
||||
logrus.Printf("blkio IoTimeRecursive do not match - %s\n", err)
|
||||
t.Fail()
|
||||
}
|
||||
}
|
||||
|
||||
func expectThrottlingDataEquals(t *testing.T, expected, actual cgroups.ThrottlingData) {
|
||||
if expected != actual {
|
||||
logrus.Printf("Expected throttling data %v but found %v\n", expected, actual)
|
||||
t.Fail()
|
||||
}
|
||||
}
|
||||
|
||||
func expectHugetlbStatEquals(t *testing.T, expected, actual cgroups.HugetlbStats) {
|
||||
if expected != actual {
|
||||
logrus.Printf("Expected hugetlb stats %v but found %v\n", expected, actual)
|
||||
t.Fail()
|
||||
}
|
||||
}
|
||||
|
||||
func expectMemoryStatEquals(t *testing.T, expected, actual cgroups.MemoryStats) {
|
||||
expectMemoryDataEquals(t, expected.Usage, actual.Usage)
|
||||
expectMemoryDataEquals(t, expected.SwapUsage, actual.SwapUsage)
|
||||
expectMemoryDataEquals(t, expected.KernelUsage, actual.KernelUsage)
|
||||
|
||||
if expected.UseHierarchy != actual.UseHierarchy {
|
||||
logrus.Printf("Expected memory use hierarchy %v, but found %v\n", expected.UseHierarchy, actual.UseHierarchy)
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
for key, expValue := range expected.Stats {
|
||||
actValue, ok := actual.Stats[key]
|
||||
if !ok {
|
||||
logrus.Printf("Expected memory stat key %s not found\n", key)
|
||||
t.Fail()
|
||||
}
|
||||
if expValue != actValue {
|
||||
logrus.Printf("Expected memory stat value %d but found %d\n", expValue, actValue)
|
||||
t.Fail()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func expectMemoryDataEquals(t *testing.T, expected, actual cgroups.MemoryData) {
|
||||
if expected.Usage != actual.Usage {
|
||||
logrus.Printf("Expected memory usage %d but found %d\n", expected.Usage, actual.Usage)
|
||||
t.Fail()
|
||||
}
|
||||
if expected.MaxUsage != actual.MaxUsage {
|
||||
logrus.Printf("Expected memory max usage %d but found %d\n", expected.MaxUsage, actual.MaxUsage)
|
||||
t.Fail()
|
||||
}
|
||||
if expected.Failcnt != actual.Failcnt {
|
||||
logrus.Printf("Expected memory failcnt %d but found %d\n", expected.Failcnt, actual.Failcnt)
|
||||
t.Fail()
|
||||
}
|
||||
if expected.Limit != actual.Limit {
|
||||
logrus.Printf("Expected memory limit %d but found %d\n", expected.Limit, actual.Limit)
|
||||
t.Fail()
|
||||
}
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
// +build linux
|
||||
|
||||
/*
|
||||
Utility for testing cgroup operations.
|
||||
|
||||
Creates a mock of the cgroup filesystem for the duration of the test.
|
||||
*/
|
||||
package fs
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type cgroupTestUtil struct {
|
||||
// cgroup data to use in tests.
|
||||
CgroupData *cgroupData
|
||||
|
||||
// Path to the mock cgroup directory.
|
||||
CgroupPath string
|
||||
|
||||
// Temporary directory to store mock cgroup filesystem.
|
||||
tempDir string
|
||||
t *testing.T
|
||||
}
|
||||
|
||||
// Creates a new test util for the specified subsystem
|
||||
func NewCgroupTestUtil(subsystem string, t *testing.T) *cgroupTestUtil {
|
||||
d := &cgroupData{
|
||||
config: &configs.Cgroup{},
|
||||
}
|
||||
d.config.Resources = &configs.Resources{}
|
||||
tempDir, err := ioutil.TempDir("", "cgroup_test")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
d.root = tempDir
|
||||
testCgroupPath := filepath.Join(d.root, subsystem)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Ensure the full mock cgroup path exists.
|
||||
err = os.MkdirAll(testCgroupPath, 0755)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
return &cgroupTestUtil{CgroupData: d, CgroupPath: testCgroupPath, tempDir: tempDir, t: t}
|
||||
}
|
||||
|
||||
func (c *cgroupTestUtil) cleanup() {
|
||||
os.RemoveAll(c.tempDir)
|
||||
}
|
||||
|
||||
// Write the specified contents on the mock of the specified cgroup files.
|
||||
func (c *cgroupTestUtil) writeFileContents(fileContents map[string]string) {
|
||||
for file, contents := range fileContents {
|
||||
err := fscommon.WriteFile(c.CgroupPath, file, contents)
|
||||
if err != nil {
|
||||
c.t.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func setCpu(dirPath string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.CpuWeight != 0 {
|
||||
if err := fscommon.WriteFile(dirPath, "cpu.weight", strconv.FormatUint(cgroup.Resources.CpuWeight, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if cgroup.Resources.CpuMax != "" {
|
||||
if err := fscommon.WriteFile(dirPath, "cpu.max", cgroup.Resources.CpuMax); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
func statCpu(dirPath string, stats *cgroups.Stats) error {
|
||||
f, err := os.Open(filepath.Join(dirPath, "cpu.stat"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
sc := bufio.NewScanner(f)
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
switch t {
|
||||
case "usage_usec":
|
||||
stats.CpuStats.CpuUsage.TotalUsage = v * 1000
|
||||
|
||||
case "user_usec":
|
||||
stats.CpuStats.CpuUsage.UsageInUsermode = v * 1000
|
||||
|
||||
case "system_usec":
|
||||
stats.CpuStats.CpuUsage.UsageInKernelmode = v * 1000
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func setCpuset(dirPath string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.CpusetCpus != "" {
|
||||
if err := fscommon.WriteFile(dirPath, "cpuset.cpus", cgroup.Resources.CpusetCpus); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpusetMems != "" {
|
||||
if err := fscommon.WriteFile(dirPath, "cpuset.mems", cgroup.Resources.CpusetMems); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,99 @@
|
|||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
const UnifiedMountpoint = "/sys/fs/cgroup"
|
||||
|
||||
func defaultDirPath(c *configs.Cgroup) (string, error) {
|
||||
if (c.Name != "" || c.Parent != "") && c.Path != "" {
|
||||
return "", errors.Errorf("cgroup: either Path or Name and Parent should be used, got %+v", c)
|
||||
}
|
||||
if len(c.Paths) != 0 {
|
||||
// never set by specconv
|
||||
return "", errors.Errorf("cgroup: Paths is unsupported, use Path, got %+v", c)
|
||||
}
|
||||
|
||||
// XXX: Do not remove this code. Path safety is important! -- cyphar
|
||||
cgPath := libcontainerUtils.CleanPath(c.Path)
|
||||
cgParent := libcontainerUtils.CleanPath(c.Parent)
|
||||
cgName := libcontainerUtils.CleanPath(c.Name)
|
||||
|
||||
ownCgroup, err := parseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return _defaultDirPath(UnifiedMountpoint, cgPath, cgParent, cgName, ownCgroup)
|
||||
}
|
||||
|
||||
func _defaultDirPath(root, cgPath, cgParent, cgName, ownCgroup string) (string, error) {
|
||||
if (cgName != "" || cgParent != "") && cgPath != "" {
|
||||
return "", errors.New("cgroup: either Path or Name and Parent should be used")
|
||||
}
|
||||
innerPath := cgPath
|
||||
if innerPath == "" {
|
||||
innerPath = filepath.Join(cgParent, cgName)
|
||||
}
|
||||
if filepath.IsAbs(innerPath) {
|
||||
return filepath.Join(root, innerPath), nil
|
||||
}
|
||||
return filepath.Join(root, ownCgroup, innerPath), nil
|
||||
}
|
||||
|
||||
// parseCgroupFile parses /proc/PID/cgroup file and return string
|
||||
func parseCgroupFile(path string) (string, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
return parseCgroupFromReader(f)
|
||||
}
|
||||
|
||||
func parseCgroupFromReader(r io.Reader) (string, error) {
|
||||
var (
|
||||
s = bufio.NewScanner(r)
|
||||
)
|
||||
for s.Scan() {
|
||||
if err := s.Err(); err != nil {
|
||||
return "", err
|
||||
}
|
||||
var (
|
||||
text = s.Text()
|
||||
parts = strings.SplitN(text, ":", 3)
|
||||
)
|
||||
if len(parts) < 3 {
|
||||
return "", errors.Errorf("invalid cgroup entry: %q", text)
|
||||
}
|
||||
// text is like "0::/user.slice/user-1001.slice/session-1.scope"
|
||||
if parts[0] == "0" && parts[1] == "" {
|
||||
return parts[2], nil
|
||||
}
|
||||
}
|
||||
return "", errors.New("cgroup path not found")
|
||||
}
|
|
@ -0,0 +1,76 @@
|
|||
/*
|
||||
Copyright The containerd Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseCgroupFromReader(t *testing.T) {
|
||||
cases := map[string]string{
|
||||
"0::/user.slice/user-1001.slice/session-1.scope\n": "/user.slice/user-1001.slice/session-1.scope",
|
||||
"2:cpuset:/foo\n1:name=systemd:/\n": "",
|
||||
"2:cpuset:/foo\n1:name=systemd:/\n0::/user.slice/user-1001.slice/session-1.scope\n": "/user.slice/user-1001.slice/session-1.scope",
|
||||
}
|
||||
for s, expected := range cases {
|
||||
g, err := parseCgroupFromReader(strings.NewReader(s))
|
||||
if expected != "" {
|
||||
if string(g) != expected {
|
||||
t.Errorf("expected %q, got %q", expected, string(g))
|
||||
}
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
} else {
|
||||
if err == nil {
|
||||
t.Error("error is expected")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDefaultDirPath(t *testing.T) {
|
||||
root := "/sys/fs/cgroup"
|
||||
cases := []struct {
|
||||
cgPath string
|
||||
cgParent string
|
||||
cgName string
|
||||
ownCgroup string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
cgPath: "/foo/bar",
|
||||
ownCgroup: "/apple/banana",
|
||||
expected: "/sys/fs/cgroup/foo/bar",
|
||||
},
|
||||
{
|
||||
cgPath: "foo/bar",
|
||||
ownCgroup: "/apple/banana",
|
||||
expected: "/sys/fs/cgroup/apple/banana/foo/bar",
|
||||
},
|
||||
}
|
||||
for _, c := range cases {
|
||||
got, err := _defaultDirPath(root, c.cgPath, c.cgParent, c.cgName, c.ownCgroup)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if got != c.expected {
|
||||
t.Fatalf("expected %q, got %q", c.expected, got)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,73 @@
|
|||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/ebpf"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func isRWM(cgroupPermissions string) bool {
|
||||
r := false
|
||||
w := false
|
||||
m := false
|
||||
for _, rn := range cgroupPermissions {
|
||||
switch rn {
|
||||
case 'r':
|
||||
r = true
|
||||
case 'w':
|
||||
w = true
|
||||
case 'm':
|
||||
m = true
|
||||
}
|
||||
}
|
||||
return r && w && m
|
||||
}
|
||||
|
||||
// the logic is from crun
|
||||
// https://github.com/containers/crun/blob/0.10.2/src/libcrun/cgroup.c#L1644-L1652
|
||||
func canSkipEBPFError(cgroup *configs.Cgroup) bool {
|
||||
for _, dev := range cgroup.Resources.Devices {
|
||||
if dev.Allow || !isRWM(dev.Permissions) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func setDevices(dirPath string, cgroup *configs.Cgroup) error {
|
||||
devices := cgroup.Devices
|
||||
if allowAllDevices := cgroup.Resources.AllowAllDevices; allowAllDevices != nil {
|
||||
// never set by OCI specconv, but *allowAllDevices=false is still used by the integration test
|
||||
if *allowAllDevices == true {
|
||||
return errors.New("libcontainer AllowAllDevices is not supported, use Devices")
|
||||
}
|
||||
for _, ad := range cgroup.Resources.AllowedDevices {
|
||||
d := *ad
|
||||
d.Allow = true
|
||||
devices = append(devices, &d)
|
||||
}
|
||||
}
|
||||
if len(cgroup.Resources.DeniedDevices) != 0 {
|
||||
// never set by OCI specconv
|
||||
return errors.New("libcontainer DeniedDevices is not supported, use Devices")
|
||||
}
|
||||
insts, license, err := devicefilter.DeviceFilter(devices)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dirFD, err := unix.Open(dirPath, unix.O_DIRECTORY|unix.O_RDONLY, 0600)
|
||||
if err != nil {
|
||||
return errors.Errorf("cannot get dir FD for %s", dirPath)
|
||||
}
|
||||
defer unix.Close(dirFD)
|
||||
if _, err := ebpf.LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil {
|
||||
if !canSkipEBPFError(cgroup) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
func setFreezer(dirPath string, state configs.FreezerState) error {
|
||||
var desired int
|
||||
switch state {
|
||||
case configs.Undefined:
|
||||
return nil
|
||||
case configs.Frozen:
|
||||
desired = 1
|
||||
case configs.Thawed:
|
||||
desired = 0
|
||||
default:
|
||||
return errors.Errorf("unknown freezer state %+v", state)
|
||||
}
|
||||
supportedErr := supportsFreezer(dirPath)
|
||||
if supportedErr != nil && desired != 0 {
|
||||
// can ignore error if desired == 1
|
||||
return errors.Wrap(supportedErr, "freezer not supported")
|
||||
}
|
||||
return freezeWithInt(dirPath, desired)
|
||||
}
|
||||
|
||||
func supportsFreezer(dirPath string) error {
|
||||
_, err := fscommon.ReadFile(dirPath, "cgroup.freeze")
|
||||
return err
|
||||
}
|
||||
|
||||
// freeze writes desired int to "cgroup.freeze".
|
||||
func freezeWithInt(dirPath string, desired int) error {
|
||||
desiredS := strconv.Itoa(desired)
|
||||
if err := fscommon.WriteFile(dirPath, "cgroup.freeze", desiredS); err != nil {
|
||||
return err
|
||||
}
|
||||
got, err := fscommon.ReadFile(dirPath, "cgroup.freeze")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if gotS := strings.TrimSpace(string(got)); gotS != desiredS {
|
||||
return errors.Errorf("expected \"cgroup.freeze\" in %q to be %q, got %q", dirPath, desiredS, gotS)
|
||||
}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,214 @@
|
|||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// NewManager creates a manager for cgroup v2 unified hierarchy.
|
||||
// dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope".
|
||||
// If dirPath is empty, it is automatically set using config.
|
||||
func NewManager(config *configs.Cgroup, dirPath string, rootless bool) (cgroups.Manager, error) {
|
||||
if config == nil {
|
||||
config = &configs.Cgroup{}
|
||||
}
|
||||
if dirPath != "" {
|
||||
if filepath.Clean(dirPath) != dirPath || !filepath.IsAbs(dirPath) {
|
||||
return nil, errors.Errorf("invalid dir path %q", dirPath)
|
||||
}
|
||||
} else {
|
||||
var err error
|
||||
dirPath, err = defaultDirPath(config)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
controllers, err := detectControllers(dirPath)
|
||||
if err != nil && !rootless {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
m := &manager{
|
||||
config: config,
|
||||
dirPath: dirPath,
|
||||
controllers: controllers,
|
||||
rootless: rootless,
|
||||
}
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func detectControllers(dirPath string) (map[string]struct{}, error) {
|
||||
if err := os.MkdirAll(dirPath, 0755); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
controllersPath, err := securejoin.SecureJoin(dirPath, "cgroup.controllers")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
controllersData, err := ioutil.ReadFile(controllersPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
controllersFields := strings.Fields(string(controllersData))
|
||||
controllers := make(map[string]struct{}, len(controllersFields))
|
||||
for _, c := range controllersFields {
|
||||
controllers[c] = struct{}{}
|
||||
}
|
||||
return controllers, nil
|
||||
}
|
||||
|
||||
type manager struct {
|
||||
config *configs.Cgroup
|
||||
// dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope"
|
||||
dirPath string
|
||||
// controllers is content of "cgroup.controllers" file.
|
||||
// excludes pseudo-controllers ("devices" and "freezer").
|
||||
controllers map[string]struct{}
|
||||
rootless bool
|
||||
}
|
||||
|
||||
func (m *manager) Apply(pid int) error {
|
||||
if err := cgroups.WriteCgroupProc(m.dirPath, pid); err != nil && !m.rootless {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *manager) GetPids() ([]int, error) {
|
||||
return cgroups.GetPids(m.dirPath)
|
||||
}
|
||||
|
||||
func (m *manager) GetAllPids() ([]int, error) {
|
||||
return cgroups.GetAllPids(m.dirPath)
|
||||
}
|
||||
|
||||
func (m *manager) GetStats() (*cgroups.Stats, error) {
|
||||
var (
|
||||
st cgroups.Stats
|
||||
errs []error
|
||||
)
|
||||
// pids (since kernel 4.5)
|
||||
if _, ok := m.controllers["pids"]; ok {
|
||||
if err := statPids(m.dirPath, &st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
} else {
|
||||
if err := statPidsWithoutController(m.dirPath, &st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
// memory (since kenrel 4.5)
|
||||
if _, ok := m.controllers["memory"]; ok {
|
||||
if err := statMemory(m.dirPath, &st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
// io (since kernel 4.5)
|
||||
if _, ok := m.controllers["io"]; ok {
|
||||
if err := statIo(m.dirPath, &st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
// cpu (since kernel 4.15)
|
||||
if _, ok := m.controllers["cpu"]; ok {
|
||||
if err := statCpu(m.dirPath, &st); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
if len(errs) > 0 && !m.rootless {
|
||||
return &st, errors.Errorf("error while statting cgroup v2: %+v", errs)
|
||||
}
|
||||
return &st, nil
|
||||
}
|
||||
|
||||
func (m *manager) Freeze(state configs.FreezerState) error {
|
||||
if err := setFreezer(m.dirPath, state); err != nil {
|
||||
return err
|
||||
}
|
||||
m.config.Resources.Freezer = state
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *manager) Destroy() error {
|
||||
return os.RemoveAll(m.dirPath)
|
||||
}
|
||||
|
||||
// GetPaths is for compatibility purpose and should be removed in future
|
||||
func (m *manager) GetPaths() map[string]string {
|
||||
paths := map[string]string{
|
||||
// pseudo-controller for compatibility
|
||||
"devices": m.dirPath,
|
||||
"freezer": m.dirPath,
|
||||
}
|
||||
for c := range m.controllers {
|
||||
paths[c] = m.dirPath
|
||||
}
|
||||
return paths
|
||||
}
|
||||
|
||||
func (m *manager) GetUnifiedPath() (string, error) {
|
||||
return m.dirPath, nil
|
||||
}
|
||||
|
||||
func (m *manager) Set(container *configs.Config) error {
|
||||
if container == nil || container.Cgroups == nil {
|
||||
return nil
|
||||
}
|
||||
var errs []error
|
||||
// pids (since kernel 4.5)
|
||||
if _, ok := m.controllers["pids"]; ok {
|
||||
if err := setPids(m.dirPath, container.Cgroups); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
// memory (since kernel 4.5)
|
||||
if _, ok := m.controllers["memory"]; ok {
|
||||
if err := setMemory(m.dirPath, container.Cgroups); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
// io (since kernel 4.5)
|
||||
if _, ok := m.controllers["io"]; ok {
|
||||
if err := setIo(m.dirPath, container.Cgroups); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
// cpu (since kernel 4.15)
|
||||
if _, ok := m.controllers["cpu"]; ok {
|
||||
if err := setCpu(m.dirPath, container.Cgroups); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
// devices (since kernel 4.15, pseudo-controller)
|
||||
if err := setDevices(m.dirPath, container.Cgroups); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
// cpuset (since kernel 5.0)
|
||||
if _, ok := m.controllers["cpuset"]; ok {
|
||||
if err := setCpuset(m.dirPath, container.Cgroups); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
}
|
||||
// freezer (since kernel 5.2, pseudo-controller)
|
||||
if err := setFreezer(m.dirPath, container.Cgroups.Freezer); err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
if len(errs) > 0 && !m.rootless {
|
||||
return errors.Errorf("error while setting cgroup v2: %+v", errs)
|
||||
}
|
||||
m.config = container.Cgroups
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *manager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.config, nil
|
||||
}
|
|
@ -0,0 +1,124 @@
|
|||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func setIo(dirPath string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.BlkioWeight != 0 {
|
||||
filename := "io.bfq.weight"
|
||||
if err := fscommon.WriteFile(dirPath, filename, strconv.FormatUint(uint64(cgroup.Resources.BlkioWeight), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for _, td := range cgroup.Resources.BlkioThrottleReadBpsDevice {
|
||||
if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("rbps")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleWriteBpsDevice {
|
||||
if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("wbps")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleReadIOPSDevice {
|
||||
if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("riops")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, td := range cgroup.Resources.BlkioThrottleWriteIOPSDevice {
|
||||
if err := fscommon.WriteFile(dirPath, "io.max", td.StringName("wiops")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func readCgroup2MapFile(dirPath string, name string) (map[string][]string, error) {
|
||||
ret := map[string][]string{}
|
||||
p := filepath.Join(dirPath, name)
|
||||
f, err := os.Open(p)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
scanner := bufio.NewScanner(f)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
parts := strings.Fields(line)
|
||||
if len(parts) < 2 {
|
||||
continue
|
||||
}
|
||||
ret[parts[0]] = parts[1:]
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func statIo(dirPath string, stats *cgroups.Stats) error {
|
||||
// more details on the io.stat file format: https://www.kernel.org/doc/Documentation/cgroup-v2.txt
|
||||
var ioServiceBytesRecursive []cgroups.BlkioStatEntry
|
||||
values, err := readCgroup2MapFile(dirPath, "io.stat")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for k, v := range values {
|
||||
d := strings.Split(k, ":")
|
||||
if len(d) != 2 {
|
||||
continue
|
||||
}
|
||||
minor, err := strconv.ParseUint(d[0], 10, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
major, err := strconv.ParseUint(d[1], 10, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, item := range v {
|
||||
d := strings.Split(item, "=")
|
||||
if len(d) != 2 {
|
||||
continue
|
||||
}
|
||||
op := d[0]
|
||||
|
||||
// Accommodate the cgroup v1 naming
|
||||
switch op {
|
||||
case "rbytes":
|
||||
op = "read"
|
||||
case "wbytes":
|
||||
op = "write"
|
||||
}
|
||||
|
||||
value, err := strconv.ParseUint(d[1], 10, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
entry := cgroups.BlkioStatEntry{
|
||||
Op: op,
|
||||
Major: major,
|
||||
Minor: minor,
|
||||
Value: value,
|
||||
}
|
||||
ioServiceBytesRecursive = append(ioServiceBytesRecursive, entry)
|
||||
}
|
||||
}
|
||||
stats.BlkioStats = cgroups.BlkioStats{IoServiceBytesRecursive: ioServiceBytesRecursive}
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,103 @@
|
|||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
func setMemory(dirPath string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.MemorySwap != 0 {
|
||||
if err := fscommon.WriteFile(dirPath, "memory.swap.max", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.Memory != 0 {
|
||||
if err := fscommon.WriteFile(dirPath, "memory.max", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// cgroup.Resources.KernelMemory is ignored
|
||||
|
||||
if cgroup.Resources.MemoryReservation != 0 {
|
||||
if err := fscommon.WriteFile(dirPath, "memory.low", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func statMemory(dirPath string, stats *cgroups.Stats) error {
|
||||
// Set stats from memory.stat.
|
||||
statsFile, err := os.Open(filepath.Join(dirPath, "memory.stat"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer statsFile.Close()
|
||||
|
||||
sc := bufio.NewScanner(statsFile)
|
||||
for sc.Scan() {
|
||||
t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text())
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "failed to parse memory.stat (%q)", sc.Text())
|
||||
}
|
||||
stats.MemoryStats.Stats[t] = v
|
||||
}
|
||||
stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"]
|
||||
|
||||
memoryUsage, err := getMemoryDataV2(dirPath, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.Usage = memoryUsage
|
||||
swapUsage, err := getMemoryDataV2(dirPath, "swap")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.SwapUsage = swapUsage
|
||||
|
||||
stats.MemoryStats.UseHierarchy = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func getMemoryDataV2(path, name string) (cgroups.MemoryData, error) {
|
||||
memoryData := cgroups.MemoryData{}
|
||||
|
||||
moduleName := "memory"
|
||||
if name != "" {
|
||||
moduleName = strings.Join([]string{"memory", name}, ".")
|
||||
}
|
||||
usage := strings.Join([]string{moduleName, "current"}, ".")
|
||||
limit := strings.Join([]string{moduleName, "max"}, ".")
|
||||
|
||||
value, err := fscommon.GetCgroupParamUint(path, usage)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, errors.Wrapf(err, "failed to parse %s", usage)
|
||||
}
|
||||
memoryData.Usage = value
|
||||
|
||||
value, err = fscommon.GetCgroupParamUint(path, limit)
|
||||
if err != nil {
|
||||
if moduleName != "memory" && os.IsNotExist(err) {
|
||||
return cgroups.MemoryData{}, nil
|
||||
}
|
||||
return cgroups.MemoryData{}, errors.Wrapf(err, "failed to parse %s", limit)
|
||||
}
|
||||
memoryData.Limit = value
|
||||
|
||||
return memoryData, nil
|
||||
}
|
|
@ -0,0 +1,90 @@
|
|||
// +build linux
|
||||
|
||||
package fs2
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func setPids(dirPath string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.PidsLimit != 0 {
|
||||
// "max" is the fallback value.
|
||||
limit := "max"
|
||||
|
||||
if cgroup.Resources.PidsLimit > 0 {
|
||||
limit = strconv.FormatInt(cgroup.Resources.PidsLimit, 10)
|
||||
}
|
||||
|
||||
if err := fscommon.WriteFile(dirPath, "pids.max", limit); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func isNOTSUP(err error) bool {
|
||||
switch err := err.(type) {
|
||||
case *os.PathError:
|
||||
return err.Err == unix.ENOTSUP
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func statPidsWithoutController(dirPath string, stats *cgroups.Stats) error {
|
||||
// if the controller is not enabled, let's read PIDS from cgroups.procs
|
||||
// (or threads if cgroup.threads is enabled)
|
||||
contents, err := ioutil.ReadFile(filepath.Join(dirPath, "cgroup.procs"))
|
||||
if err != nil && isNOTSUP(err) {
|
||||
contents, err = ioutil.ReadFile(filepath.Join(dirPath, "cgroup.threads"))
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
pids := make(map[string]string)
|
||||
for _, i := range strings.Split(string(contents), "\n") {
|
||||
if i != "" {
|
||||
pids[i] = i
|
||||
}
|
||||
}
|
||||
stats.PidsStats.Current = uint64(len(pids))
|
||||
stats.PidsStats.Limit = 0
|
||||
return nil
|
||||
}
|
||||
|
||||
func statPids(dirPath string, stats *cgroups.Stats) error {
|
||||
current, err := fscommon.GetCgroupParamUint(dirPath, "pids.current")
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "failed to parse pids.current")
|
||||
}
|
||||
|
||||
maxString, err := fscommon.GetCgroupParamString(dirPath, "pids.max")
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "failed to parse pids.max")
|
||||
}
|
||||
|
||||
// Default if pids.max == "max" is 0 -- which represents "no limit".
|
||||
var max uint64
|
||||
if maxString != "max" {
|
||||
max, err = fscommon.ParseUint(maxString, 10, 64)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "failed to parse pids.max - unable to parse %q as a uint from Cgroup file %q",
|
||||
maxString, filepath.Join(dirPath, "pids.max"))
|
||||
}
|
||||
}
|
||||
|
||||
stats.PidsStats.Current = current
|
||||
stats.PidsStats.Limit = max
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
// +build linux
|
||||
|
||||
package fscommon
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
|
||||
securejoin "github.com/cyphar/filepath-securejoin"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
func WriteFile(dir, file, data string) error {
|
||||
if dir == "" {
|
||||
return errors.Errorf("no directory specified for %s", file)
|
||||
}
|
||||
path, err := securejoin.SecureJoin(dir, file)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := ioutil.WriteFile(path, []byte(data), 0700); err != nil {
|
||||
return errors.Wrapf(err, "failed to write %q to %q", data, path)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func ReadFile(dir, file string) (string, error) {
|
||||
if dir == "" {
|
||||
return "", errors.Errorf("no directory specified for %s", file)
|
||||
}
|
||||
path, err := securejoin.SecureJoin(dir, file)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
data, err := ioutil.ReadFile(path)
|
||||
return string(data), err
|
||||
}
|
|
@ -0,0 +1,83 @@
|
|||
// +build linux
|
||||
|
||||
package fscommon
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrNotValidFormat = errors.New("line is not a valid key value format")
|
||||
)
|
||||
|
||||
// Saturates negative values at zero and returns a uint64.
|
||||
// Due to kernel bugs, some of the memory cgroup stats can be negative.
|
||||
func ParseUint(s string, base, bitSize int) (uint64, error) {
|
||||
value, err := strconv.ParseUint(s, base, bitSize)
|
||||
if err != nil {
|
||||
intValue, intErr := strconv.ParseInt(s, base, bitSize)
|
||||
// 1. Handle negative values greater than MinInt64 (and)
|
||||
// 2. Handle negative values lesser than MinInt64
|
||||
if intErr == nil && intValue < 0 {
|
||||
return 0, nil
|
||||
} else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
return value, err
|
||||
}
|
||||
|
||||
return value, nil
|
||||
}
|
||||
|
||||
// Parses a cgroup param and returns as name, value
|
||||
// i.e. "io_service_bytes 1234" will return as io_service_bytes, 1234
|
||||
func GetCgroupParamKeyValue(t string) (string, uint64, error) {
|
||||
parts := strings.Fields(t)
|
||||
switch len(parts) {
|
||||
case 2:
|
||||
value, err := ParseUint(parts[1], 10, 64)
|
||||
if err != nil {
|
||||
return "", 0, fmt.Errorf("unable to convert param value (%q) to uint64: %v", parts[1], err)
|
||||
}
|
||||
|
||||
return parts[0], value, nil
|
||||
default:
|
||||
return "", 0, ErrNotValidFormat
|
||||
}
|
||||
}
|
||||
|
||||
// Gets a single uint64 value from the specified cgroup file.
|
||||
func GetCgroupParamUint(cgroupPath, cgroupFile string) (uint64, error) {
|
||||
fileName := filepath.Join(cgroupPath, cgroupFile)
|
||||
contents, err := ioutil.ReadFile(fileName)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
trimmed := strings.TrimSpace(string(contents))
|
||||
if trimmed == "max" {
|
||||
return math.MaxUint64, nil
|
||||
}
|
||||
|
||||
res, err := ParseUint(trimmed, 10, 64)
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("unable to parse %q as a uint from Cgroup file %q", string(contents), fileName)
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// Gets a string value from the specified cgroup file
|
||||
func GetCgroupParamString(cgroupPath, cgroupFile string) (string, error) {
|
||||
contents, err := ioutil.ReadFile(filepath.Join(cgroupPath, cgroupFile))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return strings.TrimSpace(string(contents)), nil
|
||||
}
|
|
@ -0,0 +1,97 @@
|
|||
// +build linux
|
||||
|
||||
package fscommon
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"testing"
|
||||
)
|
||||
|
||||
const (
|
||||
cgroupFile = "cgroup.file"
|
||||
floatValue = 2048.0
|
||||
floatString = "2048"
|
||||
)
|
||||
|
||||
func TestGetCgroupParamsInt(t *testing.T) {
|
||||
// Setup tempdir.
|
||||
tempDir, err := ioutil.TempDir("", "cgroup_utils_test")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer os.RemoveAll(tempDir)
|
||||
tempFile := filepath.Join(tempDir, cgroupFile)
|
||||
|
||||
// Success.
|
||||
err = ioutil.WriteFile(tempFile, []byte(floatString), 0755)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
value, err := GetCgroupParamUint(tempDir, cgroupFile)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
} else if value != floatValue {
|
||||
t.Fatalf("Expected %d to equal %f", value, floatValue)
|
||||
}
|
||||
|
||||
// Success with new line.
|
||||
err = ioutil.WriteFile(tempFile, []byte(floatString+"\n"), 0755)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
value, err = GetCgroupParamUint(tempDir, cgroupFile)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
} else if value != floatValue {
|
||||
t.Fatalf("Expected %d to equal %f", value, floatValue)
|
||||
}
|
||||
|
||||
// Success with negative values
|
||||
err = ioutil.WriteFile(tempFile, []byte("-12345"), 0755)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
value, err = GetCgroupParamUint(tempDir, cgroupFile)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
} else if value != 0 {
|
||||
t.Fatalf("Expected %d to equal %d", value, 0)
|
||||
}
|
||||
|
||||
// Success with negative values lesser than min int64
|
||||
s := strconv.FormatFloat(math.MinInt64, 'f', -1, 64)
|
||||
err = ioutil.WriteFile(tempFile, []byte(s), 0755)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
value, err = GetCgroupParamUint(tempDir, cgroupFile)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
} else if value != 0 {
|
||||
t.Fatalf("Expected %d to equal %d", value, 0)
|
||||
}
|
||||
|
||||
// Not a float.
|
||||
err = ioutil.WriteFile(tempFile, []byte("not-a-float"), 0755)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
_, err = GetCgroupParamUint(tempDir, cgroupFile)
|
||||
if err == nil {
|
||||
t.Fatal("Expecting error, got none")
|
||||
}
|
||||
|
||||
// Unknown file.
|
||||
err = os.Remove(tempFile)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
_, err = GetCgroupParamUint(tempDir, cgroupFile)
|
||||
if err == nil {
|
||||
t.Fatal("Expecting error, got none")
|
||||
}
|
||||
}
|
|
@ -0,0 +1,108 @@
|
|||
// +build linux
|
||||
|
||||
package cgroups
|
||||
|
||||
type ThrottlingData struct {
|
||||
// Number of periods with throttling active
|
||||
Periods uint64 `json:"periods,omitempty"`
|
||||
// Number of periods when the container hit its throttling limit.
|
||||
ThrottledPeriods uint64 `json:"throttled_periods,omitempty"`
|
||||
// Aggregate time the container was throttled for in nanoseconds.
|
||||
ThrottledTime uint64 `json:"throttled_time,omitempty"`
|
||||
}
|
||||
|
||||
// CpuUsage denotes the usage of a CPU.
|
||||
// All CPU stats are aggregate since container inception.
|
||||
type CpuUsage struct {
|
||||
// Total CPU time consumed.
|
||||
// Units: nanoseconds.
|
||||
TotalUsage uint64 `json:"total_usage,omitempty"`
|
||||
// Total CPU time consumed per core.
|
||||
// Units: nanoseconds.
|
||||
PercpuUsage []uint64 `json:"percpu_usage,omitempty"`
|
||||
// Time spent by tasks of the cgroup in kernel mode.
|
||||
// Units: nanoseconds.
|
||||
UsageInKernelmode uint64 `json:"usage_in_kernelmode"`
|
||||
// Time spent by tasks of the cgroup in user mode.
|
||||
// Units: nanoseconds.
|
||||
UsageInUsermode uint64 `json:"usage_in_usermode"`
|
||||
}
|
||||
|
||||
type CpuStats struct {
|
||||
CpuUsage CpuUsage `json:"cpu_usage,omitempty"`
|
||||
ThrottlingData ThrottlingData `json:"throttling_data,omitempty"`
|
||||
}
|
||||
|
||||
type MemoryData struct {
|
||||
Usage uint64 `json:"usage,omitempty"`
|
||||
MaxUsage uint64 `json:"max_usage,omitempty"`
|
||||
Failcnt uint64 `json:"failcnt"`
|
||||
Limit uint64 `json:"limit"`
|
||||
}
|
||||
|
||||
type MemoryStats struct {
|
||||
// memory used for cache
|
||||
Cache uint64 `json:"cache,omitempty"`
|
||||
// usage of memory
|
||||
Usage MemoryData `json:"usage,omitempty"`
|
||||
// usage of memory + swap
|
||||
SwapUsage MemoryData `json:"swap_usage,omitempty"`
|
||||
// usage of kernel memory
|
||||
KernelUsage MemoryData `json:"kernel_usage,omitempty"`
|
||||
// usage of kernel TCP memory
|
||||
KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"`
|
||||
// if true, memory usage is accounted for throughout a hierarchy of cgroups.
|
||||
UseHierarchy bool `json:"use_hierarchy"`
|
||||
|
||||
Stats map[string]uint64 `json:"stats,omitempty"`
|
||||
}
|
||||
|
||||
type PidsStats struct {
|
||||
// number of pids in the cgroup
|
||||
Current uint64 `json:"current,omitempty"`
|
||||
// active pids hard limit
|
||||
Limit uint64 `json:"limit,omitempty"`
|
||||
}
|
||||
|
||||
type BlkioStatEntry struct {
|
||||
Major uint64 `json:"major,omitempty"`
|
||||
Minor uint64 `json:"minor,omitempty"`
|
||||
Op string `json:"op,omitempty"`
|
||||
Value uint64 `json:"value,omitempty"`
|
||||
}
|
||||
|
||||
type BlkioStats struct {
|
||||
// number of bytes tranferred to and from the block device
|
||||
IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"`
|
||||
IoServicedRecursive []BlkioStatEntry `json:"io_serviced_recursive,omitempty"`
|
||||
IoQueuedRecursive []BlkioStatEntry `json:"io_queue_recursive,omitempty"`
|
||||
IoServiceTimeRecursive []BlkioStatEntry `json:"io_service_time_recursive,omitempty"`
|
||||
IoWaitTimeRecursive []BlkioStatEntry `json:"io_wait_time_recursive,omitempty"`
|
||||
IoMergedRecursive []BlkioStatEntry `json:"io_merged_recursive,omitempty"`
|
||||
IoTimeRecursive []BlkioStatEntry `json:"io_time_recursive,omitempty"`
|
||||
SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"`
|
||||
}
|
||||
|
||||
type HugetlbStats struct {
|
||||
// current res_counter usage for hugetlb
|
||||
Usage uint64 `json:"usage,omitempty"`
|
||||
// maximum usage ever recorded.
|
||||
MaxUsage uint64 `json:"max_usage,omitempty"`
|
||||
// number of times hugetlb usage allocation failure.
|
||||
Failcnt uint64 `json:"failcnt"`
|
||||
}
|
||||
|
||||
type Stats struct {
|
||||
CpuStats CpuStats `json:"cpu_stats,omitempty"`
|
||||
MemoryStats MemoryStats `json:"memory_stats,omitempty"`
|
||||
PidsStats PidsStats `json:"pids_stats,omitempty"`
|
||||
BlkioStats BlkioStats `json:"blkio_stats,omitempty"`
|
||||
// the map is in the format "size of hugepage: stats of the hugepage"
|
||||
HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"`
|
||||
}
|
||||
|
||||
func NewStats() *Stats {
|
||||
memoryStats := MemoryStats{Stats: make(map[string]uint64)}
|
||||
hugetlbStats := make(map[string]HugetlbStats)
|
||||
return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats}
|
||||
}
|
|
@ -0,0 +1,67 @@
|
|||
// +build !linux
|
||||
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type Manager struct {
|
||||
Cgroups *configs.Cgroup
|
||||
Paths map[string]string
|
||||
}
|
||||
|
||||
func UseSystemd() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func NewSystemdCgroupsManager() (func(config *configs.Cgroup, paths map[string]string) cgroups.Manager, error) {
|
||||
return nil, fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) Apply(pid int) error {
|
||||
return fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) GetPids() ([]int, error) {
|
||||
return nil, fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) GetAllPids() ([]int, error) {
|
||||
return nil, fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) Destroy() error {
|
||||
return fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) GetPaths() map[string]string {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) GetUnifiedPath() (string, error) {
|
||||
return "", fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
||||
return nil, fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) Set(container *configs.Config) error {
|
||||
return fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) Freeze(state configs.FreezerState) error {
|
||||
return fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
||||
func Freeze(c *configs.Cgroup, state configs.FreezerState) error {
|
||||
return fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return nil, fmt.Errorf("Systemd not supported")
|
||||
}
|
|
@ -0,0 +1,534 @@
|
|||
// +build linux
|
||||
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/dbus"
|
||||
"github.com/godbus/dbus"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type LegacyManager struct {
|
||||
mu sync.Mutex
|
||||
Cgroups *configs.Cgroup
|
||||
Paths map[string]string
|
||||
}
|
||||
|
||||
type subsystem interface {
|
||||
// Name returns the name of the subsystem.
|
||||
Name() string
|
||||
// Returns the stats, as 'stats', corresponding to the cgroup under 'path'.
|
||||
GetStats(path string, stats *cgroups.Stats) error
|
||||
// Set the cgroup represented by cgroup.
|
||||
Set(path string, cgroup *configs.Cgroup) error
|
||||
}
|
||||
|
||||
var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist")
|
||||
|
||||
type subsystemSet []subsystem
|
||||
|
||||
func (s subsystemSet) Get(name string) (subsystem, error) {
|
||||
for _, ss := range s {
|
||||
if ss.Name() == name {
|
||||
return ss, nil
|
||||
}
|
||||
}
|
||||
return nil, errSubsystemDoesNotExist
|
||||
}
|
||||
|
||||
var legacySubsystems = subsystemSet{
|
||||
&fs.CpusetGroup{},
|
||||
&fs.DevicesGroup{},
|
||||
&fs.MemoryGroup{},
|
||||
&fs.CpuGroup{},
|
||||
&fs.CpuacctGroup{},
|
||||
&fs.PidsGroup{},
|
||||
&fs.BlkioGroup{},
|
||||
&fs.HugetlbGroup{},
|
||||
&fs.PerfEventGroup{},
|
||||
&fs.FreezerGroup{},
|
||||
&fs.NetPrioGroup{},
|
||||
&fs.NetClsGroup{},
|
||||
&fs.NameGroup{GroupName: "name=systemd"},
|
||||
}
|
||||
|
||||
const (
|
||||
testScopeWait = 4
|
||||
testSliceWait = 4
|
||||
)
|
||||
|
||||
var (
|
||||
connLock sync.Mutex
|
||||
theConn *systemdDbus.Conn
|
||||
)
|
||||
|
||||
func newProp(name string, units interface{}) systemdDbus.Property {
|
||||
return systemdDbus.Property{
|
||||
Name: name,
|
||||
Value: dbus.MakeVariant(units),
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: This function comes from package github.com/coreos/go-systemd/util
|
||||
// It was borrowed here to avoid a dependency on cgo.
|
||||
//
|
||||
// IsRunningSystemd checks whether the host was booted with systemd as its init
|
||||
// system. This functions similarly to systemd's `sd_booted(3)`: internally, it
|
||||
// checks whether /run/systemd/system/ exists and is a directory.
|
||||
// http://www.freedesktop.org/software/systemd/man/sd_booted.html
|
||||
func isRunningSystemd() bool {
|
||||
fi, err := os.Lstat("/run/systemd/system")
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return fi.IsDir()
|
||||
}
|
||||
|
||||
func UseSystemd() bool {
|
||||
if !isRunningSystemd() {
|
||||
return false
|
||||
}
|
||||
|
||||
connLock.Lock()
|
||||
defer connLock.Unlock()
|
||||
|
||||
if theConn == nil {
|
||||
var err error
|
||||
theConn, err = systemdDbus.New()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func NewSystemdCgroupsManager() (func(config *configs.Cgroup, paths map[string]string) cgroups.Manager, error) {
|
||||
if !isRunningSystemd() {
|
||||
return nil, fmt.Errorf("systemd not running on this host, can't use systemd as a cgroups.Manager")
|
||||
}
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
return func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &UnifiedManager{
|
||||
Cgroups: config,
|
||||
Paths: paths,
|
||||
}
|
||||
}, nil
|
||||
}
|
||||
return func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &LegacyManager{
|
||||
Cgroups: config,
|
||||
Paths: paths,
|
||||
}
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) Apply(pid int) error {
|
||||
var (
|
||||
c = m.Cgroups
|
||||
unitName = getUnitName(c)
|
||||
slice = "system.slice"
|
||||
properties []systemdDbus.Property
|
||||
)
|
||||
|
||||
if c.Paths != nil {
|
||||
paths := make(map[string]string)
|
||||
for name, path := range c.Paths {
|
||||
_, err := getSubsystemPath(m.Cgroups, name)
|
||||
if err != nil {
|
||||
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
|
||||
if cgroups.IsNotFound(err) {
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
paths[name] = path
|
||||
}
|
||||
m.Paths = paths
|
||||
return cgroups.EnterPid(m.Paths, pid)
|
||||
}
|
||||
|
||||
if c.Parent != "" {
|
||||
slice = c.Parent
|
||||
}
|
||||
|
||||
properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name))
|
||||
|
||||
// if we create a slice, the parent is defined via a Wants=
|
||||
if strings.HasSuffix(unitName, ".slice") {
|
||||
properties = append(properties, systemdDbus.PropWants(slice))
|
||||
} else {
|
||||
// otherwise, we use Slice=
|
||||
properties = append(properties, systemdDbus.PropSlice(slice))
|
||||
}
|
||||
|
||||
// only add pid if its valid, -1 is used w/ general slice creation.
|
||||
if pid != -1 {
|
||||
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
|
||||
}
|
||||
|
||||
// Check if we can delegate. This is only supported on systemd versions 218 and above.
|
||||
if !strings.HasSuffix(unitName, ".slice") {
|
||||
// Assume scopes always support delegation.
|
||||
properties = append(properties, newProp("Delegate", true))
|
||||
}
|
||||
|
||||
// Always enable accounting, this gets us the same behaviour as the fs implementation,
|
||||
// plus the kernel has some problems with joining the memory cgroup at a later time.
|
||||
properties = append(properties,
|
||||
newProp("MemoryAccounting", true),
|
||||
newProp("CPUAccounting", true),
|
||||
newProp("BlockIOAccounting", true))
|
||||
|
||||
// Assume DefaultDependencies= will always work (the check for it was previously broken.)
|
||||
properties = append(properties,
|
||||
newProp("DefaultDependencies", false))
|
||||
|
||||
if c.Resources.Memory != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemoryLimit", uint64(c.Resources.Memory)))
|
||||
}
|
||||
|
||||
if c.Resources.CpuShares != 0 {
|
||||
properties = append(properties,
|
||||
newProp("CPUShares", c.Resources.CpuShares))
|
||||
}
|
||||
|
||||
// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
|
||||
if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 {
|
||||
// corresponds to USEC_INFINITY in systemd
|
||||
// if USEC_INFINITY is provided, CPUQuota is left unbound by systemd
|
||||
// always setting a property value ensures we can apply a quota and remove it later
|
||||
cpuQuotaPerSecUSec := uint64(math.MaxUint64)
|
||||
if c.Resources.CpuQuota > 0 {
|
||||
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
|
||||
// (integer percentage of CPU) internally. This means that if a fractional percent of
|
||||
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
|
||||
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
|
||||
cpuQuotaPerSecUSec = uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod
|
||||
if cpuQuotaPerSecUSec%10000 != 0 {
|
||||
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
|
||||
}
|
||||
}
|
||||
properties = append(properties,
|
||||
newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
|
||||
}
|
||||
|
||||
if c.Resources.BlkioWeight != 0 {
|
||||
properties = append(properties,
|
||||
newProp("BlockIOWeight", uint64(c.Resources.BlkioWeight)))
|
||||
}
|
||||
|
||||
if c.Resources.PidsLimit > 0 {
|
||||
properties = append(properties,
|
||||
newProp("TasksAccounting", true),
|
||||
newProp("TasksMax", uint64(c.Resources.PidsLimit)))
|
||||
}
|
||||
|
||||
// We have to set kernel memory here, as we can't change it once
|
||||
// processes have been attached to the cgroup.
|
||||
if c.Resources.KernelMemory != 0 {
|
||||
if err := setKernelMemory(c); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
statusChan := make(chan string, 1)
|
||||
if _, err := theConn.StartTransientUnit(unitName, "replace", properties, statusChan); err == nil {
|
||||
select {
|
||||
case <-statusChan:
|
||||
case <-time.After(time.Second):
|
||||
logrus.Warnf("Timed out while waiting for StartTransientUnit(%s) completion signal from dbus. Continuing...", unitName)
|
||||
}
|
||||
} else if !isUnitExists(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := joinCgroups(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
paths := make(map[string]string)
|
||||
for _, s := range legacySubsystems {
|
||||
subsystemPath, err := getSubsystemPath(m.Cgroups, s.Name())
|
||||
if err != nil {
|
||||
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
|
||||
if cgroups.IsNotFound(err) {
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
paths[s.Name()] = subsystemPath
|
||||
}
|
||||
m.Paths = paths
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) Destroy() error {
|
||||
if m.Cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
theConn.StopUnit(getUnitName(m.Cgroups), "replace", nil)
|
||||
if err := cgroups.RemovePaths(m.Paths); err != nil {
|
||||
return err
|
||||
}
|
||||
m.Paths = make(map[string]string)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetPaths() map[string]string {
|
||||
m.mu.Lock()
|
||||
paths := m.Paths
|
||||
m.mu.Unlock()
|
||||
return paths
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetUnifiedPath() (string, error) {
|
||||
return "", errors.New("unified path is only supported when running in unified mode")
|
||||
}
|
||||
|
||||
func join(c *configs.Cgroup, subsystem string, pid int) (string, error) {
|
||||
path, err := getSubsystemPath(c, subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
return "", err
|
||||
}
|
||||
if err := cgroups.WriteCgroupProc(path, pid); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func joinCgroups(c *configs.Cgroup, pid int) error {
|
||||
for _, sys := range legacySubsystems {
|
||||
name := sys.Name()
|
||||
switch name {
|
||||
case "name=systemd":
|
||||
// let systemd handle this
|
||||
case "cpuset":
|
||||
path, err := getSubsystemPath(c, name)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
s := &fs.CpusetGroup{}
|
||||
if err := s.ApplyDir(path, c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
default:
|
||||
_, err := join(c, name, pid)
|
||||
if err != nil {
|
||||
// Even if it's `not found` error, we'll return err
|
||||
// because devices cgroup is hard requirement for
|
||||
// container security.
|
||||
if name == "devices" {
|
||||
return err
|
||||
}
|
||||
// For other subsystems, omit the `not found` error
|
||||
// because they are optional.
|
||||
if !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// systemd represents slice hierarchy using `-`, so we need to follow suit when
|
||||
// generating the path of slice. Essentially, test-a-b.slice becomes
|
||||
// /test.slice/test-a.slice/test-a-b.slice.
|
||||
func ExpandSlice(slice string) (string, error) {
|
||||
suffix := ".slice"
|
||||
// Name has to end with ".slice", but can't be just ".slice".
|
||||
if len(slice) < len(suffix) || !strings.HasSuffix(slice, suffix) {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
// Path-separators are not allowed.
|
||||
if strings.Contains(slice, "/") {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
var path, prefix string
|
||||
sliceName := strings.TrimSuffix(slice, suffix)
|
||||
// if input was -.slice, we should just return root now
|
||||
if sliceName == "-" {
|
||||
return "/", nil
|
||||
}
|
||||
for _, component := range strings.Split(sliceName, "-") {
|
||||
// test--a.slice isn't permitted, nor is -test.slice.
|
||||
if component == "" {
|
||||
return "", fmt.Errorf("invalid slice name: %s", slice)
|
||||
}
|
||||
|
||||
// Append the component to the path and to the prefix.
|
||||
path += "/" + prefix + component + suffix
|
||||
prefix += component + "-"
|
||||
}
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
|
||||
mountpoint, err := cgroups.FindCgroupMountpoint(c.Path, subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
initPath, err := cgroups.GetInitCgroup(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
// if pid 1 is systemd 226 or later, it will be in init.scope, not the root
|
||||
initPath = strings.TrimSuffix(filepath.Clean(initPath), "init.scope")
|
||||
|
||||
slice := "system.slice"
|
||||
if c.Parent != "" {
|
||||
slice = c.Parent
|
||||
}
|
||||
|
||||
slice, err = ExpandSlice(slice)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(mountpoint, initPath, slice, getUnitName(c)), nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) Freeze(state configs.FreezerState) error {
|
||||
path, err := getSubsystemPath(m.Cgroups, "freezer")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
prevState := m.Cgroups.Resources.Freezer
|
||||
m.Cgroups.Resources.Freezer = state
|
||||
freezer, err := legacySubsystems.Get("freezer")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = freezer.Set(path, m.Cgroups)
|
||||
if err != nil {
|
||||
m.Cgroups.Resources.Freezer = prevState
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetPids() ([]int, error) {
|
||||
path, err := getSubsystemPath(m.Cgroups, "devices")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetPids(path)
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetAllPids() ([]int, error) {
|
||||
path, err := getSubsystemPath(m.Cgroups, "devices")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetAllPids(path)
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetStats() (*cgroups.Stats, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
stats := cgroups.NewStats()
|
||||
for name, path := range m.Paths {
|
||||
sys, err := legacySubsystems.Get(name)
|
||||
if err == errSubsystemDoesNotExist || !cgroups.PathExists(path) {
|
||||
continue
|
||||
}
|
||||
if err := sys.GetStats(path, stats); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func (m *LegacyManager) Set(container *configs.Config) error {
|
||||
// If Paths are set, then we are just joining cgroups paths
|
||||
// and there is no need to set any values.
|
||||
if m.Cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
for _, sys := range legacySubsystems {
|
||||
// Get the subsystem path, but don't error out for not found cgroups.
|
||||
path, err := getSubsystemPath(container.Cgroups, sys.Name())
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := sys.Set(path, container.Cgroups); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if m.Paths["cpu"] != "" {
|
||||
if err := fs.CheckCpushares(m.Paths["cpu"], container.Cgroups.Resources.CpuShares); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getUnitName(c *configs.Cgroup) string {
|
||||
// by default, we create a scope unless the user explicitly asks for a slice.
|
||||
if !strings.HasSuffix(c.Name, ".slice") {
|
||||
return fmt.Sprintf("%s-%s.scope", c.ScopePrefix, c.Name)
|
||||
}
|
||||
return c.Name
|
||||
}
|
||||
|
||||
func setKernelMemory(c *configs.Cgroup) error {
|
||||
path, err := getSubsystemPath(c, "memory")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
// do not try to enable the kernel memory if we already have
|
||||
// tasks in the cgroup.
|
||||
content, err := ioutil.ReadFile(filepath.Join(path, "tasks"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(content) > 0 {
|
||||
return nil
|
||||
}
|
||||
return fs.EnableKernelMemoryAccounting(path)
|
||||
}
|
||||
|
||||
// isUnitExists returns true if the error is that a systemd unit already exists.
|
||||
func isUnitExists(err error) bool {
|
||||
if err != nil {
|
||||
if dbusError, ok := err.(dbus.Error); ok {
|
||||
return strings.Contains(dbusError.Name, "org.freedesktop.systemd1.UnitExists")
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *LegacyManager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.Cgroups, nil
|
||||
}
|
|
@ -0,0 +1,312 @@
|
|||
// +build linux
|
||||
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
systemdDbus "github.com/coreos/go-systemd/dbus"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type UnifiedManager struct {
|
||||
mu sync.Mutex
|
||||
Cgroups *configs.Cgroup
|
||||
Paths map[string]string
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Apply(pid int) error {
|
||||
var (
|
||||
c = m.Cgroups
|
||||
unitName = getUnitName(c)
|
||||
slice = "system.slice"
|
||||
properties []systemdDbus.Property
|
||||
)
|
||||
|
||||
if c.Paths != nil {
|
||||
paths := make(map[string]string)
|
||||
for name, path := range c.Paths {
|
||||
_, err := getSubsystemPath(m.Cgroups, name)
|
||||
if err != nil {
|
||||
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
|
||||
if cgroups.IsNotFound(err) {
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
paths[name] = path
|
||||
}
|
||||
m.Paths = paths
|
||||
return cgroups.EnterPid(m.Paths, pid)
|
||||
}
|
||||
|
||||
if c.Parent != "" {
|
||||
slice = c.Parent
|
||||
}
|
||||
|
||||
properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name))
|
||||
|
||||
// if we create a slice, the parent is defined via a Wants=
|
||||
if strings.HasSuffix(unitName, ".slice") {
|
||||
properties = append(properties, systemdDbus.PropWants(slice))
|
||||
} else {
|
||||
// otherwise, we use Slice=
|
||||
properties = append(properties, systemdDbus.PropSlice(slice))
|
||||
}
|
||||
|
||||
// only add pid if its valid, -1 is used w/ general slice creation.
|
||||
if pid != -1 {
|
||||
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
|
||||
}
|
||||
|
||||
// Check if we can delegate. This is only supported on systemd versions 218 and above.
|
||||
if !strings.HasSuffix(unitName, ".slice") {
|
||||
// Assume scopes always support delegation.
|
||||
properties = append(properties, newProp("Delegate", true))
|
||||
}
|
||||
|
||||
// Always enable accounting, this gets us the same behaviour as the fs implementation,
|
||||
// plus the kernel has some problems with joining the memory cgroup at a later time.
|
||||
properties = append(properties,
|
||||
newProp("MemoryAccounting", true),
|
||||
newProp("CPUAccounting", true),
|
||||
newProp("BlockIOAccounting", true))
|
||||
|
||||
// Assume DefaultDependencies= will always work (the check for it was previously broken.)
|
||||
properties = append(properties,
|
||||
newProp("DefaultDependencies", false))
|
||||
|
||||
if c.Resources.Memory != 0 {
|
||||
properties = append(properties,
|
||||
newProp("MemoryLimit", uint64(c.Resources.Memory)))
|
||||
}
|
||||
|
||||
if c.Resources.CpuShares != 0 {
|
||||
properties = append(properties,
|
||||
newProp("CPUShares", c.Resources.CpuShares))
|
||||
}
|
||||
|
||||
// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
|
||||
if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 {
|
||||
// corresponds to USEC_INFINITY in systemd
|
||||
// if USEC_INFINITY is provided, CPUQuota is left unbound by systemd
|
||||
// always setting a property value ensures we can apply a quota and remove it later
|
||||
cpuQuotaPerSecUSec := uint64(math.MaxUint64)
|
||||
if c.Resources.CpuQuota > 0 {
|
||||
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
|
||||
// (integer percentage of CPU) internally. This means that if a fractional percent of
|
||||
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
|
||||
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
|
||||
cpuQuotaPerSecUSec = uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod
|
||||
if cpuQuotaPerSecUSec%10000 != 0 {
|
||||
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
|
||||
}
|
||||
}
|
||||
properties = append(properties,
|
||||
newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
|
||||
}
|
||||
|
||||
if c.Resources.BlkioWeight != 0 {
|
||||
properties = append(properties,
|
||||
newProp("BlockIOWeight", uint64(c.Resources.BlkioWeight)))
|
||||
}
|
||||
|
||||
if c.Resources.PidsLimit > 0 {
|
||||
properties = append(properties,
|
||||
newProp("TasksAccounting", true),
|
||||
newProp("TasksMax", uint64(c.Resources.PidsLimit)))
|
||||
}
|
||||
|
||||
// We have to set kernel memory here, as we can't change it once
|
||||
// processes have been attached to the cgroup.
|
||||
if c.Resources.KernelMemory != 0 {
|
||||
if err := setKernelMemory(c); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
statusChan := make(chan string, 1)
|
||||
if _, err := theConn.StartTransientUnit(unitName, "replace", properties, statusChan); err == nil {
|
||||
select {
|
||||
case <-statusChan:
|
||||
case <-time.After(time.Second):
|
||||
logrus.Warnf("Timed out while waiting for StartTransientUnit(%s) completion signal from dbus. Continuing...", unitName)
|
||||
}
|
||||
} else if !isUnitExists(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := joinCgroupsV2(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
path, err := getSubsystemPath(m.Cgroups, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
m.Paths = map[string]string{
|
||||
"pids": path,
|
||||
"memory": path,
|
||||
"io": path,
|
||||
"cpu": path,
|
||||
"devices": path,
|
||||
"cpuset": path,
|
||||
"freezer": path,
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Destroy() error {
|
||||
if m.Cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
theConn.StopUnit(getUnitName(m.Cgroups), "replace", nil)
|
||||
if err := cgroups.RemovePaths(m.Paths); err != nil {
|
||||
return err
|
||||
}
|
||||
m.Paths = make(map[string]string)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetPaths() map[string]string {
|
||||
m.mu.Lock()
|
||||
paths := m.Paths
|
||||
m.mu.Unlock()
|
||||
return paths
|
||||
}
|
||||
func (m *UnifiedManager) GetUnifiedPath() (string, error) {
|
||||
unifiedPath := ""
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
for k, v := range m.Paths {
|
||||
if unifiedPath == "" {
|
||||
unifiedPath = v
|
||||
} else if v != unifiedPath {
|
||||
return unifiedPath,
|
||||
errors.Errorf("expected %q path to be unified path %q, got %q", k, unifiedPath, v)
|
||||
}
|
||||
}
|
||||
if unifiedPath == "" {
|
||||
// FIXME: unified path could be detected even when no controller is available
|
||||
return unifiedPath, errors.New("cannot detect unified path")
|
||||
}
|
||||
return unifiedPath, nil
|
||||
}
|
||||
func createCgroupsv2Path(path string) (Err error) {
|
||||
content, err := ioutil.ReadFile("/sys/fs/cgroup/cgroup.controllers")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !filepath.HasPrefix(path, "/sys/fs/cgroup") {
|
||||
return fmt.Errorf("invalid cgroup path %s", path)
|
||||
}
|
||||
|
||||
res := ""
|
||||
for i, c := range strings.Split(strings.TrimSpace(string(content)), " ") {
|
||||
if i == 0 {
|
||||
res = fmt.Sprintf("+%s", c)
|
||||
} else {
|
||||
res = res + fmt.Sprintf(" +%s", c)
|
||||
}
|
||||
}
|
||||
resByte := []byte(res)
|
||||
|
||||
current := "/sys/fs"
|
||||
elements := strings.Split(path, "/")
|
||||
for i, e := range elements[3:] {
|
||||
current = filepath.Join(current, e)
|
||||
if i > 0 {
|
||||
if err := os.Mkdir(current, 0755); err != nil {
|
||||
if !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
// If the directory was created, be sure it is not left around on errors.
|
||||
defer func() {
|
||||
if Err != nil {
|
||||
os.Remove(current)
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
if i < len(elements[3:])-1 {
|
||||
if err := ioutil.WriteFile(filepath.Join(current, "cgroup.subtree_control"), resByte, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func joinCgroupsV2(c *configs.Cgroup, pid int) error {
|
||||
path, err := getSubsystemPath(c, "memory")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return createCgroupsv2Path(path)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) fsManager() (cgroups.Manager, error) {
|
||||
path, err := m.GetUnifiedPath()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return fs2.NewManager(m.Cgroups, path, false)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Freeze(state configs.FreezerState) error {
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return fsMgr.Freeze(state)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetPids() ([]int, error) {
|
||||
path, err := m.GetUnifiedPath()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetPids(path)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetAllPids() ([]int, error) {
|
||||
path, err := m.GetUnifiedPath()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetAllPids(path)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetStats() (*cgroups.Stats, error) {
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return fsMgr.GetStats()
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) Set(container *configs.Config) error {
|
||||
fsMgr, err := m.fsManager()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return fsMgr.Set(container)
|
||||
}
|
||||
|
||||
func (m *UnifiedManager) GetCgroups() (*configs.Cgroup, error) {
|
||||
return m.Cgroups, nil
|
||||
}
|
|
@ -0,0 +1,588 @@
|
|||
// +build linux
|
||||
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
units "github.com/docker/go-units"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
CgroupNamePrefix = "name="
|
||||
CgroupProcesses = "cgroup.procs"
|
||||
unifiedMountpoint = "/sys/fs/cgroup"
|
||||
)
|
||||
|
||||
var (
|
||||
isUnifiedOnce sync.Once
|
||||
isUnified bool
|
||||
)
|
||||
|
||||
// HugePageSizeUnitList is a list of the units used by the linux kernel when
|
||||
// naming the HugePage control files.
|
||||
// https://www.kernel.org/doc/Documentation/cgroup-v1/hugetlb.txt
|
||||
// TODO Since the kernel only use KB, MB and GB; TB and PB should be removed,
|
||||
// depends on https://github.com/docker/go-units/commit/a09cd47f892041a4fac473133d181f5aea6fa393
|
||||
var HugePageSizeUnitList = []string{"B", "KB", "MB", "GB", "TB", "PB"}
|
||||
|
||||
// IsCgroup2UnifiedMode returns whether we are running in cgroup v2 unified mode.
|
||||
func IsCgroup2UnifiedMode() bool {
|
||||
isUnifiedOnce.Do(func() {
|
||||
var st syscall.Statfs_t
|
||||
if err := syscall.Statfs(unifiedMountpoint, &st); err != nil {
|
||||
panic("cannot statfs cgroup root")
|
||||
}
|
||||
isUnified = st.Type == unix.CGROUP2_SUPER_MAGIC
|
||||
})
|
||||
return isUnified
|
||||
}
|
||||
|
||||
// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
|
||||
func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
return unifiedMountpoint, nil
|
||||
}
|
||||
mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem)
|
||||
return mnt, err
|
||||
}
|
||||
|
||||
func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) {
|
||||
// We are not using mount.GetMounts() because it's super-inefficient,
|
||||
// parsing it directly sped up x10 times because of not using Sscanf.
|
||||
// It was one of two major performance drawbacks in container start.
|
||||
if !isSubsystemAvailable(subsystem) {
|
||||
return "", "", NewNotFoundError(subsystem)
|
||||
}
|
||||
|
||||
f, err := os.Open("/proc/self/mountinfo")
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
if IsCgroup2UnifiedMode() {
|
||||
subsystem = ""
|
||||
}
|
||||
|
||||
return findCgroupMountpointAndRootFromReader(f, cgroupPath, subsystem)
|
||||
}
|
||||
|
||||
func findCgroupMountpointAndRootFromReader(reader io.Reader, cgroupPath, subsystem string) (string, string, error) {
|
||||
scanner := bufio.NewScanner(reader)
|
||||
for scanner.Scan() {
|
||||
txt := scanner.Text()
|
||||
fields := strings.Fields(txt)
|
||||
if len(fields) < 9 {
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(fields[4], cgroupPath) {
|
||||
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
|
||||
if (subsystem == "" && fields[9] == "cgroup2") || opt == subsystem {
|
||||
return fields[4], fields[3], nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
|
||||
return "", "", NewNotFoundError(subsystem)
|
||||
}
|
||||
|
||||
func isSubsystemAvailable(subsystem string) bool {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
controllers, err := GetAllSubsystems()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
for _, c := range controllers {
|
||||
if c == subsystem {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
_, avail := cgroups[subsystem]
|
||||
return avail
|
||||
}
|
||||
|
||||
func GetClosestMountpointAncestor(dir, mountinfo string) string {
|
||||
deepestMountPoint := ""
|
||||
for _, mountInfoEntry := range strings.Split(mountinfo, "\n") {
|
||||
mountInfoParts := strings.Fields(mountInfoEntry)
|
||||
if len(mountInfoParts) < 5 {
|
||||
continue
|
||||
}
|
||||
mountPoint := mountInfoParts[4]
|
||||
if strings.HasPrefix(mountPoint, deepestMountPoint) && strings.HasPrefix(dir, mountPoint) {
|
||||
deepestMountPoint = mountPoint
|
||||
}
|
||||
}
|
||||
return deepestMountPoint
|
||||
}
|
||||
|
||||
func FindCgroupMountpointDir() (string, error) {
|
||||
f, err := os.Open("/proc/self/mountinfo")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
scanner := bufio.NewScanner(f)
|
||||
for scanner.Scan() {
|
||||
text := scanner.Text()
|
||||
fields := strings.Split(text, " ")
|
||||
// Safe as mountinfo encodes mountpoints with spaces as \040.
|
||||
index := strings.Index(text, " - ")
|
||||
postSeparatorFields := strings.Fields(text[index+3:])
|
||||
numPostFields := len(postSeparatorFields)
|
||||
|
||||
// This is an error as we can't detect if the mount is for "cgroup"
|
||||
if numPostFields == 0 {
|
||||
return "", fmt.Errorf("Found no fields post '-' in %q", text)
|
||||
}
|
||||
|
||||
if postSeparatorFields[0] == "cgroup" || postSeparatorFields[0] == "cgroup2" {
|
||||
// Check that the mount is properly formatted.
|
||||
if numPostFields < 3 {
|
||||
return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
|
||||
}
|
||||
|
||||
return filepath.Dir(fields[4]), nil
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return "", NewNotFoundError("cgroup")
|
||||
}
|
||||
|
||||
type Mount struct {
|
||||
Mountpoint string
|
||||
Root string
|
||||
Subsystems []string
|
||||
}
|
||||
|
||||
func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
|
||||
if len(m.Subsystems) == 0 {
|
||||
return "", fmt.Errorf("no subsystem for mount")
|
||||
}
|
||||
|
||||
return getControllerPath(m.Subsystems[0], cgroups)
|
||||
}
|
||||
|
||||
func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) {
|
||||
res := make([]Mount, 0, len(ss))
|
||||
scanner := bufio.NewScanner(mi)
|
||||
numFound := 0
|
||||
for scanner.Scan() && numFound < len(ss) {
|
||||
txt := scanner.Text()
|
||||
sepIdx := strings.Index(txt, " - ")
|
||||
if sepIdx == -1 {
|
||||
return nil, fmt.Errorf("invalid mountinfo format")
|
||||
}
|
||||
if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" {
|
||||
continue
|
||||
}
|
||||
fields := strings.Split(txt, " ")
|
||||
m := Mount{
|
||||
Mountpoint: fields[4],
|
||||
Root: fields[3],
|
||||
}
|
||||
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
|
||||
seen, known := ss[opt]
|
||||
if !known || (!all && seen) {
|
||||
continue
|
||||
}
|
||||
ss[opt] = true
|
||||
if strings.HasPrefix(opt, CgroupNamePrefix) {
|
||||
opt = opt[len(CgroupNamePrefix):]
|
||||
}
|
||||
m.Subsystems = append(m.Subsystems, opt)
|
||||
numFound++
|
||||
}
|
||||
if len(m.Subsystems) > 0 || all {
|
||||
res = append(res, m)
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// GetCgroupMounts returns the mounts for the cgroup subsystems.
|
||||
// all indicates whether to return just the first instance or all the mounts.
|
||||
func GetCgroupMounts(all bool) ([]Mount, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
availableControllers, err := GetAllSubsystems()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
m := Mount{
|
||||
Mountpoint: unifiedMountpoint,
|
||||
Root: unifiedMountpoint,
|
||||
Subsystems: availableControllers,
|
||||
}
|
||||
return []Mount{m}, nil
|
||||
}
|
||||
|
||||
f, err := os.Open("/proc/self/mountinfo")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
allMap := make(map[string]bool)
|
||||
for s := range allSubsystems {
|
||||
allMap[s] = false
|
||||
}
|
||||
return getCgroupMountsHelper(allMap, f, all)
|
||||
}
|
||||
|
||||
// GetAllSubsystems returns all the cgroup subsystems supported by the kernel
|
||||
func GetAllSubsystems() ([]string, error) {
|
||||
// /proc/cgroups is meaningless for v2
|
||||
// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#deprecated-v1-core-features
|
||||
if IsCgroup2UnifiedMode() {
|
||||
// "pseudo" controllers do not appear in /sys/fs/cgroup/cgroup.controllers.
|
||||
// - devices: implemented in kernel 4.15
|
||||
// - freezer: implemented in kernel 5.2
|
||||
// We assume these are always available, as it is hard to detect availability.
|
||||
pseudo := []string{"devices", "freezer"}
|
||||
data, err := ioutil.ReadFile("/sys/fs/cgroup/cgroup.controllers")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
subsystems := append(pseudo, strings.Fields(string(data))...)
|
||||
return subsystems, nil
|
||||
}
|
||||
f, err := os.Open("/proc/cgroups")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
subsystems := []string{}
|
||||
|
||||
s := bufio.NewScanner(f)
|
||||
for s.Scan() {
|
||||
text := s.Text()
|
||||
if text[0] != '#' {
|
||||
parts := strings.Fields(text)
|
||||
if len(parts) >= 4 && parts[3] != "0" {
|
||||
subsystems = append(subsystems, parts[0])
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return subsystems, nil
|
||||
}
|
||||
|
||||
// GetOwnCgroup returns the relative path to the cgroup docker is running in.
|
||||
func GetOwnCgroup(subsystem string) (string, error) {
|
||||
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return getControllerPath(subsystem, cgroups)
|
||||
}
|
||||
|
||||
func GetOwnCgroupPath(subsystem string) (string, error) {
|
||||
cgroup, err := GetOwnCgroup(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return getCgroupPathHelper(subsystem, cgroup)
|
||||
}
|
||||
|
||||
func GetInitCgroup(subsystem string) (string, error) {
|
||||
cgroups, err := ParseCgroupFile("/proc/1/cgroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return getControllerPath(subsystem, cgroups)
|
||||
}
|
||||
|
||||
func GetInitCgroupPath(subsystem string) (string, error) {
|
||||
cgroup, err := GetInitCgroup(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return getCgroupPathHelper(subsystem, cgroup)
|
||||
}
|
||||
|
||||
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
|
||||
mnt, root, err := FindCgroupMountpointAndRoot("", subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// This is needed for nested containers, because in /proc/self/cgroup we
|
||||
// see paths from host, which don't exist in container.
|
||||
relCgroup, err := filepath.Rel(root, cgroup)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(mnt, relCgroup), nil
|
||||
}
|
||||
|
||||
func readProcsFile(dir string) ([]int, error) {
|
||||
f, err := os.Open(filepath.Join(dir, CgroupProcesses))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var (
|
||||
s = bufio.NewScanner(f)
|
||||
out = []int{}
|
||||
)
|
||||
|
||||
for s.Scan() {
|
||||
if t := s.Text(); t != "" {
|
||||
pid, err := strconv.Atoi(t)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, pid)
|
||||
}
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// ParseCgroupFile parses the given cgroup file, typically from
|
||||
// /proc/<pid>/cgroup, into a map of subgroups to cgroup names.
|
||||
func ParseCgroupFile(path string) (map[string]string, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
return parseCgroupFromReader(f)
|
||||
}
|
||||
|
||||
// helper function for ParseCgroupFile to make testing easier
|
||||
func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
|
||||
s := bufio.NewScanner(r)
|
||||
cgroups := make(map[string]string)
|
||||
|
||||
for s.Scan() {
|
||||
text := s.Text()
|
||||
// from cgroups(7):
|
||||
// /proc/[pid]/cgroup
|
||||
// ...
|
||||
// For each cgroup hierarchy ... there is one entry
|
||||
// containing three colon-separated fields of the form:
|
||||
// hierarchy-ID:subsystem-list:cgroup-path
|
||||
parts := strings.SplitN(text, ":", 3)
|
||||
if len(parts) < 3 {
|
||||
return nil, fmt.Errorf("invalid cgroup entry: must contain at least two colons: %v", text)
|
||||
}
|
||||
|
||||
for _, subs := range strings.Split(parts[1], ",") {
|
||||
cgroups[subs] = parts[2]
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return cgroups, nil
|
||||
}
|
||||
|
||||
func getControllerPath(subsystem string, cgroups map[string]string) (string, error) {
|
||||
if IsCgroup2UnifiedMode() {
|
||||
return "/", nil
|
||||
}
|
||||
|
||||
if p, ok := cgroups[subsystem]; ok {
|
||||
return p, nil
|
||||
}
|
||||
|
||||
if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok {
|
||||
return p, nil
|
||||
}
|
||||
|
||||
return "", NewNotFoundError(subsystem)
|
||||
}
|
||||
|
||||
func PathExists(path string) bool {
|
||||
if _, err := os.Stat(path); err != nil {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func EnterPid(cgroupPaths map[string]string, pid int) error {
|
||||
for _, path := range cgroupPaths {
|
||||
if PathExists(path) {
|
||||
if err := WriteCgroupProc(path, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// RemovePaths iterates over the provided paths removing them.
|
||||
// We trying to remove all paths five times with increasing delay between tries.
|
||||
// If after all there are not removed cgroups - appropriate error will be
|
||||
// returned.
|
||||
func RemovePaths(paths map[string]string) (err error) {
|
||||
delay := 10 * time.Millisecond
|
||||
for i := 0; i < 5; i++ {
|
||||
if i != 0 {
|
||||
time.Sleep(delay)
|
||||
delay *= 2
|
||||
}
|
||||
for s, p := range paths {
|
||||
os.RemoveAll(p)
|
||||
// TODO: here probably should be logging
|
||||
_, err := os.Stat(p)
|
||||
// We need this strange way of checking cgroups existence because
|
||||
// RemoveAll almost always returns error, even on already removed
|
||||
// cgroups
|
||||
if os.IsNotExist(err) {
|
||||
delete(paths, s)
|
||||
}
|
||||
}
|
||||
if len(paths) == 0 {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("Failed to remove paths: %v", paths)
|
||||
}
|
||||
|
||||
func GetHugePageSize() ([]string, error) {
|
||||
files, err := ioutil.ReadDir("/sys/kernel/mm/hugepages")
|
||||
if err != nil {
|
||||
return []string{}, err
|
||||
}
|
||||
var fileNames []string
|
||||
for _, st := range files {
|
||||
fileNames = append(fileNames, st.Name())
|
||||
}
|
||||
return getHugePageSizeFromFilenames(fileNames)
|
||||
}
|
||||
|
||||
func getHugePageSizeFromFilenames(fileNames []string) ([]string, error) {
|
||||
var pageSizes []string
|
||||
for _, fileName := range fileNames {
|
||||
nameArray := strings.Split(fileName, "-")
|
||||
pageSize, err := units.RAMInBytes(nameArray[1])
|
||||
if err != nil {
|
||||
return []string{}, err
|
||||
}
|
||||
sizeString := units.CustomSize("%g%s", float64(pageSize), 1024.0, HugePageSizeUnitList)
|
||||
pageSizes = append(pageSizes, sizeString)
|
||||
}
|
||||
|
||||
return pageSizes, nil
|
||||
}
|
||||
|
||||
// GetPids returns all pids, that were added to cgroup at path.
|
||||
func GetPids(path string) ([]int, error) {
|
||||
return readProcsFile(path)
|
||||
}
|
||||
|
||||
// GetAllPids returns all pids, that were added to cgroup at path and to all its
|
||||
// subcgroups.
|
||||
func GetAllPids(path string) ([]int, error) {
|
||||
var pids []int
|
||||
// collect pids from all sub-cgroups
|
||||
err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error {
|
||||
dir, file := filepath.Split(p)
|
||||
if file != CgroupProcesses {
|
||||
return nil
|
||||
}
|
||||
if iErr != nil {
|
||||
return iErr
|
||||
}
|
||||
cPids, err := readProcsFile(dir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
pids = append(pids, cPids...)
|
||||
return nil
|
||||
})
|
||||
return pids, err
|
||||
}
|
||||
|
||||
// WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file
|
||||
func WriteCgroupProc(dir string, pid int) error {
|
||||
// Normally dir should not be empty, one case is that cgroup subsystem
|
||||
// is not mounted, we will get empty dir, and we want it fail here.
|
||||
if dir == "" {
|
||||
return fmt.Errorf("no such directory for %s", CgroupProcesses)
|
||||
}
|
||||
|
||||
// Dont attach any pid to the cgroup if -1 is specified as a pid
|
||||
if pid == -1 {
|
||||
return nil
|
||||
}
|
||||
|
||||
cgroupProcessesFile, err := os.OpenFile(filepath.Join(dir, CgroupProcesses), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0700)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
|
||||
}
|
||||
defer cgroupProcessesFile.Close()
|
||||
|
||||
for i := 0; i < 5; i++ {
|
||||
_, err = cgroupProcessesFile.WriteString(strconv.Itoa(pid))
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// EINVAL might mean that the task being added to cgroup.procs is in state
|
||||
// TASK_NEW. We should attempt to do so again.
|
||||
if isEINVAL(err) {
|
||||
time.Sleep(30 * time.Millisecond)
|
||||
continue
|
||||
}
|
||||
|
||||
return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func isEINVAL(err error) bool {
|
||||
switch err := err.(type) {
|
||||
case *os.PathError:
|
||||
return err.Err == unix.EINVAL
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
|
@ -0,0 +1,459 @@
|
|||
// +build linux
|
||||
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
const fedoraMountinfo = `15 35 0:3 / /proc rw,nosuid,nodev,noexec,relatime shared:5 - proc proc rw
|
||||
16 35 0:14 / /sys rw,nosuid,nodev,noexec,relatime shared:6 - sysfs sysfs rw,seclabel
|
||||
17 35 0:5 / /dev rw,nosuid shared:2 - devtmpfs devtmpfs rw,seclabel,size=8056484k,nr_inodes=2014121,mode=755
|
||||
18 16 0:15 / /sys/kernel/security rw,nosuid,nodev,noexec,relatime shared:7 - securityfs securityfs rw
|
||||
19 16 0:13 / /sys/fs/selinux rw,relatime shared:8 - selinuxfs selinuxfs rw
|
||||
20 17 0:16 / /dev/shm rw,nosuid,nodev shared:3 - tmpfs tmpfs rw,seclabel
|
||||
21 17 0:10 / /dev/pts rw,nosuid,noexec,relatime shared:4 - devpts devpts rw,seclabel,gid=5,mode=620,ptmxmode=000
|
||||
22 35 0:17 / /run rw,nosuid,nodev shared:21 - tmpfs tmpfs rw,seclabel,mode=755
|
||||
23 16 0:18 / /sys/fs/cgroup rw,nosuid,nodev,noexec shared:9 - tmpfs tmpfs rw,seclabel,mode=755
|
||||
24 23 0:19 / /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime shared:10 - cgroup cgroup rw,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd
|
||||
25 16 0:20 / /sys/fs/pstore rw,nosuid,nodev,noexec,relatime shared:20 - pstore pstore rw
|
||||
26 23 0:21 / /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime shared:11 - cgroup cgroup rw,cpuset,clone_children
|
||||
27 23 0:22 / /sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime shared:12 - cgroup cgroup rw,cpuacct,cpu,clone_children
|
||||
28 23 0:23 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:13 - cgroup cgroup rw,memory,clone_children
|
||||
29 23 0:24 / /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:14 - cgroup cgroup rw,devices,clone_children
|
||||
30 23 0:25 / /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime shared:15 - cgroup cgroup rw,freezer,clone_children
|
||||
31 23 0:26 / /sys/fs/cgroup/net_cls rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,net_cls,clone_children
|
||||
32 23 0:27 / /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime shared:17 - cgroup cgroup rw,blkio,clone_children
|
||||
33 23 0:28 / /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime shared:18 - cgroup cgroup rw,perf_event,clone_children
|
||||
34 23 0:29 / /sys/fs/cgroup/hugetlb rw,nosuid,nodev,noexec,relatime shared:19 - cgroup cgroup rw,hugetlb,clone_children
|
||||
35 1 253:2 / / rw,relatime shared:1 - ext4 /dev/mapper/ssd-root--f20 rw,seclabel,data=ordered
|
||||
36 15 0:30 / /proc/sys/fs/binfmt_misc rw,relatime shared:22 - autofs systemd-1 rw,fd=38,pgrp=1,timeout=300,minproto=5,maxproto=5,direct
|
||||
37 17 0:12 / /dev/mqueue rw,relatime shared:23 - mqueue mqueue rw,seclabel
|
||||
38 35 0:31 / /tmp rw shared:24 - tmpfs tmpfs rw,seclabel
|
||||
39 17 0:32 / /dev/hugepages rw,relatime shared:25 - hugetlbfs hugetlbfs rw,seclabel
|
||||
40 16 0:7 / /sys/kernel/debug rw,relatime shared:26 - debugfs debugfs rw
|
||||
41 16 0:33 / /sys/kernel/config rw,relatime shared:27 - configfs configfs rw
|
||||
42 35 0:34 / /var/lib/nfs/rpc_pipefs rw,relatime shared:28 - rpc_pipefs sunrpc rw
|
||||
43 15 0:35 / /proc/fs/nfsd rw,relatime shared:29 - nfsd sunrpc rw
|
||||
45 35 8:17 / /boot rw,relatime shared:30 - ext4 /dev/sdb1 rw,seclabel,data=ordered
|
||||
46 35 253:4 / /home rw,relatime shared:31 - ext4 /dev/mapper/ssd-home rw,seclabel,data=ordered
|
||||
47 35 253:5 / /var/lib/libvirt/images rw,noatime,nodiratime shared:32 - ext4 /dev/mapper/ssd-virt rw,seclabel,discard,data=ordered
|
||||
48 35 253:12 / /mnt/old rw,relatime shared:33 - ext4 /dev/mapper/HelpDeskRHEL6-FedoraRoot rw,seclabel,data=ordered
|
||||
121 22 0:36 / /run/user/1000/gvfs rw,nosuid,nodev,relatime shared:104 - fuse.gvfsd-fuse gvfsd-fuse rw,user_id=1000,group_id=1000
|
||||
124 16 0:37 / /sys/fs/fuse/connections rw,relatime shared:107 - fusectl fusectl rw
|
||||
165 38 253:3 / /tmp/mnt rw,relatime shared:147 - ext4 /dev/mapper/ssd-root rw,seclabel,data=ordered
|
||||
167 35 253:15 / /var/lib/docker/devicemapper/mnt/aae4076022f0e2b80a2afbf8fc6df450c52080191fcef7fb679a73e6f073e5c2 rw,relatime shared:149 - ext4 /dev/mapper/docker-253:2-425882-aae4076022f0e2b80a2afbf8fc6df450c52080191fcef7fb679a73e6f073e5c2 rw,seclabel,discard,stripe=16,data=ordered
|
||||
171 35 253:16 / /var/lib/docker/devicemapper/mnt/c71be651f114db95180e472f7871b74fa597ee70a58ccc35cb87139ddea15373 rw,relatime shared:153 - ext4 /dev/mapper/docker-253:2-425882-c71be651f114db95180e472f7871b74fa597ee70a58ccc35cb87139ddea15373 rw,seclabel,discard,stripe=16,data=ordered
|
||||
175 35 253:17 / /var/lib/docker/devicemapper/mnt/1bac6ab72862d2d5626560df6197cf12036b82e258c53d981fa29adce6f06c3c rw,relatime shared:157 - ext4 /dev/mapper/docker-253:2-425882-1bac6ab72862d2d5626560df6197cf12036b82e258c53d981fa29adce6f06c3c rw,seclabel,discard,stripe=16,data=ordered
|
||||
179 35 253:18 / /var/lib/docker/devicemapper/mnt/d710a357d77158e80d5b2c55710ae07c94e76d34d21ee7bae65ce5418f739b09 rw,relatime shared:161 - ext4 /dev/mapper/docker-253:2-425882-d710a357d77158e80d5b2c55710ae07c94e76d34d21ee7bae65ce5418f739b09 rw,seclabel,discard,stripe=16,data=ordered
|
||||
183 35 253:19 / /var/lib/docker/devicemapper/mnt/6479f52366114d5f518db6837254baab48fab39f2ac38d5099250e9a6ceae6c7 rw,relatime shared:165 - ext4 /dev/mapper/docker-253:2-425882-6479f52366114d5f518db6837254baab48fab39f2ac38d5099250e9a6ceae6c7 rw,seclabel,discard,stripe=16,data=ordered
|
||||
187 35 253:20 / /var/lib/docker/devicemapper/mnt/8d9df91c4cca5aef49eeb2725292aab324646f723a7feab56be34c2ad08268e1 rw,relatime shared:169 - ext4 /dev/mapper/docker-253:2-425882-8d9df91c4cca5aef49eeb2725292aab324646f723a7feab56be34c2ad08268e1 rw,seclabel,discard,stripe=16,data=ordered
|
||||
191 35 253:21 / /var/lib/docker/devicemapper/mnt/c8240b768603d32e920d365dc9d1dc2a6af46cd23e7ae819947f969e1b4ec661 rw,relatime shared:173 - ext4 /dev/mapper/docker-253:2-425882-c8240b768603d32e920d365dc9d1dc2a6af46cd23e7ae819947f969e1b4ec661 rw,seclabel,discard,stripe=16,data=ordered
|
||||
195 35 253:22 / /var/lib/docker/devicemapper/mnt/2eb3a01278380bbf3ed12d86ac629eaa70a4351301ee307a5cabe7b5f3b1615f rw,relatime shared:177 - ext4 /dev/mapper/docker-253:2-425882-2eb3a01278380bbf3ed12d86ac629eaa70a4351301ee307a5cabe7b5f3b1615f rw,seclabel,discard,stripe=16,data=ordered
|
||||
199 35 253:23 / /var/lib/docker/devicemapper/mnt/37a17fb7c9d9b80821235d5f2662879bd3483915f245f9b49cdaa0e38779b70b rw,relatime shared:181 - ext4 /dev/mapper/docker-253:2-425882-37a17fb7c9d9b80821235d5f2662879bd3483915f245f9b49cdaa0e38779b70b rw,seclabel,discard,stripe=16,data=ordered
|
||||
203 35 253:24 / /var/lib/docker/devicemapper/mnt/aea459ae930bf1de913e2f29428fd80ee678a1e962d4080019d9f9774331ee2b rw,relatime shared:185 - ext4 /dev/mapper/docker-253:2-425882-aea459ae930bf1de913e2f29428fd80ee678a1e962d4080019d9f9774331ee2b rw,seclabel,discard,stripe=16,data=ordered
|
||||
207 35 253:25 / /var/lib/docker/devicemapper/mnt/928ead0bc06c454bd9f269e8585aeae0a6bd697f46dc8754c2a91309bc810882 rw,relatime shared:189 - ext4 /dev/mapper/docker-253:2-425882-928ead0bc06c454bd9f269e8585aeae0a6bd697f46dc8754c2a91309bc810882 rw,seclabel,discard,stripe=16,data=ordered
|
||||
211 35 253:26 / /var/lib/docker/devicemapper/mnt/0f284d18481d671644706e7a7244cbcf63d590d634cc882cb8721821929d0420 rw,relatime shared:193 - ext4 /dev/mapper/docker-253:2-425882-0f284d18481d671644706e7a7244cbcf63d590d634cc882cb8721821929d0420 rw,seclabel,discard,stripe=16,data=ordered
|
||||
215 35 253:27 / /var/lib/docker/devicemapper/mnt/d9dd16722ab34c38db2733e23f69e8f4803ce59658250dd63e98adff95d04919 rw,relatime shared:197 - ext4 /dev/mapper/docker-253:2-425882-d9dd16722ab34c38db2733e23f69e8f4803ce59658250dd63e98adff95d04919 rw,seclabel,discard,stripe=16,data=ordered
|
||||
219 35 253:28 / /var/lib/docker/devicemapper/mnt/bc4500479f18c2c08c21ad5282e5f826a016a386177d9874c2764751c031d634 rw,relatime shared:201 - ext4 /dev/mapper/docker-253:2-425882-bc4500479f18c2c08c21ad5282e5f826a016a386177d9874c2764751c031d634 rw,seclabel,discard,stripe=16,data=ordered
|
||||
223 35 253:29 / /var/lib/docker/devicemapper/mnt/7770c8b24eb3d5cc159a065910076938910d307ab2f5d94e1dc3b24c06ee2c8a rw,relatime shared:205 - ext4 /dev/mapper/docker-253:2-425882-7770c8b24eb3d5cc159a065910076938910d307ab2f5d94e1dc3b24c06ee2c8a rw,seclabel,discard,stripe=16,data=ordered
|
||||
227 35 253:30 / /var/lib/docker/devicemapper/mnt/c280cd3d0bf0aa36b478b292279671624cceafc1a67eaa920fa1082601297adf rw,relatime shared:209 - ext4 /dev/mapper/docker-253:2-425882-c280cd3d0bf0aa36b478b292279671624cceafc1a67eaa920fa1082601297adf rw,seclabel,discard,stripe=16,data=ordered
|
||||
231 35 253:31 / /var/lib/docker/devicemapper/mnt/8b59a7d9340279f09fea67fd6ad89ddef711e9e7050eb647984f8b5ef006335f rw,relatime shared:213 - ext4 /dev/mapper/docker-253:2-425882-8b59a7d9340279f09fea67fd6ad89ddef711e9e7050eb647984f8b5ef006335f rw,seclabel,discard,stripe=16,data=ordered
|
||||
235 35 253:32 / /var/lib/docker/devicemapper/mnt/1a28059f29eda821578b1bb27a60cc71f76f846a551abefabce6efd0146dce9f rw,relatime shared:217 - ext4 /dev/mapper/docker-253:2-425882-1a28059f29eda821578b1bb27a60cc71f76f846a551abefabce6efd0146dce9f rw,seclabel,discard,stripe=16,data=ordered
|
||||
239 35 253:33 / /var/lib/docker/devicemapper/mnt/e9aa60c60128cad1 rw,relatime shared:221 - ext4 /dev/mapper/docker-253:2-425882-e9aa60c60128cad1 rw,seclabel,discard,stripe=16,data=ordered
|
||||
243 35 253:34 / /var/lib/docker/devicemapper/mnt/5fec11304b6f4713fea7b6ccdcc1adc0a1966187f590fe25a8227428a8df275d-init rw,relatime shared:225 - ext4 /dev/mapper/docker-253:2-425882-5fec11304b6f4713fea7b6ccdcc1adc0a1966187f590fe25a8227428a8df275d-init rw,seclabel,discard,stripe=16,data=ordered
|
||||
247 35 253:35 / /var/lib/docker/devicemapper/mnt/5fec11304b6f4713fea7b6ccdcc1adc0a1966187f590fe25a8227428a8df275d rw,relatime shared:229 - ext4 /dev/mapper/docker-253:2-425882-5fec11304b6f4713fea7b6ccdcc1adc0a1966187f590fe25a8227428a8df275d rw,seclabel,discard,stripe=16,data=ordered
|
||||
31 21 0:23 / /DATA/foo_bla_bla rw,relatime - cifs //foo/BLA\040BLA\040BLA/ rw,sec=ntlm,cache=loose,unc=\\foo\BLA BLA BLA,username=my_login,domain=mydomain.com,uid=12345678,forceuid,gid=12345678,forcegid,addr=10.1.30.10,file_mode=0755,dir_mode=0755,nounix,rsize=61440,wsize=65536,actimeo=1`
|
||||
|
||||
const systemdMountinfo = `115 83 0:32 / / rw,relatime - aufs none rw,si=c0bd3d3,dio,dirperm1
|
||||
116 115 0:35 / /proc rw,nosuid,nodev,noexec,relatime - proc proc rw
|
||||
117 115 0:36 / /dev rw,nosuid - tmpfs tmpfs rw,mode=755
|
||||
118 117 0:37 / /dev/pts rw,nosuid,noexec,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=666
|
||||
119 115 0:38 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw
|
||||
120 119 0:39 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs tmpfs rw,mode=755
|
||||
121 120 0:19 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,xattr,release_agent=/lib/systemd/systemd-cgroups-agent,name=systemd
|
||||
122 120 0:20 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,devices
|
||||
123 120 0:21 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,freezer
|
||||
124 120 0:22 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
|
||||
125 120 0:23 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/net_cls,net_prio rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,net_cls,net_prio
|
||||
126 120 0:24 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,blkio
|
||||
127 120 0:25 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpuset,clone_children
|
||||
128 120 0:26 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,cpu,cpuacct
|
||||
129 120 0:27 /system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime - cgroup cgroup rw,perf_event,release_agent=/run/cgmanager/agents/cgm-release-agent.perf_event
|
||||
130 115 43:0 /var/lib/docker/volumes/a44a712176377f57c094397330ee04387284c478364eb25f4c3d25f775f25c26/_data /var/lib/docker rw,relatime - ext4 /dev/nbd0 rw,data=ordered
|
||||
131 115 43:0 /var/lib/docker/containers/dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e/resolv.conf /etc/resolv.conf rw,relatime - ext4 /dev/nbd0 rw,data=ordered
|
||||
132 115 43:0 /var/lib/docker/containers/dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e/hostname /etc/hostname rw,relatime - ext4 /dev/nbd0 rw,data=ordered
|
||||
133 115 43:0 /var/lib/docker/containers/dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e/hosts /etc/hosts rw,relatime - ext4 /dev/nbd0 rw,data=ordered
|
||||
134 117 0:33 / /dev/shm rw,nosuid,nodev,noexec,relatime - tmpfs shm rw,size=65536k
|
||||
135 117 0:13 / /dev/mqueue rw,nosuid,nodev,noexec,relatime - mqueue mqueue rw
|
||||
136 117 0:12 /1 /dev/console rw,nosuid,noexec,relatime - devpts none rw,gid=5,mode=620,ptmxmode=000
|
||||
84 115 0:40 / /tmp rw,relatime - tmpfs none rw`
|
||||
|
||||
const bedrockMountinfo = `120 17 0:28 / /sys/fs/cgroup ro,nosuid,nodev,noexec shared:16 - tmpfs tmpfs ro,mode=755
|
||||
124 28 0:28 / /bedrock/strata/arch/sys/fs/cgroup rw,nosuid,nodev,noexec shared:16 - tmpfs tmpfs ro,mode=755
|
||||
123 53 0:28 / /bedrock/strata/fallback/sys/fs/cgroup rw,nosuid,nodev,noexec shared:16 - tmpfs tmpfs ro,mode=755
|
||||
122 71 0:28 / /bedrock/strata/gentoo/sys/fs/cgroup rw,nosuid,nodev,noexec shared:16 - tmpfs tmpfs ro,mode=755
|
||||
121 89 0:28 / /bedrock/strata/kde/sys/fs/cgroup rw,nosuid,nodev,noexec shared:16 - tmpfs tmpfs ro,mode=755
|
||||
125 120 0:29 / /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime shared:17 - cgroup cgroup rw,xattr,release_agent=/lib/systemd/systemd-cgroups-agent,name=systemd
|
||||
129 124 0:29 / /bedrock/strata/arch/sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime shared:17 - cgroup cgroup rw,xattr,release_agent=/lib/systemd/systemd-cgroups-agent,name=systemd
|
||||
128 123 0:29 / /bedrock/strata/fallback/sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime shared:17 - cgroup cgroup rw,xattr,release_agent=/lib/systemd/systemd-cgroups-agent,name=systemd
|
||||
127 122 0:29 / /bedrock/strata/gentoo/sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime shared:17 - cgroup cgroup rw,xattr,release_agent=/lib/systemd/systemd-cgroups-agent,name=systemd
|
||||
126 121 0:29 / /bedrock/strata/kde/sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime shared:17 - cgroup cgroup rw,xattr,release_agent=/lib/systemd/systemd-cgroups-agent,name=systemd
|
||||
140 120 0:32 / /sys/fs/cgroup/net_cls,net_prio rw,nosuid,nodev,noexec,relatime shared:48 - cgroup cgroup rw,net_cls,net_prio
|
||||
144 124 0:32 / /bedrock/strata/arch/sys/fs/cgroup/net_cls,net_prio rw,nosuid,nodev,noexec,relatime shared:48 - cgroup cgroup rw,net_cls,net_prio
|
||||
143 123 0:32 / /bedrock/strata/fallback/sys/fs/cgroup/net_cls,net_prio rw,nosuid,nodev,noexec,relatime shared:48 - cgroup cgroup rw,net_cls,net_prio
|
||||
142 122 0:32 / /bedrock/strata/gentoo/sys/fs/cgroup/net_cls,net_prio rw,nosuid,nodev,noexec,relatime shared:48 - cgroup cgroup rw,net_cls,net_prio
|
||||
141 121 0:32 / /bedrock/strata/kde/sys/fs/cgroup/net_cls,net_prio rw,nosuid,nodev,noexec,relatime shared:48 - cgroup cgroup rw,net_cls,net_prio
|
||||
145 120 0:33 / /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime shared:49 - cgroup cgroup rw,blkio
|
||||
149 124 0:33 / /bedrock/strata/arch/sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime shared:49 - cgroup cgroup rw,blkio
|
||||
148 123 0:33 / /bedrock/strata/fallback/sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime shared:49 - cgroup cgroup rw,blkio
|
||||
147 122 0:33 / /bedrock/strata/gentoo/sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime shared:49 - cgroup cgroup rw,blkio
|
||||
146 121 0:33 / /bedrock/strata/kde/sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime shared:49 - cgroup cgroup rw,blkio
|
||||
150 120 0:34 / /sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime shared:50 - cgroup cgroup rw,cpu,cpuacct
|
||||
154 124 0:34 / /bedrock/strata/arch/sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime shared:50 - cgroup cgroup rw,cpu,cpuacct
|
||||
153 123 0:34 / /bedrock/strata/fallback/sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime shared:50 - cgroup cgroup rw,cpu,cpuacct
|
||||
152 122 0:34 / /bedrock/strata/gentoo/sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime shared:50 - cgroup cgroup rw,cpu,cpuacct
|
||||
151 121 0:34 / /bedrock/strata/kde/sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime shared:50 - cgroup cgroup rw,cpu,cpuacct
|
||||
155 120 0:35 / /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime shared:51 - cgroup cgroup rw,cpuset
|
||||
159 124 0:35 / /bedrock/strata/arch/sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime shared:51 - cgroup cgroup rw,cpuset
|
||||
158 123 0:35 / /bedrock/strata/fallback/sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime shared:51 - cgroup cgroup rw,cpuset
|
||||
157 122 0:35 / /bedrock/strata/gentoo/sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime shared:51 - cgroup cgroup rw,cpuset
|
||||
156 121 0:35 / /bedrock/strata/kde/sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime shared:51 - cgroup cgroup rw,cpuset
|
||||
160 120 0:36 / /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:52 - cgroup cgroup rw,devices
|
||||
164 124 0:36 / /bedrock/strata/arch/sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:52 - cgroup cgroup rw,devices
|
||||
163 123 0:36 / /bedrock/strata/fallback/sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:52 - cgroup cgroup rw,devices
|
||||
162 122 0:36 / /bedrock/strata/gentoo/sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:52 - cgroup cgroup rw,devices
|
||||
161 121 0:36 / /bedrock/strata/kde/sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:52 - cgroup cgroup rw,devices
|
||||
165 120 0:37 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:53 - cgroup cgroup rw,memory
|
||||
169 124 0:37 / /bedrock/strata/arch/sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:53 - cgroup cgroup rw,memory
|
||||
168 123 0:37 / /bedrock/strata/fallback/sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:53 - cgroup cgroup rw,memory
|
||||
167 122 0:37 / /bedrock/strata/gentoo/sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:53 - cgroup cgroup rw,memory
|
||||
166 121 0:37 / /bedrock/strata/kde/sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:53 - cgroup cgroup rw,memory
|
||||
170 120 0:38 / /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime shared:54 - cgroup cgroup rw,freezer
|
||||
174 124 0:38 / /bedrock/strata/arch/sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime shared:54 - cgroup cgroup rw,freezer
|
||||
173 123 0:38 / /bedrock/strata/fallback/sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime shared:54 - cgroup cgroup rw,freezer
|
||||
172 122 0:38 / /bedrock/strata/gentoo/sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime shared:54 - cgroup cgroup rw,freezer
|
||||
171 121 0:38 / /bedrock/strata/kde/sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime shared:54 - cgroup cgroup rw,freezer
|
||||
175 120 0:39 / /sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:55 - cgroup cgroup rw,pids
|
||||
179 124 0:39 / /bedrock/strata/arch/sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:55 - cgroup cgroup rw,pids
|
||||
178 123 0:39 / /bedrock/strata/fallback/sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:55 - cgroup cgroup rw,pids
|
||||
177 122 0:39 / /bedrock/strata/gentoo/sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:55 - cgroup cgroup rw,pids
|
||||
176 121 0:39 / /bedrock/strata/kde/sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:55 - cgroup cgroup rw,pids
|
||||
180 120 0:40 / /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime shared:56 - cgroup cgroup rw,perf_event
|
||||
184 124 0:40 / /bedrock/strata/arch/sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime shared:56 - cgroup cgroup rw,perf_event
|
||||
183 123 0:40 / /bedrock/strata/fallback/sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime shared:56 - cgroup cgroup rw,perf_event
|
||||
182 122 0:40 / /bedrock/strata/gentoo/sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime shared:56 - cgroup cgroup rw,perf_event
|
||||
181 121 0:40 / /bedrock/strata/kde/sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime shared:56 - cgroup cgroup rw,perf_event`
|
||||
|
||||
const cgroup2Mountinfo = `18 64 0:18 / /sys rw,nosuid,nodev,noexec,relatime shared:6 - sysfs sysfs rw,seclabel
|
||||
19 64 0:4 / /proc rw,nosuid,nodev,noexec,relatime shared:5 - proc proc rw
|
||||
20 64 0:6 / /dev rw,nosuid shared:2 - devtmpfs devtmpfs rw,seclabel,size=8171204k,nr_inodes=2042801,mode=755
|
||||
21 18 0:19 / /sys/kernel/security rw,nosuid,nodev,noexec,relatime shared:7 - securityfs securityfs rw
|
||||
22 20 0:20 / /dev/shm rw,nosuid,nodev shared:3 - tmpfs tmpfs rw,seclabel
|
||||
23 20 0:21 / /dev/pts rw,nosuid,noexec,relatime shared:4 - devpts devpts rw,seclabel,gid=5,mode=620,ptmxmode=000
|
||||
24 64 0:22 / /run rw,nosuid,nodev shared:24 - tmpfs tmpfs rw,seclabel,mode=755
|
||||
25 18 0:23 / /sys/fs/cgroup ro,nosuid,nodev,noexec shared:8 - tmpfs tmpfs ro,seclabel,mode=755
|
||||
26 25 0:24 / /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime shared:9 - cgroup2 cgroup rw
|
||||
27 18 0:25 / /sys/fs/pstore rw,nosuid,nodev,noexec,relatime shared:20 - pstore pstore rw,seclabel
|
||||
28 18 0:26 / /sys/firmware/efi/efivars rw,nosuid,nodev,noexec,relatime shared:21 - efivarfs efivarfs rw
|
||||
29 25 0:27 / /sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime shared:10 - cgroup cgroup rw,cpu,cpuacct
|
||||
30 25 0:28 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:11 - cgroup cgroup rw,memory
|
||||
31 25 0:29 / /sys/fs/cgroup/net_cls,net_prio rw,nosuid,nodev,noexec,relatime shared:12 - cgroup cgroup rw,net_cls,net_prio
|
||||
32 25 0:30 / /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime shared:13 - cgroup cgroup rw,blkio
|
||||
33 25 0:31 / /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime shared:14 - cgroup cgroup rw,perf_event
|
||||
34 25 0:32 / /sys/fs/cgroup/hugetlb rw,nosuid,nodev,noexec,relatime shared:15 - cgroup cgroup rw,hugetlb
|
||||
35 25 0:33 / /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,freezer
|
||||
36 25 0:34 / /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime shared:17 - cgroup cgroup rw,cpuset
|
||||
37 25 0:35 / /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:18 - cgroup cgroup rw,devices
|
||||
38 25 0:36 / /sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime shared:19 - cgroup cgroup rw,pids
|
||||
61 18 0:37 / /sys/kernel/config rw,relatime shared:22 - configfs configfs rw
|
||||
64 0 253:0 / / rw,relatime shared:1 - ext4 /dev/mapper/fedora_dhcp--16--129-root rw,seclabel,data=ordered
|
||||
39 18 0:17 / /sys/fs/selinux rw,relatime shared:23 - selinuxfs selinuxfs rw
|
||||
40 20 0:16 / /dev/mqueue rw,relatime shared:25 - mqueue mqueue rw,seclabel
|
||||
41 20 0:39 / /dev/hugepages rw,relatime shared:26 - hugetlbfs hugetlbfs rw,seclabel
|
||||
`
|
||||
|
||||
func TestGetCgroupMounts(t *testing.T) {
|
||||
type testData struct {
|
||||
mountInfo string
|
||||
root string
|
||||
subsystems map[string]bool
|
||||
}
|
||||
testTable := []testData{
|
||||
{
|
||||
mountInfo: fedoraMountinfo,
|
||||
root: "/",
|
||||
subsystems: map[string]bool{
|
||||
"cpuset": false,
|
||||
"cpu": false,
|
||||
"cpuacct": false,
|
||||
"memory": false,
|
||||
"devices": false,
|
||||
"freezer": false,
|
||||
"net_cls": false,
|
||||
"blkio": false,
|
||||
"perf_event": false,
|
||||
"hugetlb": false,
|
||||
},
|
||||
},
|
||||
{
|
||||
mountInfo: systemdMountinfo,
|
||||
root: "/system.slice/docker-dc4eaa1a34ec4d593bc0125d31eea823a1d76ae483aeb1409cca80304e34da2e.scope",
|
||||
subsystems: map[string]bool{
|
||||
"cpuset": false,
|
||||
"cpu": false,
|
||||
"cpuacct": false,
|
||||
"memory": false,
|
||||
"devices": false,
|
||||
"freezer": false,
|
||||
"net_cls": false,
|
||||
"blkio": false,
|
||||
"perf_event": false,
|
||||
},
|
||||
},
|
||||
{
|
||||
mountInfo: bedrockMountinfo,
|
||||
root: "/",
|
||||
subsystems: map[string]bool{
|
||||
"cpuset": false,
|
||||
"cpu": false,
|
||||
"cpuacct": false,
|
||||
"memory": false,
|
||||
"devices": false,
|
||||
"freezer": false,
|
||||
"net_cls": false,
|
||||
"blkio": false,
|
||||
"perf_event": false,
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, td := range testTable {
|
||||
mi := bytes.NewBufferString(td.mountInfo)
|
||||
cgMounts, err := getCgroupMountsHelper(td.subsystems, mi, false)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
cgMap := make(map[string]Mount)
|
||||
for _, m := range cgMounts {
|
||||
for _, ss := range m.Subsystems {
|
||||
cgMap[ss] = m
|
||||
}
|
||||
}
|
||||
for ss := range td.subsystems {
|
||||
m, ok := cgMap[ss]
|
||||
if !ok {
|
||||
t.Fatalf("%s not found", ss)
|
||||
}
|
||||
if m.Root != td.root {
|
||||
t.Fatalf("unexpected root for %s: %s", ss, m.Root)
|
||||
}
|
||||
if !strings.HasPrefix(m.Mountpoint, "/sys/fs/cgroup/") && !strings.Contains(m.Mountpoint, ss) {
|
||||
t.Fatalf("unexpected mountpoint for %s: %s", ss, m.Mountpoint)
|
||||
}
|
||||
var ssFound bool
|
||||
for _, mss := range m.Subsystems {
|
||||
if mss == ss {
|
||||
ssFound = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !ssFound {
|
||||
t.Fatalf("subsystem %s not found in Subsystems field %v", ss, m.Subsystems)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkGetCgroupMounts(b *testing.B) {
|
||||
subsystems := map[string]bool{
|
||||
"cpuset": false,
|
||||
"cpu": false,
|
||||
"cpuacct": false,
|
||||
"memory": false,
|
||||
"devices": false,
|
||||
"freezer": false,
|
||||
"net_cls": false,
|
||||
"blkio": false,
|
||||
"perf_event": false,
|
||||
"hugetlb": false,
|
||||
}
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
b.StopTimer()
|
||||
mi := bytes.NewBufferString(fedoraMountinfo)
|
||||
b.StartTimer()
|
||||
if _, err := getCgroupMountsHelper(subsystems, mi, false); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseCgroupString(t *testing.T) {
|
||||
testCases := []struct {
|
||||
input string
|
||||
expectedError error
|
||||
expectedOutput map[string]string
|
||||
}{
|
||||
{
|
||||
// Taken from a CoreOS instance running systemd 225 with CPU/Mem
|
||||
// accounting enabled in systemd
|
||||
input: `9:blkio:/
|
||||
8:freezer:/
|
||||
7:perf_event:/
|
||||
6:devices:/system.slice/system-sshd.slice
|
||||
5:cpuset:/
|
||||
4:cpu,cpuacct:/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service
|
||||
3:net_cls,net_prio:/
|
||||
2:memory:/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service
|
||||
1:name=systemd:/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service`,
|
||||
expectedOutput: map[string]string{
|
||||
"name=systemd": "/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service",
|
||||
"blkio": "/",
|
||||
"freezer": "/",
|
||||
"perf_event": "/",
|
||||
"devices": "/system.slice/system-sshd.slice",
|
||||
"cpuset": "/",
|
||||
"cpu": "/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service",
|
||||
"cpuacct": "/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service",
|
||||
"net_cls": "/",
|
||||
"net_prio": "/",
|
||||
"memory": "/system.slice/system-sshd.slice/sshd@126-10.240.0.15:22-xxx.yyy.zzz.aaa:33678.service",
|
||||
},
|
||||
},
|
||||
{
|
||||
input: `malformed input`,
|
||||
expectedError: fmt.Errorf(`invalid cgroup entry: must contain at least two colons: malformed input`),
|
||||
},
|
||||
}
|
||||
|
||||
for ndx, testCase := range testCases {
|
||||
out, err := parseCgroupFromReader(strings.NewReader(testCase.input))
|
||||
if err != nil {
|
||||
if testCase.expectedError == nil || testCase.expectedError.Error() != err.Error() {
|
||||
t.Errorf("%v: expected error %v, got error %v", ndx, testCase.expectedError, err)
|
||||
}
|
||||
} else {
|
||||
if !reflect.DeepEqual(testCase.expectedOutput, out) {
|
||||
t.Errorf("%v: expected output %v, got error %v", ndx, testCase.expectedOutput, out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestIgnoreCgroup2Mount(t *testing.T) {
|
||||
subsystems := map[string]bool{
|
||||
"cpuset": false,
|
||||
"cpu": false,
|
||||
"cpuacct": false,
|
||||
"memory": false,
|
||||
"devices": false,
|
||||
"freezer": false,
|
||||
"net_cls": false,
|
||||
"blkio": false,
|
||||
"perf_event": false,
|
||||
"pids": false,
|
||||
"name=systemd": false,
|
||||
}
|
||||
|
||||
mi := bytes.NewBufferString(cgroup2Mountinfo)
|
||||
cgMounts, err := getCgroupMountsHelper(subsystems, mi, false)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for _, m := range cgMounts {
|
||||
if m.Mountpoint == "/sys/fs/cgroup/systemd" {
|
||||
t.Errorf("parsed a cgroup2 mount at /sys/fs/cgroup/systemd instead of ignoring it")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetClosestMountpointAncestor(t *testing.T) {
|
||||
fakeMountInfo := ` 18 24 0:17 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw
|
||||
100 99 1:31 / /foo/bar rw,relatime - fake fake rw,fake
|
||||
100 99 1:31 / /foo/bar/baz2 rw,relatime - fake fake rw,fake
|
||||
100 99 1:31 / /foo/bar/baz rw,relatime - fake fake rw,fake
|
||||
100 99 1:31 / /foo/bar/bazza rw,relatime - fake fake rw,fake
|
||||
100 99 1:31 / /foo/bar/baz3 rw,relatime - fake fake rw,fake
|
||||
100 99 1:31 / /foo rw,relatime - fake fake rw,fake
|
||||
100 99 1:31 / /unrelated rw,relatime - fake fake rw,fake
|
||||
100 99 1:31 / / rw,relatime - fake fake rw,fake
|
||||
`
|
||||
testCases := []struct {
|
||||
input string
|
||||
output string
|
||||
}{
|
||||
{input: "/foo/bar/baz/a/b/c", output: "/foo/bar/baz"},
|
||||
{input: "/foo/bar/baz", output: "/foo/bar/baz"},
|
||||
{input: "/foo/bar/bazza", output: "/foo/bar/bazza"},
|
||||
{input: "/a/b/c/d", output: "/"},
|
||||
}
|
||||
|
||||
for _, c := range testCases {
|
||||
mountpoint := GetClosestMountpointAncestor(c.input, fakeMountInfo)
|
||||
if mountpoint != c.output {
|
||||
t.Errorf("expected %s, got %s", c.output, mountpoint)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindCgroupMountpointAndRoot(t *testing.T) {
|
||||
fakeMountInfo := `
|
||||
35 27 0:29 / /foo rw,nosuid,nodev,noexec,relatime shared:18 - cgroup cgroup rw,devices
|
||||
35 27 0:29 / /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:18 - cgroup cgroup rw,devices
|
||||
`
|
||||
testCases := []struct {
|
||||
cgroupPath string
|
||||
output string
|
||||
}{
|
||||
{cgroupPath: "/sys/fs", output: "/sys/fs/cgroup/devices"},
|
||||
{cgroupPath: "", output: "/foo"},
|
||||
}
|
||||
|
||||
for _, c := range testCases {
|
||||
mountpoint, _, _ := findCgroupMountpointAndRootFromReader(strings.NewReader(fakeMountInfo), c.cgroupPath, "devices")
|
||||
if mountpoint != c.output {
|
||||
t.Errorf("expected %s, got %s", c.output, mountpoint)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetHugePageSizeImpl(t *testing.T) {
|
||||
|
||||
testCases := []struct {
|
||||
inputFiles []string
|
||||
outputPageSizes []string
|
||||
err error
|
||||
}{
|
||||
{
|
||||
inputFiles: []string{"hugepages-1048576kB", "hugepages-2048kB", "hugepages-32768kB", "hugepages-64kB"},
|
||||
outputPageSizes: []string{"1GB", "2MB", "32MB", "64KB"},
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
inputFiles: []string{},
|
||||
outputPageSizes: []string{},
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
inputFiles: []string{"hugepages-a"},
|
||||
outputPageSizes: []string{},
|
||||
err: errors.New("invalid size: 'a'"),
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range testCases {
|
||||
pageSizes, err := getHugePageSizeFromFilenames(c.inputFiles)
|
||||
if len(pageSizes) != 0 && len(c.outputPageSizes) != 0 && !reflect.DeepEqual(pageSizes, c.outputPageSizes) {
|
||||
t.Errorf("expected %s, got %s", c.outputPageSizes, pageSizes)
|
||||
}
|
||||
if err != nil && err.Error() != c.err.Error() {
|
||||
t.Errorf("expected error %s, got %s", c.err, err)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,66 @@
|
|||
package configs
|
||||
|
||||
import "fmt"
|
||||
|
||||
// blockIODevice holds major:minor format supported in blkio cgroup
|
||||
type blockIODevice struct {
|
||||
// Major is the device's major number
|
||||
Major int64 `json:"major"`
|
||||
// Minor is the device's minor number
|
||||
Minor int64 `json:"minor"`
|
||||
}
|
||||
|
||||
// WeightDevice struct holds a `major:minor weight`|`major:minor leaf_weight` pair
|
||||
type WeightDevice struct {
|
||||
blockIODevice
|
||||
// Weight is the bandwidth rate for the device, range is from 10 to 1000
|
||||
Weight uint16 `json:"weight"`
|
||||
// LeafWeight is the bandwidth rate for the device while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only
|
||||
LeafWeight uint16 `json:"leafWeight"`
|
||||
}
|
||||
|
||||
// NewWeightDevice returns a configured WeightDevice pointer
|
||||
func NewWeightDevice(major, minor int64, weight, leafWeight uint16) *WeightDevice {
|
||||
wd := &WeightDevice{}
|
||||
wd.Major = major
|
||||
wd.Minor = minor
|
||||
wd.Weight = weight
|
||||
wd.LeafWeight = leafWeight
|
||||
return wd
|
||||
}
|
||||
|
||||
// WeightString formats the struct to be writable to the cgroup specific file
|
||||
func (wd *WeightDevice) WeightString() string {
|
||||
return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.Weight)
|
||||
}
|
||||
|
||||
// LeafWeightString formats the struct to be writable to the cgroup specific file
|
||||
func (wd *WeightDevice) LeafWeightString() string {
|
||||
return fmt.Sprintf("%d:%d %d", wd.Major, wd.Minor, wd.LeafWeight)
|
||||
}
|
||||
|
||||
// ThrottleDevice struct holds a `major:minor rate_per_second` pair
|
||||
type ThrottleDevice struct {
|
||||
blockIODevice
|
||||
// Rate is the IO rate limit per cgroup per device
|
||||
Rate uint64 `json:"rate"`
|
||||
}
|
||||
|
||||
// NewThrottleDevice returns a configured ThrottleDevice pointer
|
||||
func NewThrottleDevice(major, minor int64, rate uint64) *ThrottleDevice {
|
||||
td := &ThrottleDevice{}
|
||||
td.Major = major
|
||||
td.Minor = minor
|
||||
td.Rate = rate
|
||||
return td
|
||||
}
|
||||
|
||||
// String formats the struct to be writable to the cgroup specific file
|
||||
func (td *ThrottleDevice) String() string {
|
||||
return fmt.Sprintf("%d:%d %d", td.Major, td.Minor, td.Rate)
|
||||
}
|
||||
|
||||
// StringName formats the struct to be writable to the cgroup specific file
|
||||
func (td *ThrottleDevice) StringName(name string) string {
|
||||
return fmt.Sprintf("%d:%d %s=%d", td.Major, td.Minor, name, td.Rate)
|
||||
}
|
|
@ -0,0 +1,130 @@
|
|||
package configs
|
||||
|
||||
type FreezerState string
|
||||
|
||||
const (
|
||||
Undefined FreezerState = ""
|
||||
Frozen FreezerState = "FROZEN"
|
||||
Thawed FreezerState = "THAWED"
|
||||
)
|
||||
|
||||
type Cgroup struct {
|
||||
// Deprecated, use Path instead
|
||||
Name string `json:"name,omitempty"`
|
||||
|
||||
// name of parent of cgroup or slice
|
||||
// Deprecated, use Path instead
|
||||
Parent string `json:"parent,omitempty"`
|
||||
|
||||
// Path specifies the path to cgroups that are created and/or joined by the container.
|
||||
// The path is assumed to be relative to the host system cgroup mountpoint.
|
||||
Path string `json:"path"`
|
||||
|
||||
// ScopePrefix describes prefix for the scope name
|
||||
ScopePrefix string `json:"scope_prefix"`
|
||||
|
||||
// Paths represent the absolute cgroups paths to join.
|
||||
// This takes precedence over Path.
|
||||
Paths map[string]string
|
||||
|
||||
// Resources contains various cgroups settings to apply
|
||||
*Resources
|
||||
}
|
||||
|
||||
type Resources struct {
|
||||
// If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list.
|
||||
// Deprecated
|
||||
AllowAllDevices *bool `json:"allow_all_devices,omitempty"`
|
||||
// Deprecated
|
||||
AllowedDevices []*Device `json:"allowed_devices,omitempty"`
|
||||
// Deprecated
|
||||
DeniedDevices []*Device `json:"denied_devices,omitempty"`
|
||||
|
||||
Devices []*Device `json:"devices"`
|
||||
|
||||
// Memory limit (in bytes)
|
||||
Memory int64 `json:"memory"`
|
||||
|
||||
// Memory reservation or soft_limit (in bytes)
|
||||
MemoryReservation int64 `json:"memory_reservation"`
|
||||
|
||||
// Total memory usage (memory + swap); set `-1` to enable unlimited swap
|
||||
MemorySwap int64 `json:"memory_swap"`
|
||||
|
||||
// Kernel memory limit (in bytes)
|
||||
KernelMemory int64 `json:"kernel_memory"`
|
||||
|
||||
// Kernel memory limit for TCP use (in bytes)
|
||||
KernelMemoryTCP int64 `json:"kernel_memory_tcp"`
|
||||
|
||||
// CPU shares (relative weight vs. other containers)
|
||||
CpuShares uint64 `json:"cpu_shares"`
|
||||
|
||||
// CPU hardcap limit (in usecs). Allowed cpu time in a given period.
|
||||
CpuQuota int64 `json:"cpu_quota"`
|
||||
|
||||
// CPU period to be used for hardcapping (in usecs). 0 to use system default.
|
||||
CpuPeriod uint64 `json:"cpu_period"`
|
||||
|
||||
// How many time CPU will use in realtime scheduling (in usecs).
|
||||
CpuRtRuntime int64 `json:"cpu_rt_quota"`
|
||||
|
||||
// CPU period to be used for realtime scheduling (in usecs).
|
||||
CpuRtPeriod uint64 `json:"cpu_rt_period"`
|
||||
|
||||
// CPU to use
|
||||
CpusetCpus string `json:"cpuset_cpus"`
|
||||
|
||||
// MEM to use
|
||||
CpusetMems string `json:"cpuset_mems"`
|
||||
|
||||
// Process limit; set <= `0' to disable limit.
|
||||
PidsLimit int64 `json:"pids_limit"`
|
||||
|
||||
// Specifies per cgroup weight, range is from 10 to 1000.
|
||||
BlkioWeight uint16 `json:"blkio_weight"`
|
||||
|
||||
// Specifies tasks' weight in the given cgroup while competing with the cgroup's child cgroups, range is from 10 to 1000, cfq scheduler only
|
||||
BlkioLeafWeight uint16 `json:"blkio_leaf_weight"`
|
||||
|
||||
// Weight per cgroup per device, can override BlkioWeight.
|
||||
BlkioWeightDevice []*WeightDevice `json:"blkio_weight_device"`
|
||||
|
||||
// IO read rate limit per cgroup per device, bytes per second.
|
||||
BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device"`
|
||||
|
||||
// IO write rate limit per cgroup per device, bytes per second.
|
||||
BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device"`
|
||||
|
||||
// IO read rate limit per cgroup per device, IO per second.
|
||||
BlkioThrottleReadIOPSDevice []*ThrottleDevice `json:"blkio_throttle_read_iops_device"`
|
||||
|
||||
// IO write rate limit per cgroup per device, IO per second.
|
||||
BlkioThrottleWriteIOPSDevice []*ThrottleDevice `json:"blkio_throttle_write_iops_device"`
|
||||
|
||||
// set the freeze value for the process
|
||||
Freezer FreezerState `json:"freezer"`
|
||||
|
||||
// Hugetlb limit (in bytes)
|
||||
HugetlbLimit []*HugepageLimit `json:"hugetlb_limit"`
|
||||
|
||||
// Whether to disable OOM Killer
|
||||
OomKillDisable bool `json:"oom_kill_disable"`
|
||||
|
||||
// Tuning swappiness behaviour per cgroup
|
||||
MemorySwappiness *uint64 `json:"memory_swappiness"`
|
||||
|
||||
// Set priority of network traffic for container
|
||||
NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"`
|
||||
|
||||
// Set class identifier for container's network packets
|
||||
NetClsClassid uint32 `json:"net_cls_classid_u"`
|
||||
|
||||
// Used on cgroups v2:
|
||||
|
||||
// CpuWeight sets a proportional bandwidth limit.
|
||||
CpuWeight uint64 `json:"cpu_weight"`
|
||||
|
||||
// CpuMax sets she maximum bandwidth limit (format: max period).
|
||||
CpuMax string `json:"cpu_max"`
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
// +build !linux
|
||||
|
||||
package configs
|
||||
|
||||
// TODO Windows: This can ultimately be entirely factored out on Windows as
|
||||
// cgroups are a Unix-specific construct.
|
||||
type Cgroup struct {
|
||||
}
|
|
@ -0,0 +1,354 @@
|
|||
package configs
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"time"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type Rlimit struct {
|
||||
Type int `json:"type"`
|
||||
Hard uint64 `json:"hard"`
|
||||
Soft uint64 `json:"soft"`
|
||||
}
|
||||
|
||||
// IDMap represents UID/GID Mappings for User Namespaces.
|
||||
type IDMap struct {
|
||||
ContainerID int `json:"container_id"`
|
||||
HostID int `json:"host_id"`
|
||||
Size int `json:"size"`
|
||||
}
|
||||
|
||||
// Seccomp represents syscall restrictions
|
||||
// By default, only the native architecture of the kernel is allowed to be used
|
||||
// for syscalls. Additional architectures can be added by specifying them in
|
||||
// Architectures.
|
||||
type Seccomp struct {
|
||||
DefaultAction Action `json:"default_action"`
|
||||
Architectures []string `json:"architectures"`
|
||||
Syscalls []*Syscall `json:"syscalls"`
|
||||
}
|
||||
|
||||
// Action is taken upon rule match in Seccomp
|
||||
type Action int
|
||||
|
||||
const (
|
||||
Kill Action = iota + 1
|
||||
Errno
|
||||
Trap
|
||||
Allow
|
||||
Trace
|
||||
Log
|
||||
)
|
||||
|
||||
// Operator is a comparison operator to be used when matching syscall arguments in Seccomp
|
||||
type Operator int
|
||||
|
||||
const (
|
||||
EqualTo Operator = iota + 1
|
||||
NotEqualTo
|
||||
GreaterThan
|
||||
GreaterThanOrEqualTo
|
||||
LessThan
|
||||
LessThanOrEqualTo
|
||||
MaskEqualTo
|
||||
)
|
||||
|
||||
// Arg is a rule to match a specific syscall argument in Seccomp
|
||||
type Arg struct {
|
||||
Index uint `json:"index"`
|
||||
Value uint64 `json:"value"`
|
||||
ValueTwo uint64 `json:"value_two"`
|
||||
Op Operator `json:"op"`
|
||||
}
|
||||
|
||||
// Syscall is a rule to match a syscall in Seccomp
|
||||
type Syscall struct {
|
||||
Name string `json:"name"`
|
||||
Action Action `json:"action"`
|
||||
Args []*Arg `json:"args"`
|
||||
}
|
||||
|
||||
// TODO Windows. Many of these fields should be factored out into those parts
|
||||
// which are common across platforms, and those which are platform specific.
|
||||
|
||||
// Config defines configuration options for executing a process inside a contained environment.
|
||||
type Config struct {
|
||||
// NoPivotRoot will use MS_MOVE and a chroot to jail the process into the container's rootfs
|
||||
// This is a common option when the container is running in ramdisk
|
||||
NoPivotRoot bool `json:"no_pivot_root"`
|
||||
|
||||
// ParentDeathSignal specifies the signal that is sent to the container's process in the case
|
||||
// that the parent process dies.
|
||||
ParentDeathSignal int `json:"parent_death_signal"`
|
||||
|
||||
// Path to a directory containing the container's root filesystem.
|
||||
Rootfs string `json:"rootfs"`
|
||||
|
||||
// Readonlyfs will remount the container's rootfs as readonly where only externally mounted
|
||||
// bind mounts are writtable.
|
||||
Readonlyfs bool `json:"readonlyfs"`
|
||||
|
||||
// Specifies the mount propagation flags to be applied to /.
|
||||
RootPropagation int `json:"rootPropagation"`
|
||||
|
||||
// Mounts specify additional source and destination paths that will be mounted inside the container's
|
||||
// rootfs and mount namespace if specified
|
||||
Mounts []*Mount `json:"mounts"`
|
||||
|
||||
// The device nodes that should be automatically created within the container upon container start. Note, make sure that the node is marked as allowed in the cgroup as well!
|
||||
Devices []*Device `json:"devices"`
|
||||
|
||||
MountLabel string `json:"mount_label"`
|
||||
|
||||
// Hostname optionally sets the container's hostname if provided
|
||||
Hostname string `json:"hostname"`
|
||||
|
||||
// Namespaces specifies the container's namespaces that it should setup when cloning the init process
|
||||
// If a namespace is not provided that namespace is shared from the container's parent process
|
||||
Namespaces Namespaces `json:"namespaces"`
|
||||
|
||||
// Capabilities specify the capabilities to keep when executing the process inside the container
|
||||
// All capabilities not specified will be dropped from the processes capability mask
|
||||
Capabilities *Capabilities `json:"capabilities"`
|
||||
|
||||
// Networks specifies the container's network setup to be created
|
||||
Networks []*Network `json:"networks"`
|
||||
|
||||
// Routes can be specified to create entries in the route table as the container is started
|
||||
Routes []*Route `json:"routes"`
|
||||
|
||||
// Cgroups specifies specific cgroup settings for the various subsystems that the container is
|
||||
// placed into to limit the resources the container has available
|
||||
Cgroups *Cgroup `json:"cgroups"`
|
||||
|
||||
// AppArmorProfile specifies the profile to apply to the process running in the container and is
|
||||
// change at the time the process is execed
|
||||
AppArmorProfile string `json:"apparmor_profile,omitempty"`
|
||||
|
||||
// ProcessLabel specifies the label to apply to the process running in the container. It is
|
||||
// commonly used by selinux
|
||||
ProcessLabel string `json:"process_label,omitempty"`
|
||||
|
||||
// Rlimits specifies the resource limits, such as max open files, to set in the container
|
||||
// If Rlimits are not set, the container will inherit rlimits from the parent process
|
||||
Rlimits []Rlimit `json:"rlimits,omitempty"`
|
||||
|
||||
// OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores
|
||||
// for a process. Valid values are between the range [-1000, '1000'], where processes with
|
||||
// higher scores are preferred for being killed. If it is unset then we don't touch the current
|
||||
// value.
|
||||
// More information about kernel oom score calculation here: https://lwn.net/Articles/317814/
|
||||
OomScoreAdj *int `json:"oom_score_adj,omitempty"`
|
||||
|
||||
// UidMappings is an array of User ID mappings for User Namespaces
|
||||
UidMappings []IDMap `json:"uid_mappings"`
|
||||
|
||||
// GidMappings is an array of Group ID mappings for User Namespaces
|
||||
GidMappings []IDMap `json:"gid_mappings"`
|
||||
|
||||
// MaskPaths specifies paths within the container's rootfs to mask over with a bind
|
||||
// mount pointing to /dev/null as to prevent reads of the file.
|
||||
MaskPaths []string `json:"mask_paths"`
|
||||
|
||||
// ReadonlyPaths specifies paths within the container's rootfs to remount as read-only
|
||||
// so that these files prevent any writes.
|
||||
ReadonlyPaths []string `json:"readonly_paths"`
|
||||
|
||||
// Sysctl is a map of properties and their values. It is the equivalent of using
|
||||
// sysctl -w my.property.name value in Linux.
|
||||
Sysctl map[string]string `json:"sysctl"`
|
||||
|
||||
// Seccomp allows actions to be taken whenever a syscall is made within the container.
|
||||
// A number of rules are given, each having an action to be taken if a syscall matches it.
|
||||
// A default action to be taken if no rules match is also given.
|
||||
Seccomp *Seccomp `json:"seccomp"`
|
||||
|
||||
// NoNewPrivileges controls whether processes in the container can gain additional privileges.
|
||||
NoNewPrivileges bool `json:"no_new_privileges,omitempty"`
|
||||
|
||||
// Hooks are a collection of actions to perform at various container lifecycle events.
|
||||
// CommandHooks are serialized to JSON, but other hooks are not.
|
||||
Hooks *Hooks
|
||||
|
||||
// Version is the version of opencontainer specification that is supported.
|
||||
Version string `json:"version"`
|
||||
|
||||
// Labels are user defined metadata that is stored in the config and populated on the state
|
||||
Labels []string `json:"labels"`
|
||||
|
||||
// NoNewKeyring will not allocated a new session keyring for the container. It will use the
|
||||
// callers keyring in this case.
|
||||
NoNewKeyring bool `json:"no_new_keyring"`
|
||||
|
||||
// IntelRdt specifies settings for Intel RDT group that the container is placed into
|
||||
// to limit the resources (e.g., L3 cache, memory bandwidth) the container has available
|
||||
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
|
||||
|
||||
// RootlessEUID is set when the runc was launched with non-zero EUID.
|
||||
// Note that RootlessEUID is set to false when launched with EUID=0 in userns.
|
||||
// When RootlessEUID is set, runc creates a new userns for the container.
|
||||
// (config.json needs to contain userns settings)
|
||||
RootlessEUID bool `json:"rootless_euid,omitempty"`
|
||||
|
||||
// RootlessCgroups is set when unlikely to have the full access to cgroups.
|
||||
// When RootlessCgroups is set, cgroups errors are ignored.
|
||||
RootlessCgroups bool `json:"rootless_cgroups,omitempty"`
|
||||
}
|
||||
|
||||
type Hooks struct {
|
||||
// Prestart commands are executed after the container namespaces are created,
|
||||
// but before the user supplied command is executed from init.
|
||||
Prestart []Hook
|
||||
|
||||
// Poststart commands are executed after the container init process starts.
|
||||
Poststart []Hook
|
||||
|
||||
// Poststop commands are executed after the container init process exits.
|
||||
Poststop []Hook
|
||||
}
|
||||
|
||||
type Capabilities struct {
|
||||
// Bounding is the set of capabilities checked by the kernel.
|
||||
Bounding []string
|
||||
// Effective is the set of capabilities checked by the kernel.
|
||||
Effective []string
|
||||
// Inheritable is the capabilities preserved across execve.
|
||||
Inheritable []string
|
||||
// Permitted is the limiting superset for effective capabilities.
|
||||
Permitted []string
|
||||
// Ambient is the ambient set of capabilities that are kept.
|
||||
Ambient []string
|
||||
}
|
||||
|
||||
func (hooks *Hooks) UnmarshalJSON(b []byte) error {
|
||||
var state struct {
|
||||
Prestart []CommandHook
|
||||
Poststart []CommandHook
|
||||
Poststop []CommandHook
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(b, &state); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
deserialize := func(shooks []CommandHook) (hooks []Hook) {
|
||||
for _, shook := range shooks {
|
||||
hooks = append(hooks, shook)
|
||||
}
|
||||
|
||||
return hooks
|
||||
}
|
||||
|
||||
hooks.Prestart = deserialize(state.Prestart)
|
||||
hooks.Poststart = deserialize(state.Poststart)
|
||||
hooks.Poststop = deserialize(state.Poststop)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (hooks Hooks) MarshalJSON() ([]byte, error) {
|
||||
serialize := func(hooks []Hook) (serializableHooks []CommandHook) {
|
||||
for _, hook := range hooks {
|
||||
switch chook := hook.(type) {
|
||||
case CommandHook:
|
||||
serializableHooks = append(serializableHooks, chook)
|
||||
default:
|
||||
logrus.Warnf("cannot serialize hook of type %T, skipping", hook)
|
||||
}
|
||||
}
|
||||
|
||||
return serializableHooks
|
||||
}
|
||||
|
||||
return json.Marshal(map[string]interface{}{
|
||||
"prestart": serialize(hooks.Prestart),
|
||||
"poststart": serialize(hooks.Poststart),
|
||||
"poststop": serialize(hooks.Poststop),
|
||||
})
|
||||
}
|
||||
|
||||
type Hook interface {
|
||||
// Run executes the hook with the provided state.
|
||||
Run(*specs.State) error
|
||||
}
|
||||
|
||||
// NewFunctionHook will call the provided function when the hook is run.
|
||||
func NewFunctionHook(f func(*specs.State) error) FuncHook {
|
||||
return FuncHook{
|
||||
run: f,
|
||||
}
|
||||
}
|
||||
|
||||
type FuncHook struct {
|
||||
run func(*specs.State) error
|
||||
}
|
||||
|
||||
func (f FuncHook) Run(s *specs.State) error {
|
||||
return f.run(s)
|
||||
}
|
||||
|
||||
type Command struct {
|
||||
Path string `json:"path"`
|
||||
Args []string `json:"args"`
|
||||
Env []string `json:"env"`
|
||||
Dir string `json:"dir"`
|
||||
Timeout *time.Duration `json:"timeout"`
|
||||
}
|
||||
|
||||
// NewCommandHook will execute the provided command when the hook is run.
|
||||
func NewCommandHook(cmd Command) CommandHook {
|
||||
return CommandHook{
|
||||
Command: cmd,
|
||||
}
|
||||
}
|
||||
|
||||
type CommandHook struct {
|
||||
Command
|
||||
}
|
||||
|
||||
func (c Command) Run(s *specs.State) error {
|
||||
b, err := json.Marshal(s)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var stdout, stderr bytes.Buffer
|
||||
cmd := exec.Cmd{
|
||||
Path: c.Path,
|
||||
Args: c.Args,
|
||||
Env: c.Env,
|
||||
Stdin: bytes.NewReader(b),
|
||||
Stdout: &stdout,
|
||||
Stderr: &stderr,
|
||||
}
|
||||
if err := cmd.Start(); err != nil {
|
||||
return err
|
||||
}
|
||||
errC := make(chan error, 1)
|
||||
go func() {
|
||||
err := cmd.Wait()
|
||||
if err != nil {
|
||||
err = fmt.Errorf("error running hook: %v, stdout: %s, stderr: %s", err, stdout.String(), stderr.String())
|
||||
}
|
||||
errC <- err
|
||||
}()
|
||||
var timerCh <-chan time.Time
|
||||
if c.Timeout != nil {
|
||||
timer := time.NewTimer(*c.Timeout)
|
||||
defer timer.Stop()
|
||||
timerCh = timer.C
|
||||
}
|
||||
select {
|
||||
case err := <-errC:
|
||||
return err
|
||||
case <-timerCh:
|
||||
cmd.Process.Kill()
|
||||
cmd.Wait()
|
||||
return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds())
|
||||
}
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
package configs
|
||||
|
||||
import "fmt"
|
||||
|
||||
// HostUID gets the translated uid for the process on host which could be
|
||||
// different when user namespaces are enabled.
|
||||
func (c Config) HostUID(containerId int) (int, error) {
|
||||
if c.Namespaces.Contains(NEWUSER) {
|
||||
if c.UidMappings == nil {
|
||||
return -1, fmt.Errorf("User namespaces enabled, but no uid mappings found.")
|
||||
}
|
||||
id, found := c.hostIDFromMapping(containerId, c.UidMappings)
|
||||
if !found {
|
||||
return -1, fmt.Errorf("User namespaces enabled, but no user mapping found.")
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
// Return unchanged id.
|
||||
return containerId, nil
|
||||
}
|
||||
|
||||
// HostRootUID gets the root uid for the process on host which could be non-zero
|
||||
// when user namespaces are enabled.
|
||||
func (c Config) HostRootUID() (int, error) {
|
||||
return c.HostUID(0)
|
||||
}
|
||||
|
||||
// HostGID gets the translated gid for the process on host which could be
|
||||
// different when user namespaces are enabled.
|
||||
func (c Config) HostGID(containerId int) (int, error) {
|
||||
if c.Namespaces.Contains(NEWUSER) {
|
||||
if c.GidMappings == nil {
|
||||
return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
|
||||
}
|
||||
id, found := c.hostIDFromMapping(containerId, c.GidMappings)
|
||||
if !found {
|
||||
return -1, fmt.Errorf("User namespaces enabled, but no group mapping found.")
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
// Return unchanged id.
|
||||
return containerId, nil
|
||||
}
|
||||
|
||||
// HostRootGID gets the root gid for the process on host which could be non-zero
|
||||
// when user namespaces are enabled.
|
||||
func (c Config) HostRootGID() (int, error) {
|
||||
return c.HostGID(0)
|
||||
}
|
||||
|
||||
// Utility function that gets a host ID for a container ID from user namespace map
|
||||
// if that ID is present in the map.
|
||||
func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) {
|
||||
for _, m := range uMap {
|
||||
if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
|
||||
hostID := m.HostID + (containerID - m.ContainerID)
|
||||
return hostID, true
|
||||
}
|
||||
}
|
||||
return -1, false
|
||||
}
|
|
@ -0,0 +1,130 @@
|
|||
package configs
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func loadConfig(name string) (*Config, error) {
|
||||
f, err := os.Open(filepath.Join("../sample_configs", name))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var container *Config
|
||||
if err := json.NewDecoder(f).Decode(&container); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Check that a config doesn't contain extra fields
|
||||
var configMap, abstractMap map[string]interface{}
|
||||
|
||||
if _, err := f.Seek(0, 0); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := json.NewDecoder(f).Decode(&abstractMap); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
configData, err := json.Marshal(&container)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(configData, &configMap); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for k := range configMap {
|
||||
delete(abstractMap, k)
|
||||
}
|
||||
|
||||
if len(abstractMap) != 0 {
|
||||
return nil, fmt.Errorf("unknown fields: %s", abstractMap)
|
||||
}
|
||||
|
||||
return container, nil
|
||||
}
|
||||
|
||||
func TestRemoveNamespace(t *testing.T) {
|
||||
ns := Namespaces{
|
||||
{Type: NEWNET},
|
||||
}
|
||||
if !ns.Remove(NEWNET) {
|
||||
t.Fatal("NEWNET was not removed")
|
||||
}
|
||||
if len(ns) != 0 {
|
||||
t.Fatalf("namespaces should have 0 items but reports %d", len(ns))
|
||||
}
|
||||
}
|
||||
|
||||
func TestHostRootUIDNoUSERNS(t *testing.T) {
|
||||
config := &Config{
|
||||
Namespaces: Namespaces{},
|
||||
}
|
||||
uid, err := config.HostRootUID()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if uid != 0 {
|
||||
t.Fatalf("expected uid 0 with no USERNS but received %d", uid)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHostRootUIDWithUSERNS(t *testing.T) {
|
||||
config := &Config{
|
||||
Namespaces: Namespaces{{Type: NEWUSER}},
|
||||
UidMappings: []IDMap{
|
||||
{
|
||||
ContainerID: 0,
|
||||
HostID: 1000,
|
||||
Size: 1,
|
||||
},
|
||||
},
|
||||
}
|
||||
uid, err := config.HostRootUID()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if uid != 1000 {
|
||||
t.Fatalf("expected uid 1000 with no USERNS but received %d", uid)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHostRootGIDNoUSERNS(t *testing.T) {
|
||||
config := &Config{
|
||||
Namespaces: Namespaces{},
|
||||
}
|
||||
uid, err := config.HostRootGID()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if uid != 0 {
|
||||
t.Fatalf("expected gid 0 with no USERNS but received %d", uid)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHostRootGIDWithUSERNS(t *testing.T) {
|
||||
config := &Config{
|
||||
Namespaces: Namespaces{{Type: NEWUSER}},
|
||||
GidMappings: []IDMap{
|
||||
{
|
||||
ContainerID: 0,
|
||||
HostID: 1000,
|
||||
Size: 1,
|
||||
},
|
||||
},
|
||||
}
|
||||
uid, err := config.HostRootGID()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if uid != 1000 {
|
||||
t.Fatalf("expected gid 1000 with no USERNS but received %d", uid)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,195 @@
|
|||
package configs_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
func TestUnmarshalHooks(t *testing.T) {
|
||||
timeout := time.Second
|
||||
|
||||
prestartCmd := configs.NewCommandHook(configs.Command{
|
||||
Path: "/var/vcap/hooks/prestart",
|
||||
Args: []string{"--pid=123"},
|
||||
Env: []string{"FOO=BAR"},
|
||||
Dir: "/var/vcap",
|
||||
Timeout: &timeout,
|
||||
})
|
||||
prestart, err := json.Marshal(prestartCmd.Command)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
hook := configs.Hooks{}
|
||||
err = hook.UnmarshalJSON([]byte(fmt.Sprintf(`{"Prestart" :[%s]}`, prestart)))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(hook.Prestart[0], prestartCmd) {
|
||||
t.Errorf("Expected prestart to equal %+v but it was %+v",
|
||||
prestartCmd, hook.Prestart[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestUnmarshalHooksWithInvalidData(t *testing.T) {
|
||||
hook := configs.Hooks{}
|
||||
err := hook.UnmarshalJSON([]byte(`{invalid-json}`))
|
||||
if err == nil {
|
||||
t.Error("Expected error to occur but it was nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMarshalHooks(t *testing.T) {
|
||||
timeout := time.Second
|
||||
|
||||
prestartCmd := configs.NewCommandHook(configs.Command{
|
||||
Path: "/var/vcap/hooks/prestart",
|
||||
Args: []string{"--pid=123"},
|
||||
Env: []string{"FOO=BAR"},
|
||||
Dir: "/var/vcap",
|
||||
Timeout: &timeout,
|
||||
})
|
||||
|
||||
hook := configs.Hooks{
|
||||
Prestart: []configs.Hook{prestartCmd},
|
||||
}
|
||||
hooks, err := hook.MarshalJSON()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
h := `{"poststart":null,"poststop":null,"prestart":[{"path":"/var/vcap/hooks/prestart","args":["--pid=123"],"env":["FOO=BAR"],"dir":"/var/vcap","timeout":1000000000}]}`
|
||||
if string(hooks) != h {
|
||||
t.Errorf("Expected hooks %s to equal %s", string(hooks), h)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMarshalUnmarshalHooks(t *testing.T) {
|
||||
timeout := time.Second
|
||||
|
||||
prestart := configs.NewCommandHook(configs.Command{
|
||||
Path: "/var/vcap/hooks/prestart",
|
||||
Args: []string{"--pid=123"},
|
||||
Env: []string{"FOO=BAR"},
|
||||
Dir: "/var/vcap",
|
||||
Timeout: &timeout,
|
||||
})
|
||||
|
||||
hook := configs.Hooks{
|
||||
Prestart: []configs.Hook{prestart},
|
||||
}
|
||||
hooks, err := hook.MarshalJSON()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
umMhook := configs.Hooks{}
|
||||
err = umMhook.UnmarshalJSON(hooks)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if !reflect.DeepEqual(umMhook.Prestart[0], prestart) {
|
||||
t.Errorf("Expected hooks to be equal after mashaling -> unmarshaling them: %+v, %+v", umMhook.Prestart[0], prestart)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMarshalHooksWithUnexpectedType(t *testing.T) {
|
||||
fHook := configs.NewFunctionHook(func(*specs.State) error {
|
||||
return nil
|
||||
})
|
||||
hook := configs.Hooks{
|
||||
Prestart: []configs.Hook{fHook},
|
||||
}
|
||||
hooks, err := hook.MarshalJSON()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
h := `{"poststart":null,"poststop":null,"prestart":null}`
|
||||
if string(hooks) != h {
|
||||
t.Errorf("Expected hooks %s to equal %s", string(hooks), h)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFuncHookRun(t *testing.T) {
|
||||
state := &specs.State{
|
||||
Version: "1",
|
||||
ID: "1",
|
||||
Status: "created",
|
||||
Pid: 1,
|
||||
Bundle: "/bundle",
|
||||
}
|
||||
|
||||
fHook := configs.NewFunctionHook(func(s *specs.State) error {
|
||||
if !reflect.DeepEqual(state, s) {
|
||||
t.Errorf("Expected state %+v to equal %+v", state, s)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
fHook.Run(state)
|
||||
}
|
||||
|
||||
func TestCommandHookRun(t *testing.T) {
|
||||
state := &specs.State{
|
||||
Version: "1",
|
||||
ID: "1",
|
||||
Status: "created",
|
||||
Pid: 1,
|
||||
Bundle: "/bundle",
|
||||
}
|
||||
timeout := time.Second
|
||||
|
||||
cmdHook := configs.NewCommandHook(configs.Command{
|
||||
Path: os.Args[0],
|
||||
Args: []string{os.Args[0], "-test.run=TestHelperProcess"},
|
||||
Env: []string{"FOO=BAR"},
|
||||
Dir: "/",
|
||||
Timeout: &timeout,
|
||||
})
|
||||
|
||||
err := cmdHook.Run(state)
|
||||
if err != nil {
|
||||
t.Errorf(fmt.Sprintf("Expected error to not occur but it was %+v", err))
|
||||
}
|
||||
}
|
||||
|
||||
func TestCommandHookRunTimeout(t *testing.T) {
|
||||
state := &specs.State{
|
||||
Version: "1",
|
||||
ID: "1",
|
||||
Status: "created",
|
||||
Pid: 1,
|
||||
Bundle: "/bundle",
|
||||
}
|
||||
timeout := (10 * time.Millisecond)
|
||||
|
||||
cmdHook := configs.NewCommandHook(configs.Command{
|
||||
Path: os.Args[0],
|
||||
Args: []string{os.Args[0], "-test.run=TestHelperProcessWithTimeout"},
|
||||
Env: []string{"FOO=BAR"},
|
||||
Dir: "/",
|
||||
Timeout: &timeout,
|
||||
})
|
||||
|
||||
err := cmdHook.Run(state)
|
||||
if err == nil {
|
||||
t.Error("Expected error to occur but it was nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHelperProcess(*testing.T) {
|
||||
fmt.Println("Helper Process")
|
||||
os.Exit(0)
|
||||
}
|
||||
func TestHelperProcessWithTimeout(*testing.T) {
|
||||
time.Sleep(time.Second)
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
package configs
|
||||
|
||||
// All current tests are for Unix-specific functionality
|
|
@ -0,0 +1,57 @@
|
|||
package configs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
)
|
||||
|
||||
const (
|
||||
Wildcard = -1
|
||||
)
|
||||
|
||||
// TODO Windows: This can be factored out in the future
|
||||
|
||||
type Device struct {
|
||||
// Device type, block, char, etc.
|
||||
Type rune `json:"type"`
|
||||
|
||||
// Path to the device.
|
||||
Path string `json:"path"`
|
||||
|
||||
// Major is the device's major number.
|
||||
Major int64 `json:"major"`
|
||||
|
||||
// Minor is the device's minor number.
|
||||
Minor int64 `json:"minor"`
|
||||
|
||||
// Cgroup permissions format, rwm.
|
||||
Permissions string `json:"permissions"`
|
||||
|
||||
// FileMode permission bits for the device.
|
||||
FileMode os.FileMode `json:"file_mode"`
|
||||
|
||||
// Uid of the device.
|
||||
Uid uint32 `json:"uid"`
|
||||
|
||||
// Gid of the device.
|
||||
Gid uint32 `json:"gid"`
|
||||
|
||||
// Write the file to the allowed list
|
||||
Allow bool `json:"allow"`
|
||||
}
|
||||
|
||||
func (d *Device) CgroupString() string {
|
||||
return fmt.Sprintf("%c %s:%s %s", d.Type, deviceNumberString(d.Major), deviceNumberString(d.Minor), d.Permissions)
|
||||
}
|
||||
|
||||
func (d *Device) Mkdev() int {
|
||||
return int((d.Major << 8) | (d.Minor & 0xff) | ((d.Minor & 0xfff00) << 12))
|
||||
}
|
||||
|
||||
// deviceNumberString converts the device number to a string return result.
|
||||
func deviceNumberString(number int64) string {
|
||||
if number == Wildcard {
|
||||
return "*"
|
||||
}
|
||||
return fmt.Sprint(number)
|
||||
}
|
|
@ -0,0 +1,111 @@
|
|||
// +build linux
|
||||
|
||||
package configs
|
||||
|
||||
var (
|
||||
// DefaultSimpleDevices are devices that are to be both allowed and created.
|
||||
DefaultSimpleDevices = []*Device{
|
||||
// /dev/null and zero
|
||||
{
|
||||
Path: "/dev/null",
|
||||
Type: 'c',
|
||||
Major: 1,
|
||||
Minor: 3,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
{
|
||||
Path: "/dev/zero",
|
||||
Type: 'c',
|
||||
Major: 1,
|
||||
Minor: 5,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
|
||||
{
|
||||
Path: "/dev/full",
|
||||
Type: 'c',
|
||||
Major: 1,
|
||||
Minor: 7,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
|
||||
// consoles and ttys
|
||||
{
|
||||
Path: "/dev/tty",
|
||||
Type: 'c',
|
||||
Major: 5,
|
||||
Minor: 0,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
|
||||
// /dev/urandom,/dev/random
|
||||
{
|
||||
Path: "/dev/urandom",
|
||||
Type: 'c',
|
||||
Major: 1,
|
||||
Minor: 9,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
{
|
||||
Path: "/dev/random",
|
||||
Type: 'c',
|
||||
Major: 1,
|
||||
Minor: 8,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
}
|
||||
DefaultAllowedDevices = append([]*Device{
|
||||
// allow mknod for any device
|
||||
{
|
||||
Type: 'c',
|
||||
Major: Wildcard,
|
||||
Minor: Wildcard,
|
||||
Permissions: "m",
|
||||
},
|
||||
{
|
||||
Type: 'b',
|
||||
Major: Wildcard,
|
||||
Minor: Wildcard,
|
||||
Permissions: "m",
|
||||
},
|
||||
|
||||
{
|
||||
Path: "/dev/console",
|
||||
Type: 'c',
|
||||
Major: 5,
|
||||
Minor: 1,
|
||||
Permissions: "rwm",
|
||||
},
|
||||
// /dev/pts/ - pts namespaces are "coming soon"
|
||||
{
|
||||
Path: "",
|
||||
Type: 'c',
|
||||
Major: 136,
|
||||
Minor: Wildcard,
|
||||
Permissions: "rwm",
|
||||
},
|
||||
{
|
||||
Path: "",
|
||||
Type: 'c',
|
||||
Major: 5,
|
||||
Minor: 2,
|
||||
Permissions: "rwm",
|
||||
},
|
||||
|
||||
// tuntap
|
||||
{
|
||||
Path: "",
|
||||
Type: 'c',
|
||||
Major: 10,
|
||||
Minor: 200,
|
||||
Permissions: "rwm",
|
||||
},
|
||||
}, DefaultSimpleDevices...)
|
||||
DefaultAutoCreatedDevices = append([]*Device{}, DefaultSimpleDevices...)
|
||||
)
|
|
@ -0,0 +1,9 @@
|
|||
package configs
|
||||
|
||||
type HugepageLimit struct {
|
||||
// which type of hugepage to limit.
|
||||
Pagesize string `json:"page_size"`
|
||||
|
||||
// usage limit for hugepage.
|
||||
Limit uint64 `json:"limit"`
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
package configs
|
||||
|
||||
type IntelRdt struct {
|
||||
// The schema for L3 cache id and capacity bitmask (CBM)
|
||||
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
|
||||
|
||||
// The schema of memory bandwidth per L3 cache id
|
||||
// Format: "MB:<cache_id0>=bandwidth0;<cache_id1>=bandwidth1;..."
|
||||
// The unit of memory bandwidth is specified in "percentages" by
|
||||
// default, and in "MBps" if MBA Software Controller is enabled.
|
||||
MemBwSchema string `json:"memBwSchema,omitempty"`
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
package configs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
type IfPrioMap struct {
|
||||
Interface string `json:"interface"`
|
||||
Priority int64 `json:"priority"`
|
||||
}
|
||||
|
||||
func (i *IfPrioMap) CgroupString() string {
|
||||
return fmt.Sprintf("%s %d", i.Interface, i.Priority)
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
package configs
|
||||
|
||||
const (
|
||||
// EXT_COPYUP is a directive to copy up the contents of a directory when
|
||||
// a tmpfs is mounted over it.
|
||||
EXT_COPYUP = 1 << iota
|
||||
)
|
||||
|
||||
type Mount struct {
|
||||
// Source path for the mount.
|
||||
Source string `json:"source"`
|
||||
|
||||
// Destination path for the mount inside the container.
|
||||
Destination string `json:"destination"`
|
||||
|
||||
// Device the mount is for.
|
||||
Device string `json:"device"`
|
||||
|
||||
// Mount flags.
|
||||
Flags int `json:"flags"`
|
||||
|
||||
// Propagation Flags
|
||||
PropagationFlags []int `json:"propagation_flags"`
|
||||
|
||||
// Mount data applied to the mount.
|
||||
Data string `json:"data"`
|
||||
|
||||
// Relabel source if set, "z" indicates shared, "Z" indicates unshared.
|
||||
Relabel string `json:"relabel"`
|
||||
|
||||
// Extensions are additional flags that are specific to runc.
|
||||
Extensions int `json:"extensions"`
|
||||
|
||||
// Optional Command to be run before Source is mounted.
|
||||
PremountCmds []Command `json:"premount_cmds"`
|
||||
|
||||
// Optional Command to be run after Source is mounted.
|
||||
PostmountCmds []Command `json:"postmount_cmds"`
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
package configs
|
||||
|
||||
type NamespaceType string
|
||||
|
||||
type Namespaces []Namespace
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue