/src/ganeti-kvmd
/src/ganeti-luxid
/src/ganeti-metad
+/src/ganeti-maintd
/src/ganeti-mond
/src/rpc-test
src/Ganeti/JQueue \
src/Ganeti/Locking \
src/Ganeti/Logging \
+ src/Ganeti/MaintD \
src/Ganeti/Monitoring \
src/Ganeti/Metad \
src/Ganeti/Objects \
$(addsuffix /*.py[co],$(DIRS)) \
$(addsuffix /*.hi,$(HS_DIRS)) \
$(addsuffix /*.o,$(HS_DIRS)) \
+ $(addsuffix /*.dyn_hi,$(HS_DIRS)) \
+ $(addsuffix /*.dyn_o,$(HS_DIRS)) \
$(addsuffix /*.$(HTEST_SUFFIX)_hi,$(HS_DIRS)) \
$(addsuffix /*.$(HTEST_SUFFIX)_o,$(HS_DIRS)) \
$(HASKELL_PACKAGE_VERSIONS_FILE) \
src/ganeti-confd \
src/ganeti-wconfd \
src/ganeti-luxid \
+ src/ganeti-maintd \
src/ganeti-metad \
src/ganeti-mond \
.hpc/*.mix src/*.tix test/hs/*.tix *.tix \
HS_GENERATED_FILES = $(HS_PROGS) src/hluxid src/ganeti-luxid \
src/hconfd src/ganeti-confd
if ENABLE_MOND
-HS_GENERATED_FILES += src/ganeti-mond
+HS_GENERATED_FILES += src/ganeti-mond src/ganeti-maintd
endif
if ENABLE_METADATA
HS_GENERATED_FILES += src/ganeti-metad
doc/examples/systemd/ganeti-kvmd.service \
doc/examples/systemd/ganeti-luxid.service \
doc/examples/systemd/ganeti-metad.service \
+ doc/examples/systemd/ganeti-maintd.service \
doc/examples/systemd/ganeti-mond.service \
doc/examples/systemd/ganeti-noded.service \
doc/examples/systemd/ganeti-rapi.service \
doc/design-2.14.rst \
doc/design-2.15.rst \
doc/design-2.16.rst \
+ doc/design-2.17.rst \
doc/design-allocation-efficiency.rst \
doc/design-autorepair.rst \
doc/design-bulk-create.rst \
doc/design-location.rst \
doc/design-linuxha.rst \
doc/design-lu-generated-jobs.rst \
+ doc/design-macvtap.rst \
+ doc/design-memory-over-commitment.rst \
+ doc/design-migration-speed-hbal.rst \
doc/design-monitoring-agent.rst \
doc/design-move-instance-improvements.rst \
doc/design-multi-reloc.rst \
doc/design-multi-storage-htools.rst \
doc/design-multi-version-tests.rst \
+ doc/design-n-m-redundancy.rst \
doc/design-network.rst \
doc/design-network2.rst \
doc/design-node-add.rst \
src/hs2py \
src/rpc-test
if ENABLE_MOND
-HS_COMPILE_PROGS += src/ganeti-mond
+HS_COMPILE_PROGS += src/ganeti-mond src/ganeti-maintd
endif
if ENABLE_METADATA
HS_COMPILE_PROGS += src/ganeti-metad
$(patsubst src.%,--exclude Test.%,$(subst /,.,$(patsubst %.hs,%, $(HS_LIB_SRCS))))
HS_LIB_SRCS = \
+ src/Ganeti/Prelude.hs \
src/Ganeti/BasicTypes.hs \
src/Ganeti/Codec.hs \
src/Ganeti/Common.hs \
src/Ganeti/DataCollectors.hs \
src/Ganeti/DataCollectors/CLI.hs \
src/Ganeti/DataCollectors/CPUload.hs \
+ src/Ganeti/DataCollectors/Diagnose.hs \
src/Ganeti/DataCollectors/Diskstats.hs \
src/Ganeti/DataCollectors/Drbd.hs \
src/Ganeti/DataCollectors/InstStatus.hs \
src/Ganeti/DataCollectors/InstStatusTypes.hs \
+ src/Ganeti/DataCollectors/KvmRSS.hs \
src/Ganeti/DataCollectors/Lv.hs \
src/Ganeti/DataCollectors/Program.hs \
src/Ganeti/DataCollectors/Types.hs \
src/Ganeti/HTools/Cluster/AllocationSolution.hs \
src/Ganeti/HTools/Cluster/Evacuate.hs \
src/Ganeti/HTools/Cluster/Metrics.hs \
+ src/Ganeti/HTools/Cluster/MetricsComponents.hs \
+ src/Ganeti/HTools/Cluster/MetricsTH.hs \
src/Ganeti/HTools/Cluster/Moves.hs \
src/Ganeti/HTools/Cluster/Utils.hs \
src/Ganeti/HTools/Container.hs \
src/Ganeti/HTools/Program/Hsqueeze.hs \
src/Ganeti/HTools/Program/Hroller.hs \
src/Ganeti/HTools/Program/Main.hs \
+ src/Ganeti/HTools/RedundancyLevel.hs \
+ src/Ganeti/HTools/Repair.hs \
src/Ganeti/HTools/Tags.hs \
src/Ganeti/HTools/Tags/Constants.hs \
src/Ganeti/HTools/Types.hs \
src/Ganeti/Logging/Lifted.hs \
src/Ganeti/Logging/WriterLog.hs \
src/Ganeti/Luxi.hs \
+ src/Ganeti/MaintD/Autorepairs.hs \
+ src/Ganeti/MaintD/Balance.hs \
+ src/Ganeti/MaintD/CleanupIncidents.hs \
+ src/Ganeti/MaintD/CollectIncidents.hs \
+ src/Ganeti/MaintD/FailIncident.hs \
+ src/Ganeti/MaintD/HandleIncidents.hs \
+ src/Ganeti/MaintD/MemoryState.hs \
+ src/Ganeti/MaintD/Server.hs \
+ src/Ganeti/MaintD/Utils.hs \
src/Ganeti/Network.hs \
src/Ganeti/Objects.hs \
src/Ganeti/Objects/BitArray.hs \
src/Ganeti/Objects/Disk.hs \
src/Ganeti/Objects/Instance.hs \
+ src/Ganeti/Objects/HvState.hs \
src/Ganeti/Objects/Lens.hs \
+ src/Ganeti/Objects/Maintenance.hs \
src/Ganeti/Objects/Nic.hs \
src/Ganeti/OpCodes.hs \
src/Ganeti/OpCodes/Lens.hs \
src/Ganeti/Utils.hs \
src/Ganeti/Utils/Atomic.hs \
src/Ganeti/Utils/AsyncWorker.hs \
+ src/Ganeti/Utils/Http.hs \
src/Ganeti/Utils/IORef.hs \
src/Ganeti/Utils/Livelock.hs \
src/Ganeti/Utils/Monad.hs \
cp -f $< $@
if ENABLE_MOND
-nodist_sbin_SCRIPTS += src/ganeti-mond
+nodist_sbin_SCRIPTS += src/ganeti-mond src/ganeti-maintd
endif
if ENABLE_METADATA
daemons/ganeti-cleaner.in \
$(pkglib_python_scripts) \
devel/build_chroot \
+ devel/cert_digest.py \
devel/upload \
devel/webserver \
tools/kvm-ifup.in \
test/autotools/autotools-check-news.test \
test/data/htools/clean-nonzero-score.data \
test/data/htools/common-suffix.data \
+ test/data/htools/dyn1.json \
+ test/data/htools/dyn2.json \
+ test/data/htools/dyn3.json \
test/data/htools/empty-cluster.data \
test/data/htools/hail-alloc-dedicated-1.json \
test/data/htools/hail-alloc-desired-location.json \
test/data/htools/hail-alloc-secondary.json \
test/data/htools/hail-alloc-spindles.json \
test/data/htools/hail-alloc-twodisks.json \
+ test/data/htools/hail-alloc-memory-over-commitment.json \
test/data/htools/hail-change-group.json \
test/data/htools/hail-invalid-reloc.json \
test/data/htools/hail-node-evac.json \
test/data/htools/hail-reloc-drbd.json \
test/data/htools/hail-reloc-drbd-crowded.json \
+ test/data/htools/hbal-avoid-disk-moves.data \
test/data/htools/hbal-cpu-speed.data \
test/data/htools/hbal-desiredlocation-1.data \
test/data/htools/hbal-desiredlocation-2.data \
test/data/htools/hbal-desiredlocation-3.data \
test/data/htools/hbal-desiredlocation-4.data \
test/data/htools/hbal-dyn.data \
+ test/data/htools/hbal-dyn2.data \
test/data/htools/hbal-evac.data \
test/data/htools/hbal-excl-tags.data \
test/data/htools/hbal-forth.data \
test/data/htools/hbal-location-1.data \
test/data/htools/hbal-location-exclusion.data \
test/data/htools/hbal-location-2.data \
+ test/data/htools/hbal-memory-over-commitment.data \
+ test/data/htools/hbal-memory-over-commitment-2.data \
test/data/htools/hbal-migration-1.data \
test/data/htools/hbal-migration-2.data \
test/data/htools/hbal-migration-3.data \
test/data/cluster_config_2.13.json \
test/data/cluster_config_2.14.json \
test/data/cluster_config_2.15.json \
+ test/data/cluster_config_2.16.json \
+ test/data/cluster_config_2.17.json \
test/data/instance-minor-pairing.txt \
test/data/instance-disks.txt \
test/data/ip-addr-show-dummy0.txt \
====
+Version 2.17.0 alpha1
+---------------------
+
+*(unreleased)*
+
+Incompatible/important changes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+- The IAllocator protocol has been extended by a new ``hv_state`` parameter.
+ This new parameter is used to estimate the amount of memory utilized by
+ the node. It replaces ``reserved_mem`` on hypervisors other than ``xen-pvm``
+ and ``xen-hvm`` because ``reserved_mem`` was reported incorrectly on them.
+ If this ``hv_state`` parameter is not presented in an iallocator input, the
+ old ``reserved_mem`` will be used.
+- Tools now log into a separate log file ``tools.log``. Also, each log
+ message of tools is now properly labelled with the name of the tool
+ that submitted the message.
+- The options ``--debug`` and ``--verbose`` of ``gnt-cluster
+ renew-crypto`` and ``gnt-node {add,remove,modify}`` now (also) control the
+ log level of the SSH calls to all nodes.
+
+
+New features
+~~~~~~~~~~~~
+
+- There is a new daemon, the :doc:`Ganeti Maintenance Daemon <design-repaird>`,
+ that coordinates all maintenance operations on a cluster, i.e. rebalancing,
+ activate disks, ERROR_down handling and node repairs actions.
+- ``htools`` support memory over-commitment now. Look at
+ :doc:`Memory Over Commitment <design-memory-over-commitment>` for the
+ details.
+- ``hbal`` has a new option ``--avoid-disk-moves *factor*`` that allows disk
+ moves only if the gain in the cluster metrics is ``*factor*`` times higher
+ than with no disk moves.
+- ``hcheck`` reports the level of redundancy for each node group as a new ouput
+ parameter, see :doc:`N+M Redundancy <design-n-m-redundancy>`.
+
+
Version 2.16.0 beta2
--------------------
-Ganeti 2.16
+Ganeti 2.17
===========
For installation instructions, read the INSTALL and the doc/install.rst
, curl >= 1.3.7 && < 1.4
, hinotify >= 0.3.2 && < 0.4
, hslogger >= 1.1.4 && < 1.3
- , json >= 0.5 && < 0.9
- , lens >= 3.10 && < 4.8
+ , json >= 0.5 && < 1.0
+ , lens >= 3.10 && < 4.13
, lifted-base >= 0.2.0.3 && < 0.3
, monad-control >= 0.3.1.3 && < 1.1
, MonadCatchIO-transformers >= 0.3.0.0 && < 0.4
# Configure script for Ganeti
m4_define([gnt_version_major], [2])
-m4_define([gnt_version_minor], [16])
+m4_define([gnt_version_minor], [17])
m4_define([gnt_version_revision], [0])
-m4_define([gnt_version_suffix], [~beta2])
+m4_define([gnt_version_suffix], [~alpha1])
m4_define([gnt_version_full],
m4_format([%d.%d.%d%s],
gnt_version_major, gnt_version_minor,
}
if _mond_enabled; then
- DAEMONS+=( ganeti-mond )
+ DAEMONS+=( ganeti-mond ganeti-maintd)
fi
# The full list of all daemons we know about
metad)
echo "@GNTMETADUSER@:@GNTMETADGROUP@"
;;
+ maintd)
+ echo "@GNTMONDUSER@:@GNTMONDGROUP@"
+ ;;
*)
echo "root:@GNTDAEMONSGROUP@"
;;
SHA1_LIST='
cabal-install-1.18.0.2.tar.gz 2d1f7a48d17b1e02a1e67584a889b2ff4176a773
cabal-install-1.22.4.0.tar.gz b98eea96d321cdeed83a201c192dac116e786ec2
+cabal-install-1.22.6.0.tar.gz d474b0eef6944af1abef92419cea13cee50993f3
ghc-7.6.3-i386-unknown-linux.tar.bz2 f042b4171a2d4745137f2e425e6949c185f8ea14
ghc-7.6.3-x86_64-unknown-linux.tar.bz2 46ec3f3352ff57fba0dcbc8d9c20f7bcb6924b77
ghc-7.8.4-i386-unknown-linux-deb7.tar.bz2 4f523f854c37a43b738359506a89a37a9fa9fc5f
ghc-7.8.4-x86_64-unknown-linux-deb7.tar.bz2 3f68321b064e5c1ffcb05838b85bcc00aa2315b4
+ghc-7.10.2-i386-unknown-linux-deb7.tar.bz2 c759ab9af566f5c3c9b75b702615f1d0c2f999fd
+ghc-7.10.2-x86_64-unknown-linux-deb7.tar.bz2 f028e4a07995353a47286478fc8644f66defa227
'
# export all variables needed in the schroot
'hlint>=1.9.12'
;;
+ jessie-ghc710)
+
+ GHC_VERSION="7.10.2"
+ GHC_VARIANT="-deb7"
+ CABAL_INSTALL_VERSION="1.22.6.0"
+ # the version of the Cabal library below must match the version used by
+ # CABAL_INSTALL_VERSION, see the dependencies of cabal-install
+ CABAL_LIB_VERSION=">=1.22.2 && <1.23"
+ export GHC_VERSION GHC_VARIANT CABAL_INSTALL_VERSION
+
+ in_chroot -- \
+ $APT_INSTALL \
+ autoconf automake \
+ zlib1g-dev \
+ libgmp3-dev \
+ libcurl4-openssl-dev \
+ libpcre3-dev \
+ happy \
+ hlint hscolour pandoc \
+ shelltestrunner \
+ graphviz qemu-utils \
+ python-docutils \
+ python-simplejson \
+ python-pyparsing \
+ python-pyinotify \
+ python-pycurl \
+ python-ipaddr \
+ python-yaml \
+ python-paramiko \
+ git \
+ git-email \
+ vim
+
+ in_chroot -- \
+ $APT_INSTALL python-setuptools python-dev build-essential
+
+ in_chroot -- \
+ easy_install \
+ logilab-astng==0.24.1 \
+ logilab-common==0.58.3 \
+ mock==1.0.1 \
+ pylint==0.26.0
+
+ in_chroot -- \
+ easy_install \
+ sphinx==1.1.3 \
+ pep8==1.3.3 \
+ coverage==3.4 \
+ bitarray==0.8.0
+
+ install_ghc
+
+ install_cabal
+
+ in_chroot -- \
+ cabal update
+
+ in_chroot -- \
+ cabal install --global \
+ HUnit-1.2.5.2 \
+ PSQueue-1.1 \
+ StateVar-1.1.0.0 \
+ ansi-terminal-0.6.2.1 \
+ ansi-wl-pprint-0.6.7.2 \
+ base-orphans-0.4.1 \
+ base64-bytestring-1.0.0.1 \
+ blaze-builder-0.4.0.1 \
+ bytestring-builder-0.10.6.0.0 \
+ bytestring-mmap-0.2.2 \
+ curl-1.3.8 \
+ enumerator-0.4.20 \
+ extensible-exceptions-0.1.1.4 \
+ hashable-1.2.3.3 \
+ case-insensitive-1.2.0.4 \
+ hinotify-0.3.7 \
+ hostname-1.0 \
+ hslogger-1.2.9 \
+ monads-tf-0.1.0.2 \
+ MonadCatchIO-transformers-0.3.1.3 \
+ nats-1 \
+ parallel-3.2.0.6 \
+ prelude-extras-0.4 \
+ primitive-0.6 \
+ reflection-2 \
+ regex-base-0.93.2 \
+ regex-pcre-0.94.4 \
+ regex-posix-0.95.2 \
+ scientific-0.3.3.8 \
+ attoparsec-0.12.1.6 \
+ attoparsec-enumerator-0.3.4 \
+ streaming-commons-0.1.12.1 \
+ blaze-builder-enumerator-0.2.1.0 \
+ syb-0.5.1 \
+ json-0.9.1 \
+ tagged-0.8.1 \
+ tf-random-0.5 \
+ QuickCheck-2.7.6 \
+ Crypto-4.2.5.1 \
+ transformers-compat-0.4.0.4 \
+ distributive-0.4.4 \
+ exceptions-0.8.0.2 \
+ temporary-1.2.0.3 \
+ transformers-base-0.4.4 \
+ monad-control-1.0.0.4 \
+ lifted-base-0.2.3.6 \
+ unix-compat-0.4.1.4 \
+ unordered-containers-0.2.5.1 \
+ semigroups-0.16.2.2 \
+ bifunctors-5 \
+ utf8-string-0.3.8 \
+ vector-0.11.0.0 \
+ void-0.7 \
+ contravariant-1.3.2 \
+ comonad-4.2.7.2 \
+ profunctors-5.1.1 \
+ semigroupoids-5.0.0.2 \
+ free-4.12.1 \
+ adjunctions-4.2.1 \
+ kan-extensions-4.2.2 \
+ lens-4.12.3 \
+ xml-1.3.14 \
+ test-framework-0.8.1.1 \
+ test-framework-hunit-0.3.0.1 \
+ test-framework-quickcheck2-0.3.0.3 \
+ zlib-bindings-0.1.1.5 \
+ zlib-enum-0.2.3.1 \
+ snap-core-0.9.7.2 \
+ snap-server-0.9.5.1 \
+;;
+
jessie-ghc78)
GHC_VERSION="7.8.4"
test-framework-0.8.0.3 \
test-framework-hunit-0.3.0.1 \
test-framework-quickcheck2-0.3.0.2 \
- 'transformers>=0.3.0.0'
+ 'transformers>=0.3.0.0' \
+ zlib-0.5.4.2
;;
*)
#!/usr/bin/python
-#
-# Copyright (C) 2011 Google Inc.
+# Copyright (C) 2015 Google Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-"""Tool to format JSON data.
-
-"""
+# This is a test script to ease debugging of SSL problems. It can be
+# applied on any of Ganeti's SSL certificates (for example client.pem
+# and server.pem) and will output a digest.
import sys
-import simplejson
+import OpenSSL
-def main():
- """Main routine.
+def usage():
+ print "%s filename" % sys.argv[0]
+ print
+ print "'filename' must be a filename of an SSL certificate in PEM format."
- """
- if len(sys.argv) > 1:
- sys.stderr.write("Read JSON data from standard input and write a"
- " formatted version on standard output. There are"
- " no options or arguments.\n")
- sys.exit(1)
- data = simplejson.load(sys.stdin)
- simplejson.dump(data, sys.stdout, indent=2, sort_keys=True)
- sys.stdout.write("\n")
+if __name__ == "__main__":
+ if len(sys.argv) < 2:
+ usage()
+
+ cert_fd = open(sys.argv[1], "r")
+ cert_plain = cert_fd.read()
+
+ print "Certificate:"
+ print cert_plain
+
+ cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
+ cert_plain)
+
+ print "Digest:"
+ print cert.digest("sha1")
-if __name__ == "__main__":
- main()
--- /dev/null
+==================
+Ganeti 2.17 design
+==================
+
+The following designs' implementations were completed in Ganeti 2.17.
+
+- :doc:`design-memory-over-commitment`
+- :doc:`design-migration-speed-hbal`
+- :doc:`design-n-m-redundancy`
+- :doc:`design-repaird`
\ No newline at end of file
Design document drafts
======================
-.. Last updated for Ganeti 2.16
+.. Last updated for Ganeti 2.17
.. toctree::
:maxdepth: 2
design-network2.rst
design-configlock.rst
design-multi-storage-htools.rst
- design-repaird.rst
+ design-macvtap.rst
design-scsi-kvm.rst
design-disks.rst
--- /dev/null
+===============
+MacVTap support
+===============
+
+.. contents:: :depth: 3
+
+This is a design document detailing the implementation of `MacVTap`
+support in Ganeti. The initial implementation targets the KVM
+hypervisor, but it is intended to be ported to the XEN hypervisor as
+well.
+
+Current state and shortcomings
+==============================
+
+Currently, Ganeti provides a number of options for networking a virtual
+machine, that are the ``bridged``, ``routed``, and ``openvswitch``
+modes. ``MacVTap``, is another virtual network interface in Linux, that
+is not supported by Ganeti and that could be added to the currently
+supported solutions. It is an interface that acts as a regular TUN/TAP
+device, and thus it is transparently supported by QEMU. Because of its
+design, it can greatly simplify Ganeti setups using bridged instances.
+
+In brief, the MacVTap interface is based on the ``MacVLan`` Linux
+driver, which basically allows a single physical interface to be
+associated with multiple IPs and MAC addresses. It is meant to replace
+the combination of the TUN/TAP and bridge drivers with a more
+lightweight setup that doesn't require any extra configuration on the
+host. MacVTap driver is supposed to be more efficient than using a
+regular bridge. Unlike bridges, it doesn't need to do STP or to
+discover/learn MAC addresses of other connected devices on a given
+domain, as it it knows every MAC address it can receive. In fact, it
+introduces a bridge-like behavior for virtual machines but without the
+need to have a real bridge setup on the host. Instead, each virtual
+interface extends an existing network device by attaching directly to
+it, having its own MAC address, and providing a separate virtual
+interface to be used by the userspace processes. The MacVTap MAC address
+is used on the external network and the guest OS cannot spoof or change
+that address.
+
+Background
+==========
+
+This section provides some extra information for the MacVTap interface,
+that we took into account for the rest of this design document.
+
+MacVTap modes of operation
+--------------------------
+
+A MacVTap device can operate in one of four modes, just like the MacVLan
+driver does. These modes determine how the tap endpoints communicate
+between each other providing various levels of isolation between them.
+Those modes are the following:
+
+* `VEPA (Virtual Ethernet Port Aggregator) mode`: The default mode that
+ is compatible with virtualization-enabled switches. The communication
+ between endpoints on the same lower device, happens through the
+ external switch.
+
+* `Bridge mode`: It works almost like a traditional bridge, connecting
+ all endpoints directly to each other.
+
+* `Private mode`: An endpoint in this mode can never communicate to any
+ other endpoint on the same lower device.
+
+* `Passthru mode`: This mode was added later to work on some limitations
+ on MacVLans (more details here_).
+
+MacVTap internals
+-----------------
+
+The creation of a MacVTap device is *not* done by opening the
+`/dev/net/tun` device and issuing a corresponding `ioctl()` to register
+a network device as happens in tap devices. Instead, there are two ways
+to create a MacVTap device. The first one is using the `rtnetlink(7)`
+interface directly, just like the `libvirt` or the `iproute2` utilities
+do, and the second one is to use the high-level `ip-link` command. Since
+creating a MacVTap interface programmatically using the netlink protocol
+is a bit more complicated than creating a normal TUN/TAP device, we
+propose using the ip-link tool for the MacVTap handling, which it is
+much simpler and straightforward in use, and also fulfills all our
+needs. Additionally, since Ganeti already depends on `iproute2` being
+installed in the system, this does not introduces an extra dependency.
+
+The following example, creates a MacVTap device using the `ip-link`
+tool, named `macvtap0`, operating in `bridge` mode, and which is using
+`eth0` as its lower device:
+
+::
+
+ ip link add link eth0 name macvtap0 address 1a:36:1b:aa:b3:77 type macvtap mode bridge
+
+Once a MacVTap interface is created, an actual character device appears
+under `/dev`, called ``/dev/tapXX``, where ``XX`` is the interface index
+of the device.
+
+Proposed changes
+================
+
+In order to be able to create instances using the MacVTap device driver,
+we propose some modifications that affect the ``nicparams`` slot of the
+Ganeti's configuration ``NIC`` object, and also the code part regarding
+to the KVM hypervisor, as detailed in the following sections.
+
+Configuration changes
+---------------------
+
+The nicparams ``mode`` attribute will be extended to support the
+``macvtap`` mode. When using the MacVTap mode, the ``link`` attribute
+will specify the network device where the MacVTap interfaces will be
+attached to, the *lower device*. Note that the lower device should
+exists, otherwise the operation will fail. If no link is specified, the
+cluster-wide default NIC `link` param will be used instead.
+
+We propose the MacVTap mode to be configurable, and so the nicparams
+object will be extended with an extra slot named ``mvtap_mode``. This
+parameter will only be used if the network mode is set to MacVTap since
+it does not make sense in other modes, similarly to the `vlan` slot of
+the `openvswitch` mode.
+
+Below there is a snippet of some of the ``gnt-network`` commands'
+output:
+
+Network connection
+~~~~~~~~~~~~~~~~~~
+
+::
+
+ gnt-network connect -N mode=macvtap,link=eth0,mvtap_mode=bridge vtap-net vtap_group
+
+Network listing
+~~~~~~~~~~~~~~~
+
+::
+
+ gnt-network list
+
+ Network Subnet Gateway MacPrefix GroupList
+ br-net 10.48.1.0/24 10.48.1.254 - default (bridged, br0, , )
+ vtap-net 192.168.100.0/24 192.168.100.1 - vtap_group (macvtap, eth0, , bridge)
+
+Network information
+~~~~~~~~~~~~~~~~~~~
+
+::
+
+ gnt-network info
+
+ Network name: vtap-net
+ UUID: 4f139b48-3f08-46b1-911f-d37de7e12dcf
+ Serial number: 1
+ Subnet: 192.168.100.0/28
+ Gateway: 192.168.100.1
+ IPv6 Subnet: 2001:db8:2ffc::/64
+ IPv6 Gateway: 2001:db8:2ffc::1
+ Mac Prefix: None
+ size: 16
+ free: 10 (62.50%)
+ usage map:
+ 0 XXXXX..........X 63
+ (X) used (.) free
+ externally reserved IPs:
+ 192.168.100.0, 192.168.100.1, 192.168.100.15
+ connected to node groups:
+ vtap_group (mode:macvtap link:eth0 vlan: mvtap_mode:bridge)
+ used by 2 instances:
+ inst1.example.com: 0:192.168.100.2
+ inst2.example.com: 0:192.168.100.3
+
+
+Hypervisor changes
+------------------
+
+A new method will be introduced in the KVM's `netdev.py` module, named
+``OpenVTap``, similar to the ``OpenTap`` method, that will be
+responsible for creating a MacVTap device using the `ip-link` command,
+and returning its file descriptor. The ``OpenVtap`` method will receive
+as arguments the network's `link`, the mode of the MacVTap device
+(``mvtap_mode``), and also the ``interface name`` of the device to be
+created, otherwise we will not be able to retrieve it, and so opening
+the created device.
+
+Since we want the names among the MacVTap devices to be unique on the
+same node, we will make use of the existing ``_GenerateKvmTapName``
+method to generate device names but with some modifications, to be
+adapted to our needs. This method is actually a wrapper over the
+``GenerateTapName`` method which currently is being used to generate TAP
+interface names for NICs meant to be used in instance communication
+using the ``gnt.com`` prefix. We propose extending this method to
+generate names for the MacVTap interface too, using the ``vtap`` prefix.
+To do so, we could add an extra boolean argument in that method, named
+`inst_comm`, to differentiate the two cases, so that the method will
+return the appropriate name depending on its usage. This argument will
+be optional and defaulted to `True`, to not affect the existing API.
+
+Currently, the `OpenTap` method handles the `vhost-net`, `mq`, and the
+`vnet_hdr` features. The `vhost-net` feature will be normally supported
+for the MacVTap devices too, and so is the `multiqueue` feature, which
+can be enabled using the `numrxqueues` and `numtxqueues` parameters of
+the `ip-link` command. The only drawback seems to be the `vnet_hdr`
+feature modification. For a MacVTap device this flag is enabled by
+default, and it can not be disabled if a user requests to.
+
+A last hypervisor change will be the introduction of a new method named
+``_RemoveStaleMacvtapDevs`` that will remove any remaining MacVTap
+devices, and which is detailed in the following section.
+
+Tools changes
+-------------
+
+Some of the Ganeti tools should also be extended to support MacVTap
+devices. Those are the ``kvm-ifup`` and ``net-common`` scripts. These
+modifications will include a new method named ``setup_macvtap`` that
+will simply change the device status to `UP` just before and instance is
+started:
+
+::
+
+ ip link set $INTERFACE up
+
+As mentioned in the `Background` section, MacVTap devices are
+persistent. So, we have to manually delete the MacVTap device after an
+instance shutdown. To do so, we propose creating a ``kvm-ifdown``
+script, that will be invoked after an instance shutdown in order to
+remove the relevant MacVTap devices. The ``kvm-ifdown`` script should
+explicitly call the following commands and currently will be functional
+for MacVTap NICs only:
+
+::
+
+ ip link set $INTERFACE down
+ ip link delete $INTERFACE
+
+To be able to call the `kvm-ifdown` script we should extend the KVM's
+``_ConfigureNIC`` method with an extra argument that is the name of the
+script to be invoked, instead of calling by default the `kvm-ifup`
+script, as it currently happens.
+
+The invocation of the `kvm-ifdown` script will be made through a
+separate method that we will create, named ``_RemoveStaleMacvtapDevs``.
+This method will read the NIC runtime files of an instance and will
+remove any devices using the MacVTap interface. This method will be
+included in the ``CleanupInstance`` method in order to cover all the
+cases where an instance using MacVTap NICs needs to be cleaned up.
+
+Besides the instance shutdown, there are a couple of cases where the
+MacVTap NICs will need to be cleaned up too. In case of an internal
+instance shutdown, where the ``kvmd`` is not enabled, the instance will
+be in ``ERROR_DOWN`` state. In that case, when the instance is started
+either by the `ganeti-watcher` or by the admin, the ``CleanupInstance``
+method, and consequently the `kvm-ifdown` script, will not be called and
+so the MacVTap NICs will have to manually be deleted. Otherwise starting
+the instance will result in more than one MacVTap devices using the same
+MAC address. An instance migration is another case where deleting an
+instance will keep stale MacVTap devices on the source node. In order
+to solve those potential issues, we will explicitly call the
+``_RemoveStaleMacvtapDevs`` method after a successful instance migration
+on the source node, and also before creating a new device for a NIC that
+is using the MacVTap interface to remove any stale devices.
+
+.. _here: http://thread.gmane.org/gmane.comp.emulators.kvm.devel/61824/)
+
+.. vim: set textwidth=72 :
+.. Local Variables:
+.. mode: rst
+.. fill-column: 72
+.. End:
--- /dev/null
+======================
+Memory Over Commitment
+======================
+
+.. contents:: :depth: 4
+
+This document describes the proposed changes to support memory
+overcommitment in Ganeti.
+
+Background
+==========
+
+Memory is a non-preemptable resource, and thus cannot be shared, e.g.,
+in a round-robin fashion. Therefore, Ganeti is very careful to make
+sure there is always enough physical memory for the memory promised
+to the instances. In fact, even in an N+1 redundant way: should one
+node fail, its instances can be relocated to other nodes while still
+having enough physical memory for the memory promised to all instances.
+
+Overview over the current memory model
+--------------------------------------
+
+To make decisions, ``htools`` query the following parameters from Ganeti.
+
+- The amount of memory used by each instance. This is the state-of-record
+ backend parameter ``maxmem`` for that instance (maybe inherited from
+ group-level or cluster-level backend paramters). It tells the hypervisor
+ the maximal amount of memory that instance may use.
+
+- The state-of-world parameters for the node memory. They are collected
+ live and are hypervisor specific. The following parameters are collected.
+
+ - memory_total: the total memory size on the node
+
+ - memory_free: the available memory on the node for instances
+
+ - memory_dom0: the memory used by the node itself, if available
+
+ For Xen, the amount of total and free memory are obtained by parsing
+ the output of Xen ``info`` command (e.g., ``xm info``). The dom0
+ memory is obtained by looking in the output of the ``list`` command
+ for ``Domain-0``.
+
+ For the ``kvm`` hypervisor, all these paramters are obtained by
+ reading ``/proc/memstate``, where the entries ``MemTotal`` and
+ ``Active`` are considered the values for ``memory_total`` and
+ ``memory_dom0``, respectively. The value for ``memory_free`` is
+ taken as the sum of the entries ``MemFree``, ``Buffers``, and ``Cached``.
+
+
+Current state and shortcomings
+==============================
+
+While the current model of never over committing memory serves well
+to provide reliability guarantees to instances, it does not suit well
+situations were the actual use of memory in the instances is spiky. Consider
+a scenario where instances only touch a small portion of their memory most
+of the time, but occasionally use a large amount of memory. Then, at any moment,
+a large fraction of the memory used for the instances sits around without
+being actively used. By swapping out the not actively used memory, resources
+can be used more efficiently.
+
+Proposed changes
+================
+
+We propose to support over commitment of memory if desired by the
+administrator. Memory will change from being a hard constraint to
+being a question of policy. The default will be not to over commit
+memory.
+
+Extension of the policy by a new parameter
+------------------------------------------
+
+The instance policy is extended by a new real-number field ``memory-ratio``.
+Policies on groups inherit this parameter from the cluster wide policy in the
+same way as all other parameters of the instance policy.
+
+When a cluster is upgraded from an earlier version not containing
+``memory-ratio``, the value ``1.0`` is inserted for this new field in
+the cluster-level ``ipolicy``; in this way, the status quo of not over
+committing memory is preserved via upgrades. The ``gnt-cluster
+modify`` and ``gnt-group modify`` commands are extended to allow
+setting of the ``memory-ratio``.
+
+The ``htools`` text format is extended to also contain this new
+ipolicy parameter. It is added as an optional entry at the end of the
+parameter list of an ipolicy line, to remain backwards compatible.
+If the paramter is missing, the value ``1.0`` is assumed.
+
+Changes to the memory reporting on non ``xen-hvm`` and ``xen-pvm``
+------------------------------------------------------------------
+
+For all hypervisors ``memory_dom0`` corresponds to the amount of memory used
+by Ganeti itself and all other non-hypervisor processes running on this node.
+The amount of memory currently reported for ``memory_dom0`` on hypervisors
+other than ``xen-hvm`` and ``xen-pvm``, however, includes the amount of active
+memory of the hypervisor processes. This is in conflict with the underlying
+assumption ``memory_dom0`` memory is not available for instance.
+
+Therefore, for hypervisors other than ``xen-pvm`` and ``xen-hvm`` we will use
+a new state-of-recored hypervisor paramter called ``mem_node`` in htools
+instead of the reported ``memory_dom0``. As a hypervisor state parameter, it is
+run-time tunable and inheritable at group and cluster levels. If this paramter
+is not present, a default value of ``1024M`` will be used, which is a
+conservative estimate of the amount of memory used by Ganeti on a medium-sized
+cluster. The reason for using a state-of-record value is to have a stable
+amount of reserved memory, irrespective of the current activity of Ganeti.
+
+Currently, hypervisor state parameters are partly implemented but not used
+by ganeti.
+
+Changes to the memory policy
+----------------------------
+
+The memory policy will be changed in that we assume that one byte
+of physical node memory can hold ``memory-ratio`` bytes of instance
+memory, but still only one byte of Ganeti memory. Of course, in practise
+this has to be backed by swap space; it is the administrator's responsibility
+to ensure that each node has swap of at
+least ``(memory-ratio - 1.0) * (memory_total - memory_dom0)``. Ganeti
+will warn if the amount of swap space is not big enough.
+
+
+The new memory policy will be as follows.
+
+- The difference between the total memory of a node and its dom0
+ memory will be considered the amount of *available memory*.
+
+- The amount of *used memory* will be (as is now) the sum of
+ the memory of all instance and the reserved memory.
+
+- The *relative memory usage* is the fraction of used and available
+ memory. Note that the relative usage can be bigger than ``1.0``.
+
+- The memory-related constraint for instance placement is that
+ afterwards the relative memory usage be at most the
+ memory-ratio. Again, if the ratio of the memory of the real
+ instances on the node to available memory is bigger than the
+ memory-ratio this is considered a hard violation, otherwise
+ it is considered a soft violation.
+
+- The definition of N+1 redundancy (including
+ :doc:`design-shared-storage-redundancy`) is kept literally as is.
+ Note, however, that the meaning does change, as the definition depends
+ on the notion of allowed moves, which is changed by this proposal.
+
+
+Changes to cluster verify
+-------------------------
+
+The only place where the Ganeti core handles memory is
+when ``gnt-cluster verify`` verifies N+1 redundancy. This code will be changed
+to follow the new memory model.
+
+Additionally, ``gnt-cluster verify`` will warn if the sum of available memory
+and swap space is not at least as big as the used memory.
+
+Changes to ``htools``
+---------------------
+
+The underlying model of the cluster will be changed in accordance with
+the suggested change of the memory policy. As all higher-level ``htools``
+operations go through only the primitives of adding/moving an instance
+if possible, and inspecting the cluster metrics, changing the base
+model will make all ``htools`` compliant with the new memory model.
+
+Balancing
+---------
+
+The cluster metric components will not be changed. Note the standard
+deviation of relative memory usage is already one of the components.
+For dynamic (load-based) balancing, the amount of not immediately
+discardable memory will serve as an indication of memory activity;
+as usual, the measure will be the standard deviation of the relative
+value (i.e., the ratio of non-discardable memory to available
+memory). The weighting for this metric component will have to be
+determined by experimentation and will depend on the memory ratio;
+for a memory ratio of ``1.0`` the weight will be ``0.0``, as memory
+need not be taken into account if no over-commitment is in place.
+For memory ratios bigger than ``1.0``, the weight will be positive
+and grow with the ratio.
--- /dev/null
+==================================
+Migration speed accounting in Hbal
+==================================
+
+.. contents:: :depth: 2
+
+Hbal usually performs complex sequence of moves during cluster balancing in
+order to achieve local optimal cluster state. Unfortunately, each move may take
+significant amount of time. Thus, during the sequence of moves the situation on
+cluster may change (e.g., because of adding new instance or because of instance
+or node parameters change) and desired moves can become unprofitable.
+
+Usually disk moves become a bottleneck and require sufficient amount of time.
+:ref:`Instance move improvements <move-performance>` considers
+disk moves speed in more details. Currently, ``hbal`` has a ``--no-disk-moves``
+option preventing disk moves during cluster balancing in order to perform fast
+(but of course non optimal) balancing. It may be useful, but ideally we need to
+find a balance between optimal configuration and time to reach this
+configuration.
+
+Avoiding insignificant disk moves
+=================================
+
+Allowing only profitable enough disk moves may become a first step to reach
+a compromise between moves speed and optimal scoring. This can be implemented
+by introducing ``--avoid-disk-moves *FACTOR*`` option which will admit disk
+moves only if the gain in the cluster metrics is *FACTOR* times
+higher than the gain achievable by non disk moves.
aspects of the problem, they do not exclude each other and will be presented
independently.
+.. _move-performance:
+
The performance of Ganeti moves
===============================
--- /dev/null
+===========================
+Checking for N+M redundancy
+===========================
+
+.. contents:: :depth: 4
+
+This document describes how the level of redundancy is estimated
+in Ganeti.
+
+
+Current state and shortcomings
+==============================
+
+Ganeti keeps the cluster N+1 redundant, also taking into account
+:doc:`design-shared-storage-redundancy`. In other words, Ganeti
+tries to keep the cluster in a state, where after failure of a single
+node, no matter which one, all instances can be started immediately.
+However, e.g., for planning
+maintenance, it is sometimes desirable to know from how many node
+losses the cluster can recover from. This is also useful information,
+when operating big clusters and expecting long times for hardware repair.
+
+
+Proposed changes
+================
+
+Higher redundancy as a sequential concept
+-----------------------------------------
+
+The intuitive meaning of an N+M redundant cluster is that M nodes can
+fail without instances being lost. However, when DRBD is used, already
+failure of 2 nodes can cause complete loss of an instance. Therefore, the
+best we can hope for, is to be able to recover from M sequential failures.
+This intuition that a cluster is N+M redundant, if M nodes can fail one-by-one,
+leaving enough time for a rebalance in between, without losing instances, is
+formalized in the next definition.
+
+Definition of N+M redundancy
+----------------------------
+
+We keep the definition of :doc:`design-shared-storage-redundancy`. Moreover,
+for M a non-negative integer, we define a cluster to be N+(M+2) redundant,
+if after draining any node the standard rebalancing procedure (as, e.g.,
+provided by `hbal`) will fully evacuate that node and result in an N+(M+1)
+redundant cluster.
+
+Independence of Groups
+----------------------
+
+Immediately from the definition, we see that the redundancy level, i.e.,
+the maximal M such that the cluster is N+M redundant, can be computed
+in a group-by-group manner: the standard balancing algorithm will never
+move instances between node groups. The redundancy level of the cluster
+is then the minimum of the redundancy level of the independent groups.
+
+Estimation of the redundancy level
+----------------------------------
+
+The definition of N+M redundancy requires to consider M failures in
+arbitrary order, thus considering super-exponentially many cases for
+large M. As, however, balancing moves instances anyway, the redundancy
+level mainly depends on the amount of node resources available to the
+instances in a node group. So we can get a good approximation of the
+redundancy level of a node group by only considering draining one largest
+node in that group. This is how Ganeti will estimate the redundancy level.
+
+Modifications to existing tools
+-------------------------------
+
+As redundancy levels higher than N+1 are mainly about planning capacity,
+they level of redundancy only needs to be computed on demand. Hence, we
+keep the tool changes minimal.
+
+- ``hcheck`` will report the level of redundancy for each node group as
+ a new output parameter
+
+The rest of Ganeti will not be changed.
Returns a list of all non-cleared incidents. Each incident is reported
as a JSON object with at least the following information.
-- ``id`` The unique identifier assigned to the event.
+- ``uuid`` The unique identifier assigned to the event.
- ``node`` The UUID of the node on which the even was observed.
MOND_ARGS=""
WCONFD_ARGS=""
LUXID_ARGS=""
+MAINTD_ARGS=""
MOND_ARGS="-d"
WCONFD_ARGS="-d"
LUXID_ARGS="-d"
+MAINTD_ARGS="-d"
[Unit]
-Description = Ganeti query daemon (luxid)
-Documentation = man:ganeti-luxid(8)
+Description = Ganeti maintenance daemon (maintd)
+Documentation = man:ganeti-maintd(8)
Requires = ganeti-common.service
After = ganeti-common.service
PartOf = ganeti-master.target
[Service]
Type = simple
-User = @GNTLUXIDUSER@
-Group = @GNTLUXIDGROUP@
-ExecStart = @SBINDIR@/ganeti-luxid -f
+User = @GNTMONDUSER@
+Group = @GNTMONDGROUP@
+ExecStart = @SBINDIR@/ganeti-maintd -f
Restart = on-failure
SuccessExitStatus = 0 11
Ganeti customisation using hooks
================================
-Documents Ganeti version 2.16
+Documents Ganeti version 2.17
.. contents::
------------
In order to allow customisation of operations, Ganeti runs scripts in
-sub-directories of ``@SYSCONFDIR@/ganeti/hooks``. These sub-directories
+sub-directories of ``@SYSCONFDIR@/ganeti/hooks`` (that is usually
+``/etc/ganeti/hooks``). These sub-directories
are named ``$hook-$phase.d``, where ``$phase`` is either ``pre`` or
``post`` and ``$hook`` matches the directory name given for a hook (e.g.
``cluster-verify-post.d`` or ``node-add-pre.d``).
This is similar to the ``/etc/network/`` structure present in Debian
for network interface handling.
+Note that Ganeti does not create its ``hooks`` directory by default.
+If you want to use hooks scripts, create it on all nodes. This applies
+also to all sub directories such as ``node-add-pre.d``.
+
Organisation
------------
Note that, even though we call them scripts, we are actually talking
about any executable.
+The filenames of the scripts need to match the regular expression
+``^[a-zA-Z0-9_-]+$``. This means in particular, that scripts having
+a filename extension (such as ``myhook.sh``) are silently ignored
+by Ganeti.
+
*pre* scripts
~~~~~~~~~~~~~
Ganeti automatic instance allocation
====================================
-Documents Ganeti version 2.16
+Documents Ganeti version 2.17
.. contents::
design-2.14.rst
design-2.15.rst
design-2.16.rst
+ design-2.17.rst
Draft designs
-------------
design-location.rst
design-linuxha.rst
design-lu-generated-jobs.rst
+ design-memory-over-commitment.rst
+ design-migration-speed-hbal.rst
design-monitoring-agent.rst
design-move-instance-improvements.rst
design-multi-reloc.rst
design-multi-version-tests.rst
design-network.rst
+ design-n-m-redundancy.rst
design-node-add.rst
design-node-security.rst
design-oob.rst
design-query2.rst
design-query-splitting.rst
design-reason-trail.rst
+ design-repaird.rst
design-restricted-commands.rst
design-shared-storage.rst
design-shared-storage-redundancy.rst
constants.ISPECS_STD,
constants.IPOLICY_DTS,
constants.IPOLICY_VCPU_RATIO,
- constants.IPOLICY_SPINDLE_RATIO])
+ constants.IPOLICY_SPINDLE_RATIO,
+ constants.IPOLICY_MEMORY_RATIO])
.. pyassert::
Maximum ratio of virtual to physical CPUs (`float`)
:pyeval:`constants.IPOLICY_SPINDLE_RATIO`
Maximum ratio of instances to their node's ``spindle_count`` (`float`)
+:pyeval:`constants.IPOLICY_MEMORY_RATIO`
+ Maximum ratio of memory overcommitment (`float`)
Usage examples
--------------
Security in Ganeti
==================
-Documents Ganeti version 2.16
+Documents Ganeti version 2.17
Ganeti was developed to run on internal, trusted systems. As such, the
security model is all-or-nothing.
Virtual cluster support
=======================
-Documents Ganeti version 2.16
+Documents Ganeti version 2.17
.. contents::
pub_key_file=pathutils.SSH_PUB_KEYS,
ssconf_store=None,
noded_cert_file=pathutils.NODED_CERT_FILE,
- run_cmd_fn=ssh.RunSshCmdWithStdin):
+ run_cmd_fn=ssh.RunSshCmdWithStdin,
+ ssh_update_debug=False,
+ ssh_update_verbose=False):
"""Distributes a node's public SSH key across the cluster.
Note that this function should only be executed on the master node, which
pub_key_file=pub_key_file,
ssconf_store=ssconf_store,
noded_cert_file=noded_cert_file,
- run_cmd_fn=run_cmd_fn)
+ run_cmd_fn=run_cmd_fn,
+ ssh_update_debug=ssh_update_debug,
+ ssh_update_verbose=ssh_update_verbose)
# Node info named tuple specifically for the use with AddNodeSshKeyBulk
pub_key_file=pathutils.SSH_PUB_KEYS,
ssconf_store=None,
noded_cert_file=pathutils.NODED_CERT_FILE,
- run_cmd_fn=ssh.RunSshCmdWithStdin):
+ run_cmd_fn=ssh.RunSshCmdWithStdin,
+ ssh_update_debug=False,
+ ssh_update_verbose=False):
"""Distributes a node's public SSH key across the cluster.
Note that this function should only be executed on the master node, which
(constants.SSHS_OVERRIDE, all_keys)
try:
+ backoff = 5 # seconds
utils.RetryByNumberOfTimes(
- constants.SSHS_MAX_RETRIES,
+ constants.SSHS_MAX_RETRIES, backoff,
errors.SshUpdateError,
run_cmd_fn, cluster_name, node_info.name, pathutils.SSH_UPDATE,
ssh_port_map.get(node_info.name), node_data,
- debug=False, verbose=False, use_cluster_key=False,
- ask_key=False, strict_host_check=False)
+ debug=ssh_update_debug, verbose=ssh_update_verbose,
+ use_cluster_key=False, ask_key=False, strict_host_check=False)
except errors.SshUpdateError as e:
# Clean up the master's public key file if adding key fails
if node_info.to_public_keys:
if node in potential_master_candidates:
logging.debug("Updating SSH key files of node '%s'.", node)
try:
+ backoff = 5 # seconds
utils.RetryByNumberOfTimes(
- constants.SSHS_MAX_RETRIES,
- errors.SshUpdateError,
+ constants.SSHS_MAX_RETRIES, backoff, errors.SshUpdateError,
run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE,
ssh_port_map.get(node), pot_mc_data,
- debug=False, verbose=False, use_cluster_key=False,
- ask_key=False, strict_host_check=False)
+ debug=ssh_update_debug, verbose=ssh_update_verbose,
+ use_cluster_key=False, ask_key=False, strict_host_check=False)
except errors.SshUpdateError as last_exception:
error_msg = ("When adding the key of node '%s', updating SSH key"
" files of node '%s' failed after %s retries."
if to_authorized_keys:
run_cmd_fn(cluster_name, node, pathutils.SSH_UPDATE,
ssh_port_map.get(node), base_data,
- debug=False, verbose=False, use_cluster_key=False,
- ask_key=False, strict_host_check=False)
+ debug=ssh_update_debug, verbose=ssh_update_verbose,
+ use_cluster_key=False, ask_key=False,
+ strict_host_check=False)
return node_errors
+# TODO: will be fixed with pending patch series.
+# pylint: disable=R0913
def RemoveNodeSshKey(node_uuid, node_name,
master_candidate_uuids,
potential_master_candidates,
ssconf_store=None,
noded_cert_file=pathutils.NODED_CERT_FILE,
readd=False,
- run_cmd_fn=ssh.RunSshCmdWithStdin):
+ run_cmd_fn=ssh.RunSshCmdWithStdin,
+ ssh_update_debug=False,
+ ssh_update_verbose=False):
"""Removes the node's SSH keys from the key files and distributes those.
Note that at least one of the flags C{from_authorized_keys},
ssconf_store=ssconf_store,
noded_cert_file=noded_cert_file,
readd=readd,
- run_cmd_fn=run_cmd_fn)
+ run_cmd_fn=run_cmd_fn,
+ ssh_update_debug=ssh_update_debug,
+ ssh_update_verbose=ssh_update_verbose)
# Node info named tuple specifically for the use with RemoveNodeSshKeyBulk
ssconf_store=None,
noded_cert_file=pathutils.NODED_CERT_FILE,
readd=False,
- run_cmd_fn=ssh.RunSshCmdWithStdin):
+ run_cmd_fn=ssh.RunSshCmdWithStdin,
+ ssh_update_debug=False,
+ ssh_update_verbose=False):
"""Removes the node's SSH keys from the key files and distributes those.
Note that at least one of the flags C{from_authorized_keys},
logging.debug("Updating key setup of potential master candidate node"
" %s.", node)
try:
+ backoff = 5 # seconds
utils.RetryByNumberOfTimes(
- constants.SSHS_MAX_RETRIES,
- errors.SshUpdateError,
+ constants.SSHS_MAX_RETRIES, backoff, errors.SshUpdateError,
run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE,
ssh_port, pot_mc_data,
- debug=False, verbose=False, use_cluster_key=False,
- ask_key=False, strict_host_check=False)
+ debug=ssh_update_debug, verbose=ssh_update_verbose,
+ use_cluster_key=False, ask_key=False, strict_host_check=False)
except errors.SshUpdateError as last_exception:
error_msg = error_msg_final % (
node_info.name, node, last_exception)
if from_authorized_keys:
logging.debug("Updating key setup of normal node %s.", node)
try:
+ backoff = 5 # seconds
utils.RetryByNumberOfTimes(
- constants.SSHS_MAX_RETRIES,
- errors.SshUpdateError,
+ constants.SSHS_MAX_RETRIES, backoff, errors.SshUpdateError,
run_cmd_fn, cluster_name, node, pathutils.SSH_UPDATE,
ssh_port, base_data,
- debug=False, verbose=False, use_cluster_key=False,
- ask_key=False, strict_host_check=False)
+ debug=ssh_update_debug, verbose=ssh_update_verbose,
+ use_cluster_key=False, ask_key=False, strict_host_check=False)
except errors.SshUpdateError as last_exception:
error_msg = error_msg_final % (
node_info.name, node, last_exception)
logging.debug("Updating SSH key setup of target node '%s'.",
node_info.name)
try:
+ backoff = 5 # seconds
utils.RetryByNumberOfTimes(
- constants.SSHS_MAX_RETRIES,
+ constants.SSHS_MAX_RETRIES, backoff,
errors.SshUpdateError,
run_cmd_fn, cluster_name, node_info.name, pathutils.SSH_UPDATE,
ssh_port, data,
- debug=False, verbose=False, use_cluster_key=False,
- ask_key=False, strict_host_check=False)
+ debug=ssh_update_debug, verbose=ssh_update_verbose,
+ use_cluster_key=False, ask_key=False, strict_host_check=False)
except errors.SshUpdateError as last_exception:
result_msgs.append(
(node_info.name,
ssh.RemovePublicKey(node_uuid, key_file=pub_key_file)
return result_msgs
+# pylint: enable=R0913
+
+
+def RemoveSshKeyFromPublicKeyFile(node_name,
+ pub_key_file=pathutils.SSH_PUB_KEYS,
+ ssconf_store=None):
+ """Removes a SSH key from the master's public key file.
+
+ This is an operation that is only used to clean up after failed operations
+ (for example failed hooks before adding a node). To avoid abuse of this
+ function (and the matching RPC call), we add a safety check to make sure
+ that only stray keys can be removed that belong to nodes that are not
+ in the cluster (anymore).
+
+ @type node_name: string
+ @param node_name: the name of the node whose key is removed
+
+ """
+ if not ssconf_store:
+ ssconf_store = ssconf.SimpleStore()
+
+ node_list = ssconf_store.GetNodeList()
+
+ if node_name in node_list:
+ raise errors.SshUpdateError("Cannot remove key of node '%s',"
+ " because it still belongs to the cluster."
+ % node_name)
+
+ keys_by_name = ssh.QueryPubKeyFile([node_name], key_file=pub_key_file)
+ if not keys_by_name or node_name not in keys_by_name:
+ logging.info("The node '%s' whose key is supposed to be removed does not"
+ " have an entry in the public key file. Hence, there is"
+ " nothing left to do.", node_name)
+ ssh.RemovePublicKey(node_name, key_file=pub_key_file)
-def _GenerateNodeSshKey(node_uuid, node_name, ssh_port_map, ssh_key_type,
- ssh_key_bits, pub_key_file=pathutils.SSH_PUB_KEYS,
+
+def _GenerateNodeSshKey(node_name, ssh_port_map, ssh_key_type, ssh_key_bits,
ssconf_store=None,
noded_cert_file=pathutils.NODED_CERT_FILE,
run_cmd_fn=ssh.RunSshCmdWithStdin,
- suffix=""):
+ suffix="",
+ ssh_update_debug=False,
+ ssh_update_verbose=False):
"""Generates the root SSH key pair on the node.
- @type node_uuid: str
- @param node_uuid: UUID of the node whose key is removed
@type node_name: str
@param node_name: name of the node whose key is remove
@type ssh_port_map: dict of str to int
if not ssconf_store:
ssconf_store = ssconf.SimpleStore()
- keys_by_uuid = ssh.QueryPubKeyFile([node_uuid], key_file=pub_key_file)
- if not keys_by_uuid or node_uuid not in keys_by_uuid:
- raise errors.SshUpdateError("Node %s (UUID: %s) whose key is requested to"
- " be regenerated is not registered in the"
- " public keys file." % (node_name, node_uuid))
-
data = {}
_InitSshUpdateData(data, noded_cert_file, ssconf_store)
cluster_name = data[constants.SSHS_CLUSTER_NAME]
run_cmd_fn(cluster_name, node_name, pathutils.SSH_UPDATE,
ssh_port_map.get(node_name), data,
- debug=False, verbose=False, use_cluster_key=False,
- ask_key=False, strict_host_check=False)
+ debug=ssh_update_debug, verbose=ssh_update_verbose,
+ use_cluster_key=False, ask_key=False, strict_host_check=False)
def _GetMasterNodeUUID(node_uuid_name_map, master_node_name):
return old_master_keys_by_uuid
-def _GetNewMasterKey(root_keyfiles, master_node_uuid):
- new_master_keys = []
- for (_, (_, public_key_file)) in root_keyfiles.items():
- public_key_dir = os.path.dirname(public_key_file)
- public_key_file_tmp_filename = \
- os.path.splitext(os.path.basename(public_key_file))[0] \
- + constants.SSHS_MASTER_SUFFIX + ".pub"
- public_key_path_tmp = os.path.join(public_key_dir,
- public_key_file_tmp_filename)
- if os.path.exists(public_key_path_tmp):
- # for some key types, there might not be any keys
- key = utils.ReadFile(public_key_path_tmp)
- new_master_keys.append(key)
- if not new_master_keys:
- raise errors.SshUpdateError("Cannot find any type of temporary SSH key.")
- return {master_node_uuid: new_master_keys}
-
-
-def _ReplaceMasterKeyOnMaster(root_keyfiles):
- number_of_moves = 0
- for (_, (private_key_file, public_key_file)) in root_keyfiles.items():
- key_dir = os.path.dirname(public_key_file)
- private_key_file_tmp = \
- os.path.basename(private_key_file) + constants.SSHS_MASTER_SUFFIX
- public_key_file_tmp = private_key_file_tmp + ".pub"
- private_key_path_tmp = os.path.join(key_dir,
- private_key_file_tmp)
- public_key_path_tmp = os.path.join(key_dir,
- public_key_file_tmp)
- if os.path.exists(public_key_file):
- utils.CreateBackup(public_key_file)
- utils.RemoveFile(public_key_file)
- if os.path.exists(private_key_file):
- utils.CreateBackup(private_key_file)
- utils.RemoveFile(private_key_file)
- if os.path.exists(public_key_path_tmp) and \
- os.path.exists(private_key_path_tmp):
- # for some key types, there might not be any keys
- shutil.move(public_key_path_tmp, public_key_file)
- shutil.move(private_key_path_tmp, private_key_file)
- number_of_moves += 1
- if not number_of_moves:
- raise errors.SshUpdateError("Could not move at least one master SSH key.")
-
-
def RenewSshKeys(node_uuids, node_names, master_candidate_uuids,
potential_master_candidates, old_key_type, new_key_type,
new_key_bits,
ganeti_pub_keys_file=pathutils.SSH_PUB_KEYS,
ssconf_store=None,
noded_cert_file=pathutils.NODED_CERT_FILE,
- run_cmd_fn=ssh.RunSshCmdWithStdin):
+ run_cmd_fn=ssh.RunSshCmdWithStdin,
+ ssh_update_debug=False,
+ ssh_update_verbose=False):
"""Renews all SSH keys and updates authorized_keys and ganeti_pub_keys.
@type node_uuids: list of str
raise errors.ProgrammerError("List of nodes UUIDs and node names"
" does not match in length.")
- (_, root_keyfiles) = \
- ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False)
- (_, old_pub_keyfile) = root_keyfiles[old_key_type]
- (_, new_pub_keyfile) = root_keyfiles[new_key_type]
- old_master_key = utils.ReadFile(old_pub_keyfile)
+ old_pub_keyfile = ssh.GetSshPubKeyFilename(old_key_type)
+ new_pub_keyfile = ssh.GetSshPubKeyFilename(new_key_type)
+ old_master_key = ssh.ReadLocalSshPubKeys([old_key_type])
node_uuid_name_map = zip(node_uuids, node_names)
node_list.append((node_uuid, node_name, master_candidate,
potential_master_candidate))
- keys_by_uuid = ssh.QueryPubKeyFile([node_uuid],
- key_file=ganeti_pub_keys_file)
- if not keys_by_uuid:
- raise errors.SshUpdateError("No public key of node %s (UUID %s) found,"
- " not generating a new key."
- % (node_name, node_uuid))
-
if master_candidate:
logging.debug("Fetching old SSH key from node '%s'.", node_name)
- old_pub_key = ssh.ReadRemoteSshPubKeys(old_pub_keyfile,
- node_name, cluster_name,
- ssh_port_map[node_name],
- False, # ask_key
- False) # key_check
+ old_pub_key = ssh.ReadRemoteSshPubKey(old_pub_keyfile,
+ node_name, cluster_name,
+ ssh_port_map[node_name],
+ False, # ask_key
+ False) # key_check
if old_pub_key != old_master_key:
# If we are already in a multi-key setup (that is past Ganeti 2.12),
# we can safely remove the old key of the node. Otherwise, we cannot
node_info_to_remove,
master_candidate_uuids,
potential_master_candidates,
- master_uuid=master_node_uuid)
+ master_uuid=master_node_uuid,
+ pub_key_file=ganeti_pub_keys_file,
+ ssconf_store=ssconf_store,
+ noded_cert_file=noded_cert_file,
+ run_cmd_fn=run_cmd_fn,
+ ssh_update_debug=ssh_update_debug,
+ ssh_update_verbose=ssh_update_verbose)
if node_errors:
all_node_errors = all_node_errors + node_errors
in node_list:
logging.debug("Generating new SSH key for node '%s'.", node_name)
- _GenerateNodeSshKey(node_uuid, node_name, ssh_port_map, new_key_type,
- new_key_bits, pub_key_file=ganeti_pub_keys_file,
+ _GenerateNodeSshKey(node_name, ssh_port_map, new_key_type, new_key_bits,
ssconf_store=ssconf_store,
noded_cert_file=noded_cert_file,
- run_cmd_fn=run_cmd_fn)
+ run_cmd_fn=run_cmd_fn,
+ ssh_update_verbose=ssh_update_verbose,
+ ssh_update_debug=ssh_update_debug)
try:
logging.debug("Fetching newly created SSH key from node '%s'.", node_name)
- pub_key = ssh.ReadRemoteSshPubKeys(new_pub_keyfile,
- node_name, cluster_name,
- ssh_port_map[node_name],
- False, # ask_key
- False) # key_check
+ pub_key = ssh.ReadRemoteSshPubKey(new_pub_keyfile,
+ node_name, cluster_name,
+ ssh_port_map[node_name],
+ False, # ask_key
+ False) # key_check
except:
raise errors.SshUpdateError("Could not fetch key of node %s"
" (UUID %s)" % (node_name, node_uuid))
node_keys_to_add, potential_master_candidates,
pub_key_file=ganeti_pub_keys_file, ssconf_store=ssconf_store,
noded_cert_file=noded_cert_file,
- run_cmd_fn=run_cmd_fn)
+ run_cmd_fn=run_cmd_fn,
+ ssh_update_debug=ssh_update_debug,
+ ssh_update_verbose=ssh_update_verbose)
if node_errors:
all_node_errors = all_node_errors + node_errors
# Generate a new master key with a suffix, don't touch the old one for now
logging.debug("Generate new ssh key of master.")
- _GenerateNodeSshKey(master_node_uuid, master_node_name, ssh_port_map,
+ _GenerateNodeSshKey(master_node_name, ssh_port_map,
new_key_type, new_key_bits,
- pub_key_file=ganeti_pub_keys_file,
ssconf_store=ssconf_store,
noded_cert_file=noded_cert_file,
run_cmd_fn=run_cmd_fn,
- suffix=constants.SSHS_MASTER_SUFFIX)
+ suffix=constants.SSHS_MASTER_SUFFIX,
+ ssh_update_debug=ssh_update_debug,
+ ssh_update_verbose=ssh_update_verbose)
# Read newly created master key
- new_master_key_dict = _GetNewMasterKey(root_keyfiles, master_node_uuid)
+ new_master_keys = ssh.ReadLocalSshPubKeys(
+ [new_key_type], suffix=constants.SSHS_MASTER_SUFFIX)
# Replace master key in the master nodes' public key file
ssh.RemovePublicKey(master_node_uuid, key_file=ganeti_pub_keys_file)
- for pub_key in new_master_key_dict[master_node_uuid]:
+ for pub_key in new_master_keys:
ssh.AddPublicKey(master_node_uuid, pub_key, key_file=ganeti_pub_keys_file)
# Add new master key to all node's public and authorized keys
to_authorized_keys=True, to_public_keys=True,
get_public_keys=False, pub_key_file=ganeti_pub_keys_file,
ssconf_store=ssconf_store, noded_cert_file=noded_cert_file,
- run_cmd_fn=run_cmd_fn)
+ run_cmd_fn=run_cmd_fn,
+ ssh_update_debug=ssh_update_debug,
+ ssh_update_verbose=ssh_update_verbose)
if node_errors:
all_node_errors = all_node_errors + node_errors
# Remove the old key file and rename the new key to the non-temporary filename
- _ReplaceMasterKeyOnMaster(root_keyfiles)
+ ssh.ReplaceSshKeys(new_key_type, new_key_type,
+ src_key_suffix=constants.SSHS_MASTER_SUFFIX)
# Remove old key from authorized keys
(auth_key_file, _) = \
potential_master_candidates,
keys_to_remove=old_master_keys_by_uuid, from_authorized_keys=True,
from_public_keys=False, clear_authorized_keys=False,
- clear_public_keys=False)
+ clear_public_keys=False,
+ pub_key_file=ganeti_pub_keys_file,
+ ssconf_store=ssconf_store,
+ noded_cert_file=noded_cert_file,
+ run_cmd_fn=run_cmd_fn,
+ ssh_update_debug=ssh_update_debug,
+ ssh_update_verbose=ssh_update_verbose)
if node_errors:
all_node_errors = all_node_errors + node_errors
return _verify_cmd(path, cmd)
-def RunRestrictedCmd(cmd,
- _lock_timeout=_RCMD_LOCK_TIMEOUT,
- _lock_file=pathutils.RESTRICTED_COMMANDS_LOCK_FILE,
- _path=pathutils.RESTRICTED_COMMANDS_DIR,
- _sleep_fn=time.sleep,
- _prepare_fn=_PrepareRestrictedCmd,
- _runcmd_fn=utils.RunCmd,
- _enabled=constants.ENABLE_RESTRICTED_COMMANDS):
- """Executes a restricted command after performing strict tests.
+def RunConstrainedCmd(cmd,
+ lock_file,
+ path,
+ inp=None,
+ _lock_timeout=_RCMD_LOCK_TIMEOUT,
+ _sleep_fn=time.sleep,
+ _prepare_fn=_PrepareRestrictedCmd,
+ _runcmd_fn=utils.RunCmd,
+ _enabled=constants.ENABLE_RESTRICTED_COMMANDS):
+ """Executes a command after performing strict tests.
@type cmd: string
@param cmd: Command name
+ @type lock_file: string
+ @param lock_file: path to the lock file
+ @type path: string
+ @param path: path to the directory in which the command is present
+ @type inp: string
+ @param inp: Input to be passed to the command
@rtype: string
@return: Command output
@raise RPCFail: In case of an error
try:
cmdresult = None
try:
- lock = utils.FileLock.Open(_lock_file)
+ lock = utils.FileLock.Open(lock_file)
lock.Exclusive(blocking=True, timeout=_lock_timeout)
- (status, value) = _prepare_fn(_path, cmd)
+ (status, value) = _prepare_fn(path, cmd)
if status:
+ if inp:
+ input_fd = tempfile.TemporaryFile()
+ input_fd.write(inp)
+ input_fd.flush()
+ input_fd.seek(0)
+ else:
+ input_fd = None
cmdresult = _runcmd_fn([value], env={}, reset_env=True,
- postfork_fn=lambda _: lock.Unlock())
+ postfork_fn=lambda _: lock.Unlock(),
+ input_fd=input_fd)
+ if input_fd:
+ input_fd.close()
else:
logging.error(value)
except Exception: # pylint: disable=W0703
default_nodegroup.uuid: default_nodegroup,
}
now = time.time()
+ maintenance = objects.Maintenance(serial_no=1, ctime=now, mtime=now)
config_data = objects.ConfigData(version=version,
cluster=cluster_config,
nodegroups=nodegroups,
networks={},
disks={},
filters={},
+ maintenance=maintenance,
serial_no=1,
ctime=now, mtime=now)
utils.WriteFile(cfg_file,
constants.NDS_CLUSTER_NAME: cluster_name,
constants.NDS_NODE_DAEMON_CERTIFICATE:
utils.ReadFile(pathutils.NODED_CERT_FILE),
+ constants.NDS_HMAC:
+ utils.ReadFile(pathutils.CONFD_HMAC_KEY),
constants.NDS_SSCONF: ssconf.SimpleStore().ReadAll(),
constants.NDS_START_NODE_DAEMON: True,
constants.NDS_NODE_NAME: node,
ipolicy_disk_templates=None,
ipolicy_vcpu_ratio=None,
ipolicy_spindle_ratio=None,
+ ipolicy_memory_ratio=None,
group_ipolicy=False,
allowed_values=None,
fill_all=False):
ipolicy_out[constants.IPOLICY_VCPU_RATIO] = ipolicy_vcpu_ratio
if ipolicy_spindle_ratio is not None:
ipolicy_out[constants.IPOLICY_SPINDLE_RATIO] = ipolicy_spindle_ratio
+ if ipolicy_memory_ratio is not None:
+ ipolicy_out[constants.IPOLICY_MEMORY_RATIO] = ipolicy_memory_ratio
assert not (frozenset(ipolicy_out.keys()) - constants.IPOLICY_ALL_KEYS)
"DST_NODE_OPT",
"EARLY_RELEASE_OPT",
"ENABLED_DATA_COLLECTORS_OPT",
+ "DIAGNOSE_DATA_COLLECTOR_FILENAME_OPT",
"ENABLED_DISK_TEMPLATES_OPT",
"ENABLED_HV_OPT",
"ENABLED_USER_SHUTDOWN_OPT",
"IGNORE_SOFT_ERRORS_OPT",
"IGNORE_SIZE_OPT",
"INCLUDEDEFAULTS_OPT",
+ "INPUT_OPT",
"INSTALL_IMAGE_OPT",
"INSTANCE_COMMUNICATION_NETWORK_OPT",
"INSTANCE_COMMUNICATION_OPT",
"IPOLICY_STD_SPECS_OPT",
"IPOLICY_STD_SPECS_STR",
"IPOLICY_VCPU_RATIO",
+ "IPOLICY_MEMORY_RATIO",
"LONG_SLEEP_OPT",
"MAC_PREFIX_OPT",
+ "MAINT_BALANCE_OPT",
+ "MAINT_BALANCE_THRESHOLD_OPT",
+ "MAINT_INTERVAL_OPT",
"MAINTAIN_NODE_HEALTH_OPT",
"MASTER_NETDEV_OPT",
"MASTER_NETMASK_OPT",
help=("The maximum allowed instances to"
" spindle ratio"))
+IPOLICY_MEMORY_RATIO = cli_option("--ipolicy-memory-ratio",
+ dest="ipolicy_memory_ratio",
+ type="maybefloat", default=None,
+ help=("The maximum allowed used memory to"
+ " physicall memory ratio (in terms of"
+ " memory overcommitment)"))
+
HYPERVISOR_OPT = cli_option("-H", "--hypervisor-parameters", dest="hypervisor",
help="Hypervisor and hypervisor options, in the"
" format hypervisor:option=value,option=value,...",
help="Comma-separated list of compression tools which are"
" allowed to be used by Ganeti in various operations")
+MAINT_INTERVAL_OPT = \
+ cli_option("--maintenance-interval", dest="maint_round_delay", type="int",
+ default=None, help="Minimal time in seconds, the maintenance"
+ " daemon waits between rounds")
+
+MAINT_BALANCE_OPT = \
+ cli_option("--auto-balance-cluster", dest="maint_balance", type="bool",
+ default=None, metavar=_YORNO, help="Whether the maintenance"
+ " daemon should balance the cluster")
+
+MAINT_BALANCE_THRESHOLD_OPT = \
+ cli_option("--auto-balance-threshold", dest="maint_balance_threshold",
+ type="float", default=None, metavar="CLUSTERSCORE",
+ help="Minimal gain for an auto-balancing step to be taken")
+
VG_NAME_OPT = cli_option("--vg-name", dest="vg_name",
help=("Enables LVM and specifies the volume group"
" name (cluster-wide) for disk allocation"
"in the format collector=bool, where collector is one of %s."
% ", ".join(constants.DATA_COLLECTOR_NAMES))
+DIAGNOSE_DATA_COLLECTOR_FILENAME_OPT = \
+ cli_option("--diagnose-data-collector-filename",
+ dest="diagnose_data_collector_filename",
+ help=("Set's the file name of the script"
+ " diagnose data collector should run"
+ " If this value is empty string, the collector"
+ " will return a success value"
+ " without running anything"),
+ type="string")
+
+
VERIFY_CLUTTER_OPT = cli_option(
"--verify-ssh-clutter", default=False, dest="verify_clutter",
help="Verify that Ganeti did not clutter"
"--long-sleep", default=False, dest="long_sleep",
help="Allow long shutdowns when backing up instances", action="store_true")
+INPUT_OPT = cli_option("--input", dest="input", default=None,
+ help=("input to be passed as stdin"
+ " to the repair command"),
+ type="string")
+
SSH_KEY_TYPE_OPT = \
cli_option("--ssh-key-type", default=None,
choices=list(constants.SSHK_ALL), dest="ssh_key_type",
IPOLICY_DISK_TEMPLATES,
IPOLICY_VCPU_RATIO,
IPOLICY_SPINDLE_RATIO,
+ IPOLICY_MEMORY_RATIO,
]
# instance policy split specs options
from ganeti import ssh
from ganeti import uidpool
from ganeti import utils
+from ganeti import wconfd
from ganeti.client import base
ipolicy_disk_templates=opts.ipolicy_disk_templates,
ipolicy_vcpu_ratio=opts.ipolicy_vcpu_ratio,
ipolicy_spindle_ratio=opts.ipolicy_spindle_ratio,
+ ipolicy_memory_ratio=opts.ipolicy_memory_ratio,
fill_all=True)
if opts.candidate_pool_size is None:
node_certificates=new_node_cert or new_cluster_cert,
renew_ssh_keys=new_ssh_keys,
ssh_key_type=ssh_key_type,
- ssh_key_bits=ssh_key_bits)
+ ssh_key_bits=ssh_key_bits,
+ verbose=verbose,
+ debug=debug)
SubmitOpCode(renew_op, cl=cl)
ToStdout("All requested certificates and keys have been replaced."
# get the key files of all non-master nodes
for node in nonmaster_nodes:
- pub_key = ssh.ReadRemoteSshPubKeys(pub_key_filename, node, cluster_name,
- ssh_port_map[node],
- options.ssh_key_check,
- options.ssh_key_check)
+ pub_key = ssh.ReadRemoteSshPubKey(pub_key_filename, node, cluster_name,
+ ssh_port_map[node],
+ options.ssh_key_check,
+ options.ssh_key_check)
ssh.AddPublicKey(node_uuid_map[node], pub_key, key_file=pub_key_file)
opts.ipolicy_disk_templates is not None or
opts.ipolicy_vcpu_ratio is not None or
opts.ipolicy_spindle_ratio is not None or
+ opts.ipolicy_memory_ratio is not None or
opts.modify_etc_hosts is not None or
opts.file_storage_dir is not None or
opts.install_image is not None or
opts.compression_tools is not None or
opts.shared_file_storage_dir is not None or
opts.enabled_user_shutdown is not None or
+ opts.maint_round_delay is not None or
+ opts.maint_balance is not None or
+ opts.maint_balance_threshold is not None or
opts.data_collector_interval or
+ opts.diagnose_data_collector_filename is not None or
opts.enabled_data_collectors):
ToStderr("Please give at least one of the parameters.")
return 1
ipolicy_disk_templates=opts.ipolicy_disk_templates,
ipolicy_vcpu_ratio=opts.ipolicy_vcpu_ratio,
ipolicy_spindle_ratio=opts.ipolicy_spindle_ratio,
+ ipolicy_memory_ratio=opts.ipolicy_memory_ratio,
)
mnh = opts.maintain_node_health
shared_file_storage_dir=opts.shared_file_storage_dir,
compression_tools=compression_tools,
enabled_user_shutdown=opts.enabled_user_shutdown,
+ maint_round_delay=opts.maint_round_delay,
+ maint_balance=opts.maint_balance,
+ maint_balance_threshold=opts.maint_balance_threshold,
enabled_data_collectors=enabled_data_collectors,
data_collector_interval=data_collector_interval,
+ diagnose_data_collector_filename=opts.diagnose_data_collector_filename
)
return base.GetResult(None, opts, SubmitOrSend(op, opts))
return _off_fn(opts, node_list, inst_map)
+def RemoveRepair(opts, args):
+ """Uncoditionally remove a repair event
+
+ @param opts: the command line options selected by the user (ignored)
+ @type args: list
+ @param args: one element, the uuid of the event to remove
+ @rtype: int
+ @return: the desired exit code
+
+ """
+ uuid = args[0]
+ wconfd.Client().RmMaintdIncident(uuid)
+ return 0
+
+
def _GetCreateCommand(info):
buf = StringIO()
buf.write("gnt-cluster init")
INSTANCE_POLICY_OPTS +
[GLOBAL_FILEDIR_OPT, GLOBAL_SHARED_FILEDIR_OPT, ZEROING_IMAGE_OPT,
COMPRESSION_TOOLS_OPT] +
- [ENABLED_DATA_COLLECTORS_OPT, DATA_COLLECTOR_INTERVAL_OPT],
+ [ENABLED_DATA_COLLECTORS_OPT, DATA_COLLECTOR_INTERVAL_OPT,
+ DIAGNOSE_DATA_COLLECTOR_FILENAME_OPT,
+ MAINT_INTERVAL_OPT, MAINT_BALANCE_OPT, MAINT_BALANCE_THRESHOLD_OPT],
"[opts...]",
"Alters the parameters of the cluster"),
"renew-crypto": (
"upgrade": (
UpgradeGanetiCommand, ARGS_NONE, [TO_OPT, RESUME_OPT], "",
"Upgrade (or downgrade) to a new Ganeti version"),
+ "remove-repair": (
+ RemoveRepair, [ArgUnknown()], [], "<uuid>",
+ "Remove a repair event from the list of pending events"),
}
minmax_ispecs=opts.ipolicy_bounds_specs,
ipolicy_vcpu_ratio=opts.ipolicy_vcpu_ratio,
ipolicy_spindle_ratio=opts.ipolicy_spindle_ratio,
+ ipolicy_memory_ratio=opts.ipolicy_memory_ratio,
ipolicy_disk_templates=opts.ipolicy_disk_templates,
group_ipolicy=True)
allmods = [opts.ndparams, opts.alloc_policy, opts.diskparams, opts.hv_state,
opts.disk_state, opts.ipolicy_bounds_specs,
opts.ipolicy_vcpu_ratio, opts.ipolicy_spindle_ratio,
- opts.diskparams, opts.ipolicy_disk_templates]
+ opts.ipolicy_memory_ratio, opts.diskparams,
+ opts.ipolicy_disk_templates]
if allmods.count(None) == len(allmods):
ToStderr("Please give at least one of the parameters.")
return 1
ipolicy_disk_templates=opts.ipolicy_disk_templates,
ipolicy_vcpu_ratio=opts.ipolicy_vcpu_ratio,
ipolicy_spindle_ratio=opts.ipolicy_spindle_ratio,
+ ipolicy_memory_ratio=opts.ipolicy_memory_ratio,
group_ipolicy=True,
allowed_values=[constants.VALUE_DEFAULT])
strict_host_check=options.ssh_key_check)
(_, pub_keyfile) = root_keyfiles[ssh_key_type]
- pub_key = ssh.ReadRemoteSshPubKeys(pub_keyfile, node, cluster_name, ssh_port,
- options.ssh_key_check,
- options.ssh_key_check)
+ pub_key = ssh.ReadRemoteSshPubKey(pub_keyfile, node, cluster_name, ssh_port,
+ options.ssh_key_check,
+ options.ssh_key_check)
# Unfortunately, we have to add the key with the node name rather than
# the node's UUID here, because at this point, we do not have a UUID yet.
# The entry will be corrected in noded later.
master_capable=opts.master_capable,
disk_state=disk_state,
hv_state=hv_state,
- node_setup=modify_ssh_setup)
+ node_setup=modify_ssh_setup,
+ verbose=opts.verbose,
+ debug=opts.debug > 0)
SubmitOpCode(op, opts=opts)
@return: the desired exit code
"""
- op = opcodes.OpNodeRemove(node_name=args[0])
+ op = opcodes.OpNodeRemove(node_name=args[0],
+ debug=opts.debug > 0,
+ verbose=opts.verbose)
SubmitOpCode(op, opts=opts)
return 0
auto_promote=opts.auto_promote,
powered=opts.node_powered,
hv_state=hv_state,
- disk_state=disk_state)
+ disk_state=disk_state,
+ verbose=opts.verbose,
+ debug=opts.debug > 0)
# even if here we process the result, we allow submit only
result = SubmitOrSend(op, opts)
return exit_code
+def RepairCommand(opts, args):
+ cl = GetClient()
+ if opts.input:
+ inp = opts.input.decode('string_escape')
+ else:
+ inp = None
+ op = opcodes.OpRepairCommand(command=args[0], node_name=args[1],
+ input=inp)
+ result = SubmitOrSend(op, opts, cl=cl)
+ print result
+ return constants.EXIT_SUCCESS
+
+
class ReplyStatus(object):
"""Class holding a reply status for synchronous confd clients.
CAPAB_MASTER_OPT, CAPAB_VM_OPT, NODE_PARAMS_OPT, HV_STATE_OPT,
DISK_STATE_OPT],
"[-s ip] [--readd] [--no-ssh-key-check] [--force-join]"
- " [--no-node-setup] [--verbose] [--network] <node_name>",
+ " [--no-node-setup] [--verbose] [--network] [--debug] <node_name>",
"Add a node to the cluster"),
"evacuate": (
EvacuateNode, ARGS_ONE_NODE,
[MC_OPT, DRAINED_OPT, OFFLINE_OPT,
CAPAB_MASTER_OPT, CAPAB_VM_OPT, SECONDARY_IP_OPT,
AUTO_PROMOTE_OPT, DRY_RUN_OPT, PRIORITY_OPT, NODE_PARAMS_OPT,
- NODE_POWERED_OPT, HV_STATE_OPT, DISK_STATE_OPT],
+ NODE_POWERED_OPT, HV_STATE_OPT, DISK_STATE_OPT, VERBOSE_OPT],
"<node_name>", "Alters the parameters of a node"),
"powercycle": (
PowercycleNode, ARGS_ONE_NODE,
"on|off|cycle|status [nodes...]",
"Change power state of node by calling out-of-band helper."),
"remove": (
- RemoveNode, ARGS_ONE_NODE, [DRY_RUN_OPT, PRIORITY_OPT],
- "<node_name>", "Removes a node from the cluster"),
+ RemoveNode, ARGS_ONE_NODE, [DRY_RUN_OPT, PRIORITY_OPT, VERBOSE_OPT],
+ "[--verbose] [--debug] <node_name>", "Removes a node from the cluster"),
"volumes": (
ListVolumes, [ArgNode()],
[NOHDR_OPT, SEP_OPT, USEUNITS_OPT, FIELDS_OPT, PRIORITY_OPT],
[SYNC_OPT, PRIORITY_OPT] + SUBMIT_OPTS + [SHOW_MACHINE_OPT, NODEGROUP_OPT],
"<command> <node_name> [<node_name>...]",
"Executes a restricted command on node(s)"),
+ "repair-command": (
+ RepairCommand, [ArgUnknown(min=1, max=1), ArgNode(min=1, max=1)],
+ [SUBMIT_OPT, INPUT_OPT], "{--input <input>} <command> <node_name>",
+ "Executes a repair command on a node"),
}
#: dictionary with aliases for commands
from ganeti.cmdlib.misc import \
LUOobCommand, \
LUExtStorageDiagnose, \
- LURestrictedCommand
+ LURestrictedCommand, \
+ LURepairCommand
from ganeti.cmdlib.test import \
LUTestOsParams, \
LUTestDelay, \
# pylint: disable=W0613,R0201
return lu_result
+ def HooksAbortCallBack(self, phase, feedback_fn, exception):
+ """Called when the hooks get aborted by an exception.
+
+ This method is called everytime a hooks phase is aborted by an exception.
+ This exception is most likely of type C{errors.HooksAbort}. However, we
+ keep the design of this function broad enough to handle any kind of
+ exception.
+
+ The intended purpose of this call back is to run any action that is
+ necessary to bring the cluster back to a clean state from the point
+ in time before calling the hook.
+
+ @type phase: string
+ @param phase: one of L{constants.HOOKS_PHASE_POST} or
+ L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
+ @type feedback_fn: callable
+ @param feedback_fn: function used send feedback back to the caller
+ @type exception: Exception
+ @param exception: The exception that was raised during the execution of
+ hooks.
+
+ """
+ pass
+
def _ExpandAndLockInstance(self, allow_forthcoming=False):
"""Helper function to expand and lock an instance.
potential_master_candidates,
cluster_info.ssh_key_type, # Old key type
self.ssh_key_type, # New key type
- self.ssh_key_bits) # New key bits
+ self.ssh_key_bits, # New key bits
+ self.op.debug,
+ self.op.verbose)
result[master_uuid].Raise("Could not renew the SSH keys of all nodes")
# After the keys have been successfully swapped, time to commit the change
feedback_fn("Cluster LVM configuration already in desired"
" state, not changing")
+ def _SetDiagnoseDataCollectorFilename(self, feedback_fn):
+ """Determines and sets the filename of the script
+ diagnose data collector should run.
+
+ """
+ if self.op.diagnose_data_collector_filename is not None:
+ fn = self.op.diagnose_data_collector_filename
+ if fn != self.cfg.GetDiagnoseDataCollectorFilename():
+ self.cfg.SetDiagnoseDataCollectorFilename(fn)
+ else:
+ feedback_fn("Diagnose data collector filename"
+ " configuration already in desired"
+ " state, not changing")
+
def _SetFileStorageDir(self, feedback_fn):
"""Set the file storage directory.
self._SetSharedFileStorageDir(feedback_fn)
self.cfg.Update(self.cluster, feedback_fn)
self._SetDrbdHelper(feedback_fn)
+ self._SetDiagnoseDataCollectorFilename(feedback_fn)
# re-read the fresh configuration again
self.cluster = self.cfg.GetClusterInfo()
if self.op.compression_tools is not None:
self.cfg.SetCompressionTools(self.op.compression_tools)
+ if self.op.maint_round_delay is not None:
+ self.cfg.SetMaintdRoundDelay(self.op.maint_round_delay)
+
+ if self.op.maint_balance is not None:
+ self.cfg.SetMaintdBalance(self.op.maint_balance)
+
+ if self.op.maint_balance_threshold is not None:
+ self.cfg.SetMaintdBalanceThreshold(self.op.maint_balance_threshold)
+
network_name = self.op.instance_communication_network
if network_name is not None:
return self._ModifyInstanceCommunicationNetwork(self.cfg,
@ivar sbp: dictionary of {primary-node: list of instances} for all
instances for which this node is secondary (config)
@ivar mfree: free memory, as reported by hypervisor (runtime)
+ @ivar mtotal: total memory, as reported by hypervisor (runtime)
+ @ivar mdom0: domain0 memory, as reported by hypervisor (runtime)
@ivar dfree: free disk, as reported by the node (runtime)
@ivar offline: the offline status (config)
@type rpc_fail: boolean
self.sinst = []
self.sbp = {}
self.mfree = 0
+ self.mtotal = 0
+ self.mdom0 = 0
self.dfree = 0
self.offline = offline
self.vm_capable = vm_capable
"""
cluster_info = self.cfg.GetClusterInfo()
+ ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster_info,
+ self.group_info)
+ memory_ratio = ipolicy[constants.IPOLICY_MEMORY_RATIO]
+
for node_uuid, n_img in node_image.items():
# This code checks that every node which is now listed as
# secondary has enough memory to host all instances it is
# WARNING: we currently take into account down instances as well
# as up ones, considering that even if they're down someone
# might want to start them even in the event of a node failure.
+ node_cfg = self.all_node_info[node_uuid]
if n_img.offline or \
- self.all_node_info[node_uuid].group != self.group_uuid:
+ node_cfg.group != self.group_uuid:
# we're skipping nodes marked offline and nodes in other groups from
# the N+1 warning, since most likely we don't have good memory
# information from them; we already list instances living on such
bep = cluster_info.FillBE(all_insts[inst_uuid])
if bep[constants.BE_AUTO_BALANCE]:
needed_mem += bep[constants.BE_MINMEM]
- test = n_img.mfree < needed_mem
+ mnode = n_img.mdom0
+ (hv, hv_state) = self.cfg.GetFilledHvStateParams(node_cfg).items()[0]
+ if hv != constants.HT_XEN_PVM and hv != constants.HT_XEN_HVM:
+ mnode = hv_state["mem_node"]
+ # minimum allowed free memory (it's negative due to over-commitment)
+ mem_treshold = (n_img.mtotal - mnode) * (memory_ratio - 1)
+ test = n_img.mfree - needed_mem < mem_treshold
self._ErrorIf(test, constants.CV_ENODEN1,
self.cfg.GetNodeName(node_uuid),
"not enough memory to accomodate instance failovers"
"""
# try to read free memory (from the hypervisor)
hv_info = nresult.get(constants.NV_HVINFO, None)
- test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
+ test = not isinstance(hv_info, dict) or "memory_free" not in hv_info \
+ or "memory_total" not in hv_info \
+ or "memory_dom0" not in hv_info
self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
"rpc call to node failed (hvinfo)")
if not test:
try:
nimg.mfree = int(hv_info["memory_free"])
+ nimg.mtotal = int(hv_info["memory_total"])
+ nimg.mdom0 = int(hv_info["memory_dom0"])
except (ValueError, TypeError):
self._ErrorIf(True, constants.CV_ENODERPC, ninfo.name,
"node returned invalid nodeinfo, check hypervisor")
potential_master_candidates,
True, # add node's key to all node's 'authorized_keys'
True, # all nodes are potential master candidates
- False) # do not update the node's public keys
+ False, # do not update the node's public keys
+ lu.op.debug,
+ lu.op.verbose)
ssh_result[master_node].Raise(
"Could not update the SSH setup of node '%s' after promotion"
" (UUID: %s)." % (node.name, node.uuid))
from ganeti import query
from ganeti import utils
from ganeti.cmdlib.base import NoHooksLU, QueryBase
-from ganeti.cmdlib.common import GetWantedNodes, SupportsOob
+from ganeti.cmdlib.common import (
+ GetWantedNodes,
+ SupportsOob,
+ ExpandNodeUuidAndName
+)
class LUOobCommand(NoHooksLU):
result.append((True, nres.payload))
return result
+
+
+class LURepairCommand(NoHooksLU):
+ """Logical unit for executing repair commands.
+
+ """
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ self.node_uuid, _ = ExpandNodeUuidAndName(self.cfg, None, self.op.node_name)
+
+ self.needed_locks = {
+ locking.LEVEL_NODE: self.node_uuid,
+ }
+ self.share_locks = {
+ locking.LEVEL_NODE: False,
+ }
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ """
+
+ def Exec(self, feedback_fn):
+ """Execute restricted command and return output.
+
+ """
+ owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
+ assert self.node_uuid in owned_nodes
+ return self.rpc.call_repair_command(self.op.node_name,
+ self.op.command,
+ self.op.input).data[1]
def PreparePostHookNodes(self, post_hook_node_uuids):
return post_hook_node_uuids + [self.new_node.uuid]
+ def HooksAbortCallBack(self, phase, feedback_fn, exception):
+ """Cleans up if the hooks fail.
+
+ This function runs actions that necessary to bring the cluster into a
+ clean state again. This is necessary if for example the hooks of this
+ operation failed and leave the node in an inconsistent state.
+
+ """
+ if phase == constants.HOOKS_PHASE_PRE:
+ feedback_fn("Pre operation hook failed. Rolling back preparations.")
+
+ master_node = self.cfg.GetMasterNodeInfo().name
+ remove_result = self.rpc.call_node_ssh_key_remove_light(
+ [master_node],
+ self.op.node_name)
+ remove_result[master_node].Raise(
+ "Error removing SSH key of node '%s'." % self.op.node_name)
+
def CheckPrereq(self):
"""Check prerequisites.
True, # from public keys
False, # clear authorized keys
True, # clear public keys
- True) # it's a readd
+ True, # it's a readd
+ self.op.debug,
+ self.op.verbose)
remove_result[master_node].Raise(
"Could not remove SSH keys of node %s before readding,"
" (UUID: %s)." % (new_node_name, new_node_uuid))
[master_node], new_node_uuid, new_node_name,
potential_master_candidates,
is_master_candidate, is_potential_master_candidate,
- is_potential_master_candidate)
+ is_potential_master_candidate, self.op.debug, self.op.verbose)
result[master_node].Raise("Could not update the node's SSH setup.")
WarnAboutFailedSshUpdates(result, master_node, feedback_fn)
False, # currently, all nodes are potential master candidates
False, # do not clear node's 'authorized_keys'
False, # do not clear node's 'ganeti_pub_keys'
- False) # no readd
+ False, # no readd
+ self.op.debug,
+ self.op.verbose)
ssh_result[master_node].Raise(
"Could not adjust the SSH setup after demoting node '%s'"
" (UUID: %s)." % (node.name, node.uuid))
potential_master_candidate, # from_public_keys
True, # clear node's 'authorized_keys'
True, # clear node's 'ganeti_public_keys'
- False) # no readd
+ False, # no readd
+ self.op.debug,
+ self.op.verbose)
result[master_node].Raise(
"Could not remove the SSH key of node '%s' (UUID: %s)." %
(self.op.node_name, self.node.uuid))
"""
return self._UnlockedGetNdParams(node)
+ def _UnlockedGetFilledHvStateParams(self, node):
+ cfg = self._ConfigData()
+ cluster_hv_state = cfg.cluster.hv_state_static
+ def_hv = self._UnlockedGetHypervisorType()
+ cluster_fv = constants.HVST_DEFAULTS if def_hv not in cluster_hv_state \
+ else cluster_hv_state[def_hv]
+ group_hv_state = self._UnlockedGetNodeGroup(node.group).hv_state_static
+ group_fv = cluster_fv if def_hv not in group_hv_state else \
+ objects.FillDict(cluster_fv, group_hv_state[def_hv])
+ node_fv = group_fv if def_hv not in node.hv_state_static else \
+ objects.FillDict(group_fv, node.hv_state_static[def_hv])
+ return {def_hv: node_fv}
+
+ @ConfigSync(shared=1)
+ def GetFilledHvStateParams(self, node):
+ """Get the node params populated with cluster defaults.
+
+ @type node: L{objects.Node}
+ @param node: The node we want to know the params for
+ @return: A dict with the filled in node hv_state params for the default hv
+
+ """
+ return self._UnlockedGetFilledHvStateParams(node)
+
@ConfigSync(shared=1)
def GetNdGroupParams(self, nodegroup):
"""Get the node groups params populated with cluster defaults.
"""
return self._ConfigData().cluster.gluster_storage_dir
+ def _UnlockedGetHypervisorType(self):
+ """Get the hypervisor type for this cluster.
+
+ """
+ return self._ConfigData().cluster.enabled_hypervisors[0]
+
@ConfigSync(shared=1)
def GetHypervisorType(self):
"""Get the hypervisor type for this cluster.
"""
- return self._ConfigData().cluster.enabled_hypervisors[0]
+ return self._UnlockedGetHypervisorType()
@ConfigSync(shared=1)
def GetRsaHostKey(self):
self._ConfigData().cluster.serial_no += 1
@ConfigSync(shared=1)
+ def GetDiagnoseDataCollectorFilename(self):
+ """Return the diagnose data collector filename
+
+ """
+ return self._ConfigData().cluster.diagnose_data_collector_filename
+
+ @ConfigSync()
+ def SetDiagnoseDataCollectorFilename(self, fn):
+ """Set the volume group name.
+
+ """
+ self._ConfigData().cluster.diagnose_data_collector_filename = fn
+ self._ConfigData().cluster.serial_no += 1
+
+ @ConfigSync(shared=1)
def GetDRBDHelper(self):
"""Return DRBD usermode helper.
if disk_uuid in inst_info.disks:
return inst_uuid
+ def SetMaintdRoundDelay(self, delay):
+ """Set the minimal time the maintenance daemon should wait between rounds"""
+ utils.SimpleRetry(True, self._wconfd.SetMaintdRoundDelay, 0.1, 30,
+ args=[delay])
+
+ def SetMaintdBalance(self, flag):
+ """Enable/disable auto-balancing by the maintenance daemon"""
+ utils.SimpleRetry(True, self._wconfd.SetMaintdBalance, 0.1, 30,
+ args=[flag])
+
+ def SetMaintdBalanceThreshold(self, score):
+ """Set the minimal score improvement per move for balancing steps"""
+ utils.SimpleRetry(True, self._wconfd.SetMaintdBalanceThreshold, 0.1, 30,
+ args=[score])
+
class DetachedConfig(ConfigWriter):
"""Read-only snapshot of the config."""
"master_capable": ninfo.master_capable,
"vm_capable": ninfo.vm_capable,
"ndparams": cfg.GetNdParams(ninfo),
+ "hv_state": cfg.GetFilledHvStateParams(ninfo)
})
for ninfo in node_cfg.values())
lu.CheckPrereq()
hm = self.BuildHooksManager(lu)
- h_results = hm.RunPhase(constants.HOOKS_PHASE_PRE)
+ try:
+ h_results = hm.RunPhase(constants.HOOKS_PHASE_PRE)
+ except Exception, err: # pylint: disable=W0703
+ # This gives the LU a chance of cleaning up in case of an hooks failure.
+ # The type of exception is deliberately broad to be able to react to
+ # any kind of failure.
+ lu.HooksAbortCallBack(constants.HOOKS_PHASE_PRE, self.Log, err)
+ # We re-raise the exception to not alter the behavior of LU handling
+ # otherwise.
+ raise err
lu.HooksCallBack(constants.HOOKS_PHASE_PRE, h_results,
self.Log, None)
__all__ = ["ConfigObject", "ConfigData", "NIC", "Disk", "Instance",
"OS", "Node", "NodeGroup", "Cluster", "FillDict", "Network",
- "Filter"]
+ "Filter", "Maintenance"]
_TIMESTAMPS = ["ctime", "mtime"]
_UUID = ["uuid"]
"networks",
"disks",
"filters",
+ "maintenance",
"serial_no",
] + _TIMESTAMPS
"""
mydict = super(ConfigData, self).ToDict(_with_private=_with_private)
mydict["cluster"] = mydict["cluster"].ToDict()
+ mydict["maintenance"] = mydict["maintenance"].ToDict()
for key in ("nodes", "instances", "nodegroups", "networks", "disks",
"filters"):
mydict[key] = outils.ContainerToDicts(mydict[key])
obj.networks = outils.ContainerFromDicts(obj.networks, dict, Network)
obj.disks = outils.ContainerFromDicts(obj.disks, dict, Disk)
obj.filters = outils.ContainerFromDicts(obj.filters, dict, Filter)
+ obj.maintenance = Maintenance.FromDict(obj.maintenance)
return obj
def DisksOfType(self, dev_type):
disk.UpgradeConfig()
if self.filters is None:
self.filters = {}
+ if self.maintenance is None:
+ self.maintenance = Maintenance.FromDict({})
+ self.maintenance.UpgradeConfig()
def _UpgradeEnabledDiskTemplates(self):
"""Upgrade the cluster's enabled disk templates by inspecting the currently
"predicates", "action", "reason_trail"] + _UUID
+class Maintenance(ConfigObject):
+ """Config object representing the state of the maintenance daemon"""
+ __slots__ = ["roundDelay", "jobs", "evacuated", "balance", "balanceThreshold",
+ "incidents", "serial_no"] + _TIMESTAMPS
+
+ def UpgradeConfig(self):
+ if self.serial_no is None:
+ self.serial_no = 1
+ if self.mtime is None:
+ self.mtime = time.time()
+ if self.ctime is None:
+ self.ctime = time.time()
+
+
class Disk(ConfigObject):
"""Config object representing a block device."""
__slots__ = [
if self.powered is None:
self.powered = True
+ if self.hv_state_static is None:
+ self.hv_state_static = {}
+ if self.disk_state_static is None:
+ self.disk_state_static = {}
+
def ToDict(self, _with_private=False):
"""Custom function for serializing.
if self.ipolicy is None:
self.ipolicy = MakeEmptyIPolicy()
+ if self.hv_state_static is None:
+ self.hv_state_static = {}
+ if self.disk_state_static is None:
+ self.disk_state_static = {}
+
if self.networks is None:
self.networks = {}
"compression_tools",
"enabled_user_shutdown",
"data_collectors",
+ "diagnose_data_collector_filename",
"ssh_key_type",
"ssh_key_bits",
] + _TIMESTAMPS + _UUID
HOOKS_BASE_DIR = CONF_DIR + "/hooks"
FILE_STORAGE_PATHS_FILE = CONF_DIR + "/file-storage-paths"
RESTRICTED_COMMANDS_DIR = CONF_DIR + "/restricted-commands"
+REPAIR_COMMANDS_DIR = CONF_DIR + "/node-repair-commands"
#: Node daemon certificate path
NODED_CERT_FILE = DATA_DIR + "/server.pem"
#: Locked in exclusive mode while noded verifies a remote command
RESTRICTED_COMMANDS_LOCK_FILE = LOCK_DIR + "/ganeti-restricted-commands.lock"
+#: Locked in exclusive mode while noded verifies a remote command
+REPAIR_COMMANDS_LOCK_FILE = LOCK_DIR + "/ganeti-repair-commands.lock"
+
#: Lock file for watcher, locked in shared mode by watcher; lock in exclusive
# mode to block watcher (see L{cli._RunWhileDaemonsStoppedHelper.Call}
WATCHER_LOCK_FILE = LOCK_DIR + "/ganeti-watcher.lock"
LOG_WATCHER = GetLogFilename("watcher")
LOG_COMMANDS = GetLogFilename("commands")
LOG_BURNIN = GetLogFilename("burnin")
+LOG_TOOLS = GetLogFilename("tools")
return _FS_UNAVAIL
-def _GetNodeHvState(_, node):
- """Converts node's hypervisor state for query result.
-
- """
- hv_state = node.hv_state
-
- if hv_state is None:
- return _FS_UNAVAIL
-
- return dict((name, value.ToDict()) for (name, value) in hv_state.items())
-
-
-def _GetNodeDiskState(_, node):
- """Converts node's disk state for query result.
-
- """
- disk_state = node.disk_state
-
- if disk_state is None:
- return _FS_UNAVAIL
-
- return dict((disk_kind, dict((name, value.ToDict())
- for (name, value) in kind_state.items()))
- for (disk_kind, kind_state) in disk_state.items())
-
-
def _BuildNodeFields():
"""Builds list of fields for node queries.
(_MakeField("custom_ndparams", "CustomNodeParameters", QFT_OTHER,
"Custom node parameters"),
NQ_GROUP, 0, _GetItemAttr("ndparams")),
- (_MakeField("hv_state", "HypervisorState", QFT_OTHER, "Hypervisor state"),
- NQ_CONFIG, 0, _GetNodeHvState),
+ # FIXME: The code below return custom hv_state instead of filled one.
+ # Anyway, this functionality is unlikely to be used.
+ (_MakeField("hv_state", "HypervisorState", QFT_OTHER,
+ "Static hypervisor state for default hypervisor only"),
+ NQ_CONFIG, 0, _GetItemAttr("hv_state_static")),
+ (_MakeField("custom_hv_state", "CustomHypervisorState", QFT_OTHER,
+ "Custom static hypervisor state"),
+ NQ_CONFIG, 0, _GetItemAttr("hv_state_static")),
(_MakeField("disk_state", "DiskState", QFT_OTHER, "Disk state"),
- NQ_CONFIG, 0, _GetNodeDiskState),
+ NQ_CONFIG, 0, _GetItemAttr("disk_state_static")),
]
fields.extend(_BuildNDFields(False))
(_MakeField("custom_diskparams", "CustomDiskParameters", QFT_OTHER,
"Custom disk parameters"),
GQ_CONFIG, 0, _GetItemAttr("diskparams")),
+ (_MakeField("hv_state", "HypervisorState", QFT_OTHER,
+ "Custom static hypervisor state"),
+ GQ_CONFIG, 0, _GetItemAttr("hv_state_static")),
+ (_MakeField("disk_state", "DiskState", QFT_OTHER, "Disk state"),
+ GQ_CONFIG, 0, _GetItemAttr("disk_state_static")),
])
# ND parameters
(_MakeField("master_node", "Master", QFT_TEXT, "Master node name"),
CQ_CONFIG, QFF_HOSTNAME,
lambda ctx, cluster: _GetNodeName(ctx, None, cluster.master_node)),
+ (_MakeField("hv_state", "HypervisorState", QFT_OTHER,
+ "Custom static hypervisor state"),
+ CQ_CONFIG, 0, _GetItemAttr("hv_state_static")),
+ (_MakeField("disk_state", "DiskState", QFT_OTHER, "Disk state"),
+ CQ_CONFIG, 0, _GetItemAttr("disk_state_static")),
]
# Simple fields
N_FIELDS = ["name", "offline", "master_candidate", "drained",
"dtotal", "dfree", "sptotal", "spfree",
- "mtotal", "mnode", "mfree",
+ "mtotal", "mnode", "mfree", "hv_state",
"pinst_cnt", "sinst_cnt",
"ctotal", "cnos", "cnodes", "csockets",
"pip", "sip", "role",
"diskparams",
"custom_diskparams",
"ndparams",
- "custom_ndparams",
+ "custom_ndparams"
] + _COMMON_FIELDS
FILTER_RULE_FIELDS = [
("to_public_keys", None, "Whether the node's key should be added"
" to all nodes' public key file"),
("get_public_keys", None, "Whether the node should get the other nodes'"
- " public keys")],
+ " public keys"),
+ ("debug", None, "Set loglevel of ssh calls to 'debug'."),
+ ("verbose", None, "Set loglevel of ssh calls to 'verbose'.")],
None, None, "Distribute a new node's public SSH key on the cluster."),
("node_ssh_key_remove", MULTI, None, constants.RPC_TMO_FAST, [
("node_uuid", None, "UUID of the node whose key is removed"),
("clear_public_keys", None,
"If the 'ganeti_pub_keys' file of the node should be cleared."),
("readd", None,
- "Whether this is a readd operation.")],
+ "Whether this is a readd operation."),
+ ("debug", None, "Set loglevel of ssh calls to 'debug'."),
+ ("verbose", None, "Set loglevel of ssh calls to 'verbose'.")],
None, None, "Remove a node's SSH key from the other nodes' key files."),
("node_ssh_keys_renew", MULTI, None, constants.RPC_TMO_4HRS, [
("node_uuids", None, "UUIDs of the nodes whose key is renewed"),
("potential_master_candidates", None, "Potential master candidates"),
("old_key_type", None, "The type of key previously used"),
("new_key_type", None, "The type of key to generate"),
- ("new_key_bits", None, "The length of the key to generate")],
+ ("new_key_bits", None, "The length of the key to generate"),
+ ("debug", None, "Set logging of SSH update tool to 'debug'."),
+ ("verbose", None, "Set logging of SSH update tool to 'info'.")],
None, None, "Renew all SSH key pairs of all nodes nodes."),
+ ("node_ssh_key_remove_light", MULTI, None, constants.RPC_TMO_FAST, [
+ ("node_name", None, "Name of the node whose key is removed")],
+ None, None, "Remove a node's SSH key from the master's public key file."),
]
_MISC_CALLS = [
("restricted_command", MULTI, None, constants.RPC_TMO_SLOW, [
("cmd", None, "Command name"),
], None, None, "Runs restricted command"),
+ ("repair_command", SINGLE, None, constants.RPC_TMO_SLOW, [
+ ("cmd", None, "Command name"),
+ ("inp", None, "Input to be passed as stdin"),
+ ], None, None, "Runs repair command"),
("run_oob", SINGLE, None, constants.RPC_TMO_NORMAL, [
("oob_program", None, None),
("command", None, None),
"""
(node_uuid, node_name, potential_master_candidates,
- to_authorized_keys, to_public_keys, get_public_keys) = params
+ to_authorized_keys, to_public_keys, get_public_keys,
+ debug, verbose) = params
return backend.AddNodeSshKey(node_uuid, node_name,
potential_master_candidates,
to_authorized_keys=to_authorized_keys,
to_public_keys=to_public_keys,
- get_public_keys=get_public_keys)
+ get_public_keys=get_public_keys,
+ ssh_update_debug=debug,
+ ssh_update_verbose=verbose)
@staticmethod
def perspective_node_ssh_keys_renew(params):
"""
(node_uuids, node_names, master_candidate_uuids,
potential_master_candidates, old_key_type, new_key_type,
- new_key_bits) = params
+ new_key_bits, debug, verbose) = params
return backend.RenewSshKeys(node_uuids, node_names, master_candidate_uuids,
potential_master_candidates, old_key_type,
- new_key_type, new_key_bits)
+ new_key_type, new_key_bits,
+ ssh_update_debug=debug,
+ ssh_update_verbose=verbose)
@staticmethod
def perspective_node_ssh_key_remove(params):
(node_uuid, node_name,
master_candidate_uuids, potential_master_candidates,
from_authorized_keys, from_public_keys, clear_authorized_keys,
- clear_public_keys, readd) = params
+ clear_public_keys, readd, debug, verbose) = params
return backend.RemoveNodeSshKey(node_uuid, node_name,
master_candidate_uuids,
potential_master_candidates,
from_public_keys=from_public_keys,
clear_authorized_keys=clear_authorized_keys,
clear_public_keys=clear_public_keys,
- readd=readd)
+ readd=readd,
+ ssh_update_debug=debug,
+ ssh_update_verbose=verbose)
+
+ @staticmethod
+ def perspective_node_ssh_key_remove_light(params):
+ """Removes a node's SSH key from the master's public key file.
+
+ """
+ (node_name, ) = params
+ return backend.RemoveSshKeyFromPublicKeyFile(node_name)
# cluster --------------------------
"""
(cmd, ) = params
- return backend.RunRestrictedCmd(cmd)
+ return backend.RunConstrainedCmd(
+ cmd,
+ lock_file=pathutils.RESTRICTED_COMMANDS_LOCK_FILE,
+ path=pathutils.RESTRICTED_COMMANDS_DIR)
+
+ @staticmethod
+ def perspective_repair_command(params):
+ """ Run a repair command.
+
+ """
+ (cmd, inp, ) = params
+
+ return backend.RunConstrainedCmd(
+ cmd,
+ lock_file=pathutils.REPAIR_COMMANDS_LOCK_FILE,
+ path=pathutils.REPAIR_COMMANDS_DIR,
+ inp=inp)
@staticmethod
def perspective_write_ssconf_files(params):
import logging
import os
+import shutil
import tempfile
from collections import namedtuple
(result.cmd, result.fail_reason))
-def ReadRemoteSshPubKeys(pub_key_file, node, cluster_name, port, ask_key,
- strict_host_check):
+def ReadRemoteSshPubKey(pub_key_file, node, cluster_name, port, ask_key,
+ strict_host_check):
"""Fetches a public SSH key from a node via SSH.
@type pub_key_file: string
return result.stdout
+def GetSshKeyFilenames(key_type, suffix=""):
+ """Get filenames of the SSH key pair of the given type.
+
+ @type key_type: string
+ @param key_type: type of SSH key, must be element of C{constants.SSHK_ALL}
+ @type suffix: string
+ @param suffix: optional suffix for the key filenames
+ @rtype: tuple of (string, string)
+ @returns: a tuple containing the name of the private key file and the
+ public key file.
+
+ """
+ if key_type not in constants.SSHK_ALL:
+ raise errors.SshUpdateError("Unsupported key type '%s'. Supported key types"
+ " are: %s." % (key_type, constants.SSHK_ALL))
+ (_, root_keyfiles) = \
+ GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False)
+ if not key_type in root_keyfiles.keys():
+ raise errors.SshUpdateError("No keyfile for key type '%s' available."
+ % key_type)
+
+ key_filenames = root_keyfiles[key_type]
+ if suffix:
+ key_filenames = [_ComputeKeyFilePathWithSuffix(key_filename, suffix)
+ for key_filename in key_filenames]
+
+ return key_filenames
+
+
+def GetSshPubKeyFilename(key_type, suffix=""):
+ """Get filename of the public SSH key of the given type.
+
+ @type key_type: string
+ @param key_type: type of SSH key, must be element of C{constants.SSHK_ALL}
+ @type suffix: string
+ @param suffix: optional suffix for the key filenames
+ @rtype: string
+ @returns: file name of the public key file
+
+ """
+ return GetSshKeyFilenames(key_type, suffix=suffix)[1]
+
+
+def _ComputeKeyFilePathWithSuffix(key_filepath, suffix):
+ """Converts the given key filename to a key filename with a suffix.
+
+ @type key_filepath: string
+ @param key_filepath: path of the key file
+ @type suffix: string
+ @param suffix: suffix to be appended to the basename of the file
+
+ """
+ path = os.path.dirname(key_filepath)
+ ext = os.path.splitext(os.path.basename(key_filepath))[1]
+ basename = os.path.splitext(os.path.basename(key_filepath))[0]
+ return os.path.join(path, basename + suffix + ext)
+
+
+def ReplaceSshKeys(src_key_type, dest_key_type,
+ src_key_suffix="", dest_key_suffix=""):
+ """Replaces an SSH key pair by another SSH key pair.
+
+ Note that both parts, the private and the public key, are replaced.
+
+ @type src_key_type: string
+ @param src_key_type: key type of key pair that is replacing the other
+ key pair
+ @type dest_key_type: string
+ @param dest_key_type: key type of the key pair that is being replaced
+ by the source key pair
+ @type src_key_suffix: string
+ @param src_key_suffix: optional suffix of the key files of the source
+ key pair
+ @type dest_key_suffix: string
+ @param dest_key_suffix: optional suffix of the keey files of the
+ destination key pair
+
+ """
+ (src_priv_filename, src_pub_filename) = GetSshKeyFilenames(
+ src_key_type, suffix=src_key_suffix)
+ (dest_priv_filename, dest_pub_filename) = GetSshKeyFilenames(
+ dest_key_type, suffix=dest_key_suffix)
+
+ if not (os.path.exists(src_priv_filename) and
+ os.path.exists(src_pub_filename)):
+ raise errors.SshUpdateError(
+ "At least one of the source key files is missing: %s",
+ ", ".join([src_priv_filename, src_pub_filename]))
+
+ for dest_file in [dest_priv_filename, dest_pub_filename]:
+ if os.path.exists(dest_file):
+ utils.CreateBackup(dest_file)
+ utils.RemoveFile(dest_file)
+
+ shutil.move(src_priv_filename, dest_priv_filename)
+ shutil.move(src_pub_filename, dest_pub_filename)
+
+
+def ReadLocalSshPubKeys(key_types, suffix=""):
+ """Reads the local root user SSH key.
+
+ @type key_types: list of string
+ @param key_types: types of SSH keys. Must be subset of constants.SSHK_ALL. If
+ 'None' or [], all available keys are returned.
+ @type suffix: string
+ @param suffix: optional suffix to be attached to key names when reading
+ them. Used for temporary key files.
+ @rtype: list of string
+ @return: list of public keys
+
+ """
+ fetch_key_types = []
+ if key_types:
+ fetch_key_types += key_types
+ else:
+ fetch_key_types = constants.SSHK_ALL
+
+ (_, root_keyfiles) = \
+ GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False)
+
+ result_keys = []
+ for (public_key_type, (_, public_key_file)) in root_keyfiles.items():
+
+ if public_key_type not in fetch_key_types:
+ continue
+
+ public_key_dir = os.path.dirname(public_key_file)
+ public_key_filename = ""
+ if suffix:
+ public_key_filename = \
+ os.path.splitext(os.path.basename(public_key_file))[0] \
+ + suffix + ".pub"
+ else:
+ public_key_filename = public_key_file
+ public_key_path = os.path.join(public_key_dir,
+ public_key_filename)
+
+ if not os.path.exists(public_key_path):
+ raise errors.SshUpdateError("Cannot find SSH public key of type '%s'."
+ % public_key_type)
+ else:
+ key = utils.ReadFile(public_key_path)
+ result_keys.append(key)
+
+ return result_keys
+
+
# Update gnt-cluster.rst when changing which combinations are valid.
KeyBitInfo = namedtuple('KeyBitInfo', ['default', 'validation_fn'])
SSH_KEY_VALID_BITS = {
#: Target major version we will upgrade to
TARGET_MAJOR = 2
#: Target minor version we will upgrade to
-TARGET_MINOR = 16
+TARGET_MINOR = 17
#: Target major version for downgrade
DOWNGRADE_MAJOR = 2
#: Target minor version for downgrade
-DOWNGRADE_MINOR = 15
+DOWNGRADE_MINOR = 16
# map of legacy device types
# (mapping differing old LD_* constants to new DT_* constants)
self._Downgrade(config_major, config_minor, config_version,
config_revision)
- # Upgrade from 2.{0..15} to 2.16
- elif config_major == 2 and config_minor in range(0, 16):
+ # Upgrade from 2.{0..n-1} to 2.n
+ elif config_major == 2 and config_minor in range(0, TARGET_MINOR):
if config_revision != 0:
logging.warning("Config revision is %s, not 0", config_revision)
if not self.UpgradeAll():
cluster["data_collectors"].get(
name, dict(active=True,
interval=constants.MOND_TIME_INTERVAL * 1e6))
+ if "diagnose_data_collector_filename" not in cluster:
+ cluster["diagnose_data_collector_filename"] = ""
# These parameters are set to pre-2.16 default values, which
# differ from post-2.16 default values
else:
disk["nodes"] = []
+ @OrFail("Upgrading maintenance data")
+ def UpgradeMaintenance(self):
+ # pylint can't infer config_data type
+ # pylint: disable=E1103
+ maintenance = self.config_data.get("maintenance", None)
+ if maintenance is None:
+ self.config_data["maintenance"] = {}
+
def UpgradeAll(self):
self.config_data["version"] = version.BuildVersion(TARGET_MAJOR,
TARGET_MINOR, 0)
self.UpgradeInstanceIndices,
self.UpgradeFilters,
self.UpgradeDiskNodes,
- self.UpgradeDiskTemplate]
+ self.UpgradeDiskTemplate,
+ self.UpgradeMaintenance]
for s in steps:
s()
return not self.errors
# DOWNGRADE ------------------------------------------------------------
- @OrFail("Removing SSH parameters")
- def DowngradeSshKeyParams(self):
- """Removes the SSH key type and bits parameters from the config.
-
- Also fails if these have been changed from values appropriate in lower
- Ganeti versions.
-
- """
- # pylint: disable=E1103
- # Because config_data is a dictionary which has the get method.
- cluster = self.config_data.get("cluster", None)
- if cluster is None:
- raise Error("Can't find the cluster entry in the configuration")
-
- def _FetchAndDelete(key):
- val = cluster.get(key, None)
- if key in cluster:
- del cluster[key]
- return val
-
- ssh_key_type = _FetchAndDelete("ssh_key_type")
- _FetchAndDelete("ssh_key_bits")
-
- if ssh_key_type is not None and ssh_key_type != "dsa":
- raise Error("The current Ganeti setup is using non-DSA SSH keys, and"
- " versions below 2.16 do not support these. To downgrade,"
- " please perform a gnt-cluster renew-crypto using the "
- " --new-ssh-keys and --ssh-key-type=dsa options, generating"
- " DSA keys that older versions can also use.")
-
def DowngradeAll(self):
+ if "maintenance" in self.config_data:
+ del self.config_data["maintenance"]
+ if "cluster" in self.config_data:
+ cluster = self.config_data["cluster"]
+ if "diagnose_data_collector_filename" in cluster:
+ del cluster["diagnose_data_collector_filename"]
+ if "data_collectors" in cluster:
+ if constants.DATA_COLLECTOR_DIAGNOSE in cluster["data_collectors"]:
+ del cluster["data_collectors"][constants.DATA_COLLECTOR_DIAGNOSE]
+ if constants.DATA_COLLECTOR_KVM_R_S_S in cluster["data_collectors"]:
+ del cluster["data_collectors"][constants.DATA_COLLECTOR_KVM_R_S_S]
+ if "ipolicy" in cluster:
+ ipolicy = cluster["ipolicy"]
+ if "memory-ratio" in ipolicy:
+ del ipolicy["memory-ratio"]
self.config_data["version"] = version.BuildVersion(DOWNGRADE_MAJOR,
DOWNGRADE_MINOR, 0)
- self.DowngradeSshKeyParams()
return not self.errors
def _ComposePaths(self):
return name
+def VerifyHmac(data, error_fn):
+ """Verifies the presence of the hmac secret.
+
+ @type data: dict
+
+ """
+ hmac = data.get(constants.NDS_HMAC)
+ if not hmac:
+ raise error_fn("Hmac key must be provided")
+
+ return hmac
+
+
def LoadData(raw, data_check):
"""Parses and verifies input data.
"""
(opts, args) = ParseOptions()
- utils.SetupToolLogging(opts.debug, opts.verbose)
+ utils.SetupToolLogging(
+ opts.debug, opts.verbose,
+ toolname=os.path.splitext(os.path.basename(__file__))[0])
if args:
logging.error("No arguments are expected")
"""
opts = ParseOptions()
- utils.SetupToolLogging(opts.debug, opts.verbose)
+ utils.SetupToolLogging(
+ opts.debug, opts.verbose,
+ toolname=os.path.splitext(os.path.basename(__file__))[0])
try:
# List of files to delete. Contains tuples consisting of the absolute path
_DATA_CHECK = ht.TStrictDict(False, True, {
constants.NDS_CLUSTER_NAME: ht.TNonEmptyString,
constants.NDS_NODE_DAEMON_CERTIFICATE: ht.TNonEmptyString,
+ constants.NDS_HMAC: ht.TNonEmptyString,
constants.NDS_SSCONF: ht.TDictOf(ht.TNonEmptyString, ht.TString),
constants.NDS_START_NODE_DAEMON: ht.TBool,
constants.NDS_NODE_NAME: ht.TString,
"""
opts = ParseOptions()
- utils.SetupToolLogging(opts.debug, opts.verbose)
+ utils.SetupToolLogging(
+ opts.debug, opts.verbose,
+ toolname=os.path.splitext(os.path.basename(__file__))[0])
try:
getent = runtime.GetEnts()
cluster_name = common.VerifyClusterName(data, SetupError,
constants.NDS_CLUSTER_NAME)
cert_pem = common.VerifyCertificateStrong(data, SetupError)
+ hmac_key = common.VerifyHmac(data, SetupError)
ssdata = VerifySsconf(data, cluster_name)
logging.info("Writing ssconf files ...")
ssconf.WriteSsconfFiles(ssdata, dry_run=opts.dry_run)
+ logging.info("Writing hmac.key ...")
+ utils.WriteFile(pathutils.CONFD_HMAC_KEY, data=hmac_key,
+ mode=pathutils.NODED_CERT_MODE,
+ uid=getent.masterd_uid, gid=getent.masterd_gid,
+ dry_run=opts.dry_run)
+
logging.info("Writing node daemon certificate ...")
utils.WriteFile(pathutils.NODED_CERT_FILE, data=cert_pem,
mode=pathutils.NODED_CERT_MODE,
"""
opts = ParseOptions()
- utils.SetupToolLogging(opts.debug, opts.verbose)
+ utils.SetupToolLogging(
+ opts.debug, opts.verbose,
+ toolname=os.path.splitext(os.path.basename(__file__))[0])
try:
data = common.LoadData(sys.stdin.read(), _DATA_CHECK)
"""
opts = ParseOptions()
- utils.SetupToolLogging(opts.debug, opts.verbose)
+ utils.SetupToolLogging(
+ opts.debug, opts.verbose,
+ toolname=os.path.splitext(os.path.basename(__file__))[0])
try:
data = common.LoadData(sys.stdin.read(), _DATA_CHECK)
"""
opts = ParseOptions()
- utils.SetupToolLogging(opts.debug, opts.verbose)
+ utils.SetupToolLogging(
+ opts.debug, opts.verbose,
+ toolname=os.path.splitext(os.path.basename(__file__))[0])
try:
data = common.LoadData(sys.stdin.read(), _DATA_CHECK)
import os.path
import logging
import logging.handlers
-from cStringIO import StringIO
from ganeti import constants
from ganeti import compat
+from ganeti import pathutils
class _ReopenableLogHandler(logging.handlers.BaseRotatingHandler):
def SetupLogging(logfile, program, debug=0, stderr_logging=False,
multithreaded=False, syslog=constants.SYSLOG_USAGE,
- console_logging=False, root_logger=None):
+ console_logging=False, root_logger=None,
+ verbose=True):
"""Configures the logging module.
@type logfile: str
the system console if logging fails
@type root_logger: logging.Logger
@param root_logger: Root logger to use (for unittests)
+ @type verbose: boolean
+ @param verbose: whether to log at 'info' level already (logfile logging only)
@raise EnvironmentError: if we can't open the log file and
syslog/stderr logging is disabled
@rtype: callable
syslog_handler.setLevel(logging.INFO)
root_logger.addHandler(syslog_handler)
- if syslog != constants.SYSLOG_ONLY:
+ if syslog != constants.SYSLOG_ONLY and logfile:
# this can fail, if the logging directories are not setup or we have
# a permisssion problem; in this case, it's best to log but ignore
# the error if stderr_logging is True, and if false we re-raise the
logfile_handler.setFormatter(formatter)
if debug:
logfile_handler.setLevel(logging.DEBUG)
- else:
+ elif verbose:
logfile_handler.setLevel(logging.INFO)
+ else:
+ logfile_handler.setLevel(logging.WARN)
root_logger.addHandler(logfile_handler)
reopen_handlers.append(logfile_handler)
except EnvironmentError:
def SetupToolLogging(debug, verbose, threadname=False,
- _root_logger=None, _stream=None):
+ toolname=None, logfile=pathutils.LOG_TOOLS):
"""Configures the logging module for tools.
- All log messages are sent to stderr.
+ All log messages are sent to the tools.log logfile.
+ @type toolname: string
+ @param toolname: name of the tool that's logging
@type debug: boolean
@param debug: Disable log message filtering
@type verbose: boolean
@param verbose: Enable verbose log messages
@type threadname: boolean
@param threadname: Whether to include thread name in output
+ @type logfile: string
+ @param logfile: the path of the log file to use, use "None"
+ for tools which don't necessarily run on Ganeti nodes (and
+ thus don't have the Ganeti log directory).
"""
- if _root_logger is None:
- root_logger = logging.getLogger("")
- else:
- root_logger = _root_logger
-
- fmt = StringIO()
- fmt.write("%(asctime)s:")
+ if not toolname:
+ toolname = "unspecified_tool"
- if threadname:
- fmt.write(" %(threadName)s")
-
- if debug or verbose:
- fmt.write(" %(levelname)s")
-
- fmt.write(" %(message)s")
-
- formatter = logging.Formatter(fmt.getvalue())
-
- stderr_handler = logging.StreamHandler(_stream)
- stderr_handler.setFormatter(formatter)
+ # 'SetupLogging' takes a quite unintuitive 'debug' option that
+ # is '0' for 'log higher than debug level' and '1' for
+ # 'log at NOSET' level. Hence this conversion.
+ debug_int = 0
if debug:
- stderr_handler.setLevel(logging.NOTSET)
- elif verbose:
- stderr_handler.setLevel(logging.INFO)
- else:
- stderr_handler.setLevel(logging.WARNING)
+ debug_int = 1
- root_logger.setLevel(logging.NOTSET)
- root_logger.addHandler(stderr_handler)
+ SetupLogging(logfile,
+ program=toolname,
+ debug=debug_int,
+ multithreaded=threadname,
+ verbose=verbose)
@type noclose_fds: list
@param noclose_fds: list of additional (fd >=3) file descriptors to leave
open for the child process
- @type input_fd: C{file}-like object or numeric file descriptor
+ @type input_fd: C{file}-like object containing an actual file descriptor
+ or numeric file descriptor
@param input_fd: File descriptor for process' standard input
@type postfork_fn: Callable receiving PID as parameter
@param postfork_fn: Callback run after fork but before timeout
@type noclose_fds: list
@param noclose_fds: list of additional (fd >=3) file descriptors to leave
open for the child process
- @type input_fd: C{file}-like object or numeric file descriptor
+ @type input_fd: C{file}-like object containing an actual file descriptor
+ or numeric file descriptor
@param input_fd: File descriptor for process' standard input
@type postfork_fn: Callable receiving PID as parameter
@param postfork_fn: Function run after fork but before timeout
wait_fn=inc_tries, _time_fn=get_tries)
-def RetryByNumberOfTimes(max_retries, exception_class, fn, *args, **kwargs):
+def RetryByNumberOfTimes(max_retries, backoff, exception_class, fn, *args,
+ **kwargs):
"""Retries calling a function up to the specified number of times.
@type max_retries: integer
@type fn: callable
@param fn: Function to be called (up to the specified maximum number of
retries.
+ @type backoff: int
+ @param backoff: this enables and configures the back off behavior after
+ failed tries. If value is '0', there will be no delay between failed
+ tries. If the value is a positive integer, it is interpreted as the
+ base length of the back off delay (in seconds). That means there will be a
+ delay between failed tries of the length specified in this paramter. With
+ each next retry, the delay is increased by the factor of two. For example,
+ if the value is '2', the first delay is 2 seconds, the second 4 seconds,
+ the third 8 seconds (until the max_retries) are hit or the function call
+ succeeds.
"""
+ if backoff < 0:
+ raise exception_class("Backoff must be a non-negative integer.")
+
last_exception = None
+ delay = backoff
for i in range(max_retries):
try:
fn(*args, **kwargs)
except errors.OpExecError as e:
logging.error("Error after retry no. %s: %s.", i, e)
last_exception = e
+ time.sleep(delay)
+ delay *= 2
else:
if last_exception:
raise exception_class("Error after %s retries. Last exception: %s."
# we are on master now
utils.EnsureDaemon(constants.RAPI)
utils.EnsureDaemon(constants.WCONFD)
+ utils.EnsureDaemon(constants.MAINTD)
# If RAPI isn't responding to queries, try one restart
logging.debug("Attempting to talk to remote API on %s",
--- /dev/null
+ganeti-maintd(8) Ganeti | Version @GANETI_VERSION@
+==================================================
+
+Name
+----
+
+ganeti-maintd - Ganeti maintenance daemon
+
+Synopsis
+--------
+**ganeti-maintd** [-f] [-d] [-p *PORT*] [-b *ADDRESS*] [--no-voting --yes-do-it]
+
+DESCRIPTION
+-----------
+
+**ganeti-maintd** is the the daemon carrying out regular maintenance
+of the cluster.
+
+For testing purposes, you can give the ``-f`` option and the
+program won't detach from the running terminal.
+
+Debug-level message can be activated by giving the ``-d`` option.
+
+The **ganeti-maintd** daemon listens to port 1816 TCP, on all interfaces,
+by default. The port can be overridden by an entry the services database
+by passing the ``-p`` option.
+The ``-b`` option can be used to specify the address to bind to
+(defaults to ``0.0.0.0``).
+
+The daemon will refuse to start if it cannot verify that the majority
+of cluster nodes believes that it is running on the master node. To
+allow failover in a two-node cluster, this can be overridden by the
+``--no-voting`` option. In this case, the ``--yes-do-it`` option has
+to be given as well.
+
+Operation
+~~~~~~~~~
+
+The maintenance daemon will carry out precisely the same jobs that
+**harep**\(1) would do if continously run. In particular, it can
+be controlled by the same set of opt-in tags.
+
+Communication
+~~~~~~~~~~~~~
+
+The daemon will expose its internal state via HTTP. The answer is
+encoded in JSON format and is specific to the particular request.
+
+``/``
++++++
+The root resource. It will return the list of supported protocol
+versions. At the moment, only version ``1`` is supported.
+
+``1/status``
+++++++++++++
+
+List of all currently ongoing incidents. This is a list of JSON
+objects, each containing at least the following information.
+
+- ``uuid`` The unique identifier assigned to the event.
+
+- ``node`` The UUID of the node on which the even was observed.
+
+- ``original`` The very JSON object reported by self-diagnose data collector.
+
+- ``repair-status`` A string describing the progress made on this event so
+ far. It is one of the following.
+
+ + ``noted`` The event has been observed, but no action has been taken yet
+
+ + ``pending`` At least one job has been submitted in reaction to the event
+ and none of the submitted jobs has failed so far.
+
+ + ``canceled`` The event has been canceled, i.e., ordered to be ignored, but
+ is still observed.
+
+ + ``failed`` At least one of the submitted jobs has failed. To avoid further
+ damage, the repair daemon will not take any further action for this event.
+
+ + ``completed`` All Ganeti actions associated with this event have been
+ completed successfully, including tagging the node.
+
+- ``jobs`` The list of the numbers of ganeti jobs submitted in response to
+ this event.
+
+- ``tag`` A string that is the tag that either has been added to the node, or,
+ if the repair event is not yet finalized, will be added in case of success.
+
+
+``/1/jobs``
++++++++++++
+The list of jobs the daemon will wait for to finish, before starting
+the next round of maintenance.
+
+``/1/evacuated``
+++++++++++++++++
+The list of instance names the daemon does not expect to have load
+data available because they have been recently evacuated from an
+offline (or drained) node. Currently, this affects only Xen instances,
+as for other hypervisors the overall CPU load on the node is taken
+as balancing measure.
discovered or set manually. Only used for estimating how many VCPUs
are left for instances
-Note that currently this option is unused by Ganeti; values will be
-recorded but will not influence the Ganeti operation.
+Note that currently only ``mem_node`` is used by Ganeti; other values
+will be recorded but will not influence the Ganeti operation.
Disk State Parameters
| [\--ipolicy-disk-templates *template* [,*template*...]]
| [\--ipolicy-spindle-ratio *ratio*]
| [\--ipolicy-vcpu-ratio *ratio*]
+| [\--ipolicy-memory-ratio *ratio*]
| [\--disk-state *diskstate*]
| [\--hypervisor-state *hvstate*]
| [\--drbd-usermode-helper *helper*]
- ``--ipolicy-spindle-ratio`` limits the instances-spindles ratio
- ``--ipolicy-vcpu-ratio`` limits the vcpu-cpu ratio
+- ``--ipolicy-memory-ratio`` limits the memory over-commitment ratio
All the instance policy elements can be overridden at group level. Group
level overrides can be removed by specifying ``default`` as the value of
| [\--user-shutdown {yes \| no}]
| [\--enabled-data-collectors *collectors*]
| [\--data-collector-interval *intervals*]
+| [\--maintenance-interval *seconds*]
+| [\--auto-balance-cluster {yes \| no }]
+| [\--auto-balance-threshold *score* ]
+| [\--diagnose-data-collector-filename *filename*]
Modify the options for the cluster.
and number of seconds specifying the interval at which the collector
shall be collected.
+The ``--diagnose-data-collector-filename`` option specifies the filename
+of the script diagnose data collector should run. If this value is an
+empty string, the data collector will return sucess without running
+anything. The default value is empty string.
+
+The ``--maintenance-interval`` option specified the minimal waiting
+time by the maintenance daemon between maintenance rounds.
+The ``--auto-balance-cluster`` option tell the maintenance daemon
+whether to also keep the cluster in a balanced fashion. If so, it
+will carry out moves, provided the gain in the cluster score for
+that move is at least the value specified by ``--auto-balance-threshold``
+in absolute terms, unless the cluster score it at least 10 times that
+value, in which case all beneficial steps will be done if auto-balancing
+is enabled.
+
See **gnt-cluster init** for a description of ``--install-image`` and
``--zeroing-image``.
See **ganeti**\(7) for a description of ``--submit`` and other common
options.
+REMOVE-REPAIR
+~~~~~~~~~~~~~
+
+**remove-repair** *uuid*
+
+Unconditionally remove the specified repair event from the list of repair
+events tracked by the maintenance daemon. Note that if the node still reports
+the same breakage, a new event for this breakage will be created at next
+node querying by the daemon.
+
RENAME
~~~~~~
| [\--new-ssh-keys] [\--no-ssh-key-check]
| [\--new-cluster-domain-secret] [\--cluster-domain-secret *filename*]
| [\--ssh-key-type *type*] | [\--ssh-key-bits *bits*]
+| [\--verbose] | [\--debug]
This command will stop all Ganeti daemons in the cluster and start
them again once the new certificates and keys are replicated. The
properties of the disk types used. They are described in more detail
in the ``init`` option description.
+The options ``--verbose`` and ``--debug`` increase the log level
+of underlying ssh calls to all nodes. If running ``renew-crypto``
+causes any problems, use them and inspect the ``tools.log`` file
+for any unusual output.
+
REPAIR-DISK-SIZES
~~~~~~~~~~~~~~~~~
| [\--ipolicy-disk-templates *template* [,*template*...]]
| [\--ipolicy-spindle-ratio *ratio*]
| [\--ipolicy-vcpu-ratio *ratio*]
+| [\--ipolicy-memory-ratio *ratio*]
| [\--disk-state *diskstate*]
| [\--hypervisor-state *hvstate*]
| {*group*}
| [\--ipolicy-disk-templates *template* [,*template*...]]
| [\--ipolicy-spindle-ratio *ratio*]
| [\--ipolicy-vcpu-ratio *ratio*]
+| [\--ipolicy-memory-ratio *ratio*]
| {*group*}
Modifies some parameters from the node group.
| [\--disk-state *diskstate*]
| [\--hypervisor-state *hvstate*]
| [\--no-node-setup]
+| [\--verbose] | [\--debug]
| {*nodename*}
Adds the given node to the cluster.
running, the ``node-cleanup`` tool can be run on the machine to be added
to clean remains of the previous cluster from the node.
+The options ``--verbose`` and ``--debug`` control the log level of the
+operation, in particular the one of the underlying SSH calls that
+Ganeti makes when adding a node.
+
Example::
# gnt-node add node5.example.com
| [\--node-powered=``yes|no``]
| [\--hypervisor-state *hvstate*]
| [\--disk-state *diskstate*]
+| [\--verbose] [\--debug]
| {*node*}
This command changes the role of the node. Each options takes
``--force`` is needed as well, and the target node for the first change
must be the master.
+The options ``--verbose`` and ``--debug`` control the log level of the
+operation, in particular the one of the underlying SSH calls that
+Ganeti makes when modifying some parameters a node (e.g. promoting
+or demoting a node to or from 'master candidate' status).
+
See **ganeti**\(7) for a description of ``--submit`` and other common
options.
REMOVE
~~~~~~
-**remove** {*nodename*}
+**remove** [\--verbose] [\--debug] {*nodename*}
Removes a node from the cluster. Instances must be removed or
migrated to another cluster before.
+The options ``--verbose`` and ``--debug`` control the log level of the
+operation, in particular the one of the underlying SSH calls that
+Ganeti makes when removing a node.
+
+
Example::
# gnt-node remove node5.example.com
output lines. ``--sync`` forces the opcode to acquire the node lock(s)
in exclusive mode.
+REPAIR-COMMAND
+~~~~~~~~~~~~~~~~~~
+
+| **repair-command** { --input *input* } *command* *node*
+
+Executes a repair command. Repair commands reside in
+``@SYSCONFDIR@/ganeti/node-repair-commands`` on a node, either as a regular
+file or as a symlink. The directory must be owned by root and not be
+world- or group-writable. If a command fails verification or otherwise
+fails to start, the node daemon log must be consulted for more detailed
+information.
+
+Example for running a command::
+
+ # gnt-node repair-command --input "input string" \
+ mycommand node.example.com
+
Tags
~~~~
**[ -g *delta* ]** **[ \--min-gain-limit *threshold* ]**
**[ -O *name...* ]**
**[ \--no-disk-moves ]**
+**[ \--avoid-disk-moves *factor* ]**
**[ \--no-instance-moves ]**
**[ -U *util-file* ]**
+**[ \--idle-default ]**
**[ \--ignore-dynu ]**
**[ \--ignore-soft-errors ]**
**[ \--mond *yes|no* ]**
**[ \--mond-xen ]**
+**[ \--mond-kvm-rss ]**
**[ \--exit-on-missing-mond-data ]**
**[ \--evac-mode ]**
**[ \--restricted-migration ]**
a much quicker balancing, but of course the improvements are
limited. It is up to the user to decide when to use one or another.
+\--avoid-disk-moves=*factor*
+ This parameter prevents hbal from not profitable enough disk moves.
+ During each balancing step it will admit disk move only if the gain
+ in the cluster metrics is *factor* times higher than the gain
+ achievable without disk moves.
+
\--no-instance-moves
This parameter prevents hbal from using instance moves
(i.e. "gnt-instance migrate/failover") operations. This will only use
metrics and thus the influence of the dynamic utilisation will be
practically insignificant.
+\--idle-default
+ If given, all dynamic utilisation information not provided explicitly
+ by the ``-U`` option or by the MonDs, if ``--mond`` is given, will be
+ assumed to be 0. Note that without this option the default assumption
+ about utilization will apply for the unspecified resources, which is 1.0,
+ i.e., full load, for every instance.
+
\--ignore-dynu
If given, all dynamic utilisation information will be ignored by
assuming it to be 0. This option will take precedence over any data
If given, also query Xen-specific collectors from MonD, provided
that monitoring daemons are queried at all.
+\--mond-kvm-rss
+ If given, also query the residual set size for kvm instances, provided
+ that monitoring daemons are queried at all.
+
+\--mem-weight=*factor*
+ Scale the weight of the dynamic memory utilization in the cluster metrics
+ by the given factor.
+
\--exit-on-missing-mond-data
If given, abort if the data obtainable from querying MonDs is incomplete.
The default behavior is to continue with a best guess based on the static
- disk templates
- vcpu ratio
- spindle ratio
+ - memory ratio (optional)
\--mond=*yes|no*
If given the program will query all MonDs to fetch data from the
nodes = qa_config.AcquireManyNodes(n)
live_instances.append(cf(nodes))
- # 2.16 only - prior to performing a downgrade, we have to make sure that the
- # SSH keys used are such that the lower version can still use them,
- # regardless of cluster defaults.
- if constants.VERSION_MINOR != 16:
- raise qa_error.Error("Please remove the key type downgrade code in 2.17")
- AssertCommand(["gnt-cluster", "renew-crypto", "--no-ssh-key-check", "-f",
- "--new-ssh-keys", "--ssh-key-type=dsa"])
-
AssertRedirectedCommand(["gnt-cluster", "upgrade", "--to", other_version])
AssertRedirectedCommand(["gnt-cluster", "verify"])
if node != master:
NodeAdd(node, readd=False)
+ for node in qa_config.get("nodes"):
+ def GetNonStartDaemons():
+ cmd = utils.ShellQuoteArgs(["ps", "-Ao", "comm"])
+ prcs = AssertCommand(cmd, node=node)[1]
+
+ non_start_daemons = []
+
+ def AddIfNotStarted(daemon):
+ if daemon not in prcs:
+ non_start_daemons.append(daemon)
+
+ AddIfNotStarted('ganeti-noded')
+ if constants.ENABLE_MOND:
+ AddIfNotStarted('ganeti-mond')
+ if node == master:
+ AddIfNotStarted('ganeti-wconfd')
+ AddIfNotStarted('ganeti-rapi')
+ AddIfNotStarted('ganeti-luxid')
+ AddIfNotStarted('ganeti-maintd')
+ return non_start_daemons
+
+ nsd = GetNonStartDaemons()
+ for daemon in nsd:
+ raise qa_error.Error(daemon + ' is not running at %s' % node.primary)
+
def MarkNodeAddedAll():
"""Mark all nodes as added.
# Identifying the node - RAPI provides these itself
IDENTIFIERS = ["node_name", "node_uuid"]
# As the name states, these can be set but not retrieved yet
- NOT_EXPOSED_YET = ["hv_state", "disk_state", "auto_promote"]
+ NOT_EXPOSED_YET = ["hv_state", "disk_state", "auto_promote",
+ "debug", "verbose"]
_DoGetPutTests("/2/nodes/%s" % node.primary,
"/2/nodes/%s/modify" % node.primary,
{-
-Copyright (C) 2009, 2010, 2011, 2012 Google Inc.
+Copyright (C) 2009, 2010, 2011, 2012, 2015 Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
, Result
, ResultT(..)
, mkResultT
+ , mkResultT'
, withError
, withErrorT
, toError
, tryError
, Error(..) -- re-export from Control.Monad.Error
, MonadIO(..) -- re-export from Control.Monad.IO.Class
+ , FromString(..)
, isOk
, isBad
, justOk
, compareNameComponent
, ListSet(..)
, emptyListSet
+ , Down(..)
) where
+import Prelude ()
+import Ganeti.Prelude
+
import Control.Applicative
import Control.Exception (try)
import Control.Monad
import Control.Monad.Trans
import Control.Monad.Trans.Control
import Data.Function
-import Data.List
+import Data.List (find, isPrefixOf)
import Data.Maybe
-import Data.Monoid
import Data.Set (Set)
import qualified Data.Set as Set (empty)
import Text.JSON (JSON)
import qualified Text.JSON as JSON (readJSON, showJSON)
+#if MIN_VERSION_base(4,6,0)
+import Data.Ord
+#endif
-- Remove after we require >= 1.8.58
-- See: https://github.com/ndmitchell/hlint/issues/24
-- | Type alias for a string Result.
type Result = GenericResult String
+-- | Type class for things that can be built from strings.
+class FromString a where
+ mkFromString :: String -> a
+
+-- | Trivial 'String' instance; requires FlexibleInstances extension
+-- though.
+instance FromString [Char] where
+ mkFromString = id
+
+instance FromString IOError where
+ mkFromString = userError
+
-- | 'Monad' instance for 'GenericResult'.
-instance (Error a) => Monad (GenericResult a) where
+instance (FromString a) => Monad (GenericResult a) where
(>>=) (Bad x) _ = Bad x
(>>=) (Ok x) fn = fn x
return = Ok
- fail = Bad . strMsg
+ fail = Bad . mkFromString
instance Functor (GenericResult a) where
fmap _ (Bad msg) = Bad msg
fmap fn (Ok val) = Ok (fn val)
-instance (Error a, Monoid a) => MonadPlus (GenericResult a) where
- mzero = Bad $ strMsg "zero Result when used as MonadPlus"
+instance (FromString a, Monoid a) => Alternative (GenericResult a) where
+ empty = Bad $ mkFromString "zero Result when used as empty"
-- for mplus, when we 'add' two Bad values, we concatenate their
-- error descriptions
- (Bad x) `mplus` (Bad y) = Bad (x `mappend` strMsg "; " `mappend` y)
- (Bad _) `mplus` x = x
- x@(Ok _) `mplus` _ = x
+ (Bad x) <|> (Bad y) = Bad (x `mappend` mkFromString "; " `mappend` y)
+ (Bad _) <|> x = x
+ x@(Ok _) <|> _ = x
+
+instance (FromString a, Monoid a) => MonadPlus (GenericResult a) where
+ mzero = empty
+ mplus = (<|>)
-instance (Error a) => MonadError a (GenericResult a) where
+instance (FromString a) => MonadError a (GenericResult a) where
throwError = Bad
{-# INLINE throwError #-}
catchError x h = genericResult h (const x) x
_ <*> (Bad x) = Bad x
(Ok f) <*> (Ok x) = Ok $ f x
-instance (Error a, Monoid a) => Alternative (GenericResult a) where
- empty = mzero
- (<|>) = mplus
-
-- | This is a monad transformation for Result. It's implementation is
-- based on the implementations of MaybeT and ErrorT.
--
-- If 'mplus' combines two failing operations, errors of both of them
-- are combined.
newtype ResultT a m b = ResultT {runResultT :: m (GenericResult a b)}
- deriving (Functor)
-- | Eliminates a 'ResultT' value given appropriate continuations
elimResultT :: (Monad m)
result (Bad e) = l e
{-# INLINE elimResultT #-}
-instance (Applicative m, Monad m, Error a) => Applicative (ResultT a m) where
+instance (Monad m) => Functor (ResultT a m) where
+ fmap f = ResultT . liftM (fmap f) . runResultT
+
+instance (Monad m, FromString a) => Applicative (ResultT a m) where
pure = return
(<*>) = ap
-instance (Monad m, Error a) => Monad (ResultT a m) where
- fail err = ResultT (return . Bad $ strMsg err)
+instance (Monad m, FromString a) => Monad (ResultT a m) where
+ fail err = ResultT (return . Bad $ mkFromString err)
return = lift . return
(>>=) = flip (elimResultT throwError)
-instance (Monad m, Error a) => MonadError a (ResultT a m) where
+instance (Monad m, FromString a) => MonadError a (ResultT a m) where
throwError = ResultT . return . Bad
catchError = catchErrorT
lift = ResultT . liftM Ok
-- | The instance catches any 'IOError' using 'try' and converts it into an
--- error message using 'strMsg'.
+-- error message using 'mkFromString'.
--
-- This way, monadic code within 'ResultT' that uses solely 'liftIO' to
-- include 'IO' actions ensures that all IO exceptions are handled.
--
-- Other exceptions (see instances of 'Exception') are not currently handled.
-- This might be revised in the future.
-instance (MonadIO m, Error a) => MonadIO (ResultT a m) where
+instance (MonadIO m, FromString a) => MonadIO (ResultT a m) where
liftIO = ResultT . liftIO
. liftM (either (failError . show) return)
. (try :: IO a -> IO (Either IOError a))
-instance (MonadBase IO m, Error a) => MonadBase IO (ResultT a m) where
+instance (MonadBase IO m, FromString a) => MonadBase IO (ResultT a m) where
liftBase = ResultT . liftBase
. liftM (either (failError . show) return)
. (try :: IO a -> IO (Either IOError a))
-instance (Error a) => MonadTransControl (ResultT a) where
+instance (FromString a) => MonadTransControl (ResultT a) where
#if MIN_VERSION_monad_control(1,0,0)
-- Needs Undecidable instances
type StT (ResultT a) b = GenericResult a b
{-# INLINE liftWith #-}
{-# INLINE restoreT #-}
-instance (Error a, MonadBaseControl IO m)
+instance (FromString a, MonadBaseControl IO m)
=> MonadBaseControl IO (ResultT a m) where
#if MIN_VERSION_monad_control(1,0,0)
-- Needs Undecidable instances
{-# INLINE liftBaseWith #-}
{-# INLINE restoreM #-}
-instance (Monad m, Error a, Monoid a) => MonadPlus (ResultT a m) where
- mzero = ResultT $ return mzero
+instance (Monad m, FromString a, Monoid a)
+ => Alternative (ResultT a m) where
+ empty = ResultT $ return mzero
-- Ensure that 'y' isn't run if 'x' contains a value. This makes it a bit
-- more complicated than 'mplus' of 'GenericResult'.
- mplus x y = elimResultT combine return x
+ x <|> y = elimResultT combine return x
where combine x' = ResultT $ liftM (mplus (Bad x')) (runResultT y)
-instance (Alternative m, Monad m, Error a, Monoid a)
- => Alternative (ResultT a m) where
- empty = mzero
- (<|>) = mplus
+instance (Monad m, FromString a, Monoid a)
+ => MonadPlus (ResultT a m) where
+ mzero = empty
+ mplus = (<|>)
-- | Changes the error message of a result value, if present.
-- Note that since 'GenericResult' is also a 'MonadError', this function
withError f = genericResult (throwError . f) return
-- | Changes the error message of a @ResultT@ value, if present.
-withErrorT :: (Monad m, Error e)
+withErrorT :: (Monad m, FromString e)
=> (e' -> e) -> ResultT e' m a -> ResultT e m a
withErrorT f = ResultT . liftM (withError f) . runResultT
toErrorBase = (toError =<<) . liftBase . runResultT
{-# INLINE toErrorBase #-}
--- | An alias for @withError strMsg@, which is often used to lift a pure error
--- to a monad stack. See also 'annotateResult'.
-toErrorStr :: (MonadError e m, Error e) => Result a -> m a
-toErrorStr = withError strMsg
+-- | An alias for @withError mkFromString@, which is often
+-- used to lift a pure error to a monad stack. See also 'annotateResult'.
+toErrorStr :: (MonadError e m, FromString e) => Result a -> m a
+toErrorStr = withError mkFromString
-- | Run a given computation and if an error occurs, return it as `Left` of
-- `Either`.
-- should be handled by the given action.
--
-- See also 'toErrorStr'.
-mkResultT :: (Monad m, Error e) => m (Result a) -> ResultT e m a
+mkResultT :: (Monad m, FromString e) => m (Result a) -> ResultT e m a
mkResultT = ResultT . liftM toErrorStr
+-- | Generalisation of mkResultT accepting any showable failures.
+mkResultT' :: (Monad m, FromString e, Show s)
+ => m (GenericResult s a) -> ResultT e m a
+mkResultT' = mkResultT . liftM (genericResult (Bad . show) Ok)
+
-- | Simple checker for whether a 'GenericResult' is OK.
isOk :: GenericResult a b -> Bool
isOk (Ok _) = True
-- 'MonadError'. Since 'Result' is an instance of 'MonadError' itself,
-- it's a generalization of type @String -> Result a -> Result a@.
-- See also 'toErrorStr'.
-annotateResult :: (MonadError e m, Error e) => String -> Result a -> m a
+annotateResult :: (MonadError e m, FromString e) => String -> Result a -> m a
annotateResult owner = toErrorStr . annotateError owner
-- | Annotate an error with an ownership information inside a 'MonadError'.
-- See also 'annotateResult'.
-annotateError :: (MonadError e m, Error e, Monoid e) => String -> m a -> m a
+annotateError :: (MonadError e m, FromString e, Monoid e)
+ => String -> m a -> m a
annotateError owner =
- flip catchError (throwError . mappend (strMsg $ owner ++ ": "))
+ flip catchError (throwError . mappend (mkFromString $ owner ++ ": "))
{-# INLINE annotateError #-}
-- | Throws a 'String' message as an error in a 'MonadError'.
-- This is a generalization of 'Bad'.
-- It's similar to 'fail', but works within a 'MonadError', avoiding the
-- unsafe nature of 'fail'.
-failError :: (MonadError e m, Error e) => String -> m a
-failError = throwError . strMsg
+failError :: (MonadError e m, FromString e) => String -> m a
+failError = throwError . mkFromString
-- | A synonym for @flip@ 'catchErrorT'.
-handleErrorT :: (Monad m, Error e)
+handleErrorT :: (Monad m, FromString e)
=> (e' -> ResultT e m a) -> ResultT e' m a -> ResultT e m a
handleErrorT handler = elimResultT handler return
{-# INLINE handleErrorT #-}
-- | Catches an error in a @ResultT@ value. This is similar to 'catchError',
-- but in addition allows to change the error type.
-catchErrorT :: (Monad m, Error e)
+catchErrorT :: (Monad m, FromString e)
=> ResultT e' m a -> (e' -> ResultT e m a) -> ResultT e m a
catchErrorT = flip handleErrorT
{-# INLINE catchErrorT #-}
emptyListSet :: ListSet a
emptyListSet = ListSet Set.empty
+
+#if MIN_VERSION_base(4,6,0)
+-- Down already defined in Data.Ord
+#else
+-- Copyright : (c) The University of Glasgow 2005
+-- License : BSD-style
+
+newtype Down a = Down a deriving (Eq, Show, Read)
+
+instance Ord a => Ord (Down a) where
+ compare (Down x) (Down y) = y `compare` x
+
+{- License text of the above code fragment:
+
+The Glasgow Haskell Compiler License
+
+Copyright 2004, The University Court of the University of Glasgow.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+- Neither name of the University nor the names of its contributors may be
+used to endorse or promote products derived from this software without
+specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY COURT OF THE UNIVERSITY OF
+GLASGOW AND THE CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+UNIVERSITY COURT OF THE UNIVERSITY OF GLASGOW OR THE CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGE.
+
+-}
+
+#endif
, decompressZlib
) where
+import Prelude ()
+import Ganeti.Prelude
+
import Codec.Compression.Zlib (compress)
import qualified Codec.Compression.Zlib.Internal as I
-import Control.Monad.Error
+import Control.Monad (liftM)
+import Control.Monad.Error.Class (MonadError(..))
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Lazy.Internal as BL
-import Data.Monoid (mempty)
+
+import Ganeti.BasicTypes
-- | Compresses a lazy bytestring.
compressZlib :: BL.ByteString -> BL.ByteString
-- | Decompresses a lazy bytestring, throwing decoding errors using
-- 'throwError'.
-decompressZlib :: (MonadError e m, Error e) => BL.ByteString -> m BL.ByteString
+decompressZlib :: (MonadError e m, FromString e)
+ => BL.ByteString -> m BL.ByteString
decompressZlib = I.foldDecompressStream
(liftM . BL.chunk)
(return mempty)
- (const $ throwError . strMsg . ("Zlib: " ++))
+ (const $ throwError . mkFromString . ("Zlib: " ++))
. I.decompressWithErrors
I.zlibFormat
I.defaultDecompressParams
hmac = hmacKey client
jobs = map (queryOneServer semaphore answer crType cQuery hmac) dest
watchdog reqAnswers = do
- threadDelay $ 1000000 * C.confdClientExpireTimeout
+ threadDelaySeconds C.confdClientExpireTimeout
_ <- swapMVar reqAnswers 0
putMVar semaphore ()
waitForResult reqAnswers = do
, prepMain
) where
-import Control.Applicative((<$>))
+import Prelude ()
+import Ganeti.Prelude
+
import Control.Concurrent
import Control.Monad (forever, liftM)
import Data.IORef
import qualified Data.Attoparsec.Text as P
-import Control.Applicative ((*>))
+import Prelude ()
+import Ganeti.Prelude
+
import qualified Data.ByteString as B
import Data.Text (pack)
import qualified Text.JSON as J
, getInstDisksFromObj
, getDrbdMinorsForDisk
, getDrbdMinorsForInstance
+ , getFilledHvStateParams
, getFilledInstHvParams
, getFilledInstBeParams
, getFilledInstOsParams
, instNodes
) where
-import Control.Applicative
+import Prelude ()
+import Ganeti.Prelude
+
import Control.Arrow ((&&&))
-import Control.Monad
-import Control.Monad.State
+import Control.Monad (liftM)
import qualified Data.ByteString as BS
import qualified Data.ByteString.UTF8 as UTF8
import qualified Data.Foldable as F
import Ganeti.BasicTypes
import qualified Ganeti.Constants as C
+import qualified Ganeti.ConstantUtils as CU
import Ganeti.Errors
import Ganeti.JSON
import Ganeti.Objects
ginsts = map (getNodeInstances cfg) gnodes in
(concatMap fst ginsts, concatMap snd ginsts)
+-- | default FilledHvStateParams.
+defaultHvStateParams :: FilledHvStateParams
+defaultHvStateParams = FilledHvStateParams
+ { hvstateCpuNode = CU.hvstDefaultCpuNode
+ , hvstateCpuTotal = CU.hvstDefaultCpuTotal
+ , hvstateMemHv = CU.hvstDefaultMemoryHv
+ , hvstateMemNode = CU.hvstDefaultMemoryNode
+ , hvstateMemTotal = CU.hvstDefaultMemoryTotal
+ }
+
+-- | Retrieves the node's static hypervisor state parameters, missing values
+-- filled with group's parameters, missing group parameters are filled
+-- with cluster's parameters. Currently, returns hvstate parameters only for
+-- the default hypervisor.
+getFilledHvStateParams :: ConfigData -> Node -> FilledHvState
+getFilledHvStateParams cfg n =
+ let cluster_hv_state =
+ fromContainer . clusterHvStateStatic $ configCluster cfg
+ def_hv = getDefaultHypervisor cfg
+ cluster_fv = fromMaybe defaultHvStateParams $ M.lookup def_hv
+ cluster_hv_state
+ group_fv = case getGroupOfNode cfg n >>=
+ M.lookup def_hv . fromContainer . groupHvStateStatic of
+ Just pv -> fillParams cluster_fv pv
+ Nothing -> cluster_fv
+ node_fv = case M.lookup def_hv . fromContainer $ nodeHvStateStatic n of
+ Just pv -> fillParams group_fv pv
+ Nothing -> group_fv
+ in GenericContainer $ M.fromList [(def_hv, node_fv)]
+
-- | Retrieves the instance hypervisor params, missing values filled with
-- cluster defaults.
getFilledInstHvParams :: [String] -> ConfigData -> Instance -> HvParams
-}
module Ganeti.ConstantUtils where
+import Prelude ()
+import Ganeti.Prelude
+
import Data.Char (ord)
-import Data.Monoid (Monoid(..))
import Data.Set (Set)
import qualified Data.Set as Set (difference, fromList, toList, union)
ipolicySpindleRatio :: String
ipolicySpindleRatio = "spindle-ratio"
+ipolicyMemoryRatio :: String
+ipolicyMemoryRatio = "memory-ratio"
+
ipolicyDefaultsVcpuRatio :: Double
ipolicyDefaultsVcpuRatio = 4.0
ipolicyDefaultsSpindleRatio :: Double
ipolicyDefaultsSpindleRatio = 32.0
+
+ipolicyDefaultsMemoryRatio :: Double
+ipolicyDefaultsMemoryRatio = 1.0
+
+-- * Hypervisor state default parameters
+
+hvstDefaultCpuNode :: Int
+hvstDefaultCpuNode = 1
+
+hvstDefaultCpuTotal :: Int
+hvstDefaultCpuTotal = 1
+
+hvstDefaultMemoryHv :: Int
+hvstDefaultMemoryHv = 1024
+
+hvstDefaultMemoryTotal :: Int
+hvstDefaultMemoryTotal = 1024
+
+hvstDefaultMemoryNode :: Int
+hvstDefaultMemoryNode = 4096
mond :: String
mond = Runtime.daemonName GanetiMond
+maintd :: String
+maintd = Runtime.daemonName GanetiMaintd
+
noded :: String
noded = Runtime.daemonName GanetiNoded
defaultMondPort :: Int
defaultMondPort = 1815
+defaultMaintdPort :: Int
+defaultMaintdPort = 1816
+
defaultMetadPort :: Int
defaultMetadPort = 80
[ (confd, (Udp, defaultConfdPort))
, (metad, (Tcp, defaultMetadPort))
, (mond, (Tcp, defaultMondPort))
+ , (maintd, (Tcp, defaultMaintdPort))
, (noded, (Tcp, defaultNodedPort))
, (rapi, (Tcp, defaultRapiPort))
, (ssh, (Tcp, 22))
hvstDefaults :: Map String Int
hvstDefaults =
Map.fromList
- [(hvstCpuNode, 1),
- (hvstCpuTotal, 1),
- (hvstMemoryHv, 0),
- (hvstMemoryTotal, 0),
- (hvstMemoryNode, 0)]
+ [ (hvstCpuNode , ConstantUtils.hvstDefaultCpuNode )
+ , (hvstCpuTotal , ConstantUtils.hvstDefaultCpuTotal )
+ , (hvstMemoryHv , ConstantUtils.hvstDefaultMemoryHv )
+ , (hvstMemoryTotal, ConstantUtils.hvstDefaultMemoryTotal)
+ , (hvstMemoryNode , ConstantUtils.hvstDefaultMemoryNode )
+ ]
hvstsParameterTypes :: Map String VType
hvstsParameterTypes =
ipolicySpindleRatio :: String
ipolicySpindleRatio = ConstantUtils.ipolicySpindleRatio
+ipolicyMemoryRatio :: String
+ipolicyMemoryRatio = ConstantUtils.ipolicyMemoryRatio
+
ispecsMinmaxKeys :: FrozenSet String
ispecsMinmaxKeys = ConstantUtils.mkSet [ispecsMax, ispecsMin]
ipolicyParameters :: FrozenSet String
ipolicyParameters =
ConstantUtils.mkSet [ConstantUtils.ipolicyVcpuRatio,
- ConstantUtils.ipolicySpindleRatio]
+ ConstantUtils.ipolicySpindleRatio,
+ ConstantUtils.ipolicyMemoryRatio]
ipolicyAllKeys :: FrozenSet String
ipolicyAllKeys =
, (ispecSpindleUse, 1)
] :: Map String Int))
, (ipolicyDts, PyValueEx (ConstantUtils.toList diskTemplates))
- , (ipolicyVcpuRatio, PyValueEx (4.0 :: Double))
- , (ipolicySpindleRatio, PyValueEx (32.0 :: Double))
+ , (ipolicyVcpuRatio, PyValueEx ConstantUtils.ipolicyDefaultsVcpuRatio)
+ , (ipolicySpindleRatio, PyValueEx ConstantUtils.ipolicyDefaultsSpindleRatio)
+ , (ipolicyMemoryRatio, PyValueEx ConstantUtils.ipolicyDefaultsMemoryRatio)
]
masterPoolSizeDefault :: Int
ndsSsconf :: String
ndsSsconf = "ssconf"
+ndsHmac :: String
+ndsHmac = "hmac_key"
+
ndsStartNodeDaemon :: String
ndsStartNodeDaemon = "start_node_daemon"
opcodeReasonSrcNoded :: String
opcodeReasonSrcNoded = _opcodeReasonSrcDaemon ++ ":noded"
+opcodeReasonSrcMaintd :: String
+opcodeReasonSrcMaintd = _opcodeReasonSrcDaemon ++ ":maintd"
+
opcodeReasonSrcOpcode :: String
opcodeReasonSrcOpcode = "gnt:opcode"
mondDefaultCategory :: String
mondDefaultCategory = "default"
+-- * Maintenance daemon
+
+-- | Default wait in seconds time between maintenance rounds.
+maintdDefaultRoundDelay :: Int
+maintdDefaultRoundDelay = 300
+
-- * Disk access modes
diskUserspace :: String
dataCollectorLv :: String
dataCollectorLv = "lv"
+-- | Collector for the resident set size of kvm processes, i.e.,
+-- the number of pages the kvm process has in RAM.
+dataCollectorKvmRSS :: String
+dataCollectorKvmRSS = "kvm-inst-rss"
+
dataCollectorInstStatus :: String
dataCollectorInstStatus = "inst-status-xen"
+dataCollectorDiagnose :: String
+dataCollectorDiagnose = "diagnose"
+
dataCollectorParameterInterval :: String
dataCollectorParameterInterval = "interval"
, dataCollectorLv
, dataCollectorInstStatus
, dataCollectorXenCpuLoad
+ , dataCollectorKvmRSS
+ , dataCollectorDiagnose
]
dataCollectorStateActive :: String
dataCollectorsIntervalName :: String
dataCollectorsIntervalName = "data_collector_interval"
+dataCollectorDiagnoseDirectory :: String
+dataCollectorDiagnoseDirectory = sysconfdir ++ "/ganeti/node-diagnose-commands"
+
-- * HTools tag prefixes
exTagsPrefix :: String
exTagsPrefix = Tags.exTagsPrefix
+
+-- * MaintD tag prefixes
+
+maintdPrefix :: String
+maintdPrefix = "maintd:"
+
+maintdSuccessTagPrefix :: String
+maintdSuccessTagPrefix = maintdPrefix ++ "repairready:"
+
+maintdFailureTagPrefix :: String
+maintdFailureTagPrefix = maintdPrefix ++ "repairfailed:"
-}
module Ganeti.Cpu.LoadParser (cpustatParser) where
-import Control.Applicative ((<*>), (<*), (*>), (<$>), (<|>))
+import Prelude ()
+import Ganeti.Prelude
+
+import Control.Applicative ((<|>))
import qualified Data.Attoparsec.Text as A
import qualified Data.Attoparsec.Combinator as AC
import Data.Attoparsec.Text (Parser)
oneCPUstatParser :: Parser CPUstat
oneCPUstatParser =
let nameP = stringP
- userP = numberP
- niceP = numberP
- systemP = numberP
- idleP = numberP
- iowaitP = numberP
- irqP = numberP
- softirqP = numberP
- stealP = numberP
- guestP = numberP
- guest_niceP = numberP
+ userP = integerP
+ niceP = integerP
+ systemP = integerP
+ idleP = integerP
+ iowaitP = integerP
+ irqP = integerP
+ softirqP = integerP
+ stealP = integerP
+ guestP = integerP
+ guest_niceP = integerP
in
CPUstat <$> nameP <*> userP <*> niceP <*> systemP <*> idleP <*> iowaitP
<*> irqP <*> softirqP <*> stealP <*> guestP <*> guest_niceP
module Ganeti.Cpu.Types
( CPUstat(..)
, CPUavgload(..)
+ , emptyCPUavgload
) where
import Ganeti.THH
, simpleField "cpu_total" [t| Double |]
])
+-- | CPU activity of an idle node. This can be used as a default
+-- value for offline nodes.
+emptyCPUavgload :: CPUavgload
+emptyCPUavgload = CPUavgload { cavCpuNumber = 1
+ , cavCpus = [ 0.0 ]
+ , cavCpuTotal = 0.0
+ }
+
-- | This is the format of the data parsed by the input file.
$(buildObject "CPUstat" "cs"
[ simpleField "name" [t| String |]
- , simpleField "user" [t| Int |]
- , simpleField "nice" [t| Int |]
- , simpleField "system" [t| Int |]
- , simpleField "idle" [t| Int |]
- , simpleField "iowait" [t| Int |]
- , simpleField "irq" [t| Int |]
- , simpleField "softirq" [t| Int |]
- , simpleField "steal" [t| Int |]
- , simpleField "guest" [t| Int |]
- , simpleField "guest_nice" [t| Int |]
+ , simpleField "user" [t| Integer |]
+ , simpleField "nice" [t| Integer |]
+ , simpleField "system" [t| Integer |]
+ , simpleField "idle" [t| Integer |]
+ , simpleField "iowait" [t| Integer |]
+ , simpleField "irq" [t| Integer |]
+ , simpleField "softirq" [t| Integer |]
+ , simpleField "steal" [t| Integer |]
+ , simpleField "guest" [t| Integer |]
+ , simpleField "guest_nice" [t| Integer |]
])
module Ganeti.DataCollectors( collectors ) where
+import Prelude ()
+import Ganeti.Prelude
+
import qualified Data.ByteString.UTF8 as UTF8
import Data.Map (findWithDefault)
-import Data.Monoid (mempty)
import qualified Ganeti.DataCollectors.CPUload as CPUload
+import qualified Ganeti.DataCollectors.Diagnose as Diagnose
import qualified Ganeti.DataCollectors.Diskstats as Diskstats
import qualified Ganeti.DataCollectors.Drbd as Drbd
import qualified Ganeti.DataCollectors.InstStatus as InstStatus
+import qualified Ganeti.DataCollectors.KvmRSS as KvmRSS
import qualified Ganeti.DataCollectors.Lv as Lv
import qualified Ganeti.DataCollectors.XenCpuLoad as XenCpuLoad
import Ganeti.DataCollectors.Types (DataCollector(..),ReportBuilder(..))
collectors =
[ cpuLoadCollector
, xenCpuLoadCollector
+ , kvmRSSCollector
, diskStatsCollector
, drdbCollector
, instStatusCollector
, lvCollector
+ , diagnoseCollector
]
where
f .&&. g = \x y -> f x y && g x y
lvCollector =
DataCollector Lv.dcName Lv.dcCategory Lv.dcKind
(StatelessR Lv.dcReport) Nothing activeConfig updateInterval
+ diagnoseCollector =
+ DataCollector Diagnose.dcName Diagnose.dcCategory Diagnose.dcKind
+ (StatelessR Diagnose.dcReport) Nothing activeConfig updateInterval
cpuLoadCollector =
DataCollector CPUload.dcName CPUload.dcCategory CPUload.dcKind
(StatefulR CPUload.dcReport) (Just CPUload.dcUpdate) activeConfig
DataCollector XenCpuLoad.dcName XenCpuLoad.dcCategory XenCpuLoad.dcKind
(StatefulR XenCpuLoad.dcReport) (Just XenCpuLoad.dcUpdate) activeConfig
updateInterval
+ kvmRSSCollector =
+ DataCollector KvmRSS.dcName KvmRSS.dcCategory KvmRSS.dcKind
+ (StatelessR KvmRSS.dcReport) Nothing activeConfig updateInterval
{-
-Copyright (C) 2013 Google Inc.
+Copyright (C) 2013, 2016 Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
import Control.Arrow (first)
import qualified Control.Exception as E
+import Control.Monad (liftM)
import Data.Attoparsec.Text.Lazy as A
import Data.Maybe (fromMaybe)
import Data.Text.Lazy (pack, unpack)
bufferSize = C.cpuavgloadBufferSize
-- | The window size of the values that will export the average load.
-windowSize :: Integer
-windowSize = toInteger C.cpuavgloadWindowSize
+windowSizeInUSec :: Integer
+windowSizeInUSec = 1000000 * toInteger C.cpuavgloadWindowSize
-- | The default setting for the maximum amount of not parsed character to
-- print in case of error.
in buildDCReport cpuLoadData
-- | Data stored by the collector in mond's memory.
-type Buffer = Seq.Seq (ClockTime, [Int])
+type Buffer = Seq.Seq (ClockTime, [Integer])
-- | Compute the load from a CPU.
-computeLoad :: CPUstat -> Int
+computeLoad :: CPUstat -> Integer
computeLoad cpuData =
csUser cpuData + csNice cpuData + csSystem cpuData
+ csIowait cpuData + csIrq cpuData + csSoftirq cpuData
+ csSteal cpuData + csGuest cpuData + csGuestNice cpuData
-- | Reads and Computes the load for each CPU.
-dcCollectFromFile :: FilePath -> IO (ClockTime, [Int])
+dcCollectFromFile :: FilePath -> IO (ClockTime, [Integer])
dcCollectFromFile inputFile = do
contents <-
((E.try $ readFile inputFile) :: IO (Either IOError String)) >>=
-- | Update a Map Entry.
updateEntry :: Buffer -> Buffer -> Buffer
updateEntry newBuffer mapEntry =
- (Seq.><) newBuffer
- (if Seq.length mapEntry < bufferSize
- then mapEntry
- else Seq.drop 1 mapEntry)
+ (Seq.><) newBuffer (Seq.take bufferSize mapEntry)
-- | Updates the given Collector data.
dcUpdate :: Maybe CollectorData -> IO CollectorData
(timestampR, listR) = rightmost
workInWindow = zipWith (-) listL listR
timediff = timestampL - timestampR
- overall = fromInteger (timediff * ticks) / 1000000 :: Double
+ overall = fromIntegral (timediff * ticks) / 1000000 :: Double
if overall > 0
then BT.Ok $ map (flip (/) overall . fromIntegral) workInWindow
else BT.Bad $ "Time covered by data is not sufficient."
buildJsonReport :: Buffer -> IO J.JSValue
buildJsonReport v = do
ticks <- getSysVar ClockTick
- let res = computeAverage v windowSize ticks
+ now <- liftM clockTimeToUSec getClockTime
+ let res = computeAverage v (now - windowSizeInUSec) ticks
showError s = J.showJSON $ GJ.containerFromList [("error", s)]
return $ BT.genericResult showError (J.showJSON . formatData) res
--- /dev/null
+{-| Self-diagnose data collector
+
+-}
+
+{-
+
+Copyright (C) 2015 Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+-}
+
+module Ganeti.DataCollectors.Diagnose
+ ( dcName
+ , dcCategory
+ , dcKind
+ , dcReport
+ ) where
+
+import Control.Monad.Trans.Class (lift)
+import System.Directory (doesFileExist)
+import System.FilePath.Posix (isValid, takeFileName, (</>))
+import System.Posix.Files ( getFileStatus
+ , fileOwner
+ , fileGroup
+ , fileMode
+ , ownerModes
+ , groupReadMode
+ , groupExecuteMode
+ , otherReadMode
+ , otherExecuteMode
+ , intersectFileModes
+ , unionFileModes
+ , ownerExecuteMode
+ , isRegularFile
+ , regularFileMode
+ )
+import System.Process (readProcess)
+import Text.JSON (JSValue(..), toJSObject, toJSString, decode, Result(..))
+
+import Ganeti.BasicTypes (runResultT, ResultT(..), genericResult)
+import Ganeti.Config (loadConfig)
+import Ganeti.Constants (dataCollectorDiagnose, dataCollectorDiagnoseDirectory)
+import Ganeti.DataCollectors.Types ( DCCategory(..)
+ , DCKind(..)
+ , DCVersion(..)
+ , DCReport(..)
+ , buildReport
+ )
+import Ganeti.Objects (configCluster, clusterDiagnoseDataCollectorFilename)
+import Ganeti.Path (clusterConfFile)
+
+-- | The name of this data collector.
+dcName :: String
+dcName = dataCollectorDiagnose
+
+-- | The category of this data collector.
+dcCategory :: Maybe DCCategory
+dcCategory = Just DCNode
+
+-- | The kind of this data collector.
+dcKind :: DCKind
+dcKind = DCKStatus
+
+-- | The version of this data collector.
+dcVersion :: DCVersion
+dcVersion = DCVerBuiltin
+
+-- | The version number for the data format of this data collector.
+dcFormatVersion :: Int
+dcFormatVersion = 1
+
+okWithDetails :: String -> JSValue
+okWithDetails details = JSObject $ toJSObject
+ [ ("status", JSString $ toJSString "Ok")
+ , ("details", JSString $ toJSString details)
+ ]
+
+
+fnToVal :: String -> IO JSValue
+fnToVal fn
+ | null fn = return $ okWithDetails
+ "No file specified for diagnose data collector"
+ | not $ isValid fn = return $ okWithDetails
+ "Invalid filename specified for diagnose data collector"
+ | takeFileName fn /= fn = return $ okWithDetails
+ "Filepaths cannot be specified for diagnose data collector"
+ | otherwise = do
+ let fp = dataCollectorDiagnoseDirectory </> fn
+ exists <- doesFileExist fp
+ if exists
+ then do
+ fs <- getFileStatus fp
+ let maxFileMode = foldl1 unionFileModes [ ownerModes
+ , groupReadMode
+ , groupExecuteMode
+ , otherReadMode
+ , otherExecuteMode
+ , regularFileMode
+ ]
+ isSubSetOf m1 m2 = m1 `intersectFileModes` m2 == m1
+ case () of _
+ | fileOwner fs /= 0 -> return . okWithDetails $
+ "File for diagnose data collector " ++
+ "must be owned by root"
+ | fileGroup fs /= 0 -> return . okWithDetails $
+ "File for diagnose data collector " ++
+ "must have group root"
+ | not $ isRegularFile fs -> return . okWithDetails $
+ "File for diagnose data collector " ++
+ "must be a regular file"
+ | not $ isSubSetOf (fileMode fs) maxFileMode ->
+ return . okWithDetails $
+ "File for diagnose data collector " ++
+ "must have permissions 755 or stricter"
+ | not $ isSubSetOf ownerExecuteMode (fileMode fs) ->
+ return . okWithDetails $
+ "File for diagnose data collector " ++
+ "must be executable by owner"
+ | otherwise -> do
+ r <- fmap decode (readProcess fp [] "")
+ case r of
+ Ok val -> return val
+ Error str -> return . okWithDetails $
+ "Could not parse result: " ++ str
+ else return $ okWithDetails
+ "File specified for diagnose data collector does not exist"
+
+buildJsonReport :: IO JSValue
+buildJsonReport = fmap (genericResult okWithDetails id) . runResultT $ do
+ configData <- ResultT (clusterConfFile >>= loadConfig)
+ lift . fnToVal . clusterDiagnoseDataCollectorFilename $
+ configCluster configData
+
+-- | The data exported by the data collector, taken from the default location.
+dcReport :: IO DCReport
+dcReport = buildJsonReport >>=
+ buildReport dcName dcVersion dcFormatVersion dcCategory dcKind
--- /dev/null
+{-| kvm resident set size collector
+
+It collects the resident set size (RSS) for all kvm
+processes managed by Ganeti, i.e., the number of pages
+the process has in RAM. The value is obtained
+by taking the corresponding value from /proc/$pid/memstat.
+
+-}
+
+{-
+
+Copyright (C) 2015 Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+-}
+
+module Ganeti.DataCollectors.KvmRSS
+ ( dcName
+ , dcVersion
+ , dcFormatVersion
+ , dcCategory
+ , dcKind
+ , dcReport
+ ) where
+
+import Control.Monad (liftM)
+import Data.Char (isSpace)
+import Data.Maybe (mapMaybe)
+import Network.BSD (getHostName)
+import System.FilePath ((</>))
+import qualified Text.JSON as J
+import Text.Printf (printf)
+
+import Ganeti.BasicTypes
+import Ganeti.Confd.ClientFunctions (getInstances)
+import qualified Ganeti.Constants as C
+import Ganeti.DataCollectors.Types
+import Ganeti.Objects
+import Ganeti.Path (kvmPidDir)
+
+-- | The name of this data collector for the resident set size (RSS).
+dcName :: String
+dcName = C.dataCollectorKvmRSS
+
+-- | The version number for the data format of this data collector.
+dcFormatVersion :: Int
+dcFormatVersion = 1
+
+-- | The version of this data collector.
+dcVersion :: DCVersion
+dcVersion = DCVerBuiltin
+
+-- | The category of this data collector.
+dcCategory :: Maybe DCCategory
+dcCategory = Nothing
+
+-- | The kind of this data collector.
+dcKind :: DCKind
+dcKind = DCKPerf
+
+-- | Parse the contents of a pid file.
+parsePid :: Monad m => String -> m Int
+parsePid s = case reads s of
+ [(pid, r)] | all isSpace r -> return pid
+ _ -> fail $ "Couldn't parse pid " ++ s
+
+-- | From the contents of a memstat file get the resident set size,
+-- in pages.
+parseRss :: Monad m => String -> m Int
+parseRss s =
+ let drop1 = dropWhile isSpace . dropWhile (not . isSpace) . dropWhile isSpace
+ in case reads (drop1 s) of
+ [(n, _)] -> return n
+ _ -> fail $ "Failed to parse memstat " ++ s
+
+-- | For an instance, collect the resident set size, if available.
+collectInstanceRSS :: String -> IO (Result (String, J.JSValue))
+collectInstanceRSS inst = runResultT $ do
+ piddir <- liftIO kvmPidDir
+ let pidfile = piddir </> inst
+ pidstring <- liftIO $ readFile pidfile
+ pid <- parsePid pidstring
+ let procfspath = printf "/proc/%d/statm" pid
+ memstat <- liftIO $ readFile procfspath
+ rss <- parseRss memstat
+ return (inst, J.showJSON rss)
+
+-- | The data exported by the data collector.
+dcReport :: IO DCReport
+dcReport = do
+ node <- getHostName
+ instances <- liftM (genericResult (const []) (mapMaybe instName . fst))
+ . runResultT $ getInstances node Nothing Nothing
+ reports <- liftM justOk $ mapM collectInstanceRSS instances
+ buildReport dcName dcVersion dcFormatVersion dcCategory dcKind
+ . J.JSObject $ J.toJSObject reports
import Ganeti.Utils (getCurrentTimeUSec)
-- | The possible classes a data collector can belong to.
-data DCCategory = DCInstance | DCStorage | DCDaemon | DCHypervisor
+data DCCategory = DCInstance | DCStorage | DCDaemon | DCHypervisor | DCNode
deriving (Show, Eq, Read, Enum, Bounded)
-- | Get the category name and return it as a string.
-- | Type for the value field of the `CollectorMap` below.
data CollectorData =
- CPULoadData (Seq.Seq (ClockTime, [Int]))
+ CPULoadData (Seq.Seq (ClockTime, [Integer]))
| InstanceCpuLoad (Map.Map String (Seq.Seq (ClockTime, Double)))
instance NFData ClockTime where
, dcUpdate
) where
-import Control.Applicative ((<$>), liftA2)
+import Prelude ()
+import Ganeti.Prelude
+
+import Control.Applicative (liftA2)
import Control.Arrow ((***))
import Control.Monad (liftM, when)
import Control.Monad.IO.Class (liftIO)
combinedValues
withoutOld = Map.filter
(liftA2 (&&) (not . Seq.null)
- $ (>) (fromIntegral $ C.xentopAverageThreshold * 1000000)
+ $ (>) (fromIntegral
+ $ 3 * C.xentopAverageThreshold * 1000000)
. (clockTimeToUSec now -) . clockTimeToUSec
. fst . flip Seq.index 0)
withinRange
, ("FileStoragePathError", [excErrMsg])
])
-instance Error GanetiException where
- strMsg = GenericError
-
instance JSON GanetiException where
showJSON = saveGanetiException
readJSON = loadGanetiException
+instance FromString GanetiException where
+ mkFromString = GenericError
+
-- | Error monad using 'GanetiException' type alias.
type ErrorResult = GenericResult GanetiException
, fromCLIOptions
) where
+import qualified Data.Set as Set
+
import qualified Ganeti.HTools.CLI as CLI
import qualified Ganeti.HTools.Types as T
data AlgorithmOptions = AlgorithmOptions
{ algDiskMoves :: Bool -- ^ Whether disk moves are allowed
+ , algDiskMovesFactor :: Double -- ^ Allow only disk moves leads to gain
+ -- in cluster score more than
+ -- algDiskMovesFactor times higher than
+ -- the gain in migration moves
, algInstanceMoves :: Bool -- ^ Whether instance moves are allowed
, algRestrictedMigration :: Bool -- ^ Whether migration is restricted
, algIgnoreSoftErrors :: Bool -- ^ Whether to always ignore soft errors
-- like global N+1 redundancy
, algCapacityIgnoreGroups :: [T.Gdx] -- ^ Groups to ignore in capacity checks
, algRestrictToNodes :: Maybe [String] -- ^ nodes to restrict allocation to
+ , algAllowedNodes :: Maybe (Set.Set Int) -- ^ if given, do not perform any
+ -- operations involving other nodes
, algAcceptExisting :: Bool -- ^ accept existing violations in capacity
-- checks
}
fromCLIOptions :: CLI.Options -> AlgorithmOptions
fromCLIOptions opts = AlgorithmOptions
{ algDiskMoves = CLI.optDiskMoves opts
+ , algDiskMovesFactor = CLI.optAvoidDiskMoves opts
, algInstanceMoves = CLI.optInstMoves opts
, algRestrictedMigration = CLI.optRestrictedMigrate opts
, algIgnoreSoftErrors = CLI.optIgnoreSoftErrors opts
, algCapacity = CLI.optCapacity opts
, algCapacityIgnoreGroups = []
, algRestrictToNodes = CLI.optRestrictToNodes opts
+ , algAllowedNodes = Nothing
, algAcceptExisting = CLI.optAcceptExisting opts
}
offline <- extract "offline"
drained <- extract "drained"
guuid <- extract "group"
+ hvstate <- extractDef emptyContainer "hv_state"
vm_capable <- annotateResult desc $ maybeFromObj a "vm_capable"
let vm_capable' = fromMaybe True vm_capable
gidx <- lookupGroup ktg n guuid
dfree <- lvextract 0 "free_disk"
ctotal <- lvextract 0.0 "total_cpus"
cnos <- lvextract 0 "reserved_cpus"
- let node = flip Node.setNodeTags tags $
- Node.create n mtotal mnode mfree dtotal dfree ctotal cnos
+ let node_mem = obtainNodeMemory hvstate mnode
+ node = flip Node.setNodeTags tags $
+ Node.create n mtotal node_mem mfree dtotal dfree ctotal cnos
(not live || drained) sptotal spfree gidx excl_stor
return (n, node)
-- * Utility functions
--- | Get values behind \"data\" part of the result.
-getData :: (Monad m) => JSValue -> m JSValue
-getData (JSObject o) = fromObj (fromJSObject o) "data"
-getData x = fail $ "Invalid input, expected dict entry but got " ++ show x
-
--- | Converts a (status, value) into m value, if possible.
-parseQueryField :: (Monad m) => JSValue -> m (JSValue, JSValue)
-parseQueryField (JSArray [status, result]) = return (status, result)
-parseQueryField o =
- fail $ "Invalid query field, expected (status, value) but got " ++ show o
-
--- | Parse a result row.
-parseQueryRow :: (Monad m) => JSValue -> m [(JSValue, JSValue)]
-parseQueryRow (JSArray arr) = mapM parseQueryField arr
-parseQueryRow o =
- fail $ "Invalid query row result, expected array but got " ++ show o
-
--- | Parse an overall query result and get the [(status, value)] list
--- for each element queried.
-parseQueryResult :: (Monad m) => JSValue -> m [[(JSValue, JSValue)]]
-parseQueryResult (JSArray arr) = mapM parseQueryRow arr
-parseQueryResult o =
- fail $ "Invalid query result, expected array but got " ++ show o
-
--- | Prepare resulting output as parsers expect it.
-extractArray :: (Monad m) => JSValue -> m [[(JSValue, JSValue)]]
-extractArray v =
- getData v >>= parseQueryResult
-
--- | Testing result status for more verbose error message.
-fromJValWithStatus :: (Text.JSON.JSON a, Monad m) => (JSValue, JSValue) -> m a
-fromJValWithStatus (st, v) = do
- st' <- fromJVal st
- Qlang.checkRS st' v >>= fromJVal
-
annotateConvert :: String -> String -> String -> Result a -> Result a
annotateConvert otype oname oattr =
annotateResult $ otype ++ " '" ++ oname ++
-> (JSValue, JSValue) -- ^ The value we're trying to convert
-> Result a -- ^ The annotated result
genericConvert otype oname oattr =
- annotateConvert otype oname oattr . fromJValWithStatus
+ annotateConvert otype oname oattr . L.fromJValWithStatus
convertArrayMaybe :: (Text.JSON.JSON a) =>
String -- ^ The object type
["name", "mtotal", "mnode", "mfree", "dtotal", "dfree",
"ctotal", "cnos", "offline", "drained", "vm_capable",
"ndp/spindle_count", "group.uuid", "tags",
- "ndp/exclusive_storage", "sptotal", "spfree", "ndp/cpu_speed"]
+ "ndp/exclusive_storage", "sptotal", "spfree", "ndp/cpu_speed",
+ "hv_state"]
Qlang.EmptyFilter
-- | The input data for instance query.
getInstances :: NameAssoc
-> JSValue
-> Result [(String, Instance.Instance)]
-getInstances ktn arr = extractArray arr >>= mapM (parseInstance ktn)
+getInstances ktn arr = L.extractArray arr >>= mapM (parseInstance ktn)
-- | Construct an instance from a JSON object.
parseInstance :: NameAssoc
, status, pnode, snodes, tags, oram
, auto_balance, disk_template, su
, dsizes, dspindles, forthcoming ] = do
- xname <- annotateResult "Parsing new instance" (fromJValWithStatus name)
+ xname <- annotateResult "Parsing new instance" (L.fromJValWithStatus name)
let convert a = genericConvert "Instance" xname a
xdisk <- convert "disk_usage" disk
xmem <- case oram of -- FIXME: remove the "guessing"
-- | Parse a node list in JSON format.
getNodes :: NameAssoc -> JSValue -> Result [(String, Node.Node)]
-getNodes ktg arr = extractArray arr >>= mapM (parseNode ktg)
+getNodes ktg arr = L.extractArray arr >>= mapM (parseNode ktg)
-- | Construct a node from a JSON object.
parseNode :: NameAssoc -> [(JSValue, JSValue)] -> Result (String, Node.Node)
parseNode ktg [ name, mtotal, mnode, mfree, dtotal, dfree
, ctotal, cnos, offline, drained, vm_capable, spindles, g_uuid
- , tags, excl_stor, sptotal, spfree, cpu_speed ]
+ , tags, excl_stor, sptotal, spfree, cpu_speed, hv_state ]
+
= do
- xname <- annotateResult "Parsing new node" (fromJValWithStatus name)
+ xname <- annotateResult "Parsing new node" (L.fromJValWithStatus name)
let convert a = genericConvert "Node" xname a
xoffline <- convert "offline" offline
xdrained <- convert "drained" drained
-- is the only supported disk template
xctotal <- lvconvert 0.0 "ctotal" ctotal
xcnos <- lvconvert 0 "cnos" cnos
- let node = flip Node.setCpuSpeed xcpu_speed .
+ xhv_state <- convert "hv_state" hv_state
+ let node_mem = obtainNodeMemory xhv_state xmnode
+ node = flip Node.setCpuSpeed xcpu_speed .
flip Node.setNodeTags xtags $
- Node.create xname xmtotal xmnode xmfree xdtotal xdfree
+ Node.create xname xmtotal node_mem xmfree xdtotal xdfree
xctotal xcnos (not live || xdrained) xsptotal xspfree
xgdx xexcl_stor
return (xname, node)
-- | Parses the cluster groups.
getGroups :: JSValue -> Result [(String, Group.Group)]
-getGroups jsv = extractArray jsv >>= mapM parseGroup
+getGroups jsv = L.extractArray jsv >>= mapM parseGroup
-- | Parses a given group information.
parseGroup :: [(JSValue, JSValue)] -> Result (String, Group.Group)
parseGroup [uuid, name, apol, ipol, tags] = do
- xname <- annotateResult "Parsing new group" (fromJValWithStatus name)
+ xname <- annotateResult "Parsing new group" (L.fromJValWithStatus name)
let convert a = genericConvert "Group" xname a
xuuid <- convert "uuid" uuid
xapol <- convert "alloc_policy" apol
module Ganeti.HTools.Backend.MonD
( queryAllMonDDCs
, pMonDData
+ , Report(..)
+ , DataCollector
+ , dName
+ , fromCurl
+ , mkReport
+ , totalCPUCollector
+ , xenCPUCollector
+ , kvmRSSCollector
+ , scaleMemoryWeight
+ , useInstanceRSSData
) where
import Control.Monad
import Ganeti.BasicTypes
import qualified Ganeti.Constants as C
import Ganeti.Cpu.Types
-import qualified Ganeti.DataCollectors.XenCpuLoad as XenCpuLoad
import qualified Ganeti.DataCollectors.CPUload as CPUload
+import qualified Ganeti.DataCollectors.KvmRSS as KvmRSS
+import qualified Ganeti.DataCollectors.XenCpuLoad as XenCpuLoad
import Ganeti.DataCollectors.Types ( DCReport, DCCategory
, dcReportData, dcReportName
, getCategoryName )
-- | The actual data types for MonD's Data Collectors.
data Report = CPUavgloadReport CPUavgload
| InstanceCpuReport (Map.Map String Double)
+ | InstanceRSSReport (Map.Map String Double)
-- | Type describing a data collector basic information.
data DataCollector = DataCollector
, dUse = useInstanceCpuData
}
+-- * kvm instance RSS collector
+
+-- | Parse results of the kvm instance RSS data Collector
+mkKvmRSSReport :: DCReport -> Maybe Report
+mkKvmRSSReport =
+ liftM InstanceRSSReport . maybeParseMap . dcReportData
+
+-- | Conversion constant from htools' internal memory unit,
+-- which is MiB to RSS unit, which reported in pages (of 4kiB
+-- each).
+pagesPerMiB :: Double
+pagesPerMiB = 256.0
+
+-- | Update cluster data based on per-instance RSS data.
+-- Also set the node's memoy util pool correctly. Our unit
+-- of memory usage is pages; there are 256 pages per MiB
+-- of node memory not used by the node itself.
+useInstanceRSSData :: [(Node.Node, Report)]
+ -> (Node.List, Instance.List)
+ -> Result (Node.List, Instance.List)
+useInstanceRSSData reports (nl, il) = do
+ let toMap (InstanceRSSReport m) = Just m
+ toMap _ = Nothing
+ let usage = Map.unions $ mapMaybe (toMap . snd) reports
+ missingData = (Set.fromList . map Instance.name $ IntMap.elems il)
+ Set.\\ Map.keysSet usage
+ unless (Set.null missingData)
+ . Bad . (++) "No RSS information available for "
+ . show $ Set.elems missingData
+ let updateInstance inst =
+ let mem = Map.lookup (Instance.name inst) usage
+ dynU = Instance.util inst
+ dynU' = maybe dynU (\m -> dynU { memWeight = m }) mem
+ in inst { Instance.util = dynU' }
+ let il' = IntMap.map updateInstance il
+ let updateNode node =
+ let mem = sum
+ . map (\ idx -> maybe 0 (memWeight . Instance.util)
+ $ IntMap.lookup idx il')
+ $ Node.pList node
+ dynU = Node.utilLoad node
+ dynU' = dynU { memWeight = mem }
+ pool = Node.utilPool node
+ nodePages = (Node.tMem node - fromIntegral (Node.nMem node))
+ * pagesPerMiB
+ pool' = pool { memWeight = nodePages }
+ in node { Node.utilLoad = dynU', Node.utilPool = pool' }
+ let nl' = IntMap.map updateNode nl
+ return (nl', il')
+
+-- | Update cluster data based on the per-instance CPU usage
+kvmRSSCollector :: DataCollector
+kvmRSSCollector = DataCollector { dName = KvmRSS.dcName
+ , dCategory = KvmRSS.dcCategory
+ , dMkReport = mkKvmRSSReport
+ , dUse = useInstanceRSSData
+ }
+
+-- | Scale the importance of the memory weight in dynamic utilisation,
+-- by multiplying the usage with the given factor. Note that the underlying
+-- model for dynamic utilisation is that they are reported in arbitrary units.
+scaleMemoryWeight :: Double
+ -> (Node.List, Instance.List)
+ -> (Node.List, Instance.List)
+scaleMemoryWeight f (nl, il) =
+ let updateInst inst =
+ let dynU = Instance.util inst
+ dynU' = dynU { memWeight = f * memWeight dynU}
+ in inst { Instance.util = dynU' }
+ updateNode node =
+ let dynU = Node.utilLoad node
+ dynU' = dynU { memWeight = f * memWeight dynU}
+ in node { Node.utilLoad = dynU' }
+ in (IntMap.map updateNode nl, IntMap.map updateInst il)
+
-- * Collector choice
-- | The list of Data Collectors used by hail and hbal.
collectors :: Options -> [DataCollector]
collectors opts
| optIgnoreDynu opts = []
- | optMonDXen opts = [ xenCPUCollector ]
- | otherwise = [ totalCPUCollector ]
+ | otherwise =
+ (if optMonDXen opts then [ xenCPUCollector ] else [ totalCPUCollector ] )
+ ++ [ kvmRSSCollector | optMonDKvmRSS opts ]
-- * Querying infrastructure
ctotal <- lvextract 0.0 "ctotal"
cnos <- lvextract 0 "cnos"
tags <- extract "tags"
- let node = flip Node.setNodeTags tags $
- Node.create name mtotal mnode mfree dtotal dfree ctotal cnos
+ hv_state <- extractDef emptyContainer "hv_state"
+ let node_mem = obtainNodeMemory hv_state mnode
+ node = flip Node.setNodeTags tags $
+ Node.create name mtotal node_mem mfree dtotal dfree ctotal cnos
(not live || drained) sptotal spfree guuid' excl_stor
return (name, node)
-- | Generate policy data from a given policy object.
serializeIPolicy :: String -> IPolicy -> String
serializeIPolicy owner ipol =
- let IPolicy minmax stdspec dts vcpu_ratio spindle_ratio = ipol
+ let IPolicy minmax stdspec dts vcpu_ratio spindle_ratio memory_ratio = ipol
strings = [ owner
, serializeISpec stdspec
, serializeMultipleMinMaxISpecs minmax
, serializeDiskTemplates dts
, show vcpu_ratio
, show spindle_ratio
+ , show memory_ratio
]
in intercalate "|" strings
-- | Loads an ipolicy from a field list.
loadIPolicy :: [String] -> Result (String, IPolicy)
loadIPolicy (owner:stdspec:minmaxspecs:dtemplates:
- vcpu_ratio:spindle_ratio:_) = do
+ vcpu_ratio:spindle_ratio:memory_ratio:_) = do
xstdspec <- loadISpec (owner ++ "/stdspec") (commaSplit stdspec)
xminmaxspecs <- loadMultipleMinMaxISpecs owner $
sepSplit iSpecsSeparator minmaxspecs
xdts <- mapM diskTemplateFromRaw $ commaSplit dtemplates
xvcpu_ratio <- tryRead (owner ++ "/vcpu_ratio") vcpu_ratio
xspindle_ratio <- tryRead (owner ++ "/spindle_ratio") spindle_ratio
+ xmemory_ratio <- tryRead (owner ++ "/memory_ratio") memory_ratio
return (owner,
IPolicy xminmaxspecs xstdspec
- xdts xvcpu_ratio xspindle_ratio)
+ xdts xvcpu_ratio xspindle_ratio xmemory_ratio)
+loadIPolicy (owner:stdspec:minmaxspecs:dtemplates:
+ vcpu_ratio:spindle_ratio:_) =
+ loadIPolicy (owner:stdspec:minmaxspecs:dtemplates:
+ vcpu_ratio:spindle_ratio:["1.0"])
loadIPolicy s = fail $ "Invalid ipolicy data: '" ++ show s ++ "'"
loadOnePolicy :: (IPolicy, Group.List) -> String
{-
-Copyright (C) 2009, 2010, 2011, 2012, 2013 Google Inc.
+Copyright (C) 2009, 2010, 2011, 2012, 2013, 2015 Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
-- * The options
, oDataFile
, oDiskMoves
+ , oAvoidDiskMoves
, oDiskTemplate
, oDryRun
, oSpindleUse
, oDynuFile
+ , oMemWeight
, oMonD
, oMonDDataFile
+ , oMonDKvmRSS
, oMonDXen
, oEvacMode
, oMonDExitMissing
, oForce
, oFullEvacuation
, oGroup
+ , oIdleDefault
, oIAllocSrc
, oIgnoreDyn
, oIgnoreNonRedundant