Coverage for drivers/cleanup.py : 35%
Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/python3
2#
3# Copyright (C) Citrix Systems Inc.
4#
5# This program is free software; you can redistribute it and/or modify
6# it under the terms of the GNU Lesser General Public License as published
7# by the Free Software Foundation; version 2.1 only.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU Lesser General Public License for more details.
13#
14# You should have received a copy of the GNU Lesser General Public License
15# along with this program; if not, write to the Free Software Foundation, Inc.,
16# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17#
18# Script to coalesce and garbage collect VHD-based SR's in the background
19#
21from sm_typing import Optional, override
23import os
24import os.path
25import sys
26import time
27import signal
28import subprocess
29import getopt
30import datetime
31import traceback
32import base64
33import zlib
34import errno
35import stat
37import XenAPI # pylint: disable=import-error
38import util
39import lvutil
40import vhdutil
41import lvhdutil
42import lvmcache
43import journaler
44import fjournaler
45import lock
46import blktap2
47import xs_errors
48from refcounter import RefCounter
49from ipc import IPCFlag
50from lvmanager import LVActivator
51from srmetadata import LVMMetadataHandler, VDI_TYPE_TAG
52from functools import reduce
53from time import monotonic as _time
55try:
56 from linstorjournaler import LinstorJournaler
57 from linstorvhdutil import LinstorVhdUtil, MultiLinstorVhdUtil
58 from linstorvolumemanager import get_controller_uri
59 from linstorvolumemanager import LinstorVolumeManager
60 from linstorvolumemanager import LinstorVolumeManagerError
61 from linstorvolumemanager import PERSISTENT_PREFIX as LINSTOR_PERSISTENT_PREFIX
63 LINSTOR_AVAILABLE = True
64except ImportError:
65 LINSTOR_AVAILABLE = False
67# Disable automatic leaf-coalescing. Online leaf-coalesce is currently not
68# possible due to lvhd_stop_using_() not working correctly. However, we leave
69# this option available through the explicit LEAFCLSC_FORCE flag in the VDI
70# record for use by the offline tool (which makes the operation safe by pausing
71# the VM first)
72AUTO_ONLINE_LEAF_COALESCE_ENABLED = True
74FLAG_TYPE_ABORT = "abort" # flag to request aborting of GC/coalesce
76# process "lock", used simply as an indicator that a process already exists
77# that is doing GC/coalesce on this SR (such a process holds the lock, and we
78# check for the fact by trying the lock).
79lockGCRunning = None
81# process "lock" to indicate that the GC process has been activated but may not
82# yet be running, stops a second process from being started.
83LOCK_TYPE_GC_ACTIVE = "gc_active"
84lockGCActive = None
86# Default coalesce error rate limit, in messages per minute. A zero value
87# disables throttling, and a negative value disables error reporting.
88DEFAULT_COALESCE_ERR_RATE = 1.0 / 60
90COALESCE_LAST_ERR_TAG = 'last-coalesce-error'
91COALESCE_ERR_RATE_TAG = 'coalesce-error-rate'
92VAR_RUN = "/var/run/"
93SPEED_LOG_ROOT = VAR_RUN + "{uuid}.speed_log"
95N_RUNNING_AVERAGE = 10
97NON_PERSISTENT_DIR = '/run/nonpersistent/sm'
99# Signal Handler
100SIGTERM = False
103class AbortException(util.SMException):
104 pass
107def receiveSignal(signalNumber, frame):
108 global SIGTERM
110 util.SMlog("GC: recieved SIGTERM")
111 SIGTERM = True
112 return
115################################################################################
116#
117# Util
118#
119class Util:
120 RET_RC = 1
121 RET_STDOUT = 2
122 RET_STDERR = 4
124 UUID_LEN = 36
126 PREFIX = {"G": 1024 * 1024 * 1024, "M": 1024 * 1024, "K": 1024}
128 @staticmethod
129 def log(text) -> None:
130 util.SMlog(text, ident="SMGC")
132 @staticmethod
133 def logException(tag):
134 info = sys.exc_info()
135 if info[0] == SystemExit: 135 ↛ 137line 135 didn't jump to line 137, because the condition on line 135 was never true
136 # this should not be happening when catching "Exception", but it is
137 sys.exit(0)
138 tb = reduce(lambda a, b: "%s%s" % (a, b), traceback.format_tb(info[2]))
139 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*")
140 Util.log(" ***********************")
141 Util.log(" * E X C E P T I O N *")
142 Util.log(" ***********************")
143 Util.log("%s: EXCEPTION %s, %s" % (tag, info[0], info[1]))
144 Util.log(tb)
145 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*")
147 @staticmethod
148 def doexec(args, expectedRC, inputtext=None, ret=None, log=True):
149 "Execute a subprocess, then return its return code, stdout, stderr"
150 proc = subprocess.Popen(args,
151 stdin=subprocess.PIPE, \
152 stdout=subprocess.PIPE, \
153 stderr=subprocess.PIPE, \
154 shell=True, \
155 close_fds=True)
156 (stdout, stderr) = proc.communicate(inputtext)
157 stdout = str(stdout)
158 stderr = str(stderr)
159 rc = proc.returncode
160 if log:
161 Util.log("`%s`: %s" % (args, rc))
162 if type(expectedRC) != type([]):
163 expectedRC = [expectedRC]
164 if not rc in expectedRC:
165 reason = stderr.strip()
166 if stdout.strip():
167 reason = "%s (stdout: %s)" % (reason, stdout.strip())
168 Util.log("Failed: %s" % reason)
169 raise util.CommandException(rc, args, reason)
171 if ret == Util.RET_RC:
172 return rc
173 if ret == Util.RET_STDERR:
174 return stderr
175 return stdout
177 @staticmethod
178 def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut):
179 """execute func in a separate thread and kill it if abortTest signals
180 so"""
181 abortSignaled = abortTest() # check now before we clear resultFlag
182 resultFlag = IPCFlag(ns)
183 resultFlag.clearAll()
184 pid = os.fork()
185 if pid:
186 startTime = _time()
187 try:
188 while True:
189 if resultFlag.test("success"):
190 Util.log(" Child process completed successfully")
191 resultFlag.clear("success")
192 return
193 if resultFlag.test("failure"):
194 resultFlag.clear("failure")
195 raise util.SMException("Child process exited with error")
196 if abortTest() or abortSignaled or SIGTERM:
197 os.killpg(pid, signal.SIGKILL)
198 raise AbortException("Aborting due to signal")
199 if timeOut and _time() - startTime > timeOut:
200 os.killpg(pid, signal.SIGKILL)
201 resultFlag.clearAll()
202 raise util.SMException("Timed out")
203 time.sleep(pollInterval)
204 finally:
205 wait_pid = 0
206 rc = -1
207 count = 0
208 while wait_pid == 0 and count < 10:
209 wait_pid, rc = os.waitpid(pid, os.WNOHANG)
210 if wait_pid == 0:
211 time.sleep(2)
212 count += 1
214 if wait_pid == 0:
215 Util.log("runAbortable: wait for process completion timed out")
216 else:
217 os.setpgrp()
218 try:
219 if func() == ret:
220 resultFlag.set("success")
221 else:
222 resultFlag.set("failure")
223 except Exception as e:
224 Util.log("Child process failed with : (%s)" % e)
225 resultFlag.set("failure")
226 Util.logException("This exception has occured")
227 os._exit(0)
229 @staticmethod
230 def num2str(number):
231 for prefix in ("G", "M", "K"): 231 ↛ 234line 231 didn't jump to line 234, because the loop on line 231 didn't complete
232 if number >= Util.PREFIX[prefix]:
233 return "%.3f%s" % (float(number) / Util.PREFIX[prefix], prefix)
234 return "%s" % number
236 @staticmethod
237 def numBits(val):
238 count = 0
239 while val:
240 count += val & 1
241 val = val >> 1
242 return count
244 @staticmethod
245 def countBits(bitmap1, bitmap2):
246 """return bit count in the bitmap produced by ORing the two bitmaps"""
247 len1 = len(bitmap1)
248 len2 = len(bitmap2)
249 lenLong = len1
250 lenShort = len2
251 bitmapLong = bitmap1
252 if len2 > len1:
253 lenLong = len2
254 lenShort = len1
255 bitmapLong = bitmap2
257 count = 0
258 for i in range(lenShort):
259 val = bitmap1[i] | bitmap2[i]
260 count += Util.numBits(val)
262 for i in range(i + 1, lenLong):
263 val = bitmapLong[i]
264 count += Util.numBits(val)
265 return count
267 @staticmethod
268 def getThisScript():
269 thisScript = util.get_real_path(__file__)
270 if thisScript.endswith(".pyc"):
271 thisScript = thisScript[:-1]
272 return thisScript
275################################################################################
276#
277# XAPI
278#
279class XAPI:
280 USER = "root"
281 PLUGIN_ON_SLAVE = "on-slave"
283 CONFIG_SM = 0
284 CONFIG_OTHER = 1
285 CONFIG_ON_BOOT = 2
286 CONFIG_ALLOW_CACHING = 3
288 CONFIG_NAME = {
289 CONFIG_SM: "sm-config",
290 CONFIG_OTHER: "other-config",
291 CONFIG_ON_BOOT: "on-boot",
292 CONFIG_ALLOW_CACHING: "allow_caching"
293 }
295 class LookupError(util.SMException):
296 pass
298 @staticmethod
299 def getSession():
300 session = XenAPI.xapi_local()
301 session.xenapi.login_with_password(XAPI.USER, '', '', 'SM')
302 return session
304 def __init__(self, session, srUuid):
305 self.sessionPrivate = False
306 self.session = session
307 if self.session is None:
308 self.session = self.getSession()
309 self.sessionPrivate = True
310 self._srRef = self.session.xenapi.SR.get_by_uuid(srUuid)
311 self.srRecord = self.session.xenapi.SR.get_record(self._srRef)
312 self.hostUuid = util.get_this_host()
313 self._hostRef = self.session.xenapi.host.get_by_uuid(self.hostUuid)
314 self.task = None
315 self.task_progress = {"coalescable": 0, "done": 0}
317 def __del__(self):
318 if self.sessionPrivate:
319 self.session.xenapi.session.logout()
321 @property
322 def srRef(self):
323 return self._srRef
325 def isPluggedHere(self):
326 pbds = self.getAttachedPBDs()
327 for pbdRec in pbds:
328 if pbdRec["host"] == self._hostRef:
329 return True
330 return False
332 def poolOK(self):
333 host_recs = self.session.xenapi.host.get_all_records()
334 for host_ref, host_rec in host_recs.items():
335 if not host_rec["enabled"]:
336 Util.log("Host %s not enabled" % host_rec["uuid"])
337 return False
338 return True
340 def isMaster(self):
341 if self.srRecord["shared"]:
342 pool = list(self.session.xenapi.pool.get_all_records().values())[0]
343 return pool["master"] == self._hostRef
344 else:
345 pbds = self.getAttachedPBDs()
346 if len(pbds) < 1:
347 raise util.SMException("Local SR not attached")
348 elif len(pbds) > 1:
349 raise util.SMException("Local SR multiply attached")
350 return pbds[0]["host"] == self._hostRef
352 def getAttachedPBDs(self):
353 """Return PBD records for all PBDs of this SR that are currently
354 attached"""
355 attachedPBDs = []
356 pbds = self.session.xenapi.PBD.get_all_records()
357 for pbdRec in pbds.values():
358 if pbdRec["SR"] == self._srRef and pbdRec["currently_attached"]:
359 attachedPBDs.append(pbdRec)
360 return attachedPBDs
362 def getOnlineHosts(self):
363 return util.get_online_hosts(self.session)
365 def ensureInactive(self, hostRef, args):
366 text = self.session.xenapi.host.call_plugin( \
367 hostRef, self.PLUGIN_ON_SLAVE, "multi", args)
368 Util.log("call-plugin returned: '%s'" % text)
370 def getRecordHost(self, hostRef):
371 return self.session.xenapi.host.get_record(hostRef)
373 def _getRefVDI(self, uuid):
374 return self.session.xenapi.VDI.get_by_uuid(uuid)
376 def getRefVDI(self, vdi):
377 return self._getRefVDI(vdi.uuid)
379 def getRecordVDI(self, uuid):
380 try:
381 ref = self._getRefVDI(uuid)
382 return self.session.xenapi.VDI.get_record(ref)
383 except XenAPI.Failure:
384 return None
386 def singleSnapshotVDI(self, vdi):
387 return self.session.xenapi.VDI.snapshot(vdi.getRef(),
388 {"type": "internal"})
390 def forgetVDI(self, srUuid, vdiUuid):
391 """Forget the VDI, but handle the case where the VDI has already been
392 forgotten (i.e. ignore errors)"""
393 try:
394 vdiRef = self.session.xenapi.VDI.get_by_uuid(vdiUuid)
395 self.session.xenapi.VDI.forget(vdiRef)
396 except XenAPI.Failure:
397 pass
399 def getConfigVDI(self, vdi, key):
400 kind = vdi.CONFIG_TYPE[key]
401 if kind == self.CONFIG_SM:
402 cfg = self.session.xenapi.VDI.get_sm_config(vdi.getRef())
403 elif kind == self.CONFIG_OTHER:
404 cfg = self.session.xenapi.VDI.get_other_config(vdi.getRef())
405 elif kind == self.CONFIG_ON_BOOT:
406 cfg = self.session.xenapi.VDI.get_on_boot(vdi.getRef())
407 elif kind == self.CONFIG_ALLOW_CACHING:
408 cfg = self.session.xenapi.VDI.get_allow_caching(vdi.getRef())
409 else:
410 assert(False)
411 Util.log("Got %s for %s: %s" % (self.CONFIG_NAME[kind], vdi, repr(cfg)))
412 return cfg
414 def removeFromConfigVDI(self, vdi, key):
415 kind = vdi.CONFIG_TYPE[key]
416 if kind == self.CONFIG_SM:
417 self.session.xenapi.VDI.remove_from_sm_config(vdi.getRef(), key)
418 elif kind == self.CONFIG_OTHER:
419 self.session.xenapi.VDI.remove_from_other_config(vdi.getRef(), key)
420 else:
421 assert(False)
423 def addToConfigVDI(self, vdi, key, val):
424 kind = vdi.CONFIG_TYPE[key]
425 if kind == self.CONFIG_SM:
426 self.session.xenapi.VDI.add_to_sm_config(vdi.getRef(), key, val)
427 elif kind == self.CONFIG_OTHER:
428 self.session.xenapi.VDI.add_to_other_config(vdi.getRef(), key, val)
429 else:
430 assert(False)
432 def isSnapshot(self, vdi):
433 return self.session.xenapi.VDI.get_is_a_snapshot(vdi.getRef())
435 def markCacheSRsDirty(self):
436 sr_refs = self.session.xenapi.SR.get_all_records_where( \
437 'field "local_cache_enabled" = "true"')
438 for sr_ref in sr_refs:
439 Util.log("Marking SR %s dirty" % sr_ref)
440 util.set_dirty(self.session, sr_ref)
442 def srUpdate(self):
443 Util.log("Starting asynch srUpdate for SR %s" % self.srRecord["uuid"])
444 abortFlag = IPCFlag(self.srRecord["uuid"])
445 task = self.session.xenapi.Async.SR.update(self._srRef)
446 cancelTask = True
447 try:
448 for i in range(60):
449 status = self.session.xenapi.task.get_status(task)
450 if not status == "pending":
451 Util.log("SR.update_asynch status changed to [%s]" % status)
452 cancelTask = False
453 return
454 if abortFlag.test(FLAG_TYPE_ABORT):
455 Util.log("Abort signalled during srUpdate, cancelling task...")
456 try:
457 self.session.xenapi.task.cancel(task)
458 cancelTask = False
459 Util.log("Task cancelled")
460 except:
461 pass
462 return
463 time.sleep(1)
464 finally:
465 if cancelTask:
466 self.session.xenapi.task.cancel(task)
467 self.session.xenapi.task.destroy(task)
468 Util.log("Asynch srUpdate still running, but timeout exceeded.")
470 def update_task(self):
471 self.session.xenapi.task.set_other_config(
472 self.task,
473 {
474 "applies_to": self._srRef
475 })
476 total = self.task_progress['coalescable'] + self.task_progress['done']
477 if (total > 0):
478 self.session.xenapi.task.set_progress(
479 self.task, float(self.task_progress['done']) / total)
481 def create_task(self, label, description):
482 self.task = self.session.xenapi.task.create(label, description)
483 self.update_task()
485 def update_task_progress(self, key, value):
486 self.task_progress[key] = value
487 if self.task:
488 self.update_task()
490 def set_task_status(self, status):
491 if self.task:
492 self.session.xenapi.task.set_status(self.task, status)
495################################################################################
496#
497# VDI
498#
499class VDI(object):
500 """Object representing a VDI of a VHD-based SR"""
502 POLL_INTERVAL = 1
503 POLL_TIMEOUT = 30
504 DEVICE_MAJOR = 202
505 DRIVER_NAME_VHD = "vhd"
507 # config keys & values
508 DB_VHD_PARENT = "vhd-parent"
509 DB_VDI_TYPE = "vdi_type"
510 DB_VHD_BLOCKS = "vhd-blocks"
511 DB_VDI_PAUSED = "paused"
512 DB_VDI_RELINKING = "relinking"
513 DB_VDI_ACTIVATING = "activating"
514 DB_GC = "gc"
515 DB_COALESCE = "coalesce"
516 DB_LEAFCLSC = "leaf-coalesce" # config key
517 DB_GC_NO_SPACE = "gc_no_space"
518 LEAFCLSC_DISABLED = "false" # set by user; means do not leaf-coalesce
519 LEAFCLSC_FORCE = "force" # set by user; means skip snap-coalesce
520 LEAFCLSC_OFFLINE = "offline" # set here for informational purposes: means
521 # no space to snap-coalesce or unable to keep
522 # up with VDI. This is not used by the SM, it
523 # might be used by external components.
524 DB_ONBOOT = "on-boot"
525 ONBOOT_RESET = "reset"
526 DB_ALLOW_CACHING = "allow_caching"
528 CONFIG_TYPE = {
529 DB_VHD_PARENT: XAPI.CONFIG_SM,
530 DB_VDI_TYPE: XAPI.CONFIG_SM,
531 DB_VHD_BLOCKS: XAPI.CONFIG_SM,
532 DB_VDI_PAUSED: XAPI.CONFIG_SM,
533 DB_VDI_RELINKING: XAPI.CONFIG_SM,
534 DB_VDI_ACTIVATING: XAPI.CONFIG_SM,
535 DB_GC: XAPI.CONFIG_OTHER,
536 DB_COALESCE: XAPI.CONFIG_OTHER,
537 DB_LEAFCLSC: XAPI.CONFIG_OTHER,
538 DB_ONBOOT: XAPI.CONFIG_ON_BOOT,
539 DB_ALLOW_CACHING: XAPI.CONFIG_ALLOW_CACHING,
540 DB_GC_NO_SPACE: XAPI.CONFIG_SM
541 }
543 LIVE_LEAF_COALESCE_MAX_SIZE = 20 * 1024 * 1024 # bytes
544 LIVE_LEAF_COALESCE_TIMEOUT = 10 # seconds
545 TIMEOUT_SAFETY_MARGIN = 0.5 # extra margin when calculating
546 # feasibility of leaf coalesce
548 JRN_RELINK = "relink" # journal entry type for relinking children
549 JRN_COALESCE = "coalesce" # to communicate which VDI is being coalesced
550 JRN_LEAF = "leaf" # used in coalesce-leaf
552 STR_TREE_INDENT = 4
554 def __init__(self, sr, uuid, raw):
555 self.sr = sr
556 self.scanError = True
557 self.uuid = uuid
558 self.raw = raw
559 self.fileName = ""
560 self.parentUuid = ""
561 self.sizeVirt = -1
562 self._sizeVHD = -1
563 self._sizeAllocated = -1
564 self._hidden = False
565 self.parent = None
566 self.children = []
567 self._vdiRef = None
568 self._clearRef()
570 @staticmethod
571 def extractUuid(path):
572 raise NotImplementedError("Implement in sub class")
574 def load(self, info=None) -> None:
575 """Load VDI info"""
576 pass
578 def getDriverName(self) -> str:
579 return self.DRIVER_NAME_VHD
581 def getRef(self):
582 if self._vdiRef is None:
583 self._vdiRef = self.sr.xapi.getRefVDI(self)
584 return self._vdiRef
586 def getConfig(self, key, default=None):
587 config = self.sr.xapi.getConfigVDI(self, key)
588 if key == self.DB_ONBOOT or key == self.DB_ALLOW_CACHING: 588 ↛ 589line 588 didn't jump to line 589, because the condition on line 588 was never true
589 val = config
590 else:
591 val = config.get(key)
592 if val:
593 return val
594 return default
596 def setConfig(self, key, val):
597 self.sr.xapi.removeFromConfigVDI(self, key)
598 self.sr.xapi.addToConfigVDI(self, key, val)
599 Util.log("Set %s = %s for %s" % (key, val, self))
601 def delConfig(self, key):
602 self.sr.xapi.removeFromConfigVDI(self, key)
603 Util.log("Removed %s from %s" % (key, self))
605 def ensureUnpaused(self):
606 if self.getConfig(self.DB_VDI_PAUSED) == "true":
607 Util.log("Unpausing VDI %s" % self)
608 self.unpause()
610 def pause(self, failfast=False) -> None:
611 if not blktap2.VDI.tap_pause(self.sr.xapi.session, self.sr.uuid,
612 self.uuid, failfast):
613 raise util.SMException("Failed to pause VDI %s" % self)
615 def _report_tapdisk_unpause_error(self):
616 try:
617 xapi = self.sr.xapi.session.xenapi
618 sr_ref = xapi.SR.get_by_uuid(self.sr.uuid)
619 msg_name = "failed to unpause tapdisk"
620 msg_body = "Failed to unpause tapdisk for VDI %s, " \
621 "VMs using this tapdisk have lost access " \
622 "to the corresponding disk(s)" % self.uuid
623 xapi.message.create(msg_name, "4", "SR", self.sr.uuid, msg_body)
624 except Exception as e:
625 util.SMlog("failed to generate message: %s" % e)
627 def unpause(self):
628 if not blktap2.VDI.tap_unpause(self.sr.xapi.session, self.sr.uuid,
629 self.uuid):
630 self._report_tapdisk_unpause_error()
631 raise util.SMException("Failed to unpause VDI %s" % self)
633 def refresh(self, ignoreNonexistent=True):
634 """Pause-unpause in one step"""
635 self.sr.lock()
636 try:
637 try:
638 if not blktap2.VDI.tap_refresh(self.sr.xapi.session, 638 ↛ 640line 638 didn't jump to line 640, because the condition on line 638 was never true
639 self.sr.uuid, self.uuid):
640 self._report_tapdisk_unpause_error()
641 raise util.SMException("Failed to refresh %s" % self)
642 except XenAPI.Failure as e:
643 if util.isInvalidVDI(e) and ignoreNonexistent:
644 Util.log("VDI %s not found, ignoring" % self)
645 return
646 raise
647 finally:
648 self.sr.unlock()
650 def isSnapshot(self):
651 return self.sr.xapi.isSnapshot(self)
653 def isAttachedRW(self):
654 return util.is_attached_rw(
655 self.sr.xapi.session.xenapi.VDI.get_sm_config(self.getRef()))
657 def getVHDBlocks(self):
658 val = self.updateBlockInfo()
659 bitmap = zlib.decompress(base64.b64decode(val))
660 return bitmap
662 def isCoalesceable(self):
663 """A VDI is coalesceable if it has no siblings and is not a leaf"""
664 return not self.scanError and \
665 self.parent and \
666 len(self.parent.children) == 1 and \
667 self.isHidden() and \
668 len(self.children) > 0
670 def isLeafCoalesceable(self):
671 """A VDI is leaf-coalesceable if it has no siblings and is a leaf"""
672 return not self.scanError and \
673 self.parent and \
674 len(self.parent.children) == 1 and \
675 not self.isHidden() and \
676 len(self.children) == 0
678 def canLiveCoalesce(self, speed):
679 """Can we stop-and-leaf-coalesce this VDI? The VDI must be
680 isLeafCoalesceable() already"""
681 feasibleSize = False
682 allowedDownTime = \
683 self.TIMEOUT_SAFETY_MARGIN * self.LIVE_LEAF_COALESCE_TIMEOUT
684 vhd_size = self.getAllocatedSize()
685 if speed:
686 feasibleSize = \
687 vhd_size // speed < allowedDownTime
688 else:
689 feasibleSize = \
690 vhd_size < self.LIVE_LEAF_COALESCE_MAX_SIZE
692 return (feasibleSize or
693 self.getConfig(self.DB_LEAFCLSC) == self.LEAFCLSC_FORCE)
695 def getAllPrunable(self):
696 if len(self.children) == 0: # base case
697 # it is possible to have a hidden leaf that was recently coalesced
698 # onto its parent, its children already relinked but not yet
699 # reloaded - in which case it may not be garbage collected yet:
700 # some tapdisks could still be using the file.
701 if self.sr.journaler.get(self.JRN_RELINK, self.uuid):
702 return []
703 if not self.scanError and self.isHidden():
704 return [self]
705 return []
707 thisPrunable = True
708 vdiList = []
709 for child in self.children:
710 childList = child.getAllPrunable()
711 vdiList.extend(childList)
712 if child not in childList:
713 thisPrunable = False
715 # We can destroy the current VDI if all childs are hidden BUT the
716 # current VDI must be hidden too to do that!
717 # Example in this case (after a failed live leaf coalesce):
718 #
719 # SMGC: [32436] SR 07ed ('linstor-nvme-sr') (2 VDIs in 1 VHD trees):
720 # SMGC: [32436] b5458d61(1.000G/4.127M)
721 # SMGC: [32436] *OLD_b545(1.000G/4.129M)
722 #
723 # OLD_b545 is hidden and must be removed, but b5458d61 not.
724 # Normally we are not in this function when the delete action is
725 # executed but in `_liveLeafCoalesce`.
727 if not self.scanError and not self.isHidden() and thisPrunable:
728 vdiList.append(self)
729 return vdiList
731 def getSizeVHD(self) -> int:
732 return self._sizeVHD
734 def getAllocatedSize(self) -> int:
735 return self._sizeAllocated
737 def getTreeRoot(self):
738 "Get the root of the tree that self belongs to"
739 root = self
740 while root.parent:
741 root = root.parent
742 return root
744 def getTreeHeight(self):
745 "Get the height of the subtree rooted at self"
746 if len(self.children) == 0:
747 return 1
749 maxChildHeight = 0
750 for child in self.children:
751 childHeight = child.getTreeHeight()
752 if childHeight > maxChildHeight:
753 maxChildHeight = childHeight
755 return maxChildHeight + 1
757 def getAllLeaves(self):
758 "Get all leaf nodes in the subtree rooted at self"
759 if len(self.children) == 0:
760 return [self]
762 leaves = []
763 for child in self.children:
764 leaves.extend(child.getAllLeaves())
765 return leaves
767 def updateBlockInfo(self) -> Optional[str]:
768 val = base64.b64encode(self._queryVHDBlocks()).decode()
769 self.setConfig(VDI.DB_VHD_BLOCKS, val)
770 return val
772 def rename(self, uuid) -> None:
773 "Rename the VDI file"
774 assert(not self.sr.vdis.get(uuid))
775 self._clearRef()
776 oldUuid = self.uuid
777 self.uuid = uuid
778 self.children = []
779 # updating the children themselves is the responsibility of the caller
780 del self.sr.vdis[oldUuid]
781 self.sr.vdis[self.uuid] = self
783 def delete(self) -> None:
784 "Physically delete the VDI"
785 lock.Lock.cleanup(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid)
786 lock.Lock.cleanupAll(self.uuid)
787 self._clear()
789 def getParent(self) -> str:
790 return vhdutil.getParent(self.path, lambda x: x.strip()) 790 ↛ exitline 790 didn't run the lambda on line 790
792 def repair(self, parent) -> None:
793 vhdutil.repair(parent)
795 @override
796 def __str__(self) -> str:
797 strHidden = ""
798 if self.isHidden(): 798 ↛ 799line 798 didn't jump to line 799, because the condition on line 798 was never true
799 strHidden = "*"
800 strSizeVirt = "?"
801 if self.sizeVirt > 0: 801 ↛ 802line 801 didn't jump to line 802, because the condition on line 801 was never true
802 strSizeVirt = Util.num2str(self.sizeVirt)
803 strSizeVHD = "?"
804 if self._sizeVHD > 0: 804 ↛ 805line 804 didn't jump to line 805, because the condition on line 804 was never true
805 strSizeVHD = "/%s" % Util.num2str(self._sizeVHD)
806 strSizeAllocated = "?"
807 if self._sizeAllocated >= 0:
808 strSizeAllocated = "/%s" % Util.num2str(self._sizeAllocated)
809 strType = ""
810 if self.raw:
811 strType = "[RAW]"
812 strSizeVHD = ""
814 return "%s%s(%s%s%s)%s" % (strHidden, self.uuid[0:8], strSizeVirt,
815 strSizeVHD, strSizeAllocated, strType)
817 def validate(self, fast=False) -> None:
818 if not vhdutil.check(self.path, fast=fast): 818 ↛ 819line 818 didn't jump to line 819, because the condition on line 818 was never true
819 raise util.SMException("VHD %s corrupted" % self)
821 def _clear(self):
822 self.uuid = ""
823 self.path = ""
824 self.parentUuid = ""
825 self.parent = None
826 self._clearRef()
828 def _clearRef(self):
829 self._vdiRef = None
831 def _doCoalesce(self) -> None:
832 """Coalesce self onto parent. Only perform the actual coalescing of
833 VHD, but not the subsequent relinking. We'll do that as the next step,
834 after reloading the entire SR in case things have changed while we
835 were coalescing"""
836 self.validate()
837 self.parent.validate(True)
838 self.parent._increaseSizeVirt(self.sizeVirt)
839 self.sr._updateSlavesOnResize(self.parent)
840 self._coalesceVHD(0)
841 self.parent.validate(True)
842 #self._verifyContents(0)
843 self.parent.updateBlockInfo()
845 def _verifyContents(self, timeOut):
846 Util.log(" Coalesce verification on %s" % self)
847 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT)
848 Util.runAbortable(lambda: self._runTapdiskDiff(), True,
849 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut)
850 Util.log(" Coalesce verification succeeded")
852 def _runTapdiskDiff(self):
853 cmd = "tapdisk-diff -n %s:%s -m %s:%s" % \
854 (self.getDriverName(), self.path, \
855 self.parent.getDriverName(), self.parent.path)
856 Util.doexec(cmd, 0)
857 return True
859 @staticmethod
860 def _reportCoalesceError(vdi, ce):
861 """Reports a coalesce error to XenCenter.
863 vdi: the VDI object on which the coalesce error occured
864 ce: the CommandException that was raised"""
866 msg_name = os.strerror(ce.code)
867 if ce.code == errno.ENOSPC:
868 # TODO We could add more information here, e.g. exactly how much
869 # space is required for the particular coalesce, as well as actions
870 # to be taken by the user and consequences of not taking these
871 # actions.
872 msg_body = 'Run out of space while coalescing.'
873 elif ce.code == errno.EIO:
874 msg_body = 'I/O error while coalescing.'
875 else:
876 msg_body = ''
877 util.SMlog('Coalesce failed on SR %s: %s (%s)'
878 % (vdi.sr.uuid, msg_name, msg_body))
880 # Create a XenCenter message, but don't spam.
881 xapi = vdi.sr.xapi.session.xenapi
882 sr_ref = xapi.SR.get_by_uuid(vdi.sr.uuid)
883 oth_cfg = xapi.SR.get_other_config(sr_ref)
884 if COALESCE_ERR_RATE_TAG in oth_cfg:
885 coalesce_err_rate = float(oth_cfg[COALESCE_ERR_RATE_TAG])
886 else:
887 coalesce_err_rate = DEFAULT_COALESCE_ERR_RATE
889 xcmsg = False
890 if coalesce_err_rate == 0:
891 xcmsg = True
892 elif coalesce_err_rate > 0:
893 now = datetime.datetime.now()
894 sm_cfg = xapi.SR.get_sm_config(sr_ref)
895 if COALESCE_LAST_ERR_TAG in sm_cfg:
896 # seconds per message (minimum distance in time between two
897 # messages in seconds)
898 spm = datetime.timedelta(seconds=(1.0 / coalesce_err_rate) * 60)
899 last = datetime.datetime.fromtimestamp(
900 float(sm_cfg[COALESCE_LAST_ERR_TAG]))
901 if now - last >= spm:
902 xapi.SR.remove_from_sm_config(sr_ref,
903 COALESCE_LAST_ERR_TAG)
904 xcmsg = True
905 else:
906 xcmsg = True
907 if xcmsg:
908 xapi.SR.add_to_sm_config(sr_ref, COALESCE_LAST_ERR_TAG,
909 str(now.strftime('%s')))
910 if xcmsg:
911 xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body)
913 def coalesce(self) -> int:
914 # size is returned in sectors
915 return vhdutil.coalesce(self.path) * 512
917 @staticmethod
918 def _doCoalesceVHD(vdi):
919 try:
920 startTime = time.time()
921 vhdSize = vdi.getAllocatedSize()
922 coalesced_size = vdi.coalesce()
923 endTime = time.time()
924 vdi.sr.recordStorageSpeed(startTime, endTime, coalesced_size)
925 except util.CommandException as ce:
926 # We use try/except for the following piece of code because it runs
927 # in a separate process context and errors will not be caught and
928 # reported by anyone.
929 try:
930 # Report coalesce errors back to user via XC
931 VDI._reportCoalesceError(vdi, ce)
932 except Exception as e:
933 util.SMlog('failed to create XenCenter message: %s' % e)
934 raise ce
935 except:
936 raise
938 def _vdi_is_raw(self, vdi_path):
939 """
940 Given path to vdi determine if it is raw
941 """
942 uuid = self.extractUuid(vdi_path)
943 return self.sr.vdis[uuid].raw
945 def _coalesceVHD(self, timeOut):
946 Util.log(" Running VHD coalesce on %s" % self)
947 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 947 ↛ exitline 947 didn't run the lambda on line 947
948 try:
949 util.fistpoint.activate_custom_fn(
950 "cleanup_coalesceVHD_inject_failure",
951 util.inject_failure)
952 Util.runAbortable(lambda: VDI._doCoalesceVHD(self), None,
953 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut)
954 except:
955 #exception at this phase could indicate a failure in vhd coalesce
956 # or a kill of vhd coalesce by runAbortable due to timeOut
957 # Try a repair and reraise the exception
958 parent = ""
959 try:
960 parent = self.getParent()
961 if not self._vdi_is_raw(parent):
962 # Repair error is logged and ignored. Error reraised later
963 util.SMlog('Coalesce failed on %s, attempting repair on ' \
964 'parent %s' % (self.uuid, parent))
965 self.repair(parent)
966 except Exception as e:
967 util.SMlog('(error ignored) Failed to repair parent %s ' \
968 'after failed coalesce on %s, err: %s' %
969 (parent, self.path, e))
970 raise
972 util.fistpoint.activate("LVHDRT_coalescing_VHD_data", self.sr.uuid)
974 def _relinkSkip(self) -> None:
975 """Relink children of this VDI to point to the parent of this VDI"""
976 abortFlag = IPCFlag(self.sr.uuid)
977 for child in self.children:
978 if abortFlag.test(FLAG_TYPE_ABORT): 978 ↛ 979line 978 didn't jump to line 979, because the condition on line 978 was never true
979 raise AbortException("Aborting due to signal")
980 Util.log(" Relinking %s from %s to %s" % \
981 (child, self, self.parent))
982 util.fistpoint.activate("LVHDRT_relinking_grandchildren", self.sr.uuid)
983 child._setParent(self.parent)
984 self.children = []
986 def _reloadChildren(self, vdiSkip):
987 """Pause & unpause all VDIs in the subtree to cause blktap to reload
988 the VHD metadata for this file in any online VDI"""
989 abortFlag = IPCFlag(self.sr.uuid)
990 for child in self.children:
991 if child == vdiSkip:
992 continue
993 if abortFlag.test(FLAG_TYPE_ABORT): 993 ↛ 994line 993 didn't jump to line 994, because the condition on line 993 was never true
994 raise AbortException("Aborting due to signal")
995 Util.log(" Reloading VDI %s" % child)
996 child._reload()
998 def _reload(self):
999 """Pause & unpause to cause blktap to reload the VHD metadata"""
1000 for child in self.children: 1000 ↛ 1001line 1000 didn't jump to line 1001, because the loop on line 1000 never started
1001 child._reload()
1003 # only leaves can be attached
1004 if len(self.children) == 0: 1004 ↛ exitline 1004 didn't return from function '_reload', because the condition on line 1004 was never false
1005 try:
1006 self.delConfig(VDI.DB_VDI_RELINKING)
1007 except XenAPI.Failure as e:
1008 if not util.isInvalidVDI(e):
1009 raise
1010 self.refresh()
1012 def _tagChildrenForRelink(self):
1013 if len(self.children) == 0:
1014 retries = 0
1015 try:
1016 while retries < 15:
1017 retries += 1
1018 if self.getConfig(VDI.DB_VDI_ACTIVATING) is not None:
1019 Util.log("VDI %s is activating, wait to relink" %
1020 self.uuid)
1021 else:
1022 self.setConfig(VDI.DB_VDI_RELINKING, "True")
1024 if self.getConfig(VDI.DB_VDI_ACTIVATING):
1025 self.delConfig(VDI.DB_VDI_RELINKING)
1026 Util.log("VDI %s started activating while tagging" %
1027 self.uuid)
1028 else:
1029 return
1030 time.sleep(2)
1032 raise util.SMException("Failed to tag vdi %s for relink" % self)
1033 except XenAPI.Failure as e:
1034 if not util.isInvalidVDI(e):
1035 raise
1037 for child in self.children:
1038 child._tagChildrenForRelink()
1040 def _loadInfoParent(self):
1041 ret = vhdutil.getParent(self.path, lvhdutil.extractUuid)
1042 if ret:
1043 self.parentUuid = ret
1045 def _setParent(self, parent) -> None:
1046 vhdutil.setParent(self.path, parent.path, False)
1047 self.parent = parent
1048 self.parentUuid = parent.uuid
1049 parent.children.append(self)
1050 try:
1051 self.setConfig(self.DB_VHD_PARENT, self.parentUuid)
1052 Util.log("Updated the vhd-parent field for child %s with %s" % \
1053 (self.uuid, self.parentUuid))
1054 except:
1055 Util.log("Failed to update %s with vhd-parent field %s" % \
1056 (self.uuid, self.parentUuid))
1058 def _ensureParentActiveForRelink(self) -> None:
1059 pass
1061 def isHidden(self) -> bool:
1062 if self._hidden is None: 1062 ↛ 1063line 1062 didn't jump to line 1063, because the condition on line 1062 was never true
1063 self._loadInfoHidden()
1064 return self._hidden
1066 def _loadInfoHidden(self) -> None:
1067 hidden = vhdutil.getHidden(self.path)
1068 self._hidden = (hidden != 0)
1070 def _setHidden(self, hidden=True) -> None:
1071 self._hidden = None
1072 vhdutil.setHidden(self.path, hidden)
1073 self._hidden = hidden
1075 def _increaseSizeVirt(self, size, atomic=True) -> None:
1076 """ensure the virtual size of 'self' is at least 'size'. Note that
1077 resizing a VHD must always be offline and atomically: the file must
1078 not be open by anyone and no concurrent operations may take place.
1079 Thus we use the Agent API call for performing paused atomic
1080 operations. If the caller is already in the atomic context, it must
1081 call with atomic = False"""
1082 if self.sizeVirt >= size: 1082 ↛ 1084line 1082 didn't jump to line 1084, because the condition on line 1082 was never false
1083 return
1084 Util.log(" Expanding VHD virt size for VDI %s: %s -> %s" % \
1085 (self, Util.num2str(self.sizeVirt), Util.num2str(size)))
1087 msize = vhdutil.getMaxResizeSize(self.path) * 1024 * 1024
1088 if (size <= msize):
1089 vhdutil.setSizeVirtFast(self.path, size)
1090 else:
1091 if atomic:
1092 vdiList = self._getAllSubtree()
1093 self.sr.lock()
1094 try:
1095 self.sr.pauseVDIs(vdiList)
1096 try:
1097 self._setSizeVirt(size)
1098 finally:
1099 self.sr.unpauseVDIs(vdiList)
1100 finally:
1101 self.sr.unlock()
1102 else:
1103 self._setSizeVirt(size)
1105 self.sizeVirt = vhdutil.getSizeVirt(self.path)
1107 def _setSizeVirt(self, size) -> None:
1108 """WARNING: do not call this method directly unless all VDIs in the
1109 subtree are guaranteed to be unplugged (and remain so for the duration
1110 of the operation): this operation is only safe for offline VHDs"""
1111 jFile = os.path.join(self.sr.path, self.uuid)
1112 vhdutil.setSizeVirt(self.path, size, jFile)
1114 def _queryVHDBlocks(self) -> bytes:
1115 return vhdutil.getBlockBitmap(self.path)
1117 def _getCoalescedSizeData(self):
1118 """Get the data size of the resulting VHD if we coalesce self onto
1119 parent. We calculate the actual size by using the VHD block allocation
1120 information (as opposed to just adding up the two VHD sizes to get an
1121 upper bound)"""
1122 # make sure we don't use stale BAT info from vdi_rec since the child
1123 # was writable all this time
1124 self.delConfig(VDI.DB_VHD_BLOCKS)
1125 blocksChild = self.getVHDBlocks()
1126 blocksParent = self.parent.getVHDBlocks()
1127 numBlocks = Util.countBits(blocksChild, blocksParent)
1128 Util.log("Num combined blocks = %d" % numBlocks)
1129 sizeData = numBlocks * vhdutil.VHD_BLOCK_SIZE
1130 assert(sizeData <= self.sizeVirt)
1131 return sizeData
1133 def _calcExtraSpaceForCoalescing(self) -> int:
1134 sizeData = self._getCoalescedSizeData()
1135 sizeCoalesced = sizeData + vhdutil.calcOverheadBitmap(sizeData) + \
1136 vhdutil.calcOverheadEmpty(self.sizeVirt)
1137 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced))
1138 return sizeCoalesced - self.parent.getSizeVHD()
1140 def _calcExtraSpaceForLeafCoalescing(self) -> int:
1141 """How much extra space in the SR will be required to
1142 [live-]leaf-coalesce this VDI"""
1143 # the space requirements are the same as for inline coalesce
1144 return self._calcExtraSpaceForCoalescing()
1146 def _calcExtraSpaceForSnapshotCoalescing(self) -> int:
1147 """How much extra space in the SR will be required to
1148 snapshot-coalesce this VDI"""
1149 return self._calcExtraSpaceForCoalescing() + \
1150 vhdutil.calcOverheadEmpty(self.sizeVirt) # extra snap leaf
1152 def _getAllSubtree(self):
1153 """Get self and all VDIs in the subtree of self as a flat list"""
1154 vdiList = [self]
1155 for child in self.children:
1156 vdiList.extend(child._getAllSubtree())
1157 return vdiList
1160class FileVDI(VDI):
1161 """Object representing a VDI in a file-based SR (EXT or NFS)"""
1163 @override
1164 @staticmethod
1165 def extractUuid(path):
1166 path = os.path.basename(path.strip())
1167 if not (path.endswith(vhdutil.FILE_EXTN_VHD) or \ 1167 ↛ 1169line 1167 didn't jump to line 1169, because the condition on line 1167 was never true
1168 path.endswith(vhdutil.FILE_EXTN_RAW)):
1169 return None
1170 uuid = path.replace(vhdutil.FILE_EXTN_VHD, "").replace( \
1171 vhdutil.FILE_EXTN_RAW, "")
1172 # TODO: validate UUID format
1173 return uuid
1175 def __init__(self, sr, uuid, raw):
1176 VDI.__init__(self, sr, uuid, raw)
1177 if self.raw: 1177 ↛ 1178line 1177 didn't jump to line 1178, because the condition on line 1177 was never true
1178 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_RAW)
1179 else:
1180 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD)
1182 @override
1183 def load(self, info=None) -> None:
1184 if not info:
1185 if not util.pathexists(self.path):
1186 raise util.SMException("%s not found" % self.path)
1187 try:
1188 info = vhdutil.getVHDInfo(self.path, self.extractUuid)
1189 except util.SMException:
1190 Util.log(" [VDI %s: failed to read VHD metadata]" % self.uuid)
1191 return
1192 self.parent = None
1193 self.children = []
1194 self.parentUuid = info.parentUuid
1195 self.sizeVirt = info.sizeVirt
1196 self._sizeVHD = info.sizePhys
1197 self._sizeAllocated = info.sizeAllocated
1198 self._hidden = info.hidden
1199 self.scanError = False
1200 self.path = os.path.join(self.sr.path, "%s%s" % \
1201 (self.uuid, vhdutil.FILE_EXTN_VHD))
1203 @override
1204 def rename(self, uuid) -> None:
1205 oldPath = self.path
1206 VDI.rename(self, uuid)
1207 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD)
1208 self.path = os.path.join(self.sr.path, self.fileName)
1209 assert(not util.pathexists(self.path))
1210 Util.log("Renaming %s -> %s" % (oldPath, self.path))
1211 os.rename(oldPath, self.path)
1213 @override
1214 def delete(self) -> None:
1215 if len(self.children) > 0: 1215 ↛ 1216line 1215 didn't jump to line 1216, because the condition on line 1215 was never true
1216 raise util.SMException("VDI %s has children, can't delete" % \
1217 self.uuid)
1218 try:
1219 self.sr.lock()
1220 try:
1221 os.unlink(self.path)
1222 self.sr.forgetVDI(self.uuid)
1223 finally:
1224 self.sr.unlock()
1225 except OSError:
1226 raise util.SMException("os.unlink(%s) failed" % self.path)
1227 VDI.delete(self)
1229 @override
1230 def getAllocatedSize(self) -> int:
1231 if self._sizeAllocated == -1: 1231 ↛ 1232line 1231 didn't jump to line 1232, because the condition on line 1231 was never true
1232 self._sizeAllocated = vhdutil.getAllocatedSize(self.path)
1233 return self._sizeAllocated
1236class LVHDVDI(VDI):
1237 """Object representing a VDI in an LVHD SR"""
1239 JRN_ZERO = "zero" # journal entry type for zeroing out end of parent
1240 DRIVER_NAME_RAW = "aio"
1242 @override
1243 def load(self, info=None) -> None:
1244 # `info` is always set. `None` default value is only here to match parent method.
1245 assert info, "No info given to LVHDVDI.load"
1246 self.parent = None
1247 self.children = []
1248 self._sizeVHD = -1
1249 self._sizeAllocated = -1
1250 self.scanError = info.scanError
1251 self.sizeLV = info.sizeLV
1252 self.sizeVirt = info.sizeVirt
1253 self.fileName = info.lvName
1254 self.lvActive = info.lvActive
1255 self.lvOpen = info.lvOpen
1256 self.lvReadonly = info.lvReadonly
1257 self._hidden = info.hidden
1258 self.parentUuid = info.parentUuid
1259 self.path = os.path.join(self.sr.path, self.fileName)
1261 @override
1262 @staticmethod
1263 def extractUuid(path):
1264 return lvhdutil.extractUuid(path)
1266 @override
1267 def getDriverName(self) -> str:
1268 if self.raw:
1269 return self.DRIVER_NAME_RAW
1270 return self.DRIVER_NAME_VHD
1272 def inflate(self, size):
1273 """inflate the LV containing the VHD to 'size'"""
1274 if self.raw:
1275 return
1276 self._activate()
1277 self.sr.lock()
1278 try:
1279 lvhdutil.inflate(self.sr.journaler, self.sr.uuid, self.uuid, size)
1280 util.fistpoint.activate("LVHDRT_inflating_the_parent", self.sr.uuid)
1281 finally:
1282 self.sr.unlock()
1283 self.sizeLV = self.sr.lvmCache.getSize(self.fileName)
1284 self._sizeVHD = -1
1285 self._sizeAllocated = -1
1287 def deflate(self):
1288 """deflate the LV containing the VHD to minimum"""
1289 if self.raw:
1290 return
1291 self._activate()
1292 self.sr.lock()
1293 try:
1294 lvhdutil.deflate(self.sr.lvmCache, self.fileName, self.getSizeVHD())
1295 finally:
1296 self.sr.unlock()
1297 self.sizeLV = self.sr.lvmCache.getSize(self.fileName)
1298 self._sizeVHD = -1
1299 self._sizeAllocated = -1
1301 def inflateFully(self):
1302 self.inflate(lvhdutil.calcSizeVHDLV(self.sizeVirt))
1304 def inflateParentForCoalesce(self):
1305 """Inflate the parent only as much as needed for the purposes of
1306 coalescing"""
1307 if self.parent.raw:
1308 return
1309 inc = self._calcExtraSpaceForCoalescing()
1310 if inc > 0:
1311 util.fistpoint.activate("LVHDRT_coalescing_before_inflate_grandparent", self.sr.uuid)
1312 self.parent.inflate(self.parent.sizeLV + inc)
1314 @override
1315 def updateBlockInfo(self) -> Optional[str]:
1316 if not self.raw:
1317 return VDI.updateBlockInfo(self)
1318 return None
1320 @override
1321 def rename(self, uuid) -> None:
1322 oldUuid = self.uuid
1323 oldLVName = self.fileName
1324 VDI.rename(self, uuid)
1325 self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + self.uuid
1326 if self.raw:
1327 self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + self.uuid
1328 self.path = os.path.join(self.sr.path, self.fileName)
1329 assert(not self.sr.lvmCache.checkLV(self.fileName))
1331 self.sr.lvmCache.rename(oldLVName, self.fileName)
1332 if self.sr.lvActivator.get(oldUuid, False):
1333 self.sr.lvActivator.replace(oldUuid, self.uuid, self.fileName, False)
1335 ns = lvhdutil.NS_PREFIX_LVM + self.sr.uuid
1336 (cnt, bcnt) = RefCounter.check(oldUuid, ns)
1337 RefCounter.set(self.uuid, cnt, bcnt, ns)
1338 RefCounter.reset(oldUuid, ns)
1340 @override
1341 def delete(self) -> None:
1342 if len(self.children) > 0:
1343 raise util.SMException("VDI %s has children, can't delete" % \
1344 self.uuid)
1345 self.sr.lock()
1346 try:
1347 self.sr.lvmCache.remove(self.fileName)
1348 self.sr.forgetVDI(self.uuid)
1349 finally:
1350 self.sr.unlock()
1351 RefCounter.reset(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid)
1352 VDI.delete(self)
1354 @override
1355 def getSizeVHD(self) -> int:
1356 if self._sizeVHD == -1:
1357 self._loadInfoSizeVHD()
1358 return self._sizeVHD
1360 def _loadInfoSizeVHD(self):
1361 """Get the physical utilization of the VHD file. We do it individually
1362 (and not using the VHD batch scanner) as an optimization: this info is
1363 relatively expensive and we need it only for VDI's involved in
1364 coalescing."""
1365 if self.raw:
1366 return
1367 self._activate()
1368 self._sizeVHD = vhdutil.getSizePhys(self.path)
1369 if self._sizeVHD <= 0:
1370 raise util.SMException("phys size of %s = %d" % \
1371 (self, self._sizeVHD))
1373 @override
1374 def getAllocatedSize(self) -> int:
1375 if self._sizeAllocated == -1:
1376 self._loadInfoSizeAllocated()
1377 return self._sizeAllocated
1379 def _loadInfoSizeAllocated(self):
1380 """
1381 Get the allocated size of the VHD volume.
1382 """
1383 if self.raw:
1384 return
1385 self._activate()
1386 self._sizeAllocated = vhdutil.getAllocatedSize(self.path)
1388 @override
1389 def _loadInfoHidden(self) -> None:
1390 if self.raw:
1391 self._hidden = self.sr.lvmCache.getHidden(self.fileName)
1392 else:
1393 VDI._loadInfoHidden(self)
1395 @override
1396 def _setHidden(self, hidden=True) -> None:
1397 if self.raw:
1398 self._hidden = None
1399 self.sr.lvmCache.setHidden(self.fileName, hidden)
1400 self._hidden = hidden
1401 else:
1402 VDI._setHidden(self, hidden)
1404 @override
1405 def __str__(self) -> str:
1406 strType = "VHD"
1407 if self.raw:
1408 strType = "RAW"
1409 strHidden = ""
1410 if self.isHidden():
1411 strHidden = "*"
1412 strSizeVHD = ""
1413 if self._sizeVHD > 0:
1414 strSizeVHD = Util.num2str(self._sizeVHD)
1415 strSizeAllocated = ""
1416 if self._sizeAllocated >= 0:
1417 strSizeAllocated = Util.num2str(self._sizeAllocated)
1418 strActive = "n"
1419 if self.lvActive:
1420 strActive = "a"
1421 if self.lvOpen:
1422 strActive += "o"
1423 return "%s%s[%s](%s/%s/%s/%s|%s)" % (strHidden, self.uuid[0:8], strType,
1424 Util.num2str(self.sizeVirt), strSizeVHD, strSizeAllocated,
1425 Util.num2str(self.sizeLV), strActive)
1427 @override
1428 def validate(self, fast=False) -> None:
1429 if not self.raw:
1430 VDI.validate(self, fast)
1432 @override
1433 def _doCoalesce(self) -> None:
1434 """LVHD parents must first be activated, inflated, and made writable"""
1435 try:
1436 self._activateChain()
1437 self.sr.lvmCache.setReadonly(self.parent.fileName, False)
1438 self.parent.validate()
1439 self.inflateParentForCoalesce()
1440 VDI._doCoalesce(self)
1441 finally:
1442 self.parent._loadInfoSizeVHD()
1443 self.parent.deflate()
1444 self.sr.lvmCache.setReadonly(self.parent.fileName, True)
1446 @override
1447 def _setParent(self, parent) -> None:
1448 self._activate()
1449 if self.lvReadonly:
1450 self.sr.lvmCache.setReadonly(self.fileName, False)
1452 try:
1453 vhdutil.setParent(self.path, parent.path, parent.raw)
1454 finally:
1455 if self.lvReadonly:
1456 self.sr.lvmCache.setReadonly(self.fileName, True)
1457 self._deactivate()
1458 self.parent = parent
1459 self.parentUuid = parent.uuid
1460 parent.children.append(self)
1461 try:
1462 self.setConfig(self.DB_VHD_PARENT, self.parentUuid)
1463 Util.log("Updated the vhd-parent field for child %s with %s" % \
1464 (self.uuid, self.parentUuid))
1465 except:
1466 Util.log("Failed to update the vhd-parent with %s for child %s" % \
1467 (self.parentUuid, self.uuid))
1469 def _activate(self):
1470 self.sr.lvActivator.activate(self.uuid, self.fileName, False)
1472 def _activateChain(self):
1473 vdi = self
1474 while vdi:
1475 vdi._activate()
1476 vdi = vdi.parent
1478 def _deactivate(self):
1479 self.sr.lvActivator.deactivate(self.uuid, False)
1481 @override
1482 def _ensureParentActiveForRelink(self) -> None:
1483 self.parent._activate()
1485 @override
1486 def _increaseSizeVirt(self, size, atomic=True) -> None:
1487 "ensure the virtual size of 'self' is at least 'size'"
1488 self._activate()
1489 if not self.raw:
1490 VDI._increaseSizeVirt(self, size, atomic)
1491 return
1493 # raw VDI case
1494 offset = self.sizeLV
1495 if self.sizeVirt < size:
1496 oldSize = self.sizeLV
1497 self.sizeLV = util.roundup(lvutil.LVM_SIZE_INCREMENT, size)
1498 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.sizeLV))
1499 self.sr.lvmCache.setSize(self.fileName, self.sizeLV)
1500 offset = oldSize
1501 unfinishedZero = False
1502 jval = self.sr.journaler.get(self.JRN_ZERO, self.uuid)
1503 if jval:
1504 unfinishedZero = True
1505 offset = int(jval)
1506 length = self.sizeLV - offset
1507 if not length:
1508 return
1510 if unfinishedZero:
1511 Util.log(" ==> Redoing unfinished zeroing out")
1512 else:
1513 self.sr.journaler.create(self.JRN_ZERO, self.uuid, \
1514 str(offset))
1515 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length))
1516 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT)
1517 func = lambda: util.zeroOut(self.path, offset, length)
1518 Util.runAbortable(func, True, self.sr.uuid, abortTest,
1519 VDI.POLL_INTERVAL, 0)
1520 self.sr.journaler.remove(self.JRN_ZERO, self.uuid)
1522 @override
1523 def _setSizeVirt(self, size) -> None:
1524 """WARNING: do not call this method directly unless all VDIs in the
1525 subtree are guaranteed to be unplugged (and remain so for the duration
1526 of the operation): this operation is only safe for offline VHDs"""
1527 self._activate()
1528 jFile = lvhdutil.createVHDJournalLV(self.sr.lvmCache, self.uuid,
1529 vhdutil.MAX_VHD_JOURNAL_SIZE)
1530 try:
1531 lvhdutil.setSizeVirt(self.sr.journaler, self.sr.uuid, self.uuid,
1532 size, jFile)
1533 finally:
1534 lvhdutil.deleteVHDJournalLV(self.sr.lvmCache, self.uuid)
1536 @override
1537 def _queryVHDBlocks(self) -> bytes:
1538 self._activate()
1539 return VDI._queryVHDBlocks(self)
1541 @override
1542 def _calcExtraSpaceForCoalescing(self) -> int:
1543 if self.parent.raw:
1544 return 0 # raw parents are never deflated in the first place
1545 sizeCoalesced = lvhdutil.calcSizeVHDLV(self._getCoalescedSizeData())
1546 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced))
1547 return sizeCoalesced - self.parent.sizeLV
1549 @override
1550 def _calcExtraSpaceForLeafCoalescing(self) -> int:
1551 """How much extra space in the SR will be required to
1552 [live-]leaf-coalesce this VDI"""
1553 # we can deflate the leaf to minimize the space requirements
1554 deflateDiff = self.sizeLV - lvhdutil.calcSizeLV(self.getSizeVHD())
1555 return self._calcExtraSpaceForCoalescing() - deflateDiff
1557 @override
1558 def _calcExtraSpaceForSnapshotCoalescing(self) -> int:
1559 return self._calcExtraSpaceForCoalescing() + \
1560 lvhdutil.calcSizeLV(self.getSizeVHD())
1563class LinstorVDI(VDI):
1564 """Object representing a VDI in a LINSTOR SR"""
1566 VOLUME_LOCK_TIMEOUT = 30
1568 @override
1569 def load(self, info=None) -> None:
1570 self.parentUuid = info.parentUuid
1571 self.scanError = True
1572 self.parent = None
1573 self.children = []
1575 self.fileName = self.sr._linstor.get_volume_name(self.uuid)
1576 self.path = self.sr._linstor.build_device_path(self.fileName)
1578 if not info:
1579 try:
1580 info = self.sr._vhdutil.get_vhd_info(self.uuid)
1581 except util.SMException:
1582 Util.log(
1583 ' [VDI {}: failed to read VHD metadata]'.format(self.uuid)
1584 )
1585 return
1587 self.parentUuid = info.parentUuid
1588 self.sizeVirt = info.sizeVirt
1589 self._sizeVHD = -1
1590 self._sizeAllocated = -1
1591 self.drbd_size = -1
1592 self._hidden = info.hidden
1593 self.scanError = False
1594 self.vdi_type = vhdutil.VDI_TYPE_VHD
1596 @override
1597 def getSizeVHD(self, fetch=False) -> int:
1598 if self._sizeVHD < 0 or fetch:
1599 self._sizeVHD = self.sr._vhdutil.get_size_phys(self.uuid)
1600 return self._sizeVHD
1602 def getDrbdSize(self, fetch=False):
1603 if self.drbd_size < 0 or fetch:
1604 self.drbd_size = self.sr._vhdutil.get_drbd_size(self.uuid)
1605 return self.drbd_size
1607 @override
1608 def getAllocatedSize(self) -> int:
1609 if self._sizeAllocated == -1:
1610 if not self.raw:
1611 self._sizeAllocated = self.sr._vhdutil.get_allocated_size(self.uuid)
1612 return self._sizeAllocated
1614 def inflate(self, size):
1615 if self.raw:
1616 return
1617 self.sr.lock()
1618 try:
1619 # Ensure we use the real DRBD size and not the cached one.
1620 # Why? Because this attribute can be changed if volume is resized by user.
1621 self.drbd_size = self.getDrbdSize(fetch=True)
1622 self.sr._vhdutil.inflate(self.sr.journaler, self.uuid, self.path, size, self.drbd_size)
1623 finally:
1624 self.sr.unlock()
1625 self.drbd_size = -1
1626 self._sizeVHD = -1
1627 self._sizeAllocated = -1
1629 def deflate(self):
1630 if self.raw:
1631 return
1632 self.sr.lock()
1633 try:
1634 # Ensure we use the real sizes and not the cached info.
1635 self.drbd_size = self.getDrbdSize(fetch=True)
1636 self._sizeVHD = self.getSizeVHD(fetch=True)
1637 self.sr._vhdutil.force_deflate(self.path, self._sizeVHD, self.drbd_size, zeroize=False)
1638 finally:
1639 self.sr.unlock()
1640 self.drbd_size = -1
1641 self._sizeVHD = -1
1642 self._sizeAllocated = -1
1644 def inflateFully(self):
1645 if not self.raw:
1646 self.inflate(LinstorVhdUtil.compute_volume_size(self.sizeVirt, self.vdi_type))
1648 @override
1649 def rename(self, uuid) -> None:
1650 Util.log('Renaming {} -> {} (path={})'.format(
1651 self.uuid, uuid, self.path
1652 ))
1653 self.sr._linstor.update_volume_uuid(self.uuid, uuid)
1654 VDI.rename(self, uuid)
1656 @override
1657 def delete(self) -> None:
1658 if len(self.children) > 0:
1659 raise util.SMException(
1660 'VDI {} has children, can\'t delete'.format(self.uuid)
1661 )
1662 self.sr.lock()
1663 try:
1664 self.sr._linstor.destroy_volume(self.uuid)
1665 self.sr.forgetVDI(self.uuid)
1666 finally:
1667 self.sr.unlock()
1668 VDI.delete(self)
1670 @override
1671 def validate(self, fast=False) -> None:
1672 if not self.raw and not self.sr._vhdutil.check(self.uuid, fast=fast):
1673 raise util.SMException('VHD {} corrupted'.format(self))
1675 @override
1676 def pause(self, failfast=False) -> None:
1677 self.sr._linstor.ensure_volume_is_not_locked(
1678 self.uuid, timeout=self.VOLUME_LOCK_TIMEOUT
1679 )
1680 return super(LinstorVDI, self).pause(failfast)
1682 @override
1683 def coalesce(self) -> int:
1684 # Note: We raise `SMException` here to skip the current coalesce in case of failure.
1685 # Using another exception we can't execute the next coalesce calls.
1686 return self.sr._vhdutil.force_coalesce(self.path) * 512
1688 @override
1689 def getParent(self) -> str:
1690 return self.sr._vhdutil.get_parent(
1691 self.sr._linstor.get_volume_uuid_from_device_path(self.path)
1692 )
1694 @override
1695 def repair(self, parent_uuid) -> None:
1696 self.sr._vhdutil.force_repair(
1697 self.sr._linstor.get_device_path(parent_uuid)
1698 )
1700 @override
1701 def _relinkSkip(self) -> None:
1702 abortFlag = IPCFlag(self.sr.uuid)
1703 for child in self.children:
1704 if abortFlag.test(FLAG_TYPE_ABORT):
1705 raise AbortException('Aborting due to signal')
1706 Util.log(
1707 ' Relinking {} from {} to {}'.format(
1708 child, self, self.parent
1709 )
1710 )
1712 session = child.sr.xapi.session
1713 sr_uuid = child.sr.uuid
1714 vdi_uuid = child.uuid
1715 try:
1716 self.sr._linstor.ensure_volume_is_not_locked(
1717 vdi_uuid, timeout=self.VOLUME_LOCK_TIMEOUT
1718 )
1719 blktap2.VDI.tap_pause(session, sr_uuid, vdi_uuid)
1720 child._setParent(self.parent)
1721 finally:
1722 blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid)
1723 self.children = []
1725 @override
1726 def _setParent(self, parent) -> None:
1727 self.sr._linstor.get_device_path(self.uuid)
1728 self.sr._vhdutil.force_parent(self.path, parent.path)
1729 self.parent = parent
1730 self.parentUuid = parent.uuid
1731 parent.children.append(self)
1732 try:
1733 self.setConfig(self.DB_VHD_PARENT, self.parentUuid)
1734 Util.log("Updated the vhd-parent field for child %s with %s" % \
1735 (self.uuid, self.parentUuid))
1736 except:
1737 Util.log("Failed to update %s with vhd-parent field %s" % \
1738 (self.uuid, self.parentUuid))
1740 @override
1741 def _doCoalesce(self) -> None:
1742 try:
1743 self._activateChain()
1744 self.parent.validate()
1745 self._inflateParentForCoalesce()
1746 VDI._doCoalesce(self)
1747 finally:
1748 self.parent.deflate()
1750 def _activateChain(self):
1751 vdi = self
1752 while vdi:
1753 try:
1754 p = self.sr._linstor.get_device_path(vdi.uuid)
1755 except Exception as e:
1756 # Use SMException to skip coalesce.
1757 # Otherwise the GC is stopped...
1758 raise util.SMException(str(e))
1759 vdi = vdi.parent
1761 @override
1762 def _setHidden(self, hidden=True) -> None:
1763 HIDDEN_TAG = 'hidden'
1765 if self.raw:
1766 self._hidden = None
1767 self.sr._linstor.update_volume_metadata(self.uuid, {
1768 HIDDEN_TAG: hidden
1769 })
1770 self._hidden = hidden
1771 else:
1772 VDI._setHidden(self, hidden)
1774 @override
1775 def _increaseSizeVirt(self, size, atomic=True):
1776 if self.raw:
1777 offset = self.drbd_size
1778 if self.sizeVirt < size:
1779 oldSize = self.drbd_size
1780 self.drbd_size = LinstorVolumeManager.round_up_volume_size(size)
1781 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.drbd_size))
1782 self.sr._linstor.resize_volume(self.uuid, self.drbd_size)
1783 offset = oldSize
1784 unfinishedZero = False
1785 jval = self.sr.journaler.get(LinstorJournaler.ZERO, self.uuid)
1786 if jval:
1787 unfinishedZero = True
1788 offset = int(jval)
1789 length = self.drbd_size - offset
1790 if not length:
1791 return
1793 if unfinishedZero:
1794 Util.log(" ==> Redoing unfinished zeroing out")
1795 else:
1796 self.sr.journaler.create(LinstorJournaler.ZERO, self.uuid, str(offset))
1797 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length))
1798 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT)
1799 func = lambda: util.zeroOut(self.path, offset, length)
1800 Util.runAbortable(func, True, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0)
1801 self.sr.journaler.remove(LinstorJournaler.ZERO, self.uuid)
1802 return
1804 if self.sizeVirt >= size:
1805 return
1806 Util.log(" Expanding VHD virt size for VDI %s: %s -> %s" % \
1807 (self, Util.num2str(self.sizeVirt), Util.num2str(size)))
1809 msize = self.sr._vhdutil.get_max_resize_size(self.uuid) * 1024 * 1024
1810 if (size <= msize):
1811 self.sr._vhdutil.set_size_virt_fast(self.path, size)
1812 else:
1813 if atomic:
1814 vdiList = self._getAllSubtree()
1815 self.sr.lock()
1816 try:
1817 self.sr.pauseVDIs(vdiList)
1818 try:
1819 self._setSizeVirt(size)
1820 finally:
1821 self.sr.unpauseVDIs(vdiList)
1822 finally:
1823 self.sr.unlock()
1824 else:
1825 self._setSizeVirt(size)
1827 self.sizeVirt = self.sr._vhdutil.get_size_virt(self.uuid)
1829 @override
1830 def _setSizeVirt(self, size) -> None:
1831 jfile = self.uuid + '-jvhd'
1832 self.sr._linstor.create_volume(
1833 jfile, vhdutil.MAX_VHD_JOURNAL_SIZE, persistent=False, volume_name=jfile
1834 )
1835 try:
1836 self.inflate(LinstorVhdUtil.compute_volume_size(size, self.vdi_type))
1837 self.sr._vhdutil.set_size_virt(size, jfile)
1838 finally:
1839 try:
1840 self.sr._linstor.destroy_volume(jfile)
1841 except Exception:
1842 # We can ignore it, in any case this volume is not persistent.
1843 pass
1845 @override
1846 def _queryVHDBlocks(self) -> bytes:
1847 return self.sr._vhdutil.get_block_bitmap(self.uuid)
1849 def _inflateParentForCoalesce(self):
1850 if self.parent.raw:
1851 return
1852 inc = self._calcExtraSpaceForCoalescing()
1853 if inc > 0:
1854 self.parent.inflate(self.parent.getDrbdSize() + inc)
1856 @override
1857 def _calcExtraSpaceForCoalescing(self) -> int:
1858 if self.parent.raw:
1859 return 0
1860 size_coalesced = LinstorVhdUtil.compute_volume_size(
1861 self._getCoalescedSizeData(), self.vdi_type
1862 )
1863 Util.log("Coalesced size = %s" % Util.num2str(size_coalesced))
1864 return size_coalesced - self.parent.getDrbdSize()
1866 @override
1867 def _calcExtraSpaceForLeafCoalescing(self) -> int:
1868 assert self.getDrbdSize() > 0
1869 assert self.getSizeVHD() > 0
1870 deflate_diff = self.getDrbdSize() - LinstorVolumeManager.round_up_volume_size(self.getSizeVHD())
1871 assert deflate_diff >= 0
1872 return self._calcExtraSpaceForCoalescing() - deflate_diff
1874 @override
1875 def _calcExtraSpaceForSnapshotCoalescing(self) -> int:
1876 assert self.getSizeVHD() > 0
1877 return self._calcExtraSpaceForCoalescing() + \
1878 LinstorVolumeManager.round_up_volume_size(self.getSizeVHD())
1880################################################################################
1881#
1882# SR
1883#
1884class SR(object):
1885 class LogFilter:
1886 def __init__(self, sr):
1887 self.sr = sr
1888 self.stateLogged = False
1889 self.prevState = {}
1890 self.currState = {}
1892 def logState(self):
1893 changes = ""
1894 self.currState.clear()
1895 for vdi in self.sr.vdiTrees:
1896 self.currState[vdi.uuid] = self._getTreeStr(vdi)
1897 if not self.prevState.get(vdi.uuid) or \
1898 self.prevState[vdi.uuid] != self.currState[vdi.uuid]:
1899 changes += self.currState[vdi.uuid]
1901 for uuid in self.prevState:
1902 if not self.currState.get(uuid):
1903 changes += "Tree %s gone\n" % uuid
1905 result = "SR %s (%d VDIs in %d VHD trees): " % \
1906 (self.sr, len(self.sr.vdis), len(self.sr.vdiTrees))
1908 if len(changes) > 0:
1909 if self.stateLogged:
1910 result += "showing only VHD trees that changed:"
1911 result += "\n%s" % changes
1912 else:
1913 result += "no changes"
1915 for line in result.split("\n"):
1916 Util.log("%s" % line)
1917 self.prevState.clear()
1918 for key, val in self.currState.items():
1919 self.prevState[key] = val
1920 self.stateLogged = True
1922 def logNewVDI(self, uuid):
1923 if self.stateLogged:
1924 Util.log("Found new VDI when scanning: %s" % uuid)
1926 def _getTreeStr(self, vdi, indent=8):
1927 treeStr = "%s%s\n" % (" " * indent, vdi)
1928 for child in vdi.children:
1929 treeStr += self._getTreeStr(child, indent + VDI.STR_TREE_INDENT)
1930 return treeStr
1932 TYPE_FILE = "file"
1933 TYPE_LVHD = "lvhd"
1934 TYPE_LINSTOR = "linstor"
1935 TYPES = [TYPE_LVHD, TYPE_FILE, TYPE_LINSTOR]
1937 LOCK_RETRY_INTERVAL = 3
1938 LOCK_RETRY_ATTEMPTS = 20
1939 LOCK_RETRY_ATTEMPTS_LOCK = 100
1941 SCAN_RETRY_ATTEMPTS = 3
1943 JRN_CLONE = "clone" # journal entry type for the clone operation (from SM)
1944 TMP_RENAME_PREFIX = "OLD_"
1946 KEY_OFFLINE_COALESCE_NEEDED = "leaf_coalesce_need_offline"
1947 KEY_OFFLINE_COALESCE_OVERRIDE = "leaf_coalesce_offline_override"
1949 @staticmethod
1950 def getInstance(uuid, xapiSession, createLock=True, force=False):
1951 xapi = XAPI(xapiSession, uuid)
1952 type = normalizeType(xapi.srRecord["type"])
1953 if type == SR.TYPE_FILE:
1954 return FileSR(uuid, xapi, createLock, force)
1955 elif type == SR.TYPE_LVHD:
1956 return LVHDSR(uuid, xapi, createLock, force)
1957 elif type == SR.TYPE_LINSTOR:
1958 return LinstorSR(uuid, xapi, createLock, force)
1959 raise util.SMException("SR type %s not recognized" % type)
1961 def __init__(self, uuid, xapi, createLock, force):
1962 self.logFilter = self.LogFilter(self)
1963 self.uuid = uuid
1964 self.path = ""
1965 self.name = ""
1966 self.vdis = {}
1967 self.vdiTrees = []
1968 self.journaler = None
1969 self.xapi = xapi
1970 self._locked = 0
1971 self._srLock = None
1972 if createLock: 1972 ↛ 1973line 1972 didn't jump to line 1973, because the condition on line 1972 was never true
1973 self._srLock = lock.Lock(vhdutil.LOCK_TYPE_SR, self.uuid)
1974 else:
1975 Util.log("Requested no SR locking")
1976 self.name = self.xapi.srRecord["name_label"]
1977 self._failedCoalesceTargets = []
1979 if not self.xapi.isPluggedHere():
1980 if force: 1980 ↛ 1981line 1980 didn't jump to line 1981, because the condition on line 1980 was never true
1981 Util.log("SR %s not attached on this host, ignoring" % uuid)
1982 else:
1983 if not self.wait_for_plug():
1984 raise util.SMException("SR %s not attached on this host" % uuid)
1986 if force: 1986 ↛ 1987line 1986 didn't jump to line 1987, because the condition on line 1986 was never true
1987 Util.log("Not checking if we are Master (SR %s)" % uuid)
1988 elif not self.xapi.isMaster(): 1988 ↛ 1989line 1988 didn't jump to line 1989, because the condition on line 1988 was never true
1989 raise util.SMException("This host is NOT master, will not run")
1991 self.no_space_candidates = {}
1993 def msg_cleared(self, xapi_session, msg_ref):
1994 try:
1995 msg = xapi_session.xenapi.message.get_record(msg_ref)
1996 except XenAPI.Failure:
1997 return True
1999 return msg is None
2001 def check_no_space_candidates(self):
2002 xapi_session = self.xapi.getSession()
2004 msg_id = self.xapi.srRecord["sm_config"].get(VDI.DB_GC_NO_SPACE)
2005 if self.no_space_candidates:
2006 if msg_id is None or self.msg_cleared(xapi_session, msg_id):
2007 util.SMlog("Could not coalesce due to a lack of space "
2008 f"in SR {self.uuid}")
2009 msg_body = ("Unable to perform data coalesce due to a lack "
2010 f"of space in SR {self.uuid}")
2011 msg_id = xapi_session.xenapi.message.create(
2012 'SM_GC_NO_SPACE',
2013 3,
2014 "SR",
2015 self.uuid,
2016 msg_body)
2017 xapi_session.xenapi.SR.remove_from_sm_config(
2018 self.xapi.srRef, VDI.DB_GC_NO_SPACE)
2019 xapi_session.xenapi.SR.add_to_sm_config(
2020 self.xapi.srRef, VDI.DB_GC_NO_SPACE, msg_id)
2022 for candidate in self.no_space_candidates.values():
2023 candidate.setConfig(VDI.DB_GC_NO_SPACE, msg_id)
2024 elif msg_id is not None:
2025 # Everything was coalescable, remove the message
2026 xapi_session.xenapi.SR.remove_from_sm_config(self.xapi.srRef, VDI.DB_GC_NO_SPACE)
2027 xapi_session.xenapi.message.destroy(msg_id)
2029 def clear_no_space_msg(self, vdi):
2030 msg_id = None
2031 try:
2032 msg_id = vdi.getConfig(VDI.DB_GC_NO_SPACE)
2033 except XenAPI.Failure:
2034 pass
2036 self.no_space_candidates.pop(vdi.uuid, None)
2037 if msg_id is not None: 2037 ↛ exitline 2037 didn't return from function 'clear_no_space_msg', because the condition on line 2037 was never false
2038 vdi.delConfig(VDI.DB_GC_NO_SPACE)
2041 def wait_for_plug(self):
2042 for _ in range(1, 10):
2043 time.sleep(2)
2044 if self.xapi.isPluggedHere():
2045 return True
2046 return False
2048 def gcEnabled(self, refresh=True):
2049 if refresh:
2050 self.xapi.srRecord = \
2051 self.xapi.session.xenapi.SR.get_record(self.xapi._srRef)
2052 if self.xapi.srRecord["other_config"].get(VDI.DB_GC) == "false":
2053 Util.log("GC is disabled for this SR, abort")
2054 return False
2055 return True
2057 def scan(self, force=False) -> None:
2058 """Scan the SR and load VDI info for each VDI. If called repeatedly,
2059 update VDI objects if they already exist"""
2060 pass
2062 def scanLocked(self, force=False):
2063 self.lock()
2064 try:
2065 self.scan(force)
2066 finally:
2067 self.unlock()
2069 def getVDI(self, uuid):
2070 return self.vdis.get(uuid)
2072 def hasWork(self):
2073 if len(self.findGarbage()) > 0:
2074 return True
2075 if self.findCoalesceable():
2076 return True
2077 if self.findLeafCoalesceable():
2078 return True
2079 if self.needUpdateBlockInfo():
2080 return True
2081 return False
2083 def findCoalesceable(self):
2084 """Find a coalesceable VDI. Return a vdi that should be coalesced
2085 (choosing one among all coalesceable candidates according to some
2086 criteria) or None if there is no VDI that could be coalesced"""
2088 candidates = []
2090 srSwitch = self.xapi.srRecord["other_config"].get(VDI.DB_COALESCE)
2091 if srSwitch == "false":
2092 Util.log("Coalesce disabled for this SR")
2093 return candidates
2095 # finish any VDI for which a relink journal entry exists first
2096 journals = self.journaler.getAll(VDI.JRN_RELINK)
2097 for uuid in journals:
2098 vdi = self.getVDI(uuid)
2099 if vdi and vdi not in self._failedCoalesceTargets:
2100 return vdi
2102 for vdi in self.vdis.values():
2103 if vdi.isCoalesceable() and vdi not in self._failedCoalesceTargets:
2104 candidates.append(vdi)
2105 Util.log("%s is coalescable" % vdi.uuid)
2107 self.xapi.update_task_progress("coalescable", len(candidates))
2109 # pick one in the tallest tree
2110 treeHeight = dict()
2111 for c in candidates:
2112 height = c.getTreeRoot().getTreeHeight()
2113 if treeHeight.get(height):
2114 treeHeight[height].append(c)
2115 else:
2116 treeHeight[height] = [c]
2118 freeSpace = self.getFreeSpace()
2119 heights = list(treeHeight.keys())
2120 heights.sort(reverse=True)
2121 for h in heights:
2122 for c in treeHeight[h]:
2123 spaceNeeded = c._calcExtraSpaceForCoalescing()
2124 if spaceNeeded <= freeSpace:
2125 Util.log("Coalesce candidate: %s (tree height %d)" % (c, h))
2126 self.clear_no_space_msg(c)
2127 return c
2128 else:
2129 self.no_space_candidates[c.uuid] = c
2130 Util.log("No space to coalesce %s (free space: %d)" % \
2131 (c, freeSpace))
2132 return None
2134 def getSwitch(self, key):
2135 return self.xapi.srRecord["other_config"].get(key)
2137 def forbiddenBySwitch(self, switch, condition, fail_msg):
2138 srSwitch = self.getSwitch(switch)
2139 ret = False
2140 if srSwitch:
2141 ret = srSwitch == condition
2143 if ret:
2144 Util.log(fail_msg)
2146 return ret
2148 def leafCoalesceForbidden(self):
2149 return (self.forbiddenBySwitch(VDI.DB_COALESCE,
2150 "false",
2151 "Coalesce disabled for this SR") or
2152 self.forbiddenBySwitch(VDI.DB_LEAFCLSC,
2153 VDI.LEAFCLSC_DISABLED,
2154 "Leaf-coalesce disabled for this SR"))
2156 def findLeafCoalesceable(self):
2157 """Find leaf-coalesceable VDIs in each VHD tree"""
2159 candidates = []
2160 if self.leafCoalesceForbidden():
2161 return candidates
2163 self.gatherLeafCoalesceable(candidates)
2165 self.xapi.update_task_progress("coalescable", len(candidates))
2167 freeSpace = self.getFreeSpace()
2168 for candidate in candidates:
2169 # check the space constraints to see if leaf-coalesce is actually
2170 # feasible for this candidate
2171 spaceNeeded = candidate._calcExtraSpaceForSnapshotCoalescing()
2172 spaceNeededLive = spaceNeeded
2173 if spaceNeeded > freeSpace:
2174 spaceNeededLive = candidate._calcExtraSpaceForLeafCoalescing()
2175 if candidate.canLiveCoalesce(self.getStorageSpeed()):
2176 spaceNeeded = spaceNeededLive
2178 if spaceNeeded <= freeSpace:
2179 Util.log("Leaf-coalesce candidate: %s" % candidate)
2180 self.clear_no_space_msg(candidate)
2181 return candidate
2182 else:
2183 Util.log("No space to leaf-coalesce %s (free space: %d)" % \
2184 (candidate, freeSpace))
2185 if spaceNeededLive <= freeSpace:
2186 Util.log("...but enough space if skip snap-coalesce")
2187 candidate.setConfig(VDI.DB_LEAFCLSC,
2188 VDI.LEAFCLSC_OFFLINE)
2189 self.no_space_candidates[candidate.uuid] = candidate
2191 return None
2193 def gatherLeafCoalesceable(self, candidates):
2194 for vdi in self.vdis.values():
2195 if not vdi.isLeafCoalesceable():
2196 continue
2197 if vdi in self._failedCoalesceTargets:
2198 continue
2199 if vdi.getConfig(vdi.DB_ONBOOT) == vdi.ONBOOT_RESET:
2200 Util.log("Skipping reset-on-boot %s" % vdi)
2201 continue
2202 if vdi.getConfig(vdi.DB_ALLOW_CACHING):
2203 Util.log("Skipping allow_caching=true %s" % vdi)
2204 continue
2205 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_DISABLED:
2206 Util.log("Leaf-coalesce disabled for %s" % vdi)
2207 continue
2208 if not (AUTO_ONLINE_LEAF_COALESCE_ENABLED or
2209 vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE):
2210 continue
2211 candidates.append(vdi)
2213 def coalesce(self, vdi, dryRun=False):
2214 """Coalesce vdi onto parent"""
2215 Util.log("Coalescing %s -> %s" % (vdi, vdi.parent))
2216 if dryRun: 2216 ↛ 2217line 2216 didn't jump to line 2217, because the condition on line 2216 was never true
2217 return
2219 try:
2220 self._coalesce(vdi)
2221 except util.SMException as e:
2222 if isinstance(e, AbortException): 2222 ↛ 2223line 2222 didn't jump to line 2223, because the condition on line 2222 was never true
2223 self.cleanup()
2224 raise
2225 else:
2226 self._failedCoalesceTargets.append(vdi)
2227 Util.logException("coalesce")
2228 Util.log("Coalesce failed, skipping")
2229 self.cleanup()
2231 def coalesceLeaf(self, vdi, dryRun=False):
2232 """Leaf-coalesce vdi onto parent"""
2233 Util.log("Leaf-coalescing %s -> %s" % (vdi, vdi.parent))
2234 if dryRun:
2235 return
2237 try:
2238 uuid = vdi.uuid
2239 try:
2240 # "vdi" object will no longer be valid after this call
2241 self._coalesceLeaf(vdi)
2242 finally:
2243 vdi = self.getVDI(uuid)
2244 if vdi:
2245 vdi.delConfig(vdi.DB_LEAFCLSC)
2246 except AbortException:
2247 self.cleanup()
2248 raise
2249 except (util.SMException, XenAPI.Failure) as e:
2250 self._failedCoalesceTargets.append(vdi)
2251 Util.logException("leaf-coalesce")
2252 Util.log("Leaf-coalesce failed on %s, skipping" % vdi)
2253 self.cleanup()
2255 def garbageCollect(self, dryRun=False):
2256 vdiList = self.findGarbage()
2257 Util.log("Found %d VDIs for deletion:" % len(vdiList))
2258 for vdi in vdiList:
2259 Util.log(" %s" % vdi)
2260 if not dryRun:
2261 self.deleteVDIs(vdiList)
2262 self.cleanupJournals(dryRun)
2264 def findGarbage(self):
2265 vdiList = []
2266 for vdi in self.vdiTrees:
2267 vdiList.extend(vdi.getAllPrunable())
2268 return vdiList
2270 def deleteVDIs(self, vdiList) -> None:
2271 for vdi in vdiList:
2272 if IPCFlag(self.uuid).test(FLAG_TYPE_ABORT):
2273 raise AbortException("Aborting due to signal")
2274 Util.log("Deleting unlinked VDI %s" % vdi)
2275 self.deleteVDI(vdi)
2277 def deleteVDI(self, vdi) -> None:
2278 assert(len(vdi.children) == 0)
2279 del self.vdis[vdi.uuid]
2280 if vdi.parent: 2280 ↛ 2282line 2280 didn't jump to line 2282, because the condition on line 2280 was never false
2281 vdi.parent.children.remove(vdi)
2282 if vdi in self.vdiTrees: 2282 ↛ 2283line 2282 didn't jump to line 2283, because the condition on line 2282 was never true
2283 self.vdiTrees.remove(vdi)
2284 vdi.delete()
2286 def forgetVDI(self, vdiUuid) -> None:
2287 self.xapi.forgetVDI(self.uuid, vdiUuid)
2289 def pauseVDIs(self, vdiList) -> None:
2290 paused = []
2291 failed = False
2292 for vdi in vdiList:
2293 try:
2294 vdi.pause()
2295 paused.append(vdi)
2296 except:
2297 Util.logException("pauseVDIs")
2298 failed = True
2299 break
2301 if failed:
2302 self.unpauseVDIs(paused)
2303 raise util.SMException("Failed to pause VDIs")
2305 def unpauseVDIs(self, vdiList):
2306 failed = False
2307 for vdi in vdiList:
2308 try:
2309 vdi.unpause()
2310 except:
2311 Util.log("ERROR: Failed to unpause VDI %s" % vdi)
2312 failed = True
2313 if failed:
2314 raise util.SMException("Failed to unpause VDIs")
2316 def getFreeSpace(self) -> int:
2317 return 0
2319 def cleanup(self):
2320 Util.log("In cleanup")
2321 return
2323 @override
2324 def __str__(self) -> str:
2325 if self.name:
2326 ret = "%s ('%s')" % (self.uuid[0:4], self.name)
2327 else:
2328 ret = "%s" % self.uuid
2329 return ret
2331 def lock(self):
2332 """Acquire the SR lock. Nested acquire()'s are ok. Check for Abort
2333 signal to avoid deadlocking (trying to acquire the SR lock while the
2334 lock is held by a process that is trying to abort us)"""
2335 if not self._srLock:
2336 return
2338 if self._locked == 0:
2339 abortFlag = IPCFlag(self.uuid)
2340 for i in range(SR.LOCK_RETRY_ATTEMPTS_LOCK):
2341 if self._srLock.acquireNoblock():
2342 self._locked += 1
2343 return
2344 if abortFlag.test(FLAG_TYPE_ABORT):
2345 raise AbortException("Abort requested")
2346 time.sleep(SR.LOCK_RETRY_INTERVAL)
2347 raise util.SMException("Unable to acquire the SR lock")
2349 self._locked += 1
2351 def unlock(self):
2352 if not self._srLock: 2352 ↛ 2354line 2352 didn't jump to line 2354, because the condition on line 2352 was never false
2353 return
2354 assert(self._locked > 0)
2355 self._locked -= 1
2356 if self._locked == 0:
2357 self._srLock.release()
2359 def needUpdateBlockInfo(self) -> bool:
2360 for vdi in self.vdis.values():
2361 if vdi.scanError or len(vdi.children) == 0:
2362 continue
2363 if not vdi.getConfig(vdi.DB_VHD_BLOCKS):
2364 return True
2365 return False
2367 def updateBlockInfo(self) -> None:
2368 for vdi in self.vdis.values():
2369 if vdi.scanError or len(vdi.children) == 0:
2370 continue
2371 if not vdi.getConfig(vdi.DB_VHD_BLOCKS):
2372 vdi.updateBlockInfo()
2374 def cleanupCoalesceJournals(self):
2375 """Remove stale coalesce VDI indicators"""
2376 entries = self.journaler.getAll(VDI.JRN_COALESCE)
2377 for uuid, jval in entries.items():
2378 self.journaler.remove(VDI.JRN_COALESCE, uuid)
2380 def cleanupJournals(self, dryRun=False):
2381 """delete journal entries for non-existing VDIs"""
2382 for t in [LVHDVDI.JRN_ZERO, VDI.JRN_RELINK, SR.JRN_CLONE]:
2383 entries = self.journaler.getAll(t)
2384 for uuid, jval in entries.items():
2385 if self.getVDI(uuid):
2386 continue
2387 if t == SR.JRN_CLONE:
2388 baseUuid, clonUuid = jval.split("_")
2389 if self.getVDI(baseUuid):
2390 continue
2391 Util.log(" Deleting stale '%s' journal entry for %s "
2392 "(%s)" % (t, uuid, jval))
2393 if not dryRun:
2394 self.journaler.remove(t, uuid)
2396 def cleanupCache(self, maxAge=-1) -> int:
2397 return 0
2399 def _coalesce(self, vdi):
2400 if self.journaler.get(vdi.JRN_RELINK, vdi.uuid): 2400 ↛ 2403line 2400 didn't jump to line 2403, because the condition on line 2400 was never true
2401 # this means we had done the actual coalescing already and just
2402 # need to finish relinking and/or refreshing the children
2403 Util.log("==> Coalesce apparently already done: skipping")
2405 # The parent volume must be active for the parent change to occur.
2406 # The parent volume may become inactive if the host is rebooted.
2407 vdi._ensureParentActiveForRelink()
2408 else:
2409 # JRN_COALESCE is used to check which VDI is being coalesced in
2410 # order to decide whether to abort the coalesce. We remove the
2411 # journal as soon as the VHD coalesce step is done, because we
2412 # don't expect the rest of the process to take long
2413 self.journaler.create(vdi.JRN_COALESCE, vdi.uuid, "1")
2414 vdi._doCoalesce()
2415 self.journaler.remove(vdi.JRN_COALESCE, vdi.uuid)
2417 util.fistpoint.activate("LVHDRT_before_create_relink_journal", self.uuid)
2419 # we now need to relink the children: lock the SR to prevent ops
2420 # like SM.clone from manipulating the VDIs we'll be relinking and
2421 # rescan the SR first in case the children changed since the last
2422 # scan
2423 self.journaler.create(vdi.JRN_RELINK, vdi.uuid, "1")
2425 self.lock()
2426 try:
2427 vdi.parent._tagChildrenForRelink()
2428 self.scan()
2429 vdi._relinkSkip()
2430 finally:
2431 self.unlock()
2432 # Reload the children to leave things consistent
2433 vdi.parent._reloadChildren(vdi)
2435 self.journaler.remove(vdi.JRN_RELINK, vdi.uuid)
2436 self.deleteVDI(vdi)
2438 class CoalesceTracker:
2439 GRACE_ITERATIONS = 2
2440 MAX_ITERATIONS_NO_PROGRESS = 3
2441 MAX_ITERATIONS = 20
2442 MAX_INCREASE_FROM_MINIMUM = 1.2
2443 HISTORY_STRING = "Iteration: {its} -- Initial size {initSize}" \
2444 " --> Final size {finSize}"
2446 def __init__(self, sr):
2447 self.itsNoProgress = 0
2448 self.its = 0
2449 self.minSize = float("inf")
2450 self._history = []
2451 self.reason = ""
2452 self.startSize = None
2453 self.finishSize = None
2454 self.sr = sr
2455 self.grace_remaining = self.GRACE_ITERATIONS
2457 @property
2458 def history(self):
2459 return [x['msg'] for x in self._history]
2461 def moving_average(self):
2462 """
2463 Calculate a three point moving average
2464 """
2465 assert len(self._history) >= 3
2467 mv_average = sum([x['finalsize'] for x in self._history]) / len(self._history)
2468 util.SMlog(f'Calculated moving average as {mv_average}')
2469 return mv_average
2471 def abortCoalesce(self, prevSize, curSize):
2472 self.its += 1
2473 self._history.append(
2474 {
2475 'finalsize': curSize,
2476 'msg': self.HISTORY_STRING.format(its=self.its,
2477 initSize=prevSize,
2478 finSize=curSize)
2479 }
2480 )
2482 self.finishSize = curSize
2484 if self.startSize is None:
2485 self.startSize = prevSize
2487 if curSize < self.minSize:
2488 self.minSize = curSize
2490 if prevSize < self.minSize:
2491 self.minSize = prevSize
2493 if self.its < 4:
2494 # Perform at least three iterations
2495 return False
2497 if prevSize >= curSize or curSize < self.moving_average():
2498 # We made progress
2499 return False
2500 else:
2501 self.itsNoProgress += 1
2502 Util.log("No progress, attempt:"
2503 " {attempt}".format(attempt=self.itsNoProgress))
2504 util.fistpoint.activate("cleanup_tracker_no_progress", self.sr.uuid)
2506 if self.its > self.MAX_ITERATIONS:
2507 max = self.MAX_ITERATIONS
2508 self.reason = \
2509 "Max iterations ({max}) exceeded".format(max=max)
2510 return True
2512 if self.itsNoProgress > self.MAX_ITERATIONS_NO_PROGRESS:
2513 max = self.MAX_ITERATIONS_NO_PROGRESS
2514 self.reason = \
2515 "No progress made for {max} iterations".format(max=max)
2516 return True
2518 maxSizeFromMin = self.MAX_INCREASE_FROM_MINIMUM * self.minSize
2519 if curSize > maxSizeFromMin:
2520 self.grace_remaining -= 1
2521 if self.grace_remaining == 0:
2522 self.reason = "Unexpected bump in size," \
2523 " compared to minimum achieved"
2525 return True
2527 return False
2529 def printSizes(self):
2530 Util.log("Starting size was {size}"
2531 .format(size=self.startSize))
2532 Util.log("Final size was {size}"
2533 .format(size=self.finishSize))
2534 Util.log("Minimum size achieved was {size}"
2535 .format(size=self.minSize))
2537 def printReasoning(self):
2538 Util.log("Aborted coalesce")
2539 for hist in self.history:
2540 Util.log(hist)
2541 Util.log(self.reason)
2542 self.printSizes()
2544 def printSummary(self):
2545 if self.its == 0:
2546 return
2548 if self.reason: 2548 ↛ 2549line 2548 didn't jump to line 2549, because the condition on line 2548 was never true
2549 Util.log("Aborted coalesce")
2550 Util.log(self.reason)
2551 else:
2552 Util.log("Coalesce summary")
2554 Util.log(f"Performed {self.its} iterations")
2555 self.printSizes()
2558 def _coalesceLeaf(self, vdi):
2559 """Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot
2560 complete due to external changes, namely vdi_delete and vdi_snapshot
2561 that alter leaf-coalescibility of vdi"""
2562 tracker = self.CoalesceTracker(self)
2563 while not vdi.canLiveCoalesce(self.getStorageSpeed()):
2564 prevSizeVHD = vdi.getSizeVHD()
2565 if not self._snapshotCoalesce(vdi): 2565 ↛ 2566line 2565 didn't jump to line 2566, because the condition on line 2565 was never true
2566 return False
2567 if tracker.abortCoalesce(prevSizeVHD, vdi.getSizeVHD()):
2568 tracker.printReasoning()
2569 raise util.SMException("VDI {uuid} could not be coalesced"
2570 .format(uuid=vdi.uuid))
2571 tracker.printSummary()
2572 return self._liveLeafCoalesce(vdi)
2574 def calcStorageSpeed(self, startTime, endTime, vhdSize):
2575 speed = None
2576 total_time = endTime - startTime
2577 if total_time > 0:
2578 speed = float(vhdSize) / float(total_time)
2579 return speed
2581 def writeSpeedToFile(self, speed):
2582 content = []
2583 speedFile = None
2584 path = SPEED_LOG_ROOT.format(uuid=self.uuid)
2585 self.lock()
2586 try:
2587 Util.log("Writing to file: {myfile}".format(myfile=path))
2588 lines = ""
2589 if not os.path.isfile(path):
2590 lines = str(speed) + "\n"
2591 else:
2592 speedFile = open(path, "r+")
2593 content = speedFile.readlines()
2594 content.append(str(speed) + "\n")
2595 if len(content) > N_RUNNING_AVERAGE:
2596 del content[0]
2597 lines = "".join(content)
2599 util.atomicFileWrite(path, VAR_RUN, lines)
2600 finally:
2601 if speedFile is not None:
2602 speedFile.close()
2603 Util.log("Closing file: {myfile}".format(myfile=path))
2604 self.unlock()
2606 def recordStorageSpeed(self, startTime, endTime, vhdSize):
2607 speed = self.calcStorageSpeed(startTime, endTime, vhdSize)
2608 if speed is None:
2609 return
2611 self.writeSpeedToFile(speed)
2613 def getStorageSpeed(self):
2614 speedFile = None
2615 path = SPEED_LOG_ROOT.format(uuid=self.uuid)
2616 self.lock()
2617 try:
2618 speed = None
2619 if os.path.isfile(path):
2620 speedFile = open(path)
2621 content = speedFile.readlines()
2622 try:
2623 content = [float(i) for i in content]
2624 except ValueError:
2625 Util.log("Something bad in the speed log:{log}".
2626 format(log=speedFile.readlines()))
2627 return speed
2629 if len(content):
2630 speed = sum(content) / float(len(content))
2631 if speed <= 0: 2631 ↛ 2633line 2631 didn't jump to line 2633, because the condition on line 2631 was never true
2632 # Defensive, should be impossible.
2633 Util.log("Bad speed: {speed} calculated for SR: {uuid}".
2634 format(speed=speed, uuid=self.uuid))
2635 speed = None
2636 else:
2637 Util.log("Speed file empty for SR: {uuid}".
2638 format(uuid=self.uuid))
2639 else:
2640 Util.log("Speed log missing for SR: {uuid}".
2641 format(uuid=self.uuid))
2642 return speed
2643 finally:
2644 if not (speedFile is None):
2645 speedFile.close()
2646 self.unlock()
2648 def _snapshotCoalesce(self, vdi):
2649 # Note that because we are not holding any locks here, concurrent SM
2650 # operations may change this tree under our feet. In particular, vdi
2651 # can be deleted, or it can be snapshotted.
2652 assert(AUTO_ONLINE_LEAF_COALESCE_ENABLED)
2653 Util.log("Single-snapshotting %s" % vdi)
2654 util.fistpoint.activate("LVHDRT_coaleaf_delay_1", self.uuid)
2655 try:
2656 ret = self.xapi.singleSnapshotVDI(vdi)
2657 Util.log("Single-snapshot returned: %s" % ret)
2658 except XenAPI.Failure as e:
2659 if util.isInvalidVDI(e):
2660 Util.log("The VDI appears to have been concurrently deleted")
2661 return False
2662 raise
2663 self.scanLocked()
2664 tempSnap = vdi.parent
2665 if not tempSnap.isCoalesceable():
2666 Util.log("The VDI appears to have been concurrently snapshotted")
2667 return False
2668 Util.log("Coalescing parent %s" % tempSnap)
2669 util.fistpoint.activate("LVHDRT_coaleaf_delay_2", self.uuid)
2670 vhdSize = vdi.getSizeVHD()
2671 self._coalesce(tempSnap)
2672 if not vdi.isLeafCoalesceable():
2673 Util.log("The VDI tree appears to have been altered since")
2674 return False
2675 return True
2677 def _liveLeafCoalesce(self, vdi) -> bool:
2678 util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid)
2679 self.lock()
2680 try:
2681 self.scan()
2682 if not self.getVDI(vdi.uuid):
2683 Util.log("The VDI appears to have been deleted meanwhile")
2684 return False
2685 if not vdi.isLeafCoalesceable():
2686 Util.log("The VDI is no longer leaf-coalesceable")
2687 return False
2689 uuid = vdi.uuid
2690 vdi.pause(failfast=True)
2691 try:
2692 try:
2693 # "vdi" object will no longer be valid after this call
2694 self._doCoalesceLeaf(vdi)
2695 except:
2696 Util.logException("_doCoalesceLeaf")
2697 self._handleInterruptedCoalesceLeaf()
2698 raise
2699 finally:
2700 vdi = self.getVDI(uuid)
2701 if vdi:
2702 vdi.ensureUnpaused()
2703 vdiOld = self.getVDI(self.TMP_RENAME_PREFIX + uuid)
2704 if vdiOld:
2705 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid)
2706 self.deleteVDI(vdiOld)
2707 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid)
2708 finally:
2709 self.cleanup()
2710 self.unlock()
2711 self.logFilter.logState()
2712 return True
2714 def _doCoalesceLeaf(self, vdi):
2715 """Actual coalescing of a leaf VDI onto parent. Must be called in an
2716 offline/atomic context"""
2717 self.journaler.create(VDI.JRN_LEAF, vdi.uuid, vdi.parent.uuid)
2718 self._prepareCoalesceLeaf(vdi)
2719 vdi.parent._setHidden(False)
2720 vdi.parent._increaseSizeVirt(vdi.sizeVirt, False)
2721 vdi.validate(True)
2722 vdi.parent.validate(True)
2723 util.fistpoint.activate("LVHDRT_coaleaf_before_coalesce", self.uuid)
2724 timeout = vdi.LIVE_LEAF_COALESCE_TIMEOUT
2725 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE: 2725 ↛ 2726line 2725 didn't jump to line 2726, because the condition on line 2725 was never true
2726 Util.log("Leaf-coalesce forced, will not use timeout")
2727 timeout = 0
2728 vdi._coalesceVHD(timeout)
2729 util.fistpoint.activate("LVHDRT_coaleaf_after_coalesce", self.uuid)
2730 vdi.parent.validate(True)
2731 #vdi._verifyContents(timeout / 2)
2733 # rename
2734 vdiUuid = vdi.uuid
2735 oldName = vdi.fileName
2736 origParentUuid = vdi.parent.uuid
2737 vdi.rename(self.TMP_RENAME_PREFIX + vdiUuid)
2738 util.fistpoint.activate("LVHDRT_coaleaf_one_renamed", self.uuid)
2739 vdi.parent.rename(vdiUuid)
2740 util.fistpoint.activate("LVHDRT_coaleaf_both_renamed", self.uuid)
2741 self._updateSlavesOnRename(vdi.parent, oldName, origParentUuid)
2743 # Note that "vdi.parent" is now the single remaining leaf and "vdi" is
2744 # garbage
2746 # update the VDI record
2747 if vdi.parent.raw: 2747 ↛ 2748line 2747 didn't jump to line 2748, because the condition on line 2747 was never true
2748 vdi.parent.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_RAW)
2749 vdi.parent.delConfig(VDI.DB_VHD_BLOCKS)
2750 util.fistpoint.activate("LVHDRT_coaleaf_after_vdirec", self.uuid)
2752 self._updateNode(vdi)
2754 # delete the obsolete leaf & inflate the parent (in that order, to
2755 # minimize free space requirements)
2756 parent = vdi.parent
2757 vdi._setHidden(True)
2758 vdi.parent.children = []
2759 vdi.parent = None
2761 if parent.parent is None:
2762 parent.delConfig(VDI.DB_VHD_PARENT)
2764 extraSpace = self._calcExtraSpaceNeeded(vdi, parent)
2765 freeSpace = self.getFreeSpace()
2766 if freeSpace < extraSpace: 2766 ↛ 2769line 2766 didn't jump to line 2769, because the condition on line 2766 was never true
2767 # don't delete unless we need the space: deletion is time-consuming
2768 # because it requires contacting the slaves, and we're paused here
2769 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid)
2770 self.deleteVDI(vdi)
2771 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid)
2773 util.fistpoint.activate("LVHDRT_coaleaf_before_remove_j", self.uuid)
2774 self.journaler.remove(VDI.JRN_LEAF, vdiUuid)
2776 self.forgetVDI(origParentUuid)
2777 self._finishCoalesceLeaf(parent)
2778 self._updateSlavesOnResize(parent)
2780 def _calcExtraSpaceNeeded(self, child, parent) -> int:
2781 assert(not parent.raw) # raw parents not supported
2782 extra = child.getSizeVHD() - parent.getSizeVHD()
2783 if extra < 0: 2783 ↛ 2784line 2783 didn't jump to line 2784, because the condition on line 2783 was never true
2784 extra = 0
2785 return extra
2787 def _prepareCoalesceLeaf(self, vdi) -> None:
2788 pass
2790 def _updateNode(self, vdi) -> None:
2791 pass
2793 def _finishCoalesceLeaf(self, parent) -> None:
2794 pass
2796 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None:
2797 pass
2799 def _updateSlavesOnRename(self, vdi, oldName, origParentUuid) -> None:
2800 pass
2802 def _updateSlavesOnResize(self, vdi) -> None:
2803 pass
2805 def _removeStaleVDIs(self, uuidsPresent) -> None:
2806 for uuid in list(self.vdis.keys()):
2807 if not uuid in uuidsPresent:
2808 Util.log("VDI %s disappeared since last scan" % \
2809 self.vdis[uuid])
2810 del self.vdis[uuid]
2812 def _handleInterruptedCoalesceLeaf(self) -> None:
2813 """An interrupted leaf-coalesce operation may leave the VHD tree in an
2814 inconsistent state. If the old-leaf VDI is still present, we revert the
2815 operation (in case the original error is persistent); otherwise we must
2816 finish the operation"""
2817 pass
2819 def _buildTree(self, force):
2820 self.vdiTrees = []
2821 for vdi in self.vdis.values():
2822 if vdi.parentUuid:
2823 parent = self.getVDI(vdi.parentUuid)
2824 if not parent:
2825 if vdi.uuid.startswith(self.TMP_RENAME_PREFIX):
2826 self.vdiTrees.append(vdi)
2827 continue
2828 if force:
2829 Util.log("ERROR: Parent VDI %s not found! (for %s)" % \
2830 (vdi.parentUuid, vdi.uuid))
2831 self.vdiTrees.append(vdi)
2832 continue
2833 else:
2834 raise util.SMException("Parent VDI %s of %s not " \
2835 "found" % (vdi.parentUuid, vdi.uuid))
2836 vdi.parent = parent
2837 parent.children.append(vdi)
2838 else:
2839 self.vdiTrees.append(vdi)
2842class FileSR(SR):
2843 TYPE = SR.TYPE_FILE
2844 CACHE_FILE_EXT = ".vhdcache"
2845 # cache cleanup actions
2846 CACHE_ACTION_KEEP = 0
2847 CACHE_ACTION_REMOVE = 1
2848 CACHE_ACTION_REMOVE_IF_INACTIVE = 2
2850 def __init__(self, uuid, xapi, createLock, force):
2851 SR.__init__(self, uuid, xapi, createLock, force)
2852 self.path = "/var/run/sr-mount/%s" % self.uuid
2853 self.journaler = fjournaler.Journaler(self.path)
2855 @override
2856 def scan(self, force=False) -> None:
2857 if not util.pathexists(self.path):
2858 raise util.SMException("directory %s not found!" % self.uuid)
2859 vhds = self._scan(force)
2860 for uuid, vhdInfo in vhds.items():
2861 vdi = self.getVDI(uuid)
2862 if not vdi:
2863 self.logFilter.logNewVDI(uuid)
2864 vdi = FileVDI(self, uuid, False)
2865 self.vdis[uuid] = vdi
2866 vdi.load(vhdInfo)
2867 uuidsPresent = list(vhds.keys())
2868 rawList = [x for x in os.listdir(self.path) if x.endswith(vhdutil.FILE_EXTN_RAW)]
2869 for rawName in rawList:
2870 uuid = FileVDI.extractUuid(rawName)
2871 uuidsPresent.append(uuid)
2872 vdi = self.getVDI(uuid)
2873 if not vdi:
2874 self.logFilter.logNewVDI(uuid)
2875 vdi = FileVDI(self, uuid, True)
2876 self.vdis[uuid] = vdi
2877 self._removeStaleVDIs(uuidsPresent)
2878 self._buildTree(force)
2879 self.logFilter.logState()
2880 self._handleInterruptedCoalesceLeaf()
2882 @override
2883 def getFreeSpace(self) -> int:
2884 return util.get_fs_size(self.path) - util.get_fs_utilisation(self.path)
2886 @override
2887 def deleteVDIs(self, vdiList) -> None:
2888 rootDeleted = False
2889 for vdi in vdiList:
2890 if not vdi.parent:
2891 rootDeleted = True
2892 break
2893 SR.deleteVDIs(self, vdiList)
2894 if self.xapi.srRecord["type"] == "nfs" and rootDeleted:
2895 self.xapi.markCacheSRsDirty()
2897 @override
2898 def cleanupCache(self, maxAge=-1) -> int:
2899 """Clean up IntelliCache cache files. Caches for leaf nodes are
2900 removed when the leaf node no longer exists or its allow-caching
2901 attribute is not set. Caches for parent nodes are removed when the
2902 parent node no longer exists or it hasn't been used in more than
2903 <maxAge> hours.
2904 Return number of caches removed.
2905 """
2906 numRemoved = 0
2907 cacheFiles = [x for x in os.listdir(self.path) if self._isCacheFileName(x)]
2908 Util.log("Found %d cache files" % len(cacheFiles))
2909 cutoff = datetime.datetime.now() - datetime.timedelta(hours=maxAge)
2910 for cacheFile in cacheFiles:
2911 uuid = cacheFile[:-len(self.CACHE_FILE_EXT)]
2912 action = self.CACHE_ACTION_KEEP
2913 rec = self.xapi.getRecordVDI(uuid)
2914 if not rec:
2915 Util.log("Cache %s: VDI doesn't exist" % uuid)
2916 action = self.CACHE_ACTION_REMOVE
2917 elif rec["managed"] and not rec["allow_caching"]:
2918 Util.log("Cache %s: caching disabled" % uuid)
2919 action = self.CACHE_ACTION_REMOVE
2920 elif not rec["managed"] and maxAge >= 0:
2921 lastAccess = datetime.datetime.fromtimestamp( \
2922 os.path.getatime(os.path.join(self.path, cacheFile)))
2923 if lastAccess < cutoff:
2924 Util.log("Cache %s: older than %d hrs" % (uuid, maxAge))
2925 action = self.CACHE_ACTION_REMOVE_IF_INACTIVE
2927 if action == self.CACHE_ACTION_KEEP:
2928 Util.log("Keeping cache %s" % uuid)
2929 continue
2931 lockId = uuid
2932 parentUuid = None
2933 if rec and rec["managed"]:
2934 parentUuid = rec["sm_config"].get("vhd-parent")
2935 if parentUuid:
2936 lockId = parentUuid
2938 cacheLock = lock.Lock(blktap2.VDI.LOCK_CACHE_SETUP, lockId)
2939 cacheLock.acquire()
2940 try:
2941 if self._cleanupCache(uuid, action):
2942 numRemoved += 1
2943 finally:
2944 cacheLock.release()
2945 return numRemoved
2947 def _cleanupCache(self, uuid, action):
2948 assert(action != self.CACHE_ACTION_KEEP)
2949 rec = self.xapi.getRecordVDI(uuid)
2950 if rec and rec["allow_caching"]:
2951 Util.log("Cache %s appears to have become valid" % uuid)
2952 return False
2954 fullPath = os.path.join(self.path, uuid + self.CACHE_FILE_EXT)
2955 tapdisk = blktap2.Tapdisk.find_by_path(fullPath)
2956 if tapdisk:
2957 if action == self.CACHE_ACTION_REMOVE_IF_INACTIVE:
2958 Util.log("Cache %s still in use" % uuid)
2959 return False
2960 Util.log("Shutting down tapdisk for %s" % fullPath)
2961 tapdisk.shutdown()
2963 Util.log("Deleting file %s" % fullPath)
2964 os.unlink(fullPath)
2965 return True
2967 def _isCacheFileName(self, name):
2968 return (len(name) == Util.UUID_LEN + len(self.CACHE_FILE_EXT)) and \
2969 name.endswith(self.CACHE_FILE_EXT)
2971 def _scan(self, force):
2972 for i in range(SR.SCAN_RETRY_ATTEMPTS):
2973 error = False
2974 pattern = os.path.join(self.path, "*%s" % vhdutil.FILE_EXTN_VHD)
2975 vhds = vhdutil.getAllVHDs(pattern, FileVDI.extractUuid)
2976 for uuid, vhdInfo in vhds.items():
2977 if vhdInfo.error:
2978 error = True
2979 break
2980 if not error:
2981 return vhds
2982 Util.log("Scan error on attempt %d" % i)
2983 if force:
2984 return vhds
2985 raise util.SMException("Scan error")
2987 @override
2988 def deleteVDI(self, vdi) -> None:
2989 self._checkSlaves(vdi)
2990 SR.deleteVDI(self, vdi)
2992 def _checkSlaves(self, vdi):
2993 onlineHosts = self.xapi.getOnlineHosts()
2994 abortFlag = IPCFlag(self.uuid)
2995 for pbdRecord in self.xapi.getAttachedPBDs():
2996 hostRef = pbdRecord["host"]
2997 if hostRef == self.xapi._hostRef:
2998 continue
2999 if abortFlag.test(FLAG_TYPE_ABORT):
3000 raise AbortException("Aborting due to signal")
3001 try:
3002 self._checkSlave(hostRef, vdi)
3003 except XenAPI.Failure:
3004 if hostRef in onlineHosts:
3005 raise
3007 def _checkSlave(self, hostRef, vdi):
3008 call = (hostRef, "nfs-on-slave", "check", {'path': vdi.path})
3009 Util.log("Checking with slave: %s" % repr(call))
3010 _host = self.xapi.session.xenapi.host
3011 text = _host.call_plugin( * call)
3013 @override
3014 def _handleInterruptedCoalesceLeaf(self) -> None:
3015 entries = self.journaler.getAll(VDI.JRN_LEAF)
3016 for uuid, parentUuid in entries.items():
3017 fileList = os.listdir(self.path)
3018 childName = uuid + vhdutil.FILE_EXTN_VHD
3019 tmpChildName = self.TMP_RENAME_PREFIX + uuid + vhdutil.FILE_EXTN_VHD
3020 parentName1 = parentUuid + vhdutil.FILE_EXTN_VHD
3021 parentName2 = parentUuid + vhdutil.FILE_EXTN_RAW
3022 parentPresent = (parentName1 in fileList or parentName2 in fileList)
3023 if parentPresent or tmpChildName in fileList:
3024 self._undoInterruptedCoalesceLeaf(uuid, parentUuid)
3025 else:
3026 self._finishInterruptedCoalesceLeaf(uuid, parentUuid)
3027 self.journaler.remove(VDI.JRN_LEAF, uuid)
3028 vdi = self.getVDI(uuid)
3029 if vdi:
3030 vdi.ensureUnpaused()
3032 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3033 Util.log("*** UNDO LEAF-COALESCE")
3034 parent = self.getVDI(parentUuid)
3035 if not parent:
3036 parent = self.getVDI(childUuid)
3037 if not parent:
3038 raise util.SMException("Neither %s nor %s found" % \
3039 (parentUuid, childUuid))
3040 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid))
3041 parent.rename(parentUuid)
3042 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid)
3044 child = self.getVDI(childUuid)
3045 if not child:
3046 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid)
3047 if not child:
3048 raise util.SMException("Neither %s nor %s found" % \
3049 (childUuid, self.TMP_RENAME_PREFIX + childUuid))
3050 Util.log("Renaming child back to %s" % childUuid)
3051 child.rename(childUuid)
3052 Util.log("Updating the VDI record")
3053 child.setConfig(VDI.DB_VHD_PARENT, parentUuid)
3054 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD)
3055 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid)
3057 if child.isHidden():
3058 child._setHidden(False)
3059 if not parent.isHidden():
3060 parent._setHidden(True)
3061 self._updateSlavesOnUndoLeafCoalesce(parent, child)
3062 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid)
3063 Util.log("*** leaf-coalesce undo successful")
3064 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"):
3065 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED)
3067 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3068 Util.log("*** FINISH LEAF-COALESCE")
3069 vdi = self.getVDI(childUuid)
3070 if not vdi:
3071 Util.log(f"_finishInterruptedCoalesceLeaf, vdi {childUuid} not found, aborting")
3072 raise util.SMException("VDI %s not found" % childUuid)
3073 try:
3074 self.forgetVDI(parentUuid)
3075 except XenAPI.Failure:
3076 Util.logException('_finishInterruptedCoalesceLeaf')
3077 pass
3078 self._updateSlavesOnResize(vdi)
3079 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid)
3080 Util.log("*** finished leaf-coalesce successfully")
3083class LVHDSR(SR):
3084 TYPE = SR.TYPE_LVHD
3085 SUBTYPES = ["lvhdoiscsi", "lvhdohba"]
3087 def __init__(self, uuid, xapi, createLock, force):
3088 SR.__init__(self, uuid, xapi, createLock, force)
3089 self.vgName = "%s%s" % (lvhdutil.VG_PREFIX, self.uuid)
3090 self.path = os.path.join(lvhdutil.VG_LOCATION, self.vgName)
3092 sr_ref = self.xapi.session.xenapi.SR.get_by_uuid(self.uuid)
3093 other_conf = self.xapi.session.xenapi.SR.get_other_config(sr_ref)
3094 lvm_conf = other_conf.get('lvm-conf') if other_conf else None
3095 self.lvmCache = lvmcache.LVMCache(self.vgName, lvm_conf)
3097 self.lvActivator = LVActivator(self.uuid, self.lvmCache)
3098 self.journaler = journaler.Journaler(self.lvmCache)
3100 @override
3101 def deleteVDI(self, vdi) -> None:
3102 if self.lvActivator.get(vdi.uuid, False):
3103 self.lvActivator.deactivate(vdi.uuid, False)
3104 self._checkSlaves(vdi)
3105 SR.deleteVDI(self, vdi)
3107 @override
3108 def forgetVDI(self, vdiUuid) -> None:
3109 SR.forgetVDI(self, vdiUuid)
3110 mdpath = os.path.join(self.path, lvutil.MDVOLUME_NAME)
3111 LVMMetadataHandler(mdpath).deleteVdiFromMetadata(vdiUuid)
3113 @override
3114 def getFreeSpace(self) -> int:
3115 stats = lvutil._getVGstats(self.vgName)
3116 return stats['physical_size'] - stats['physical_utilisation']
3118 @override
3119 def cleanup(self):
3120 if not self.lvActivator.deactivateAll():
3121 Util.log("ERROR deactivating LVs while cleaning up")
3123 @override
3124 def needUpdateBlockInfo(self) -> bool:
3125 for vdi in self.vdis.values():
3126 if vdi.scanError or vdi.raw or len(vdi.children) == 0:
3127 continue
3128 if not vdi.getConfig(vdi.DB_VHD_BLOCKS):
3129 return True
3130 return False
3132 @override
3133 def updateBlockInfo(self) -> None:
3134 numUpdated = 0
3135 for vdi in self.vdis.values():
3136 if vdi.scanError or vdi.raw or len(vdi.children) == 0:
3137 continue
3138 if not vdi.getConfig(vdi.DB_VHD_BLOCKS):
3139 vdi.updateBlockInfo()
3140 numUpdated += 1
3141 if numUpdated:
3142 # deactivate the LVs back sooner rather than later. If we don't
3143 # now, by the time this thread gets to deactivations, another one
3144 # might have leaf-coalesced a node and deleted it, making the child
3145 # inherit the refcount value and preventing the correct decrement
3146 self.cleanup()
3148 @override
3149 def scan(self, force=False) -> None:
3150 vdis = self._scan(force)
3151 for uuid, vdiInfo in vdis.items():
3152 vdi = self.getVDI(uuid)
3153 if not vdi:
3154 self.logFilter.logNewVDI(uuid)
3155 vdi = LVHDVDI(self, uuid,
3156 vdiInfo.vdiType == vhdutil.VDI_TYPE_RAW)
3157 self.vdis[uuid] = vdi
3158 vdi.load(vdiInfo)
3159 self._removeStaleVDIs(vdis.keys())
3160 self._buildTree(force)
3161 self.logFilter.logState()
3162 self._handleInterruptedCoalesceLeaf()
3164 def _scan(self, force):
3165 for i in range(SR.SCAN_RETRY_ATTEMPTS):
3166 error = False
3167 self.lvmCache.refresh()
3168 vdis = lvhdutil.getVDIInfo(self.lvmCache)
3169 for uuid, vdiInfo in vdis.items():
3170 if vdiInfo.scanError:
3171 error = True
3172 break
3173 if not error:
3174 return vdis
3175 Util.log("Scan error, retrying (%d)" % i)
3176 if force:
3177 return vdis
3178 raise util.SMException("Scan error")
3180 @override
3181 def _removeStaleVDIs(self, uuidsPresent) -> None:
3182 for uuid in list(self.vdis.keys()):
3183 if not uuid in uuidsPresent:
3184 Util.log("VDI %s disappeared since last scan" % \
3185 self.vdis[uuid])
3186 del self.vdis[uuid]
3187 if self.lvActivator.get(uuid, False):
3188 self.lvActivator.remove(uuid, False)
3190 @override
3191 def _liveLeafCoalesce(self, vdi) -> bool:
3192 """If the parent is raw and the child was resized (virt. size), then
3193 we'll need to resize the parent, which can take a while due to zeroing
3194 out of the extended portion of the LV. Do it before pausing the child
3195 to avoid a protracted downtime"""
3196 if vdi.parent.raw and vdi.sizeVirt > vdi.parent.sizeVirt:
3197 self.lvmCache.setReadonly(vdi.parent.fileName, False)
3198 vdi.parent._increaseSizeVirt(vdi.sizeVirt)
3200 return SR._liveLeafCoalesce(self, vdi)
3202 @override
3203 def _prepareCoalesceLeaf(self, vdi) -> None:
3204 vdi._activateChain()
3205 self.lvmCache.setReadonly(vdi.parent.fileName, False)
3206 vdi.deflate()
3207 vdi.inflateParentForCoalesce()
3209 @override
3210 def _updateNode(self, vdi) -> None:
3211 # fix the refcounts: the remaining node should inherit the binary
3212 # refcount from the leaf (because if it was online, it should remain
3213 # refcounted as such), but the normal refcount from the parent (because
3214 # this node is really the parent node) - minus 1 if it is online (since
3215 # non-leaf nodes increment their normal counts when they are online and
3216 # we are now a leaf, storing that 1 in the binary refcount).
3217 ns = lvhdutil.NS_PREFIX_LVM + self.uuid
3218 cCnt, cBcnt = RefCounter.check(vdi.uuid, ns)
3219 pCnt, pBcnt = RefCounter.check(vdi.parent.uuid, ns)
3220 pCnt = pCnt - cBcnt
3221 assert(pCnt >= 0)
3222 RefCounter.set(vdi.parent.uuid, pCnt, cBcnt, ns)
3224 @override
3225 def _finishCoalesceLeaf(self, parent) -> None:
3226 if not parent.isSnapshot() or parent.isAttachedRW():
3227 parent.inflateFully()
3228 else:
3229 parent.deflate()
3231 @override
3232 def _calcExtraSpaceNeeded(self, child, parent) -> int:
3233 return lvhdutil.calcSizeVHDLV(parent.sizeVirt) - parent.sizeLV
3235 @override
3236 def _handleInterruptedCoalesceLeaf(self) -> None:
3237 entries = self.journaler.getAll(VDI.JRN_LEAF)
3238 for uuid, parentUuid in entries.items():
3239 childLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + uuid
3240 tmpChildLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \
3241 self.TMP_RENAME_PREFIX + uuid
3242 parentLV1 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + parentUuid
3243 parentLV2 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + parentUuid
3244 parentPresent = (self.lvmCache.checkLV(parentLV1) or \
3245 self.lvmCache.checkLV(parentLV2))
3246 if parentPresent or self.lvmCache.checkLV(tmpChildLV):
3247 self._undoInterruptedCoalesceLeaf(uuid, parentUuid)
3248 else:
3249 self._finishInterruptedCoalesceLeaf(uuid, parentUuid)
3250 self.journaler.remove(VDI.JRN_LEAF, uuid)
3251 vdi = self.getVDI(uuid)
3252 if vdi:
3253 vdi.ensureUnpaused()
3255 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3256 Util.log("*** UNDO LEAF-COALESCE")
3257 parent = self.getVDI(parentUuid)
3258 if not parent:
3259 parent = self.getVDI(childUuid)
3260 if not parent:
3261 raise util.SMException("Neither %s nor %s found" % \
3262 (parentUuid, childUuid))
3263 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid))
3264 parent.rename(parentUuid)
3265 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid)
3267 child = self.getVDI(childUuid)
3268 if not child:
3269 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid)
3270 if not child:
3271 raise util.SMException("Neither %s nor %s found" % \
3272 (childUuid, self.TMP_RENAME_PREFIX + childUuid))
3273 Util.log("Renaming child back to %s" % childUuid)
3274 child.rename(childUuid)
3275 Util.log("Updating the VDI record")
3276 child.setConfig(VDI.DB_VHD_PARENT, parentUuid)
3277 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD)
3278 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid)
3280 # refcount (best effort - assume that it had succeeded if the
3281 # second rename succeeded; if not, this adjustment will be wrong,
3282 # leading to a non-deactivation of the LV)
3283 ns = lvhdutil.NS_PREFIX_LVM + self.uuid
3284 cCnt, cBcnt = RefCounter.check(child.uuid, ns)
3285 pCnt, pBcnt = RefCounter.check(parent.uuid, ns)
3286 pCnt = pCnt + cBcnt
3287 RefCounter.set(parent.uuid, pCnt, 0, ns)
3288 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_refcount", self.uuid)
3290 parent.deflate()
3291 child.inflateFully()
3292 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_deflate", self.uuid)
3293 if child.isHidden():
3294 child._setHidden(False)
3295 if not parent.isHidden():
3296 parent._setHidden(True)
3297 if not parent.lvReadonly:
3298 self.lvmCache.setReadonly(parent.fileName, True)
3299 self._updateSlavesOnUndoLeafCoalesce(parent, child)
3300 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid)
3301 Util.log("*** leaf-coalesce undo successful")
3302 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"):
3303 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED)
3305 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3306 Util.log("*** FINISH LEAF-COALESCE")
3307 vdi = self.getVDI(childUuid)
3308 if not vdi:
3309 raise util.SMException("VDI %s not found" % childUuid)
3310 vdi.inflateFully()
3311 util.fistpoint.activate("LVHDRT_coaleaf_finish_after_inflate", self.uuid)
3312 try:
3313 self.forgetVDI(parentUuid)
3314 except XenAPI.Failure:
3315 pass
3316 self._updateSlavesOnResize(vdi)
3317 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid)
3318 Util.log("*** finished leaf-coalesce successfully")
3320 def _checkSlaves(self, vdi):
3321 """Confirm with all slaves in the pool that 'vdi' is not in use. We
3322 try to check all slaves, including those that the Agent believes are
3323 offline, but ignore failures for offline hosts. This is to avoid cases
3324 where the Agent thinks a host is offline but the host is up."""
3325 args = {"vgName": self.vgName,
3326 "action1": "deactivateNoRefcount",
3327 "lvName1": vdi.fileName,
3328 "action2": "cleanupLockAndRefcount",
3329 "uuid2": vdi.uuid,
3330 "ns2": lvhdutil.NS_PREFIX_LVM + self.uuid}
3331 onlineHosts = self.xapi.getOnlineHosts()
3332 abortFlag = IPCFlag(self.uuid)
3333 for pbdRecord in self.xapi.getAttachedPBDs():
3334 hostRef = pbdRecord["host"]
3335 if hostRef == self.xapi._hostRef:
3336 continue
3337 if abortFlag.test(FLAG_TYPE_ABORT):
3338 raise AbortException("Aborting due to signal")
3339 Util.log("Checking with slave %s (path %s)" % (
3340 self.xapi.getRecordHost(hostRef)['hostname'], vdi.path))
3341 try:
3342 self.xapi.ensureInactive(hostRef, args)
3343 except XenAPI.Failure:
3344 if hostRef in onlineHosts:
3345 raise
3347 @override
3348 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None:
3349 slaves = util.get_slaves_attached_on(self.xapi.session, [child.uuid])
3350 if not slaves:
3351 Util.log("Update-on-leaf-undo: VDI %s not attached on any slave" % \
3352 child)
3353 return
3355 tmpName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \
3356 self.TMP_RENAME_PREFIX + child.uuid
3357 args = {"vgName": self.vgName,
3358 "action1": "deactivateNoRefcount",
3359 "lvName1": tmpName,
3360 "action2": "deactivateNoRefcount",
3361 "lvName2": child.fileName,
3362 "action3": "refresh",
3363 "lvName3": child.fileName,
3364 "action4": "refresh",
3365 "lvName4": parent.fileName}
3366 for slave in slaves:
3367 Util.log("Updating %s, %s, %s on slave %s" % \
3368 (tmpName, child.fileName, parent.fileName,
3369 self.xapi.getRecordHost(slave)['hostname']))
3370 text = self.xapi.session.xenapi.host.call_plugin( \
3371 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args)
3372 Util.log("call-plugin returned: '%s'" % text)
3374 @override
3375 def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid) -> None:
3376 slaves = util.get_slaves_attached_on(self.xapi.session, [vdi.uuid])
3377 if not slaves:
3378 Util.log("Update-on-rename: VDI %s not attached on any slave" % vdi)
3379 return
3381 args = {"vgName": self.vgName,
3382 "action1": "deactivateNoRefcount",
3383 "lvName1": oldNameLV,
3384 "action2": "refresh",
3385 "lvName2": vdi.fileName,
3386 "action3": "cleanupLockAndRefcount",
3387 "uuid3": origParentUuid,
3388 "ns3": lvhdutil.NS_PREFIX_LVM + self.uuid}
3389 for slave in slaves:
3390 Util.log("Updating %s to %s on slave %s" % \
3391 (oldNameLV, vdi.fileName,
3392 self.xapi.getRecordHost(slave)['hostname']))
3393 text = self.xapi.session.xenapi.host.call_plugin( \
3394 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args)
3395 Util.log("call-plugin returned: '%s'" % text)
3397 @override
3398 def _updateSlavesOnResize(self, vdi) -> None:
3399 uuids = [x.uuid for x in vdi.getAllLeaves()]
3400 slaves = util.get_slaves_attached_on(self.xapi.session, uuids)
3401 if not slaves:
3402 util.SMlog("Update-on-resize: %s not attached on any slave" % vdi)
3403 return
3404 lvhdutil.lvRefreshOnSlaves(self.xapi.session, self.uuid, self.vgName,
3405 vdi.fileName, vdi.uuid, slaves)
3408class LinstorSR(SR):
3409 TYPE = SR.TYPE_LINSTOR
3411 def __init__(self, uuid, xapi, createLock, force):
3412 if not LINSTOR_AVAILABLE:
3413 raise util.SMException(
3414 'Can\'t load cleanup LinstorSR: LINSTOR libraries are missing'
3415 )
3417 SR.__init__(self, uuid, xapi, createLock, force)
3418 self.path = LinstorVolumeManager.DEV_ROOT_PATH
3419 self._reloadLinstor(journaler_only=True)
3421 @override
3422 def deleteVDI(self, vdi) -> None:
3423 self._checkSlaves(vdi)
3424 SR.deleteVDI(self, vdi)
3426 @override
3427 def getFreeSpace(self) -> int:
3428 return self._linstor.max_volume_size_allowed
3430 @override
3431 def scan(self, force=False) -> None:
3432 all_vdi_info = self._scan(force)
3433 for uuid, vdiInfo in all_vdi_info.items():
3434 # When vdiInfo is None, the VDI is RAW.
3435 vdi = self.getVDI(uuid)
3436 if not vdi:
3437 self.logFilter.logNewVDI(uuid)
3438 vdi = LinstorVDI(self, uuid, not vdiInfo)
3439 self.vdis[uuid] = vdi
3440 if vdiInfo:
3441 vdi.load(vdiInfo)
3442 self._removeStaleVDIs(all_vdi_info.keys())
3443 self._buildTree(force)
3444 self.logFilter.logState()
3445 self._handleInterruptedCoalesceLeaf()
3447 @override
3448 def pauseVDIs(self, vdiList) -> None:
3449 self._linstor.ensure_volume_list_is_not_locked(
3450 vdiList, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT
3451 )
3452 return super(LinstorSR, self).pauseVDIs(vdiList)
3454 def _reloadLinstor(self, journaler_only=False):
3455 session = self.xapi.session
3456 host_ref = util.get_this_host_ref(session)
3457 sr_ref = session.xenapi.SR.get_by_uuid(self.uuid)
3459 pbd = util.find_my_pbd(session, host_ref, sr_ref)
3460 if pbd is None:
3461 raise util.SMException('Failed to find PBD')
3463 dconf = session.xenapi.PBD.get_device_config(pbd)
3464 group_name = dconf['group-name']
3466 controller_uri = get_controller_uri()
3467 self.journaler = LinstorJournaler(
3468 controller_uri, group_name, logger=util.SMlog
3469 )
3471 if journaler_only:
3472 return
3474 self._linstor = LinstorVolumeManager(
3475 controller_uri,
3476 group_name,
3477 repair=True,
3478 logger=util.SMlog
3479 )
3480 self._vhdutil = LinstorVhdUtil(session, self._linstor)
3482 def _scan(self, force):
3483 for i in range(SR.SCAN_RETRY_ATTEMPTS):
3484 self._reloadLinstor()
3485 error = False
3486 try:
3487 all_vdi_info = self._load_vdi_info()
3488 for uuid, vdiInfo in all_vdi_info.items():
3489 if vdiInfo and vdiInfo.error:
3490 error = True
3491 break
3492 if not error:
3493 return all_vdi_info
3494 Util.log('Scan error, retrying ({})'.format(i))
3495 except Exception as e:
3496 Util.log('Scan exception, retrying ({}): {}'.format(i, e))
3497 Util.log(traceback.format_exc())
3499 if force:
3500 return all_vdi_info
3501 raise util.SMException('Scan error')
3503 def _load_vdi_info(self):
3504 all_volume_info = self._linstor.get_volumes_with_info()
3505 volumes_metadata = self._linstor.get_volumes_with_metadata()
3507 all_vdi_info = {}
3508 pending_vdi_uuids = []
3510 def handle_fail(vdi_uuid, e):
3511 Util.log(f" [VDI {vdi_uuid}: failed to load VDI info]: {e}")
3512 info = vhdutil.VHDInfo(vdi_uuid)
3513 info.error = 1
3514 return info
3516 for vdi_uuid, volume_info in all_volume_info.items():
3517 try:
3518 volume_metadata = volumes_metadata[vdi_uuid]
3519 if not volume_info.name and not list(volume_metadata.items()):
3520 continue # Ignore it, probably deleted.
3522 if vdi_uuid.startswith('DELETED_'):
3523 # Assume it's really a RAW volume of a failed snap without VHD header/footer.
3524 # We must remove this VDI now without adding it in the VDI list.
3525 # Otherwise `Relinking` calls and other actions can be launched on it.
3526 # We don't want that...
3527 Util.log('Deleting bad VDI {}'.format(vdi_uuid))
3529 self.lock()
3530 try:
3531 self._linstor.destroy_volume(vdi_uuid)
3532 try:
3533 self.forgetVDI(vdi_uuid)
3534 except:
3535 pass
3536 except Exception as e:
3537 Util.log('Cannot delete bad VDI: {}'.format(e))
3538 finally:
3539 self.unlock()
3540 continue
3542 vdi_type = volume_metadata.get(VDI_TYPE_TAG)
3543 if vdi_type == vhdutil.VDI_TYPE_VHD:
3544 pending_vdi_uuids.append(vdi_uuid)
3545 else:
3546 all_vdi_info[vdi_uuid] = None
3547 except Exception as e:
3548 all_vdi_info[vdi_uuid] = handle_fail(vdi_uuid, e)
3550 multi_vhdutil = MultiLinstorVhdUtil(self._linstor.uri, self._linstor.group_name)
3552 def load_info(vdi_uuid, vhdutil_instance):
3553 try:
3554 return vhdutil_instance.get_vhd_info(vdi_uuid)
3555 except Exception as e:
3556 return handle_fail(vdi_uuid, e)
3558 try:
3559 for vdiInfo in multi_vhdutil.run(load_info, pending_vdi_uuids):
3560 all_vdi_info[vdiInfo.uuid] = vdiInfo
3561 finally:
3562 del multi_vhdutil
3564 return all_vdi_info
3566 @override
3567 def _prepareCoalesceLeaf(self, vdi) -> None:
3568 vdi._activateChain()
3569 vdi.deflate()
3570 vdi._inflateParentForCoalesce()
3572 @override
3573 def _finishCoalesceLeaf(self, parent) -> None:
3574 if not parent.isSnapshot() or parent.isAttachedRW():
3575 parent.inflateFully()
3576 else:
3577 parent.deflate()
3579 @override
3580 def _calcExtraSpaceNeeded(self, child, parent) -> int:
3581 return LinstorVhdUtil.compute_volume_size(parent.sizeVirt, parent.vdi_type) - parent.getDrbdSize()
3583 def _hasValidDevicePath(self, uuid):
3584 try:
3585 self._linstor.get_device_path(uuid)
3586 except Exception:
3587 # TODO: Maybe log exception.
3588 return False
3589 return True
3591 @override
3592 def _liveLeafCoalesce(self, vdi) -> bool:
3593 self.lock()
3594 try:
3595 self._linstor.ensure_volume_is_not_locked(
3596 vdi.uuid, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT
3597 )
3598 return super(LinstorSR, self)._liveLeafCoalesce(vdi)
3599 finally:
3600 self.unlock()
3602 @override
3603 def _handleInterruptedCoalesceLeaf(self) -> None:
3604 entries = self.journaler.get_all(VDI.JRN_LEAF)
3605 for uuid, parentUuid in entries.items():
3606 if self._hasValidDevicePath(parentUuid) or \
3607 self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid):
3608 self._undoInterruptedCoalesceLeaf(uuid, parentUuid)
3609 else:
3610 self._finishInterruptedCoalesceLeaf(uuid, parentUuid)
3611 self.journaler.remove(VDI.JRN_LEAF, uuid)
3612 vdi = self.getVDI(uuid)
3613 if vdi:
3614 vdi.ensureUnpaused()
3616 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3617 Util.log('*** UNDO LEAF-COALESCE')
3618 parent = self.getVDI(parentUuid)
3619 if not parent:
3620 parent = self.getVDI(childUuid)
3621 if not parent:
3622 raise util.SMException(
3623 'Neither {} nor {} found'.format(parentUuid, childUuid)
3624 )
3625 Util.log(
3626 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid)
3627 )
3628 parent.rename(parentUuid)
3630 child = self.getVDI(childUuid)
3631 if not child:
3632 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid)
3633 if not child:
3634 raise util.SMException(
3635 'Neither {} nor {} found'.format(
3636 childUuid, self.TMP_RENAME_PREFIX + childUuid
3637 )
3638 )
3639 Util.log('Renaming child back to {}'.format(childUuid))
3640 child.rename(childUuid)
3641 Util.log('Updating the VDI record')
3642 child.setConfig(VDI.DB_VHD_PARENT, parentUuid)
3643 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD)
3645 # TODO: Maybe deflate here.
3647 if child.isHidden():
3648 child._setHidden(False)
3649 if not parent.isHidden():
3650 parent._setHidden(True)
3651 self._updateSlavesOnUndoLeafCoalesce(parent, child)
3652 Util.log('*** leaf-coalesce undo successful')
3654 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid):
3655 Util.log('*** FINISH LEAF-COALESCE')
3656 vdi = self.getVDI(childUuid)
3657 if not vdi:
3658 raise util.SMException('VDI {} not found'.format(childUuid))
3659 # TODO: Maybe inflate.
3660 try:
3661 self.forgetVDI(parentUuid)
3662 except XenAPI.Failure:
3663 pass
3664 self._updateSlavesOnResize(vdi)
3665 Util.log('*** finished leaf-coalesce successfully')
3667 def _checkSlaves(self, vdi):
3668 try:
3669 all_openers = self._linstor.get_volume_openers(vdi.uuid)
3670 for openers in all_openers.values():
3671 for opener in openers.values():
3672 if opener['process-name'] != 'tapdisk':
3673 raise util.SMException(
3674 'VDI {} is in use: {}'.format(vdi.uuid, all_openers)
3675 )
3676 except LinstorVolumeManagerError as e:
3677 if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS:
3678 raise
3681################################################################################
3682#
3683# Helpers
3684#
3685def daemonize():
3686 pid = os.fork()
3687 if pid:
3688 os.waitpid(pid, 0)
3689 Util.log("New PID [%d]" % pid)
3690 return False
3691 os.chdir("/")
3692 os.setsid()
3693 pid = os.fork()
3694 if pid:
3695 Util.log("Will finish as PID [%d]" % pid)
3696 os._exit(0)
3697 for fd in [0, 1, 2]:
3698 try:
3699 os.close(fd)
3700 except OSError:
3701 pass
3702 # we need to fill those special fd numbers or pread won't work
3703 sys.stdin = open("/dev/null", 'r')
3704 sys.stderr = open("/dev/null", 'w')
3705 sys.stdout = open("/dev/null", 'w')
3706 # As we're a new process we need to clear the lock objects
3707 lock.Lock.clearAll()
3708 return True
3711def normalizeType(type):
3712 if type in LVHDSR.SUBTYPES:
3713 type = SR.TYPE_LVHD
3714 if type in ["lvm", "lvmoiscsi", "lvmohba", "lvmofcoe"]:
3715 # temporary while LVHD is symlinked as LVM
3716 type = SR.TYPE_LVHD
3717 if type in [
3718 "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs",
3719 "moosefs", "xfs", "zfs", "largeblock"
3720 ]:
3721 type = SR.TYPE_FILE
3722 if type in ["linstor"]:
3723 type = SR.TYPE_LINSTOR
3724 if type not in SR.TYPES:
3725 raise util.SMException("Unsupported SR type: %s" % type)
3726 return type
3728GCPAUSE_DEFAULT_SLEEP = 5 * 60
3731def _gc_init_file(sr_uuid):
3732 return os.path.join(NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init')
3735def _create_init_file(sr_uuid):
3736 util.makedirs(os.path.join(NON_PERSISTENT_DIR, str(sr_uuid)))
3737 with open(os.path.join(
3738 NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init'), 'w+') as f:
3739 f.write('1')
3742def _gcLoopPause(sr, dryRun=False, immediate=False):
3743 if immediate:
3744 return
3746 # Check to see if the GCPAUSE_FISTPOINT is present. If so the fist
3747 # point will just return. Otherwise, fall back on an abortable sleep.
3749 if util.fistpoint.is_active(util.GCPAUSE_FISTPOINT):
3751 util.fistpoint.activate_custom_fn(util.GCPAUSE_FISTPOINT, 3751 ↛ exitline 3751 didn't jump to the function exit
3752 lambda *args: None)
3753 elif os.path.exists(_gc_init_file(sr.uuid)):
3754 def abortTest():
3755 return IPCFlag(sr.uuid).test(FLAG_TYPE_ABORT)
3757 # If time.sleep hangs we are in deep trouble, however for
3758 # completeness we set the timeout of the abort thread to
3759 # 110% of GCPAUSE_DEFAULT_SLEEP.
3760 Util.log("GC active, about to go quiet")
3761 Util.runAbortable(lambda: time.sleep(GCPAUSE_DEFAULT_SLEEP), 3761 ↛ exitline 3761 didn't run the lambda on line 3761
3762 None, sr.uuid, abortTest, VDI.POLL_INTERVAL,
3763 GCPAUSE_DEFAULT_SLEEP * 1.1)
3764 Util.log("GC active, quiet period ended")
3767def _gcLoop(sr, dryRun=False, immediate=False):
3768 if not lockGCActive.acquireNoblock(): 3768 ↛ 3769line 3768 didn't jump to line 3769, because the condition on line 3768 was never true
3769 Util.log("Another GC instance already active, exiting")
3770 return
3772 # Check we're still attached after acquiring locks
3773 if not sr.xapi.isPluggedHere():
3774 Util.log("SR no longer attached, exiting")
3775 return
3777 # Clean up Intellicache files
3778 sr.cleanupCache()
3780 # Track how many we do
3781 coalesced = 0
3782 task_status = "success"
3783 try:
3784 # Check if any work needs to be done
3785 if not sr.xapi.isPluggedHere(): 3785 ↛ 3786line 3785 didn't jump to line 3786, because the condition on line 3785 was never true
3786 Util.log("SR no longer attached, exiting")
3787 return
3788 sr.scanLocked()
3789 if not sr.hasWork():
3790 Util.log("No work, exiting")
3791 return
3792 sr.xapi.create_task(
3793 "Garbage Collection",
3794 "Garbage collection for SR %s" % sr.uuid)
3795 _gcLoopPause(sr, dryRun, immediate=immediate)
3796 while True:
3797 if SIGTERM:
3798 Util.log("Term requested")
3799 return
3801 if not sr.xapi.isPluggedHere(): 3801 ↛ 3802line 3801 didn't jump to line 3802, because the condition on line 3801 was never true
3802 Util.log("SR no longer attached, exiting")
3803 break
3804 sr.scanLocked()
3805 if not sr.hasWork():
3806 Util.log("No work, exiting")
3807 break
3809 if not lockGCRunning.acquireNoblock(): 3809 ↛ 3810line 3809 didn't jump to line 3810, because the condition on line 3809 was never true
3810 Util.log("Unable to acquire GC running lock.")
3811 return
3812 try:
3813 if not sr.gcEnabled(): 3813 ↛ 3814line 3813 didn't jump to line 3814, because the condition on line 3813 was never true
3814 break
3816 sr.xapi.update_task_progress("done", coalesced)
3818 sr.cleanupCoalesceJournals()
3819 # Create the init file here in case startup is waiting on it
3820 _create_init_file(sr.uuid)
3821 sr.scanLocked()
3822 sr.updateBlockInfo()
3824 howmany = len(sr.findGarbage())
3825 if howmany > 0:
3826 Util.log("Found %d orphaned vdis" % howmany)
3827 sr.lock()
3828 try:
3829 sr.garbageCollect(dryRun)
3830 finally:
3831 sr.unlock()
3832 sr.xapi.srUpdate()
3834 candidate = sr.findCoalesceable()
3835 if candidate:
3836 util.fistpoint.activate(
3837 "LVHDRT_finding_a_suitable_pair", sr.uuid)
3838 sr.coalesce(candidate, dryRun)
3839 sr.xapi.srUpdate()
3840 coalesced += 1
3841 continue
3843 candidate = sr.findLeafCoalesceable()
3844 if candidate: 3844 ↛ 3851line 3844 didn't jump to line 3851, because the condition on line 3844 was never false
3845 sr.coalesceLeaf(candidate, dryRun)
3846 sr.xapi.srUpdate()
3847 coalesced += 1
3848 continue
3850 finally:
3851 lockGCRunning.release() 3851 ↛ 3856line 3851 didn't jump to line 3856, because the break on line 3814 wasn't executed
3852 except:
3853 task_status = "failure"
3854 raise
3855 finally:
3856 sr.xapi.set_task_status(task_status)
3857 Util.log("GC process exiting, no work left")
3858 _create_init_file(sr.uuid)
3859 lockGCActive.release()
3862def _gc(session, srUuid, dryRun=False, immediate=False):
3863 init(srUuid)
3864 sr = SR.getInstance(srUuid, session)
3865 if not sr.gcEnabled(False): 3865 ↛ 3866line 3865 didn't jump to line 3866, because the condition on line 3865 was never true
3866 return
3868 try:
3869 _gcLoop(sr, dryRun, immediate=immediate)
3870 finally:
3871 sr.check_no_space_candidates()
3872 sr.cleanup()
3873 sr.logFilter.logState()
3874 del sr.xapi
3877def _abort(srUuid, soft=False):
3878 """Aborts an GC/coalesce.
3880 srUuid: the UUID of the SR whose GC/coalesce must be aborted
3881 soft: If set to True and there is a pending abort signal, the function
3882 doesn't do anything. If set to False, a new abort signal is issued.
3884 returns: If soft is set to False, we return True holding lockGCActive. If
3885 soft is set to False and an abort signal is pending, we return False
3886 without holding lockGCActive. An exception is raised in case of error."""
3887 Util.log("=== SR %s: abort ===" % (srUuid))
3888 init(srUuid)
3889 if not lockGCActive.acquireNoblock():
3890 gotLock = False
3891 Util.log("Aborting currently-running instance (SR %s)" % srUuid)
3892 abortFlag = IPCFlag(srUuid)
3893 if not abortFlag.set(FLAG_TYPE_ABORT, soft):
3894 return False
3895 for i in range(SR.LOCK_RETRY_ATTEMPTS):
3896 gotLock = lockGCActive.acquireNoblock()
3897 if gotLock:
3898 break
3899 time.sleep(SR.LOCK_RETRY_INTERVAL)
3900 abortFlag.clear(FLAG_TYPE_ABORT)
3901 if not gotLock:
3902 raise util.CommandException(code=errno.ETIMEDOUT,
3903 reason="SR %s: error aborting existing process" % srUuid)
3904 return True
3907def init(srUuid):
3908 global lockGCRunning
3909 if not lockGCRunning: 3909 ↛ 3910line 3909 didn't jump to line 3910, because the condition on line 3909 was never true
3910 lockGCRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, srUuid)
3911 global lockGCActive
3912 if not lockGCActive: 3912 ↛ 3913line 3912 didn't jump to line 3913, because the condition on line 3912 was never true
3913 lockGCActive = LockActive(srUuid)
3916class LockActive:
3917 """
3918 Wraps the use of LOCK_TYPE_GC_ACTIVE such that the lock cannot be acquired
3919 if another process holds the SR lock.
3920 """
3921 def __init__(self, srUuid):
3922 self._lock = lock.Lock(LOCK_TYPE_GC_ACTIVE, srUuid)
3923 self._srLock = lock.Lock(vhdutil.LOCK_TYPE_SR, srUuid)
3925 def acquireNoblock(self):
3926 self._srLock.acquire()
3928 try:
3929 return self._lock.acquireNoblock()
3930 finally:
3931 self._srLock.release()
3933 def release(self):
3934 self._lock.release()
3937def usage():
3938 output = """Garbage collect and/or coalesce VHDs in a VHD-based SR
3940Parameters:
3941 -u --uuid UUID SR UUID
3942 and one of:
3943 -g --gc garbage collect, coalesce, and repeat while there is work
3944 -G --gc_force garbage collect once, aborting any current operations
3945 -c --cache-clean <max_age> clean up IntelliCache cache files older than
3946 max_age hours
3947 -a --abort abort any currently running operation (GC or coalesce)
3948 -q --query query the current state (GC'ing, coalescing or not running)
3949 -x --disable disable GC/coalesce (will be in effect until you exit)
3950 -t --debug see Debug below
3952Options:
3953 -b --background run in background (return immediately) (valid for -g only)
3954 -f --force continue in the presence of VHDs with errors (when doing
3955 GC, this might cause removal of any such VHDs) (only valid
3956 for -G) (DANGEROUS)
3958Debug:
3959 The --debug parameter enables manipulation of LVHD VDIs for debugging
3960 purposes. ** NEVER USE IT ON A LIVE VM **
3961 The following parameters are required:
3962 -t --debug <cmd> <cmd> is one of "activate", "deactivate", "inflate",
3963 "deflate".
3964 -v --vdi_uuid VDI UUID
3965 """
3966 #-d --dry-run don't actually perform any SR-modifying operations
3967 print(output)
3968 Util.log("(Invalid usage)")
3969 sys.exit(1)
3972##############################################################################
3973#
3974# API
3975#
3976def abort(srUuid, soft=False):
3977 """Abort GC/coalesce if we are currently GC'ing or coalescing a VDI pair.
3978 """
3979 if _abort(srUuid, soft):
3980 stop_gc_service(srUuid)
3981 Util.log("abort: releasing the process lock")
3982 lockGCActive.release()
3983 return True
3984 else:
3985 return False
3988def run_gc(session, srUuid, dryRun, immediate=False):
3989 try:
3990 _gc(session, srUuid, dryRun, immediate=immediate)
3991 return 0
3992 except AbortException:
3993 Util.log("Aborted")
3994 return 2
3995 except Exception:
3996 Util.logException("gc")
3997 Util.log("* * * * * SR %s: ERROR\n" % srUuid)
3998 return 1
4001def gc(session, srUuid, inBackground, dryRun=False):
4002 """Garbage collect all deleted VDIs in SR "srUuid". Fork & return
4003 immediately if inBackground=True.
4005 The following algorithm is used:
4006 1. If we are already GC'ing in this SR, return
4007 2. If we are already coalescing a VDI pair:
4008 a. Scan the SR and determine if the VDI pair is GC'able
4009 b. If the pair is not GC'able, return
4010 c. If the pair is GC'able, abort coalesce
4011 3. Scan the SR
4012 4. If there is nothing to collect, nor to coalesce, return
4013 5. If there is something to collect, GC all, then goto 3
4014 6. If there is something to coalesce, coalesce one pair, then goto 3
4015 """
4016 Util.log("=== SR %s: gc ===" % srUuid)
4018 signal.signal(signal.SIGTERM, receiveSignal)
4020 if inBackground:
4021 if daemonize(): 4021 ↛ exitline 4021 didn't return from function 'gc', because the condition on line 4021 was never false
4022 # we are now running in the background. Catch & log any errors
4023 # because there is no other way to propagate them back at this
4024 # point
4026 run_gc(None, srUuid, dryRun)
4027 os._exit(0)
4028 else:
4029 os._exit(run_gc(session, srUuid, dryRun, immediate=True))
4032def start_gc(session, sr_uuid):
4033 """
4034 This function is used to try to start a backgrounded GC session by forking
4035 the current process. If using the systemd version, call start_gc_service() instead.
4036 """
4037 # don't bother if an instance already running (this is just an
4038 # optimization to reduce the overhead of forking a new process if we
4039 # don't have to, but the process will check the lock anyways)
4040 lockRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, sr_uuid)
4041 if not lockRunning.acquireNoblock():
4042 if should_preempt(session, sr_uuid):
4043 util.SMlog("Aborting currently-running coalesce of garbage VDI")
4044 try:
4045 if not abort(sr_uuid, soft=True):
4046 util.SMlog("The GC has already been scheduled to re-start")
4047 except util.CommandException as e:
4048 if e.code != errno.ETIMEDOUT:
4049 raise
4050 util.SMlog('failed to abort the GC')
4051 else:
4052 util.SMlog("A GC instance already running, not kicking")
4053 return
4054 else:
4055 lockRunning.release()
4057 util.SMlog(f"Starting GC file is {__file__}")
4058 subprocess.run([__file__, '-b', '-u', sr_uuid, '-g'],
4059 stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
4061def _gc_service_cmd(sr_uuid, action, extra_args=None):
4062 """
4063 Build and run the systemctl command for the GC service using util.doexec.
4064 """
4065 sr_uuid_esc = sr_uuid.replace("-", "\\x2d")
4066 cmd=["/usr/bin/systemctl", "--quiet"]
4067 if extra_args:
4068 cmd.extend(extra_args)
4069 cmd += [action, f"SMGC@{sr_uuid_esc}"]
4070 return util.doexec(cmd)
4073def start_gc_service(sr_uuid, wait=False):
4074 """
4075 This starts the templated systemd service which runs GC on the given SR UUID.
4076 If the service was already started, this is a no-op.
4078 Because the service is a one-shot with RemainAfterExit=no, when called with
4079 wait=True this will run the service synchronously and will not return until the
4080 run has finished. This is used to force a run of the GC instead of just kicking it
4081 in the background.
4082 """
4083 util.SMlog(f"Kicking SMGC@{sr_uuid}...")
4084 _gc_service_cmd(sr_uuid, "start", extra_args=None if wait else ["--no-block"])
4087def stop_gc_service(sr_uuid):
4088 """
4089 Stops the templated systemd service which runs GC on the given SR UUID.
4090 """
4091 util.SMlog(f"Stopping SMGC@{sr_uuid}...")
4092 (rc, _stdout, stderr) = _gc_service_cmd(sr_uuid, "stop")
4093 if rc != 0: 4093 ↛ exitline 4093 didn't return from function 'stop_gc_service', because the condition on line 4093 was never false
4094 util.SMlog(f"Failed to stop gc service `SMGC@{sr_uuid}`: `{stderr}`")
4097def wait_for_completion(sr_uuid):
4098 while get_state(sr_uuid):
4099 time.sleep(5)
4102def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False):
4103 """Garbage collect all deleted VDIs in SR "srUuid". The caller must ensure
4104 the SR lock is held.
4105 The following algorithm is used:
4106 1. If we are already GC'ing or coalescing a VDI pair, abort GC/coalesce
4107 2. Scan the SR
4108 3. GC
4109 4. return
4110 """
4111 Util.log("=== SR %s: gc_force ===" % srUuid)
4112 init(srUuid)
4113 sr = SR.getInstance(srUuid, session, lockSR, True)
4114 if not lockGCActive.acquireNoblock():
4115 abort(srUuid)
4116 else:
4117 Util.log("Nothing was running, clear to proceed")
4119 if force:
4120 Util.log("FORCED: will continue even if there are VHD errors")
4121 sr.scanLocked(force)
4122 sr.cleanupCoalesceJournals()
4124 try:
4125 sr.cleanupCache()
4126 sr.garbageCollect(dryRun)
4127 finally:
4128 sr.cleanup()
4129 sr.logFilter.logState()
4130 lockGCActive.release()
4133def get_state(srUuid):
4134 """Return whether GC/coalesce is currently running or not. This asks systemd for
4135 the state of the templated SMGC service and will return True if it is "activating"
4136 or "running" (for completeness, as in practice it will never achieve the latter state)
4137 """
4138 sr_uuid_esc = srUuid.replace("-", "\\x2d")
4139 cmd=[ "/usr/bin/systemctl", "is-active", f"SMGC@{sr_uuid_esc}"]
4140 result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
4141 state = result.stdout.decode('utf-8').rstrip()
4142 if state == "activating" or state == "running":
4143 return True
4144 return False
4147def should_preempt(session, srUuid):
4148 sr = SR.getInstance(srUuid, session)
4149 entries = sr.journaler.getAll(VDI.JRN_COALESCE)
4150 if len(entries) == 0:
4151 return False
4152 elif len(entries) > 1:
4153 raise util.SMException("More than one coalesce entry: " + str(entries))
4154 sr.scanLocked()
4155 coalescedUuid = entries.popitem()[0]
4156 garbage = sr.findGarbage()
4157 for vdi in garbage:
4158 if vdi.uuid == coalescedUuid:
4159 return True
4160 return False
4163def get_coalesceable_leaves(session, srUuid, vdiUuids):
4164 coalesceable = []
4165 sr = SR.getInstance(srUuid, session)
4166 sr.scanLocked()
4167 for uuid in vdiUuids:
4168 vdi = sr.getVDI(uuid)
4169 if not vdi:
4170 raise util.SMException("VDI %s not found" % uuid)
4171 if vdi.isLeafCoalesceable():
4172 coalesceable.append(uuid)
4173 return coalesceable
4176def cache_cleanup(session, srUuid, maxAge):
4177 sr = SR.getInstance(srUuid, session)
4178 return sr.cleanupCache(maxAge)
4181def debug(sr_uuid, cmd, vdi_uuid):
4182 Util.log("Debug command: %s" % cmd)
4183 sr = SR.getInstance(sr_uuid, None)
4184 if not isinstance(sr, LVHDSR):
4185 print("Error: not an LVHD SR")
4186 return
4187 sr.scanLocked()
4188 vdi = sr.getVDI(vdi_uuid)
4189 if not vdi:
4190 print("Error: VDI %s not found")
4191 return
4192 print("Running %s on SR %s" % (cmd, sr))
4193 print("VDI before: %s" % vdi)
4194 if cmd == "activate":
4195 vdi._activate()
4196 print("VDI file: %s" % vdi.path)
4197 if cmd == "deactivate":
4198 ns = lvhdutil.NS_PREFIX_LVM + sr.uuid
4199 sr.lvmCache.deactivate(ns, vdi.uuid, vdi.fileName, False)
4200 if cmd == "inflate":
4201 vdi.inflateFully()
4202 sr.cleanup()
4203 if cmd == "deflate":
4204 vdi.deflate()
4205 sr.cleanup()
4206 sr.scanLocked()
4207 print("VDI after: %s" % vdi)
4210def abort_optional_reenable(uuid):
4211 print("Disabling GC/coalesce for %s" % uuid)
4212 ret = _abort(uuid)
4213 input("Press enter to re-enable...")
4214 print("GC/coalesce re-enabled")
4215 lockGCRunning.release()
4216 if ret:
4217 lockGCActive.release()
4220##############################################################################
4221#
4222# CLI
4223#
4224def main():
4225 action = ""
4226 maxAge = 0
4227 uuid = ""
4228 background = False
4229 force = False
4230 dryRun = False
4231 debug_cmd = ""
4232 vdi_uuid = ""
4233 shortArgs = "gGc:aqxu:bfdt:v:"
4234 longArgs = ["gc", "gc_force", "clean_cache", "abort", "query", "disable",
4235 "uuid=", "background", "force", "dry-run", "debug=", "vdi_uuid="]
4237 try:
4238 opts, args = getopt.getopt(sys.argv[1:], shortArgs, longArgs)
4239 except getopt.GetoptError:
4240 usage()
4241 for o, a in opts:
4242 if o in ("-g", "--gc"):
4243 action = "gc"
4244 if o in ("-G", "--gc_force"):
4245 action = "gc_force"
4246 if o in ("-c", "--clean_cache"):
4247 action = "clean_cache"
4248 maxAge = int(a)
4249 if o in ("-a", "--abort"):
4250 action = "abort"
4251 if o in ("-q", "--query"):
4252 action = "query"
4253 if o in ("-x", "--disable"):
4254 action = "disable"
4255 if o in ("-u", "--uuid"):
4256 uuid = a
4257 if o in ("-b", "--background"):
4258 background = True
4259 if o in ("-f", "--force"):
4260 force = True
4261 if o in ("-d", "--dry-run"):
4262 Util.log("Dry run mode")
4263 dryRun = True
4264 if o in ("-t", "--debug"):
4265 action = "debug"
4266 debug_cmd = a
4267 if o in ("-v", "--vdi_uuid"):
4268 vdi_uuid = a
4270 if not action or not uuid:
4271 usage()
4272 if action == "debug" and not (debug_cmd and vdi_uuid) or \
4273 action != "debug" and (debug_cmd or vdi_uuid):
4274 usage()
4276 if action != "query" and action != "debug":
4277 print("All output goes to log")
4279 if action == "gc":
4280 gc(None, uuid, background, dryRun)
4281 elif action == "gc_force":
4282 gc_force(None, uuid, force, dryRun, True)
4283 elif action == "clean_cache":
4284 cache_cleanup(None, uuid, maxAge)
4285 elif action == "abort":
4286 abort(uuid)
4287 elif action == "query":
4288 print("Currently running: %s" % get_state(uuid))
4289 elif action == "disable":
4290 abort_optional_reenable(uuid)
4291 elif action == "debug":
4292 debug(uuid, debug_cmd, vdi_uuid)
4295if __name__ == '__main__': 4295 ↛ 4296line 4295 didn't jump to line 4296, because the condition on line 4295 was never true
4296 main()