Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/python3 

2# 

3# Copyright (C) Citrix Systems Inc. 

4# 

5# This program is free software; you can redistribute it and/or modify 

6# it under the terms of the GNU Lesser General Public License as published 

7# by the Free Software Foundation; version 2.1 only. 

8# 

9# This program is distributed in the hope that it will be useful, 

10# but WITHOUT ANY WARRANTY; without even the implied warranty of 

11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12# GNU Lesser General Public License for more details. 

13# 

14# You should have received a copy of the GNU Lesser General Public License 

15# along with this program; if not, write to the Free Software Foundation, Inc., 

16# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 

17# 

18# Script to coalesce and garbage collect COW-based SR's in the background 

19# 

20 

21from sm_typing import Any, Optional, List, override 

22 

23import os 

24import os.path 

25import sys 

26import time 

27import signal 

28import subprocess 

29import getopt 

30import datetime 

31import traceback 

32import base64 

33import zlib 

34import errno 

35import stat 

36 

37import XenAPI # pylint: disable=import-error 

38import util 

39import lvutil 

40import lvmcache 

41import journaler 

42import fjournaler 

43import lock 

44import blktap2 

45import xs_errors 

46from refcounter import RefCounter 

47from ipc import IPCFlag 

48from lvmanager import LVActivator 

49from srmetadata import LVMMetadataHandler, VDI_TYPE_TAG 

50from functools import reduce 

51from time import monotonic as _time 

52 

53from constants import NS_PREFIX_LVM, VG_LOCATION, VG_PREFIX 

54from cowutil import CowImageInfo, CowUtil, getCowUtil 

55from lvmcowutil import LV_PREFIX, LvmCowUtil 

56from vditype import VdiType, VdiTypeExtension, VDI_COW_TYPES, VDI_TYPE_TO_EXTENSION 

57 

58try: 

59 from linstorcowutil import LinstorCowUtil, MultiLinstorCowUtil 

60 from linstorjournaler import LinstorJournaler 

61 from linstorvolumemanager import get_controller_uri 

62 from linstorvolumemanager import LinstorVolumeManager 

63 from linstorvolumemanager import LinstorVolumeManagerError 

64 from linstorvolumemanager import PERSISTENT_PREFIX as LINSTOR_PERSISTENT_PREFIX 

65 

66 LINSTOR_AVAILABLE = True 

67except ImportError: 

68 LINSTOR_AVAILABLE = False 

69 

70# Disable automatic leaf-coalescing. Online leaf-coalesce is currently not 

71# possible due to lvhd_stop_using_() not working correctly. However, we leave 

72# this option available through the explicit LEAFCLSC_FORCE flag in the VDI 

73# record for use by the offline tool (which makes the operation safe by pausing 

74# the VM first) 

75AUTO_ONLINE_LEAF_COALESCE_ENABLED = True 

76 

77FLAG_TYPE_ABORT = "abort" # flag to request aborting of GC/coalesce 

78 

79# process "lock", used simply as an indicator that a process already exists 

80# that is doing GC/coalesce on this SR (such a process holds the lock, and we 

81# check for the fact by trying the lock). 

82lockGCRunning = None 

83 

84# process "lock" to indicate that the GC process has been activated but may not 

85# yet be running, stops a second process from being started. 

86LOCK_TYPE_GC_ACTIVE = "gc_active" 

87lockGCActive = None 

88 

89# Default coalesce error rate limit, in messages per minute. A zero value 

90# disables throttling, and a negative value disables error reporting. 

91DEFAULT_COALESCE_ERR_RATE = 1.0 / 60 

92 

93COALESCE_LAST_ERR_TAG = 'last-coalesce-error' 

94COALESCE_ERR_RATE_TAG = 'coalesce-error-rate' 

95VAR_RUN = "/var/run/" 

96SPEED_LOG_ROOT = VAR_RUN + "{uuid}.speed_log" 

97 

98N_RUNNING_AVERAGE = 10 

99 

100NON_PERSISTENT_DIR = '/run/nonpersistent/sm' 

101 

102# Signal Handler 

103SIGTERM = False 

104 

105 

106class AbortException(util.SMException): 

107 pass 

108 

109class CancelException(util.SMException): 

110 pass 

111 

112def receiveSignal(signalNumber, frame): 

113 global SIGTERM 

114 

115 util.SMlog("GC: recieved SIGTERM") 

116 SIGTERM = True 

117 return 

118 

119 

120################################################################################ 

121# 

122# Util 

123# 

124class Util: 

125 RET_RC = 1 

126 RET_STDOUT = 2 

127 RET_STDERR = 4 

128 

129 UUID_LEN = 36 

130 

131 PREFIX = {"G": 1024 * 1024 * 1024, "M": 1024 * 1024, "K": 1024} 

132 

133 @staticmethod 

134 def log(text) -> None: 

135 util.SMlog(text, ident="SMGC") 

136 

137 @staticmethod 

138 def logException(tag): 

139 info = sys.exc_info() 

140 if info[0] == SystemExit: 140 ↛ 142line 140 didn't jump to line 142, because the condition on line 140 was never true

141 # this should not be happening when catching "Exception", but it is 

142 sys.exit(0) 

143 tb = reduce(lambda a, b: "%s%s" % (a, b), traceback.format_tb(info[2])) 

144 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

145 Util.log(" ***********************") 

146 Util.log(" * E X C E P T I O N *") 

147 Util.log(" ***********************") 

148 Util.log("%s: EXCEPTION %s, %s" % (tag, info[0], info[1])) 

149 Util.log(tb) 

150 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

151 

152 @staticmethod 

153 def doexec(args, expectedRC, inputtext=None, ret=None, log=True): 

154 "Execute a subprocess, then return its return code, stdout, stderr" 

155 proc = subprocess.Popen(args, 

156 stdin=subprocess.PIPE, \ 

157 stdout=subprocess.PIPE, \ 

158 stderr=subprocess.PIPE, \ 

159 shell=True, \ 

160 close_fds=True) 

161 (stdout, stderr) = proc.communicate(inputtext) 

162 stdout = str(stdout) 

163 stderr = str(stderr) 

164 rc = proc.returncode 

165 if log: 

166 Util.log("`%s`: %s" % (args, rc)) 

167 if type(expectedRC) != type([]): 

168 expectedRC = [expectedRC] 

169 if not rc in expectedRC: 

170 reason = stderr.strip() 

171 if stdout.strip(): 

172 reason = "%s (stdout: %s)" % (reason, stdout.strip()) 

173 Util.log("Failed: %s" % reason) 

174 raise util.CommandException(rc, args, reason) 

175 

176 if ret == Util.RET_RC: 

177 return rc 

178 if ret == Util.RET_STDERR: 

179 return stderr 

180 return stdout 

181 

182 @staticmethod 

183 def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut, prefSig=signal.SIGKILL): 

184 """execute func in a separate thread and kill it if abortTest signals 

185 so""" 

186 abortSignaled = abortTest() # check now before we clear resultFlag 

187 resultFlag = IPCFlag(ns) 

188 resultFlag.clearAll() 

189 pid = os.fork() 

190 if pid: 

191 startTime = _time() 

192 try: 

193 while True: 

194 if resultFlag.test("success"): 

195 Util.log(" Child process completed successfully") 

196 resultFlag.clear("success") 

197 return 

198 if resultFlag.test("failure"): 

199 resultFlag.clear("failure") 

200 raise util.SMException("Child process exited with error") 

201 if abortTest() or abortSignaled or SIGTERM: 

202 os.killpg(pid, prefSig) 

203 raise AbortException("Aborting due to signal") 

204 if timeOut and _time() - startTime > timeOut: 

205 os.killpg(pid, prefSig) 

206 resultFlag.clearAll() 

207 raise util.SMException("Timed out") 

208 time.sleep(pollInterval) 

209 finally: 

210 wait_pid = 0 

211 rc = -1 

212 count = 0 

213 while wait_pid == 0 and count < 10: 

214 wait_pid, rc = os.waitpid(pid, os.WNOHANG) 

215 if wait_pid == 0: 

216 time.sleep(2) 

217 count += 1 

218 

219 if wait_pid == 0: 

220 Util.log("runAbortable: wait for process completion timed out") 

221 else: 

222 os.setpgrp() 

223 try: 

224 if func() == ret: 

225 resultFlag.set("success") 

226 else: 

227 resultFlag.set("failure") 

228 except Exception as e: 

229 Util.log("Child process failed with : (%s)" % e) 

230 resultFlag.set("failure") 

231 Util.logException("This exception has occured") 

232 os._exit(0) 

233 

234 @staticmethod 

235 def num2str(number): 

236 for prefix in ("G", "M", "K"): 236 ↛ 239line 236 didn't jump to line 239, because the loop on line 236 didn't complete

237 if number >= Util.PREFIX[prefix]: 

238 return "%.3f%s" % (float(number) / Util.PREFIX[prefix], prefix) 

239 return "%s" % number 

240 

241 @staticmethod 

242 def numBits(val): 

243 count = 0 

244 while val: 

245 count += val & 1 

246 val = val >> 1 

247 return count 

248 

249 @staticmethod 

250 def countBits(bitmap1, bitmap2): 

251 """return bit count in the bitmap produced by ORing the two bitmaps""" 

252 len1 = len(bitmap1) 

253 len2 = len(bitmap2) 

254 lenLong = len1 

255 lenShort = len2 

256 bitmapLong = bitmap1 

257 if len2 > len1: 

258 lenLong = len2 

259 lenShort = len1 

260 bitmapLong = bitmap2 

261 

262 count = 0 

263 for i in range(lenShort): 

264 val = bitmap1[i] | bitmap2[i] 

265 count += Util.numBits(val) 

266 

267 for i in range(i + 1, lenLong): 

268 val = bitmapLong[i] 

269 count += Util.numBits(val) 

270 return count 

271 

272 @staticmethod 

273 def getThisScript(): 

274 thisScript = util.get_real_path(__file__) 

275 if thisScript.endswith(".pyc"): 

276 thisScript = thisScript[:-1] 

277 return thisScript 

278 

279 

280################################################################################ 

281# 

282# XAPI 

283# 

284class XAPI: 

285 USER = "root" 

286 PLUGIN_ON_SLAVE = "on-slave" 

287 

288 CONFIG_SM = 0 

289 CONFIG_OTHER = 1 

290 CONFIG_ON_BOOT = 2 

291 CONFIG_ALLOW_CACHING = 3 

292 

293 CONFIG_NAME = { 

294 CONFIG_SM: "sm-config", 

295 CONFIG_OTHER: "other-config", 

296 CONFIG_ON_BOOT: "on-boot", 

297 CONFIG_ALLOW_CACHING: "allow_caching" 

298 } 

299 

300 class LookupError(util.SMException): 

301 pass 

302 

303 @staticmethod 

304 def getSession(): 

305 session = XenAPI.xapi_local() 

306 session.xenapi.login_with_password(XAPI.USER, '', '', 'SM') 

307 return session 

308 

309 def __init__(self, session, srUuid): 

310 self.sessionPrivate = False 

311 self.session = session 

312 if self.session is None: 

313 self.session = self.getSession() 

314 self.sessionPrivate = True 

315 self._srRef = self.session.xenapi.SR.get_by_uuid(srUuid) 

316 self.srRecord = self.session.xenapi.SR.get_record(self._srRef) 

317 self.hostUuid = util.get_this_host() 

318 self._hostRef = self.session.xenapi.host.get_by_uuid(self.hostUuid) 

319 self.task = None 

320 self.task_progress = {"coalescable": 0, "done": 0} 

321 

322 def __del__(self): 

323 if self.sessionPrivate: 

324 self.session.xenapi.session.logout() 

325 

326 @property 

327 def srRef(self): 

328 return self._srRef 

329 

330 def isPluggedHere(self): 

331 pbds = self.getAttachedPBDs() 

332 for pbdRec in pbds: 

333 if pbdRec["host"] == self._hostRef: 

334 return True 

335 return False 

336 

337 def poolOK(self): 

338 host_recs = self.session.xenapi.host.get_all_records() 

339 for host_ref, host_rec in host_recs.items(): 

340 if not host_rec["enabled"]: 

341 Util.log("Host %s not enabled" % host_rec["uuid"]) 

342 return False 

343 return True 

344 

345 def isMaster(self): 

346 if self.srRecord["shared"]: 

347 pool = list(self.session.xenapi.pool.get_all_records().values())[0] 

348 return pool["master"] == self._hostRef 

349 else: 

350 pbds = self.getAttachedPBDs() 

351 if len(pbds) < 1: 

352 raise util.SMException("Local SR not attached") 

353 elif len(pbds) > 1: 

354 raise util.SMException("Local SR multiply attached") 

355 return pbds[0]["host"] == self._hostRef 

356 

357 def getAttachedPBDs(self): 

358 """Return PBD records for all PBDs of this SR that are currently 

359 attached""" 

360 attachedPBDs = [] 

361 pbds = self.session.xenapi.PBD.get_all_records() 

362 for pbdRec in pbds.values(): 

363 if pbdRec["SR"] == self._srRef and pbdRec["currently_attached"]: 

364 attachedPBDs.append(pbdRec) 

365 return attachedPBDs 

366 

367 def getOnlineHosts(self): 

368 return util.get_online_hosts(self.session) 

369 

370 def ensureInactive(self, hostRef, args): 

371 text = self.session.xenapi.host.call_plugin( \ 

372 hostRef, self.PLUGIN_ON_SLAVE, "multi", args) 

373 Util.log("call-plugin returned: '%s'" % text) 

374 

375 def getRecordHost(self, hostRef): 

376 return self.session.xenapi.host.get_record(hostRef) 

377 

378 def _getRefVDI(self, uuid): 

379 return self.session.xenapi.VDI.get_by_uuid(uuid) 

380 

381 def getRefVDI(self, vdi): 

382 return self._getRefVDI(vdi.uuid) 

383 

384 def getRecordVDI(self, uuid): 

385 try: 

386 ref = self._getRefVDI(uuid) 

387 return self.session.xenapi.VDI.get_record(ref) 

388 except XenAPI.Failure: 

389 return None 

390 

391 def singleSnapshotVDI(self, vdi): 

392 return self.session.xenapi.VDI.snapshot(vdi.getRef(), 

393 {"type": "internal"}) 

394 

395 def forgetVDI(self, srUuid, vdiUuid): 

396 """Forget the VDI, but handle the case where the VDI has already been 

397 forgotten (i.e. ignore errors)""" 

398 try: 

399 vdiRef = self.session.xenapi.VDI.get_by_uuid(vdiUuid) 

400 self.session.xenapi.VDI.forget(vdiRef) 

401 except XenAPI.Failure: 

402 pass 

403 

404 def getConfigVDI(self, vdi, key): 

405 kind = vdi.CONFIG_TYPE[key] 

406 if kind == self.CONFIG_SM: 

407 cfg = self.session.xenapi.VDI.get_sm_config(vdi.getRef()) 

408 elif kind == self.CONFIG_OTHER: 

409 cfg = self.session.xenapi.VDI.get_other_config(vdi.getRef()) 

410 elif kind == self.CONFIG_ON_BOOT: 

411 cfg = self.session.xenapi.VDI.get_on_boot(vdi.getRef()) 

412 elif kind == self.CONFIG_ALLOW_CACHING: 

413 cfg = self.session.xenapi.VDI.get_allow_caching(vdi.getRef()) 

414 else: 

415 assert(False) 

416 Util.log("Got %s for %s: %s" % (self.CONFIG_NAME[kind], vdi, repr(cfg))) 

417 return cfg 

418 

419 def removeFromConfigVDI(self, vdi, key): 

420 kind = vdi.CONFIG_TYPE[key] 

421 if kind == self.CONFIG_SM: 

422 self.session.xenapi.VDI.remove_from_sm_config(vdi.getRef(), key) 

423 elif kind == self.CONFIG_OTHER: 

424 self.session.xenapi.VDI.remove_from_other_config(vdi.getRef(), key) 

425 else: 

426 assert(False) 

427 

428 def addToConfigVDI(self, vdi, key, val): 

429 kind = vdi.CONFIG_TYPE[key] 

430 if kind == self.CONFIG_SM: 

431 self.session.xenapi.VDI.add_to_sm_config(vdi.getRef(), key, val) 

432 elif kind == self.CONFIG_OTHER: 

433 self.session.xenapi.VDI.add_to_other_config(vdi.getRef(), key, val) 

434 else: 

435 assert(False) 

436 

437 def isSnapshot(self, vdi): 

438 return self.session.xenapi.VDI.get_is_a_snapshot(vdi.getRef()) 

439 

440 def markCacheSRsDirty(self): 

441 sr_refs = self.session.xenapi.SR.get_all_records_where( \ 

442 'field "local_cache_enabled" = "true"') 

443 for sr_ref in sr_refs: 

444 Util.log("Marking SR %s dirty" % sr_ref) 

445 util.set_dirty(self.session, sr_ref) 

446 

447 def srUpdate(self): 

448 Util.log("Starting asynch srUpdate for SR %s" % self.srRecord["uuid"]) 

449 abortFlag = IPCFlag(self.srRecord["uuid"]) 

450 task = self.session.xenapi.Async.SR.update(self._srRef) 

451 cancelTask = True 

452 try: 

453 for i in range(60): 

454 status = self.session.xenapi.task.get_status(task) 

455 if not status == "pending": 

456 Util.log("SR.update_asynch status changed to [%s]" % status) 

457 cancelTask = False 

458 return 

459 if abortFlag.test(FLAG_TYPE_ABORT): 

460 Util.log("Abort signalled during srUpdate, cancelling task...") 

461 try: 

462 self.session.xenapi.task.cancel(task) 

463 cancelTask = False 

464 Util.log("Task cancelled") 

465 except: 

466 pass 

467 return 

468 time.sleep(1) 

469 finally: 

470 if cancelTask: 

471 self.session.xenapi.task.cancel(task) 

472 self.session.xenapi.task.destroy(task) 

473 Util.log("Asynch srUpdate still running, but timeout exceeded.") 

474 

475 def update_task(self): 

476 self.session.xenapi.task.set_other_config( 

477 self.task, 

478 { 

479 "applies_to": self._srRef 

480 }) 

481 total = self.task_progress['coalescable'] + self.task_progress['done'] 

482 if (total > 0): 

483 self.session.xenapi.task.set_progress( 

484 self.task, float(self.task_progress['done']) / total) 

485 

486 def create_task(self, label, description): 

487 self.task = self.session.xenapi.task.create(label, description) 

488 self.update_task() 

489 

490 def update_task_progress(self, key, value): 

491 self.task_progress[key] = value 

492 if self.task: 

493 self.update_task() 

494 

495 def set_task_status(self, status): 

496 if self.task: 

497 self.session.xenapi.task.set_status(self.task, status) 

498 

499 

500################################################################################ 

501# 

502# VDI 

503# 

504class VDI(object): 

505 """Object representing a VDI of a COW-based SR""" 

506 

507 POLL_INTERVAL = 1 

508 POLL_TIMEOUT = 30 

509 DEVICE_MAJOR = 202 

510 

511 # config keys & values 

512 DB_VDI_PARENT = "vhd-parent" 

513 DB_VDI_TYPE = "vdi_type" 

514 DB_VDI_BLOCKS = "vhd-blocks" 

515 DB_VDI_PAUSED = "paused" 

516 DB_VDI_RELINKING = "relinking" 

517 DB_VDI_ACTIVATING = "activating" 

518 DB_GC = "gc" 

519 DB_COALESCE = "coalesce" 

520 DB_LEAFCLSC = "leaf-coalesce" # config key 

521 DB_GC_NO_SPACE = "gc_no_space" 

522 LEAFCLSC_DISABLED = "false" # set by user; means do not leaf-coalesce 

523 LEAFCLSC_FORCE = "force" # set by user; means skip snap-coalesce 

524 LEAFCLSC_OFFLINE = "offline" # set here for informational purposes: means 

525 # no space to snap-coalesce or unable to keep 

526 # up with VDI. This is not used by the SM, it 

527 # might be used by external components. 

528 DB_ONBOOT = "on-boot" 

529 ONBOOT_RESET = "reset" 

530 DB_ALLOW_CACHING = "allow_caching" 

531 

532 CONFIG_TYPE = { 

533 DB_VDI_PARENT: XAPI.CONFIG_SM, 

534 DB_VDI_TYPE: XAPI.CONFIG_SM, 

535 DB_VDI_BLOCKS: XAPI.CONFIG_SM, 

536 DB_VDI_PAUSED: XAPI.CONFIG_SM, 

537 DB_VDI_RELINKING: XAPI.CONFIG_SM, 

538 DB_VDI_ACTIVATING: XAPI.CONFIG_SM, 

539 DB_GC: XAPI.CONFIG_OTHER, 

540 DB_COALESCE: XAPI.CONFIG_OTHER, 

541 DB_LEAFCLSC: XAPI.CONFIG_OTHER, 

542 DB_ONBOOT: XAPI.CONFIG_ON_BOOT, 

543 DB_ALLOW_CACHING: XAPI.CONFIG_ALLOW_CACHING, 

544 DB_GC_NO_SPACE: XAPI.CONFIG_SM 

545 } 

546 

547 LIVE_LEAF_COALESCE_MAX_SIZE = 20 * 1024 * 1024 # bytes 

548 LIVE_LEAF_COALESCE_TIMEOUT = 10 # seconds 

549 TIMEOUT_SAFETY_MARGIN = 0.5 # extra margin when calculating 

550 # feasibility of leaf coalesce 

551 

552 JRN_RELINK = "relink" # journal entry type for relinking children 

553 JRN_COALESCE = "coalesce" # to communicate which VDI is being coalesced 

554 JRN_LEAF = "leaf" # used in coalesce-leaf 

555 

556 STR_TREE_INDENT = 4 

557 

558 def __init__(self, sr, uuid, vdi_type): 

559 self.sr = sr 

560 self.scanError = True 

561 self.uuid = uuid 

562 self.vdi_type = vdi_type 

563 self.fileName = "" 

564 self.parentUuid = "" 

565 self.sizeVirt = -1 

566 self._sizePhys = -1 

567 self._sizeAllocated = -1 

568 self._hidden = False 

569 self.parent = None 

570 self.children = [] 

571 self._vdiRef = None 

572 self.cowutil = getCowUtil(vdi_type) 

573 self._clearRef() 

574 

575 @staticmethod 

576 def extractUuid(path): 

577 raise NotImplementedError("Implement in sub class") 

578 

579 def load(self, info=None) -> None: 

580 """Load VDI info""" 

581 pass 

582 

583 def getDriverName(self) -> str: 

584 return self.vdi_type 

585 

586 def getRef(self): 

587 if self._vdiRef is None: 

588 self._vdiRef = self.sr.xapi.getRefVDI(self) 

589 return self._vdiRef 

590 

591 def getConfig(self, key, default=None): 

592 config = self.sr.xapi.getConfigVDI(self, key) 

593 if key == self.DB_ONBOOT or key == self.DB_ALLOW_CACHING: 593 ↛ 594line 593 didn't jump to line 594, because the condition on line 593 was never true

594 val = config 

595 else: 

596 val = config.get(key) 

597 if val: 

598 return val 

599 return default 

600 

601 def setConfig(self, key, val): 

602 self.sr.xapi.removeFromConfigVDI(self, key) 

603 self.sr.xapi.addToConfigVDI(self, key, val) 

604 Util.log("Set %s = %s for %s" % (key, val, self)) 

605 

606 def delConfig(self, key): 

607 self.sr.xapi.removeFromConfigVDI(self, key) 

608 Util.log("Removed %s from %s" % (key, self)) 

609 

610 def ensureUnpaused(self): 

611 if self.getConfig(self.DB_VDI_PAUSED) == "true": 

612 Util.log("Unpausing VDI %s" % self) 

613 self.unpause() 

614 

615 def pause(self, failfast=False) -> None: 

616 if not blktap2.VDI.tap_pause(self.sr.xapi.session, self.sr.uuid, 

617 self.uuid, failfast): 

618 raise util.SMException("Failed to pause VDI %s" % self) 

619 

620 def _report_tapdisk_unpause_error(self): 

621 try: 

622 xapi = self.sr.xapi.session.xenapi 

623 sr_ref = xapi.SR.get_by_uuid(self.sr.uuid) 

624 msg_name = "failed to unpause tapdisk" 

625 msg_body = "Failed to unpause tapdisk for VDI %s, " \ 

626 "VMs using this tapdisk have lost access " \ 

627 "to the corresponding disk(s)" % self.uuid 

628 xapi.message.create(msg_name, "4", "SR", self.sr.uuid, msg_body) 

629 except Exception as e: 

630 util.SMlog("failed to generate message: %s" % e) 

631 

632 def unpause(self): 

633 if not blktap2.VDI.tap_unpause(self.sr.xapi.session, self.sr.uuid, 

634 self.uuid): 

635 self._report_tapdisk_unpause_error() 

636 raise util.SMException("Failed to unpause VDI %s" % self) 

637 

638 def refresh(self, ignoreNonexistent=True): 

639 """Pause-unpause in one step""" 

640 self.sr.lock() 

641 try: 

642 try: 

643 if not blktap2.VDI.tap_refresh(self.sr.xapi.session, 643 ↛ 645line 643 didn't jump to line 645, because the condition on line 643 was never true

644 self.sr.uuid, self.uuid): 

645 self._report_tapdisk_unpause_error() 

646 raise util.SMException("Failed to refresh %s" % self) 

647 except XenAPI.Failure as e: 

648 if util.isInvalidVDI(e) and ignoreNonexistent: 

649 Util.log("VDI %s not found, ignoring" % self) 

650 return 

651 raise 

652 finally: 

653 self.sr.unlock() 

654 

655 def isSnapshot(self): 

656 return self.sr.xapi.isSnapshot(self) 

657 

658 def isAttachedRW(self): 

659 return util.is_attached_rw( 

660 self.sr.xapi.session.xenapi.VDI.get_sm_config(self.getRef())) 

661 

662 def getVDIBlocks(self): 

663 val = self.updateBlockInfo() 

664 bitmap = zlib.decompress(base64.b64decode(val)) 

665 return bitmap 

666 

667 def isCoalesceable(self): 

668 """A VDI is coalesceable if it has no siblings and is not a leaf""" 

669 return not self.scanError and \ 

670 self.parent and \ 

671 len(self.parent.children) == 1 and \ 

672 self.isHidden() and \ 

673 len(self.children) > 0 

674 

675 def isLeafCoalesceable(self): 

676 """A VDI is leaf-coalesceable if it has no siblings and is a leaf""" 

677 return not self.scanError and \ 

678 self.parent and \ 

679 len(self.parent.children) == 1 and \ 

680 not self.isHidden() and \ 

681 len(self.children) == 0 

682 

683 def canLiveCoalesce(self, speed): 

684 """Can we stop-and-leaf-coalesce this VDI? The VDI must be 

685 isLeafCoalesceable() already""" 

686 feasibleSize = False 

687 allowedDownTime = \ 

688 self.TIMEOUT_SAFETY_MARGIN * self.LIVE_LEAF_COALESCE_TIMEOUT 

689 allocated_size = self.getAllocatedSize() 

690 if speed: 

691 feasibleSize = \ 

692 allocated_size // speed < allowedDownTime 

693 else: 

694 feasibleSize = \ 

695 allocated_size < self.LIVE_LEAF_COALESCE_MAX_SIZE 

696 

697 return (feasibleSize or 

698 self.getConfig(self.DB_LEAFCLSC) == self.LEAFCLSC_FORCE) 

699 

700 def getAllPrunable(self): 

701 if len(self.children) == 0: # base case 

702 # it is possible to have a hidden leaf that was recently coalesced 

703 # onto its parent, its children already relinked but not yet 

704 # reloaded - in which case it may not be garbage collected yet: 

705 # some tapdisks could still be using the file. 

706 if self.sr.journaler.get(self.JRN_RELINK, self.uuid): 

707 return [] 

708 if not self.scanError and self.isHidden(): 

709 return [self] 

710 return [] 

711 

712 thisPrunable = True 

713 vdiList = [] 

714 for child in self.children: 

715 childList = child.getAllPrunable() 

716 vdiList.extend(childList) 

717 if child not in childList: 

718 thisPrunable = False 

719 

720 # We can destroy the current VDI if all childs are hidden BUT the 

721 # current VDI must be hidden too to do that! 

722 # Example in this case (after a failed live leaf coalesce): 

723 # 

724 # SMGC: [32436] SR 07ed ('linstor-nvme-sr') (2 VDIs in 1 VHD trees): 

725 # SMGC: [32436] b5458d61(1.000G/4.127M) 

726 # SMGC: [32436] *OLD_b545(1.000G/4.129M) 

727 # 

728 # OLD_b545 is hidden and must be removed, but b5458d61 not. 

729 # Normally we are not in this function when the delete action is 

730 # executed but in `_liveLeafCoalesce`. 

731 

732 if not self.scanError and not self.isHidden() and thisPrunable: 

733 vdiList.append(self) 

734 return vdiList 

735 

736 def getSizePhys(self) -> int: 

737 return self._sizePhys 

738 

739 def getAllocatedSize(self) -> int: 

740 return self._sizeAllocated 

741 

742 def getTreeRoot(self): 

743 "Get the root of the tree that self belongs to" 

744 root = self 

745 while root.parent: 

746 root = root.parent 

747 return root 

748 

749 def getTreeHeight(self): 

750 "Get the height of the subtree rooted at self" 

751 if len(self.children) == 0: 

752 return 1 

753 

754 maxChildHeight = 0 

755 for child in self.children: 

756 childHeight = child.getTreeHeight() 

757 if childHeight > maxChildHeight: 

758 maxChildHeight = childHeight 

759 

760 return maxChildHeight + 1 

761 

762 def getAllLeaves(self) -> List["VDI"]: 

763 "Get all leaf nodes in the subtree rooted at self" 

764 if len(self.children) == 0: 

765 return [self] 

766 

767 leaves = [] 

768 for child in self.children: 

769 leaves.extend(child.getAllLeaves()) 

770 return leaves 

771 

772 def updateBlockInfo(self) -> Optional[str]: 

773 val = base64.b64encode(self._queryCowBlocks()).decode() 

774 try: 

775 self.setConfig(VDI.DB_VDI_BLOCKS, val) 

776 except Exception: 

777 if self.vdi_type != VdiType.QCOW2: 

778 raise 

779 # Sometime with QCOW2, our allocation table is too big to be stored in XAPI, in this case we do not store it 

780 # and we write `skipped` instead so that hasWork is happy (and the GC doesn't run in loop indefinitely). 

781 self.setConfig(VDI.DB_VDI_BLOCKS, "skipped") 

782 

783 return val 

784 

785 def rename(self, uuid) -> None: 

786 "Rename the VDI file" 

787 assert(not self.sr.vdis.get(uuid)) 

788 self._clearRef() 

789 oldUuid = self.uuid 

790 self.uuid = uuid 

791 self.children = [] 

792 # updating the children themselves is the responsibility of the caller 

793 del self.sr.vdis[oldUuid] 

794 self.sr.vdis[self.uuid] = self 

795 

796 def delete(self) -> None: 

797 "Physically delete the VDI" 

798 lock.Lock.cleanup(self.uuid, NS_PREFIX_LVM + self.sr.uuid) 

799 lock.Lock.cleanupAll(self.uuid) 

800 self._clear() 

801 

802 def getParent(self) -> str: 

803 return self.cowutil.getParent(self.path, lambda x: x.strip()) 803 ↛ exitline 803 didn't run the lambda on line 803

804 

805 def repair(self, parent) -> None: 

806 self.cowutil.repair(parent) 

807 

808 @override 

809 def __str__(self) -> str: 

810 strHidden = "" 

811 if self.isHidden(): 811 ↛ 812line 811 didn't jump to line 812, because the condition on line 811 was never true

812 strHidden = "*" 

813 strSizeVirt = "?" 

814 if self.sizeVirt > 0: 814 ↛ 815line 814 didn't jump to line 815, because the condition on line 814 was never true

815 strSizeVirt = Util.num2str(self.sizeVirt) 

816 strSizePhys = "?" 

817 if self._sizePhys > 0: 817 ↛ 818line 817 didn't jump to line 818, because the condition on line 817 was never true

818 strSizePhys = "/%s" % Util.num2str(self._sizePhys) 

819 strSizeAllocated = "?" 

820 if self._sizeAllocated >= 0: 

821 strSizeAllocated = "/%s" % Util.num2str(self._sizeAllocated) 

822 strType = "[{}]".format(self.vdi_type) 

823 

824 return "%s%s(%s%s%s)%s" % (strHidden, self.uuid[0:8], strSizeVirt, 

825 strSizePhys, strSizeAllocated, strType) 

826 

827 def validate(self, fast=False) -> None: 

828 if self.cowutil.check(self.path, fast=fast) != CowUtil.CheckResult.Success: 828 ↛ 829line 828 didn't jump to line 829, because the condition on line 828 was never true

829 raise util.SMException("COW image %s corrupted" % self) 

830 

831 def _clear(self): 

832 self.uuid = "" 

833 self.path = "" 

834 self.parentUuid = "" 

835 self.parent = None 

836 self._clearRef() 

837 

838 def _clearRef(self): 

839 self._vdiRef = None 

840 

841 @staticmethod 

842 def _cancel_exception(sig, frame): 

843 raise CancelException() 

844 

845 def _call_plugin_coalesce(self, hostRef): 

846 signal.signal(signal.SIGTERM, self._cancel_exception) 

847 args = {"path": self.path, "vdi_type": self.vdi_type} 

848 Util.log("Calling remote coalesce plugin with: {}".format(args)) 

849 try: 

850 ret = self.sr.xapi.session.xenapi.host.call_plugin( \ 

851 hostRef, XAPI.PLUGIN_ON_SLAVE, "commit_tapdisk", args) 

852 Util.log("Remote coalesce returned {}".format(ret)) 

853 except CancelException: 

854 Util.log(f"Cancelling online coalesce following signal {args}") 

855 self.sr.xapi.session.xenapi.host.call_plugin( \ 

856 hostRef, XAPI.PLUGIN_ON_SLAVE, "commit_cancel", args) 

857 raise 

858 except Exception: 

859 raise 

860 

861 def _doCoalesceOnHost(self, hostRef): 

862 self.validate() 

863 self.parent.validate(True) 

864 self.parent._increaseSizeVirt(self.sizeVirt) 

865 self.sr._updateSlavesOnResize(self.parent) 

866 #TODO: We might need to make the LV RW on the slave directly for coalesce? 

867 # Children and parent need to be RW for QCOW2 coalesce, otherwise tapdisk(libqcow) will crash trying to access them 

868 

869 def abortTest(): 

870 file = self.sr._gc_running_file(self) 

871 try: 

872 with open(file, "r") as f: 

873 if not f.read(): 

874 Util.log("abortTest: Cancelling coalesce") 

875 return True 

876 except OSError as e: 

877 if e.errno == errno.ENOENT: 

878 Util.log("File {} does not exist".format(file)) 

879 else: 

880 Util.log("IOError: {}".format(e)) 

881 return True 

882 return False 

883 

884 Util.runAbortable(lambda: self._call_plugin_coalesce(hostRef), \ 

885 None, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0, prefSig=signal.SIGTERM) 

886 

887 self.parent.validate(True) 

888 #self._verifyContents(0) 

889 self.parent.updateBlockInfo() 

890 

891 def _isOpenOnHosts(self) -> Optional[str]: 

892 for pbdRecord in self.sr.xapi.getAttachedPBDs(): 

893 hostRef = pbdRecord["host"] 

894 args = {"path": self.path} 

895 is_openers = util.strtobool(self.sr.xapi.session.xenapi.host.call_plugin( \ 

896 hostRef, XAPI.PLUGIN_ON_SLAVE, "is_openers", args)) 

897 if is_openers: 

898 return hostRef 

899 return None 

900 

901 def _doCoalesce(self) -> None: 

902 """Coalesce self onto parent. Only perform the actual coalescing of 

903 an image, but not the subsequent relinking. We'll do that as the next step, 

904 after reloading the entire SR in case things have changed while we 

905 were coalescing""" 

906 self.validate() 

907 self.parent.validate(True) 

908 self.parent._increaseSizeVirt(self.sizeVirt) 

909 self.sr._updateSlavesOnResize(self.parent) 

910 self._coalesceCowImage(0) 

911 self.parent.validate(True) 

912 #self._verifyContents(0) 

913 self.parent.updateBlockInfo() 

914 

915 def _verifyContents(self, timeOut): 

916 Util.log(" Coalesce verification on %s" % self) 

917 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

918 Util.runAbortable(lambda: self._runTapdiskDiff(), True, 

919 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

920 Util.log(" Coalesce verification succeeded") 

921 

922 def _runTapdiskDiff(self): 

923 cmd = "tapdisk-diff -n %s:%s -m %s:%s" % \ 

924 (self.getDriverName(), self.path, \ 

925 self.parent.getDriverName(), self.parent.path) 

926 Util.doexec(cmd, 0) 

927 return True 

928 

929 @staticmethod 

930 def _reportCoalesceError(vdi, ce): 

931 """Reports a coalesce error to XenCenter. 

932 

933 vdi: the VDI object on which the coalesce error occured 

934 ce: the CommandException that was raised""" 

935 

936 msg_name = os.strerror(ce.code) 

937 if ce.code == errno.ENOSPC: 

938 # TODO We could add more information here, e.g. exactly how much 

939 # space is required for the particular coalesce, as well as actions 

940 # to be taken by the user and consequences of not taking these 

941 # actions. 

942 msg_body = 'Run out of space while coalescing.' 

943 elif ce.code == errno.EIO: 

944 msg_body = 'I/O error while coalescing.' 

945 else: 

946 msg_body = '' 

947 util.SMlog('Coalesce failed on SR %s: %s (%s)' 

948 % (vdi.sr.uuid, msg_name, msg_body)) 

949 

950 # Create a XenCenter message, but don't spam. 

951 xapi = vdi.sr.xapi.session.xenapi 

952 sr_ref = xapi.SR.get_by_uuid(vdi.sr.uuid) 

953 oth_cfg = xapi.SR.get_other_config(sr_ref) 

954 if COALESCE_ERR_RATE_TAG in oth_cfg: 

955 coalesce_err_rate = float(oth_cfg[COALESCE_ERR_RATE_TAG]) 

956 else: 

957 coalesce_err_rate = DEFAULT_COALESCE_ERR_RATE 

958 

959 xcmsg = False 

960 if coalesce_err_rate == 0: 

961 xcmsg = True 

962 elif coalesce_err_rate > 0: 

963 now = datetime.datetime.now() 

964 sm_cfg = xapi.SR.get_sm_config(sr_ref) 

965 if COALESCE_LAST_ERR_TAG in sm_cfg: 

966 # seconds per message (minimum distance in time between two 

967 # messages in seconds) 

968 spm = datetime.timedelta(seconds=(1.0 / coalesce_err_rate) * 60) 

969 last = datetime.datetime.fromtimestamp( 

970 float(sm_cfg[COALESCE_LAST_ERR_TAG])) 

971 if now - last >= spm: 

972 xapi.SR.remove_from_sm_config(sr_ref, 

973 COALESCE_LAST_ERR_TAG) 

974 xcmsg = True 

975 else: 

976 xcmsg = True 

977 if xcmsg: 

978 xapi.SR.add_to_sm_config(sr_ref, COALESCE_LAST_ERR_TAG, 

979 str(now.strftime('%s'))) 

980 if xcmsg: 

981 xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body) 

982 

983 def coalesce(self) -> int: 

984 return self.cowutil.coalesce(self.path) 

985 

986 @staticmethod 

987 def _doCoalesceCowImage(vdi: "VDI"): 

988 try: 

989 startTime = time.time() 

990 allocated_size = vdi.getAllocatedSize() 

991 coalesced_size = vdi.coalesce() 

992 endTime = time.time() 

993 vdi.sr.recordStorageSpeed(startTime, endTime, coalesced_size) 

994 except util.CommandException as ce: 

995 # We use try/except for the following piece of code because it runs 

996 # in a separate process context and errors will not be caught and 

997 # reported by anyone. 

998 try: 

999 # Report coalesce errors back to user via XC 

1000 VDI._reportCoalesceError(vdi, ce) 

1001 except Exception as e: 

1002 util.SMlog('failed to create XenCenter message: %s' % e) 

1003 raise ce 

1004 except: 

1005 raise 

1006 

1007 def _vdi_is_raw(self, vdi_path): 

1008 """ 

1009 Given path to vdi determine if it is raw 

1010 """ 

1011 uuid = self.extractUuid(vdi_path) 

1012 return self.sr.vdis[uuid].vdi_type == VdiType.RAW 

1013 

1014 def _coalesceCowImage(self, timeOut): 

1015 Util.log(" Running COW coalesce on %s" % self) 

1016 def abortTest(): 

1017 if self.cowutil.isCoalesceableOnRemote(): 

1018 file = self.sr._gc_running_file(self) 

1019 try: 

1020 with open(file, "r") as f: 

1021 if not f.read(): 

1022 return True 

1023 except OSError as e: 

1024 if e.errno == errno.ENOENT: 

1025 util.SMlog("File {} does not exist".format(file)) 

1026 else: 

1027 util.SMlog("IOError: {}".format(e)) 

1028 return True 

1029 return IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1030 

1031 try: 

1032 util.fistpoint.activate_custom_fn( 

1033 "cleanup_coalesceVHD_inject_failure", 

1034 util.inject_failure) 

1035 Util.runAbortable(lambda: VDI._doCoalesceCowImage(self), None, 

1036 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

1037 except: 

1038 # Exception at this phase could indicate a failure in COW coalesce 

1039 # or a kill of COW coalesce by runAbortable due to timeOut 

1040 # Try a repair and reraise the exception 

1041 parent = "" 

1042 try: 

1043 parent = self.getParent() 

1044 if not self._vdi_is_raw(parent): 

1045 # Repair error is logged and ignored. Error reraised later 

1046 util.SMlog('Coalesce failed on %s, attempting repair on ' \ 

1047 'parent %s' % (self.uuid, parent)) 

1048 self.repair(parent) 

1049 except Exception as e: 

1050 util.SMlog('(error ignored) Failed to repair parent %s ' \ 

1051 'after failed coalesce on %s, err: %s' % 

1052 (parent, self.path, e)) 

1053 raise 

1054 

1055 util.fistpoint.activate("LVHDRT_coalescing_VHD_data", self.sr.uuid) 

1056 

1057 def _relinkSkip(self) -> None: 

1058 """Relink children of this VDI to point to the parent of this VDI""" 

1059 abortFlag = IPCFlag(self.sr.uuid) 

1060 for child in self.children: 

1061 if abortFlag.test(FLAG_TYPE_ABORT): 1061 ↛ 1062line 1061 didn't jump to line 1062, because the condition on line 1061 was never true

1062 raise AbortException("Aborting due to signal") 

1063 Util.log(" Relinking %s from %s to %s" % \ 

1064 (child, self, self.parent)) 

1065 util.fistpoint.activate("LVHDRT_relinking_grandchildren", self.sr.uuid) 

1066 child._setParent(self.parent) 

1067 self.children = [] 

1068 

1069 def _reloadChildren(self, vdiSkip): 

1070 """Pause & unpause all VDIs in the subtree to cause blktap to reload 

1071 the COW image metadata for this file in any online VDI""" 

1072 abortFlag = IPCFlag(self.sr.uuid) 

1073 for child in self.children: 

1074 if child == vdiSkip: 

1075 continue 

1076 if abortFlag.test(FLAG_TYPE_ABORT): 1076 ↛ 1077line 1076 didn't jump to line 1077, because the condition on line 1076 was never true

1077 raise AbortException("Aborting due to signal") 

1078 Util.log(" Reloading VDI %s" % child) 

1079 child._reload() 

1080 

1081 def _reload(self): 

1082 """Pause & unpause to cause blktap to reload the image metadata""" 

1083 for child in self.children: 1083 ↛ 1084line 1083 didn't jump to line 1084, because the loop on line 1083 never started

1084 child._reload() 

1085 

1086 # only leaves can be attached 

1087 if len(self.children) == 0: 1087 ↛ exitline 1087 didn't return from function '_reload', because the condition on line 1087 was never false

1088 try: 

1089 self.delConfig(VDI.DB_VDI_RELINKING) 

1090 except XenAPI.Failure as e: 

1091 if not util.isInvalidVDI(e): 

1092 raise 

1093 self.refresh() 

1094 

1095 def _needRelink(self, list_not_to_relink): 

1096 """ 

1097 If we coalesce up the chain, we shouldn't need to do the relink at all, we only need to do the relink on the children if their direct parent was the one we were coalescing 

1098 """ 

1099 if not list_not_to_relink: 1099 ↛ 1101line 1099 didn't jump to line 1101, because the condition on line 1099 was never false

1100 return True 

1101 if self.uuid in list_not_to_relink: 

1102 return False 

1103 else: 

1104 return True 

1105 

1106 def _tagChildrenForRelink(self, list_not_to_relink=None): 

1107 if len(self.children) == 0: 

1108 retries = 0 

1109 try: 

1110 while retries < 15: 

1111 retries += 1 

1112 if self.getConfig(VDI.DB_VDI_ACTIVATING) is not None: 

1113 Util.log("VDI %s is activating, wait to relink" % 

1114 self.uuid) 

1115 else: 

1116 if self._needRelink(list_not_to_relink): 1116 ↛ 1126line 1116 didn't jump to line 1126, because the condition on line 1116 was never false

1117 self.setConfig(VDI.DB_VDI_RELINKING, "True") 

1118 

1119 if self.getConfig(VDI.DB_VDI_ACTIVATING): 

1120 self.delConfig(VDI.DB_VDI_RELINKING) 

1121 Util.log("VDI %s started activating while tagging" % 

1122 self.uuid) 

1123 else: 

1124 return 

1125 else: 

1126 Util.log(f"Not adding relinking tag to VDI {self.uuid}") 

1127 return 

1128 time.sleep(2) 

1129 

1130 raise util.SMException("Failed to tag vdi %s for relink" % self) 

1131 except XenAPI.Failure as e: 

1132 if not util.isInvalidVDI(e): 

1133 raise 

1134 

1135 for child in self.children: 

1136 child._tagChildrenForRelink(list_not_to_relink) 

1137 

1138 def _loadInfoParent(self): 

1139 ret = self.cowutil.getParent(self.path, LvmCowUtil.extractUuid) 

1140 if ret: 

1141 self.parentUuid = ret 

1142 

1143 def _setParent(self, parent) -> None: 

1144 self.cowutil.setParent(self.path, parent.path, False) 

1145 self.parent = parent 

1146 self.parentUuid = parent.uuid 

1147 parent.children.append(self) 

1148 try: 

1149 self.setConfig(self.DB_VDI_PARENT, self.parentUuid) 

1150 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1151 (self.uuid, self.parentUuid)) 

1152 except: 

1153 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1154 (self.uuid, self.parentUuid)) 

1155 

1156 def _ensureParentActiveForRelink(self) -> None: 

1157 pass 

1158 

1159 def _update_vhd_parent(self, real_parent_uuid): 

1160 try: 

1161 self.setConfig(self.DB_VDI_PARENT, real_parent_uuid) 

1162 Util.log("Updated the vhd-parent field for child %s with real parent %s following a online coalesce" % \ 

1163 (self.uuid, real_parent_uuid)) 

1164 except: 

1165 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1166 (self.uuid, real_parent_uuid)) 

1167 

1168 def isHidden(self) -> bool: 

1169 if self._hidden is None: 1169 ↛ 1170line 1169 didn't jump to line 1170, because the condition on line 1169 was never true

1170 self._loadInfoHidden() 

1171 return self._hidden 

1172 

1173 def _loadInfoHidden(self) -> None: 

1174 hidden = self.cowutil.getHidden(self.path) 

1175 self._hidden = (hidden != 0) 

1176 

1177 def _setHidden(self, hidden=True) -> None: 

1178 self._hidden = None 

1179 self.cowutil.setHidden(self.path, hidden) 

1180 self._hidden = hidden 

1181 

1182 def _increaseSizeVirt(self, size, atomic=True) -> None: 

1183 """ensure the virtual size of 'self' is at least 'size'. Note that 

1184 resizing a COW image must always be offline and atomically: the file must 

1185 not be open by anyone and no concurrent operations may take place. 

1186 Thus we use the Agent API call for performing paused atomic 

1187 operations. If the caller is already in the atomic context, it must 

1188 call with atomic = False""" 

1189 if self.sizeVirt >= size: 1189 ↛ 1191line 1189 didn't jump to line 1191, because the condition on line 1189 was never false

1190 return 

1191 Util.log(" Expanding COW image virt size for VDI %s: %s -> %s" % \ 

1192 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1193 

1194 msize = self.cowutil.getMaxResizeSize(self.path) 

1195 if (size <= msize): 

1196 self.cowutil.setSizeVirtFast(self.path, size) 

1197 else: 

1198 if atomic: 

1199 vdiList = self._getAllSubtree() 

1200 self.sr.lock() 

1201 try: 

1202 self.sr.pauseVDIs(vdiList) 

1203 try: 

1204 self._setSizeVirt(size) 

1205 finally: 

1206 self.sr.unpauseVDIs(vdiList) 

1207 finally: 

1208 self.sr.unlock() 

1209 else: 

1210 self._setSizeVirt(size) 

1211 

1212 self.sizeVirt = self.cowutil.getSizeVirt(self.path) 

1213 

1214 def _setSizeVirt(self, size) -> None: 

1215 """WARNING: do not call this method directly unless all VDIs in the 

1216 subtree are guaranteed to be unplugged (and remain so for the duration 

1217 of the operation): this operation is only safe for offline COW images""" 

1218 jFile = os.path.join(self.sr.path, self.uuid) 

1219 self.cowutil.setSizeVirt(self.path, size, jFile) 

1220 

1221 def _queryCowBlocks(self) -> bytes: 

1222 return self.cowutil.getBlockBitmap(self.path) 

1223 

1224 def _getCoalescedSizeData(self): 

1225 """Get the data size of the resulting image if we coalesce self onto 

1226 parent. We calculate the actual size by using the image block allocation 

1227 information (as opposed to just adding up the two image sizes to get an 

1228 upper bound)""" 

1229 # make sure we don't use stale BAT info from vdi_rec since the child 

1230 # was writable all this time 

1231 self.delConfig(VDI.DB_VDI_BLOCKS) 

1232 blocksChild = self.getVDIBlocks() 

1233 blocksParent = self.parent.getVDIBlocks() 

1234 numBlocks = Util.countBits(blocksChild, blocksParent) 

1235 Util.log("Num combined blocks = %d" % numBlocks) 

1236 sizeData = numBlocks * self.cowutil.getBlockSize(self.path) 

1237 assert(sizeData <= self.sizeVirt) 

1238 return sizeData 

1239 

1240 def _calcExtraSpaceForCoalescing(self) -> int: 

1241 sizeData = self._getCoalescedSizeData() 

1242 sizeCoalesced = sizeData + self.cowutil.calcOverheadBitmap(sizeData) + \ 

1243 self.cowutil.calcOverheadEmpty(self.sizeVirt) 

1244 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1245 return sizeCoalesced - self.parent.getSizePhys() 

1246 

1247 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1248 """How much extra space in the SR will be required to 

1249 [live-]leaf-coalesce this VDI""" 

1250 # the space requirements are the same as for inline coalesce 

1251 return self._calcExtraSpaceForCoalescing() 

1252 

1253 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1254 """How much extra space in the SR will be required to 

1255 snapshot-coalesce this VDI""" 

1256 return self._calcExtraSpaceForCoalescing() + \ 

1257 self.cowutil.calcOverheadEmpty(self.sizeVirt) # extra snap leaf 

1258 

1259 def _getAllSubtree(self): 

1260 """Get self and all VDIs in the subtree of self as a flat list""" 

1261 vdiList = [self] 

1262 for child in self.children: 

1263 vdiList.extend(child._getAllSubtree()) 

1264 return vdiList 

1265 

1266 

1267class FileVDI(VDI): 

1268 """Object representing a VDI in a file-based SR (EXT or NFS)""" 

1269 

1270 @override 

1271 @staticmethod 

1272 def extractUuid(path): 

1273 fileName = os.path.basename(path) 

1274 return os.path.splitext(fileName)[0] 

1275 

1276 def __init__(self, sr, uuid, vdi_type): 

1277 VDI.__init__(self, sr, uuid, vdi_type) 

1278 self.fileName = "%s%s" % (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type]) 

1279 

1280 @override 

1281 def load(self, info=None) -> None: 

1282 if not info: 

1283 if not util.pathexists(self.path): 

1284 raise util.SMException("%s not found" % self.path) 

1285 try: 

1286 info = self.cowutil.getInfo(self.path, self.extractUuid) 

1287 except util.SMException: 

1288 Util.log(" [VDI %s: failed to read COW image metadata]" % self.uuid) 

1289 return 

1290 self.parent = None 

1291 self.children = [] 

1292 self.parentUuid = info.parentUuid 

1293 self.sizeVirt = info.sizeVirt 

1294 self._sizePhys = info.sizePhys 

1295 self._sizeAllocated = info.sizeAllocated 

1296 self._hidden = info.hidden 

1297 self.scanError = False 

1298 self.path = os.path.join(self.sr.path, "%s%s" % \ 

1299 (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type])) 

1300 

1301 @override 

1302 def rename(self, uuid) -> None: 

1303 oldPath = self.path 

1304 VDI.rename(self, uuid) 

1305 self.fileName = "%s%s" % (self.uuid, VDI_TYPE_TO_EXTENSION[self.vdi_type]) 

1306 self.path = os.path.join(self.sr.path, self.fileName) 

1307 assert(not util.pathexists(self.path)) 

1308 Util.log("Renaming %s -> %s" % (oldPath, self.path)) 

1309 os.rename(oldPath, self.path) 

1310 

1311 @override 

1312 def delete(self) -> None: 

1313 if len(self.children) > 0: 1313 ↛ 1314line 1313 didn't jump to line 1314, because the condition on line 1313 was never true

1314 raise util.SMException("VDI %s has children, can't delete" % \ 

1315 self.uuid) 

1316 try: 

1317 self.sr.lock() 

1318 try: 

1319 os.unlink(self.path) 

1320 self.sr.forgetVDI(self.uuid) 

1321 finally: 

1322 self.sr.unlock() 

1323 except OSError: 

1324 raise util.SMException("os.unlink(%s) failed" % self.path) 

1325 VDI.delete(self) 

1326 

1327 @override 

1328 def getAllocatedSize(self) -> int: 

1329 if self._sizeAllocated == -1: 1329 ↛ 1330line 1329 didn't jump to line 1330, because the condition on line 1329 was never true

1330 self._sizeAllocated = self.cowutil.getAllocatedSize(self.path) 

1331 return self._sizeAllocated 

1332 

1333 

1334class LVMVDI(VDI): 

1335 """Object representing a VDI in an LVM SR""" 

1336 

1337 JRN_ZERO = "zero" # journal entry type for zeroing out end of parent 

1338 

1339 @override 

1340 def load(self, info=None) -> None: 

1341 # `info` is always set. `None` default value is only here to match parent method. 

1342 assert info, "No info given to LVMVDI.load" 

1343 self.parent = None 

1344 self.children = [] 

1345 self._sizePhys = -1 

1346 self._sizeAllocated = -1 

1347 self.scanError = info.scanError 

1348 self.sizeLV = info.sizeLV 

1349 self.sizeVirt = info.sizeVirt 

1350 self.fileName = info.lvName 

1351 self.lvActive = info.lvActive 

1352 self.lvOpen = info.lvOpen 

1353 self.lvReadonly = info.lvReadonly 

1354 self._hidden = info.hidden 

1355 self.parentUuid = info.parentUuid 

1356 self.path = os.path.join(self.sr.path, self.fileName) 

1357 self.lvmcowutil = LvmCowUtil(self.cowutil) 

1358 

1359 @override 

1360 @staticmethod 

1361 def extractUuid(path): 

1362 return LvmCowUtil.extractUuid(path) 

1363 

1364 def inflate(self, size): 

1365 """inflate the LV containing the COW image to 'size'""" 

1366 if not VdiType.isCowImage(self.vdi_type): 

1367 return 

1368 self._activate() 

1369 self.sr.lock() 

1370 try: 

1371 self.lvmcowutil.inflate(self.sr.journaler, self.sr.uuid, self.uuid, self.vdi_type, size) 

1372 util.fistpoint.activate("LVHDRT_inflating_the_parent", self.sr.uuid) 

1373 finally: 

1374 self.sr.unlock() 

1375 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1376 self._sizePhys = -1 

1377 self._sizeAllocated = -1 

1378 

1379 def deflate(self): 

1380 """deflate the LV containing the image to minimum""" 

1381 if not VdiType.isCowImage(self.vdi_type): 

1382 return 

1383 self._activate() 

1384 self.sr.lock() 

1385 try: 

1386 self.lvmcowutil.deflate(self.sr.lvmCache, self.fileName, self.getSizePhys()) 

1387 finally: 

1388 self.sr.unlock() 

1389 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1390 self._sizePhys = -1 

1391 self._sizeAllocated = -1 

1392 

1393 def inflateFully(self): 

1394 self.inflate(self.lvmcowutil.calcVolumeSize(self.sizeVirt)) 

1395 

1396 def inflateParentForCoalesce(self): 

1397 """Inflate the parent only as much as needed for the purposes of 

1398 coalescing""" 

1399 if not VdiType.isCowImage(self.parent.vdi_type): 

1400 return 

1401 inc = self._calcExtraSpaceForCoalescing() 

1402 if inc > 0: 

1403 util.fistpoint.activate("LVHDRT_coalescing_before_inflate_grandparent", self.sr.uuid) 

1404 self.parent.inflate(self.parent.sizeLV + inc) 

1405 

1406 @override 

1407 def updateBlockInfo(self) -> Optional[str]: 

1408 if VdiType.isCowImage(self.vdi_type): 

1409 return VDI.updateBlockInfo(self) 

1410 return None 

1411 

1412 @override 

1413 def rename(self, uuid) -> None: 

1414 oldUuid = self.uuid 

1415 oldLVName = self.fileName 

1416 VDI.rename(self, uuid) 

1417 self.fileName = LV_PREFIX[self.vdi_type] + self.uuid 

1418 self.path = os.path.join(self.sr.path, self.fileName) 

1419 assert(not self.sr.lvmCache.checkLV(self.fileName)) 

1420 

1421 self.sr.lvmCache.rename(oldLVName, self.fileName) 

1422 if self.sr.lvActivator.get(oldUuid, False): 

1423 self.sr.lvActivator.replace(oldUuid, self.uuid, self.fileName, False) 

1424 

1425 ns = NS_PREFIX_LVM + self.sr.uuid 

1426 (cnt, bcnt) = RefCounter.check(oldUuid, ns) 

1427 RefCounter.set(self.uuid, cnt, bcnt, ns) 

1428 RefCounter.reset(oldUuid, ns) 

1429 

1430 @override 

1431 def delete(self) -> None: 

1432 if len(self.children) > 0: 

1433 raise util.SMException("VDI %s has children, can't delete" % \ 

1434 self.uuid) 

1435 self.sr.lock() 

1436 try: 

1437 self.sr.lvmCache.remove(self.fileName) 

1438 self.sr.forgetVDI(self.uuid) 

1439 finally: 

1440 self.sr.unlock() 

1441 RefCounter.reset(self.uuid, NS_PREFIX_LVM + self.sr.uuid) 

1442 VDI.delete(self) 

1443 

1444 @override 

1445 def getSizePhys(self) -> int: 

1446 if self._sizePhys == -1: 

1447 self._loadInfoSizePhys() 

1448 return self._sizePhys 

1449 

1450 def _loadInfoSizePhys(self): 

1451 """Get the physical utilization of the COW image file. We do it individually 

1452 (and not using the COW batch scanner) as an optimization: this info is 

1453 relatively expensive and we need it only for VDI's involved in 

1454 coalescing.""" 

1455 if not VdiType.isCowImage(self.vdi_type): 

1456 return 

1457 self._activate() 

1458 self._sizePhys = self.cowutil.getSizePhys(self.path) 

1459 if self._sizePhys <= 0: 

1460 raise util.SMException("phys size of %s = %d" % \ 

1461 (self, self._sizePhys)) 

1462 

1463 @override 

1464 def getAllocatedSize(self) -> int: 

1465 if self._sizeAllocated == -1: 

1466 self._loadInfoSizeAllocated() 

1467 return self._sizeAllocated 

1468 

1469 def _loadInfoSizeAllocated(self): 

1470 """ 

1471 Get the allocated size of the COW volume. 

1472 """ 

1473 if not VdiType.isCowImage(self.vdi_type): 

1474 return 

1475 self._activate() 

1476 self._sizeAllocated = self.cowutil.getAllocatedSize(self.path) 

1477 

1478 @override 

1479 def _loadInfoHidden(self) -> None: 

1480 if not VdiType.isCowImage(self.vdi_type): 

1481 self._hidden = self.sr.lvmCache.getHidden(self.fileName) 

1482 else: 

1483 VDI._loadInfoHidden(self) 

1484 

1485 @override 

1486 def _setHidden(self, hidden=True) -> None: 

1487 if not VdiType.isCowImage(self.vdi_type): 

1488 self._hidden = None 

1489 self.sr.lvmCache.setHidden(self.fileName, hidden) 

1490 self._hidden = hidden 

1491 else: 

1492 VDI._setHidden(self, hidden) 

1493 

1494 @override 

1495 def __str__(self) -> str: 

1496 strType = self.vdi_type 

1497 if self.vdi_type == VdiType.RAW: 

1498 strType = "RAW" 

1499 strHidden = "" 

1500 if self.isHidden(): 

1501 strHidden = "*" 

1502 strSizePhys = "" 

1503 if self._sizePhys > 0: 

1504 strSizePhys = Util.num2str(self._sizePhys) 

1505 strSizeAllocated = "" 

1506 if self._sizeAllocated >= 0: 

1507 strSizeAllocated = Util.num2str(self._sizeAllocated) 

1508 strActive = "n" 

1509 if self.lvActive: 

1510 strActive = "a" 

1511 if self.lvOpen: 

1512 strActive += "o" 

1513 return "%s%s[%s](%s/%s/%s/%s|%s)" % (strHidden, self.uuid[0:8], strType, 

1514 Util.num2str(self.sizeVirt), strSizePhys, strSizeAllocated, 

1515 Util.num2str(self.sizeLV), strActive) 

1516 

1517 @override 

1518 def validate(self, fast=False) -> None: 

1519 if VdiType.isCowImage(self.vdi_type): 

1520 VDI.validate(self, fast) 

1521 

1522 def _setChainRw(self) -> List[str]: 

1523 """ 

1524 Set the readonly LV and children writable. 

1525 It's needed because the coalesce can be done by tapdisk directly 

1526 and it will need to write parent information for children. 

1527 The VDI we want to coalesce into it's parent need to be writable for libqcow coalesce part. 

1528 Return a list of the LV that were previously readonly to be made RO again after the coalesce. 

1529 """ 

1530 was_ro = [] 

1531 if self.lvReadonly: 

1532 self.sr.lvmCache.setReadonly(self.fileName, False) 

1533 was_ro.append(self.fileName) 

1534 

1535 for child in self.children: 

1536 if child.lvReadonly: 

1537 self.sr.lvmCache.setReadonly(child.fileName, False) 

1538 was_ro.append(child.fileName) 

1539 

1540 return was_ro 

1541 

1542 def _setChainRo(self, was_ro: List[str]) -> None: 

1543 """Set the list of LV in parameters to readonly""" 

1544 for lvName in was_ro: 

1545 self.sr.lvmCache.setReadonly(lvName, True) 

1546 

1547 @override 

1548 def _doCoalesce(self) -> None: 

1549 """LVMVDI parents must first be activated, inflated, and made writable""" 

1550 was_ro = [] 

1551 try: 

1552 self._activateChain() 

1553 self.sr.lvmCache.setReadonly(self.parent.fileName, False) 

1554 self.parent.validate() 

1555 self.inflateParentForCoalesce() 

1556 was_ro = self._setChainRw() 

1557 VDI._doCoalesce(self) 

1558 finally: 

1559 self.parent._loadInfoSizePhys() 

1560 self.parent.deflate() 

1561 self.sr.lvmCache.setReadonly(self.parent.fileName, True) 

1562 self._setChainRo(was_ro) 

1563 

1564 @override 

1565 def _setParent(self, parent) -> None: 

1566 self._activate() 

1567 if self.lvReadonly: 

1568 self.sr.lvmCache.setReadonly(self.fileName, False) 

1569 

1570 try: 

1571 self.cowutil.setParent(self.path, parent.path, parent.vdi_type == VdiType.RAW) 

1572 finally: 

1573 if self.lvReadonly: 

1574 self.sr.lvmCache.setReadonly(self.fileName, True) 

1575 self._deactivate() 

1576 self.parent = parent 

1577 self.parentUuid = parent.uuid 

1578 parent.children.append(self) 

1579 try: 

1580 self.setConfig(self.DB_VDI_PARENT, self.parentUuid) 

1581 Util.log("Updated the VDI-parent field for child %s with %s" % \ 

1582 (self.uuid, self.parentUuid)) 

1583 except: 

1584 Util.log("Failed to update the VDI-parent with %s for child %s" % \ 

1585 (self.parentUuid, self.uuid)) 

1586 

1587 def _activate(self): 

1588 self.sr.lvActivator.activate(self.uuid, self.fileName, False) 

1589 

1590 def _activateChain(self): 

1591 vdi = self 

1592 while vdi: 

1593 vdi._activate() 

1594 vdi = vdi.parent 

1595 

1596 def _deactivate(self): 

1597 self.sr.lvActivator.deactivate(self.uuid, False) 

1598 

1599 @override 

1600 def _ensureParentActiveForRelink(self) -> None: 

1601 self.parent._activate() 

1602 

1603 @override 

1604 def _increaseSizeVirt(self, size, atomic=True) -> None: 

1605 "ensure the virtual size of 'self' is at least 'size'" 

1606 self._activate() 

1607 if VdiType.isCowImage(self.vdi_type): 

1608 VDI._increaseSizeVirt(self, size, atomic) 

1609 return 

1610 

1611 # raw VDI case 

1612 offset = self.sizeLV 

1613 if self.sizeVirt < size: 

1614 oldSize = self.sizeLV 

1615 self.sizeLV = util.roundup(lvutil.LVM_SIZE_INCREMENT, size) 

1616 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.sizeLV)) 

1617 self.sr.lvmCache.setSize(self.fileName, self.sizeLV) 

1618 offset = oldSize 

1619 unfinishedZero = False 

1620 jval = self.sr.journaler.get(self.JRN_ZERO, self.uuid) 

1621 if jval: 

1622 unfinishedZero = True 

1623 offset = int(jval) 

1624 length = self.sizeLV - offset 

1625 if not length: 

1626 return 

1627 

1628 if unfinishedZero: 

1629 Util.log(" ==> Redoing unfinished zeroing out") 

1630 else: 

1631 self.sr.journaler.create(self.JRN_ZERO, self.uuid, \ 

1632 str(offset)) 

1633 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1634 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1635 func = lambda: util.zeroOut(self.path, offset, length) 

1636 Util.runAbortable(func, True, self.sr.uuid, abortTest, 

1637 VDI.POLL_INTERVAL, 0) 

1638 self.sr.journaler.remove(self.JRN_ZERO, self.uuid) 

1639 

1640 @override 

1641 def _setSizeVirt(self, size) -> None: 

1642 """WARNING: do not call this method directly unless all VDIs in the 

1643 subtree are guaranteed to be unplugged (and remain so for the duration 

1644 of the operation): this operation is only safe for offline COW images.""" 

1645 self._activate() 

1646 jFile = self.lvmcowutil.createResizeJournal(self.sr.lvmCache, self.uuid) 

1647 try: 

1648 self.lvmcowutil.setSizeVirt(self.sr.journaler, self.sr.uuid, self.uuid, self.vdi_type, size, jFile) 

1649 finally: 

1650 self.lvmcowutil.destroyResizeJournal(self.sr.lvmCache, self.uuid) 

1651 

1652 @override 

1653 def _queryCowBlocks(self) -> bytes: 

1654 self._activate() 

1655 return VDI._queryCowBlocks(self) 

1656 

1657 @override 

1658 def _calcExtraSpaceForCoalescing(self) -> int: 

1659 if not VdiType.isCowImage(self.parent.vdi_type): 

1660 return 0 # raw parents are never deflated in the first place 

1661 sizeCoalesced = self.lvmcowutil.calcVolumeSize(self._getCoalescedSizeData()) 

1662 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1663 return sizeCoalesced - self.parent.sizeLV 

1664 

1665 @override 

1666 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1667 """How much extra space in the SR will be required to 

1668 [live-]leaf-coalesce this VDI""" 

1669 # we can deflate the leaf to minimize the space requirements 

1670 deflateDiff = self.sizeLV - lvutil.calcSizeLV(self.getSizePhys()) 

1671 return self._calcExtraSpaceForCoalescing() - deflateDiff 

1672 

1673 @override 

1674 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1675 return self._calcExtraSpaceForCoalescing() + \ 

1676 lvutil.calcSizeLV(self.getSizePhys()) 

1677 

1678 

1679class LinstorVDI(VDI): 

1680 """Object representing a VDI in a LINSTOR SR""" 

1681 

1682 VOLUME_LOCK_TIMEOUT = 30 

1683 

1684 @override 

1685 def load(self, info=None) -> None: 

1686 self.parentUuid = info.parentUuid 

1687 self.scanError = True 

1688 self.parent = None 

1689 self.children = [] 

1690 

1691 self.fileName = self.sr._linstor.get_volume_name(self.uuid) 

1692 self.path = self.sr._linstor.build_device_path(self.fileName) 

1693 self.linstorcowutil = LinstorCowUtil(self.sr.xapi.session, self.sr._linstor, info.vdiType) 

1694 

1695 if not info: 

1696 try: 

1697 info = self.linstorcowutil.get_info(self.uuid) 

1698 except util.SMException: 

1699 Util.log( 

1700 ' [VDI {}: failed to read COW image metadata]'.format(self.uuid) 

1701 ) 

1702 return 

1703 

1704 self.parentUuid = info.parentUuid 

1705 self.sizeVirt = info.sizeVirt 

1706 self._sizePhys = -1 

1707 self._sizeAllocated = -1 

1708 self.drbd_size = -1 

1709 self._hidden = info.hidden 

1710 self.scanError = False 

1711 

1712 @override 

1713 def getSizePhys(self, fetch=False) -> int: 

1714 if self._sizePhys < 0 or fetch: 

1715 self._sizePhys = self.linstorcowutil.get_size_phys(self.uuid) 

1716 return self._sizePhys 

1717 

1718 def getDrbdSize(self, fetch=False): 

1719 if self.drbd_size < 0 or fetch: 

1720 self.drbd_size = self.linstorcowutil.get_drbd_size(self.uuid) 

1721 return self.drbd_size 

1722 

1723 @override 

1724 def getAllocatedSize(self) -> int: 

1725 if self._sizeAllocated == -1: 

1726 if VdiType.isCowImage(self.vdi_type): 

1727 self._sizeAllocated = self.linstorcowutil.get_allocated_size(self.uuid) 

1728 return self._sizeAllocated 

1729 

1730 def inflate(self, size): 

1731 if not VdiType.isCowImage(self.vdi_type): 

1732 return 

1733 self.sr.lock() 

1734 try: 

1735 # Ensure we use the real DRBD size and not the cached one. 

1736 # Why? Because this attribute can be changed if volume is resized by user. 

1737 self.drbd_size = self.getDrbdSize(fetch=True) 

1738 self.linstorcowutil.inflate(self.sr.journaler, self.uuid, self.path, size, self.drbd_size) 

1739 finally: 

1740 self.sr.unlock() 

1741 self.drbd_size = -1 

1742 self._sizePhys = -1 

1743 self._sizeAllocated = -1 

1744 

1745 def deflate(self): 

1746 if not VdiType.isCowImage(self.vdi_type): 

1747 return 

1748 self.sr.lock() 

1749 try: 

1750 # Ensure we use the real sizes and not the cached info. 

1751 self.drbd_size = self.getDrbdSize(fetch=True) 

1752 self._sizePhys = self.getSizePhys(fetch=True) 

1753 self.linstorcowutil.force_deflate(self.path, self._sizePhys, self.drbd_size, zeroize=False) 

1754 finally: 

1755 self.sr.unlock() 

1756 self.drbd_size = -1 

1757 self._sizePhys = -1 

1758 self._sizeAllocated = -1 

1759 

1760 def inflateFully(self): 

1761 if VdiType.isCowImage(self.vdi_type): 

1762 self.inflate(self.linstorcowutil.compute_volume_size(self.sizeVirt)) 

1763 

1764 @override 

1765 def rename(self, uuid) -> None: 

1766 Util.log('Renaming {} -> {} (path={})'.format( 

1767 self.uuid, uuid, self.path 

1768 )) 

1769 self.sr._linstor.update_volume_uuid(self.uuid, uuid) 

1770 VDI.rename(self, uuid) 

1771 

1772 @override 

1773 def delete(self) -> None: 

1774 if len(self.children) > 0: 

1775 raise util.SMException( 

1776 'VDI {} has children, can\'t delete'.format(self.uuid) 

1777 ) 

1778 self.sr.lock() 

1779 try: 

1780 self.sr._linstor.destroy_volume(self.uuid) 

1781 self.sr.forgetVDI(self.uuid) 

1782 finally: 

1783 self.sr.unlock() 

1784 VDI.delete(self) 

1785 

1786 @override 

1787 def validate(self, fast=False) -> None: 

1788 if VdiType.isCowImage(self.vdi_type) and self.linstorcowutil.check(self.uuid, fast=fast) != CowUtil.CheckResult.Success: 

1789 raise util.SMException('COW image {} corrupted'.format(self)) 

1790 

1791 @override 

1792 def pause(self, failfast=False) -> None: 

1793 self.sr._linstor.ensure_volume_is_not_locked( 

1794 self.uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1795 ) 

1796 return super(LinstorVDI, self).pause(failfast) 

1797 

1798 @override 

1799 def coalesce(self) -> int: 

1800 # Note: We raise `SMException` here to skip the current coalesce in case of failure. 

1801 # Using another exception we can't execute the next coalesce calls. 

1802 return self.linstorcowutil.force_coalesce(self.path) 

1803 

1804 @override 

1805 def getParent(self) -> str: 

1806 return self.linstorcowutil.get_parent( 

1807 self.sr._linstor.get_volume_uuid_from_device_path(self.path) 

1808 ) 

1809 

1810 @override 

1811 def repair(self, parent_uuid) -> None: 

1812 self.linstorcowutil.force_repair( 

1813 self.sr._linstor.get_device_path(parent_uuid) 

1814 ) 

1815 

1816 @override 

1817 def _relinkSkip(self) -> None: 

1818 abortFlag = IPCFlag(self.sr.uuid) 

1819 for child in self.children: 

1820 if abortFlag.test(FLAG_TYPE_ABORT): 

1821 raise AbortException('Aborting due to signal') 

1822 Util.log( 

1823 ' Relinking {} from {} to {}'.format( 

1824 child, self, self.parent 

1825 ) 

1826 ) 

1827 

1828 session = child.sr.xapi.session 

1829 sr_uuid = child.sr.uuid 

1830 vdi_uuid = child.uuid 

1831 try: 

1832 self.sr._linstor.ensure_volume_is_not_locked( 

1833 vdi_uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1834 ) 

1835 blktap2.VDI.tap_pause(session, sr_uuid, vdi_uuid) 

1836 child._setParent(self.parent) 

1837 finally: 

1838 blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid) 

1839 self.children = [] 

1840 

1841 @override 

1842 def _setParent(self, parent) -> None: 

1843 self.sr._linstor.get_device_path(self.uuid) 

1844 self.linstorcowutil.force_parent(self.path, parent.path) 

1845 self.parent = parent 

1846 self.parentUuid = parent.uuid 

1847 parent.children.append(self) 

1848 try: 

1849 self.setConfig(self.DB_VDI_PARENT, self.parentUuid) 

1850 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1851 (self.uuid, self.parentUuid)) 

1852 except: 

1853 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1854 (self.uuid, self.parentUuid)) 

1855 

1856 @override 

1857 def _doCoalesce(self) -> None: 

1858 try: 

1859 self._activateChain() 

1860 self.parent.validate() 

1861 self._inflateParentForCoalesce() 

1862 VDI._doCoalesce(self) 

1863 finally: 

1864 self.parent.deflate() 

1865 

1866 def _activateChain(self): 

1867 vdi = self 

1868 while vdi: 

1869 try: 

1870 p = self.sr._linstor.get_device_path(vdi.uuid) 

1871 except Exception as e: 

1872 # Use SMException to skip coalesce. 

1873 # Otherwise the GC is stopped... 

1874 raise util.SMException(str(e)) 

1875 vdi = vdi.parent 

1876 

1877 @override 

1878 def _setHidden(self, hidden=True) -> None: 

1879 HIDDEN_TAG = 'hidden' 

1880 

1881 if not VdiType.isCowImage(self.vdi_type): 

1882 self._hidden = None 

1883 self.sr._linstor.update_volume_metadata(self.uuid, { 

1884 HIDDEN_TAG: hidden 

1885 }) 

1886 self._hidden = hidden 

1887 else: 

1888 VDI._setHidden(self, hidden) 

1889 

1890 @override 

1891 def _increaseSizeVirt(self, size, atomic=True): 

1892 if self.vdi_type == VdiType.RAW: 

1893 offset = self.drbd_size 

1894 if self.sizeVirt < size: 

1895 oldSize = self.drbd_size 

1896 self.drbd_size = LinstorVolumeManager.round_up_volume_size(size) 

1897 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.drbd_size)) 

1898 self.sr._linstor.resize_volume(self.uuid, self.drbd_size) 

1899 offset = oldSize 

1900 unfinishedZero = False 

1901 jval = self.sr.journaler.get(LinstorJournaler.ZERO, self.uuid) 

1902 if jval: 

1903 unfinishedZero = True 

1904 offset = int(jval) 

1905 length = self.drbd_size - offset 

1906 if not length: 

1907 return 

1908 

1909 if unfinishedZero: 

1910 Util.log(" ==> Redoing unfinished zeroing out") 

1911 else: 

1912 self.sr.journaler.create(LinstorJournaler.ZERO, self.uuid, str(offset)) 

1913 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1914 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1915 func = lambda: util.zeroOut(self.path, offset, length) 

1916 Util.runAbortable(func, True, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0) 

1917 self.sr.journaler.remove(LinstorJournaler.ZERO, self.uuid) 

1918 return 

1919 

1920 if self.sizeVirt >= size: 

1921 return 

1922 Util.log(" Expanding COW image virt size for VDI %s: %s -> %s" % \ 

1923 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1924 

1925 msize = self.linstorcowutil.get_max_resize_size(self.uuid) * 1024 * 1024 

1926 if (size <= msize): 

1927 self.linstorcowutil.set_size_virt_fast(self.path, size) 

1928 else: 

1929 if atomic: 

1930 vdiList = self._getAllSubtree() 

1931 self.sr.lock() 

1932 try: 

1933 self.sr.pauseVDIs(vdiList) 

1934 try: 

1935 self._setSizeVirt(size) 

1936 finally: 

1937 self.sr.unpauseVDIs(vdiList) 

1938 finally: 

1939 self.sr.unlock() 

1940 else: 

1941 self._setSizeVirt(size) 

1942 

1943 self.sizeVirt = self.linstorcowutil.get_size_virt(self.uuid) 

1944 

1945 @override 

1946 def _setSizeVirt(self, size) -> None: 

1947 jfile = self.uuid + '-jvhd' 

1948 self.sr._linstor.create_volume( 

1949 jfile, self.cowutil.getResizeJournalSize(), persistent=False, volume_name=jfile 

1950 ) 

1951 try: 

1952 self.inflate(self.linstorcowutil.compute_volume_size(size)) 

1953 self.linstorcowutil.set_size_virt(self.path, size, jfile) 

1954 finally: 

1955 try: 

1956 self.sr._linstor.destroy_volume(jfile) 

1957 except Exception: 

1958 # We can ignore it, in any case this volume is not persistent. 

1959 pass 

1960 

1961 @override 

1962 def _queryCowBlocks(self) -> bytes: 

1963 return self.linstorcowutil.get_block_bitmap(self.uuid) 

1964 

1965 def _inflateParentForCoalesce(self): 

1966 if not VdiType.isCowImage(self.parent.vdi_type): 

1967 return 

1968 inc = self._calcExtraSpaceForCoalescing() 

1969 if inc > 0: 

1970 self.parent.inflate(self.parent.getDrbdSize() + inc) 

1971 

1972 @override 

1973 def _calcExtraSpaceForCoalescing(self) -> int: 

1974 if not VdiType.isCowImage(self.parent.vdi_type): 

1975 return 0 

1976 size_coalesced = self.linstorcowutil.compute_volume_size(self._getCoalescedSizeData()) 

1977 Util.log("Coalesced size = %s" % Util.num2str(size_coalesced)) 

1978 return size_coalesced - self.parent.getDrbdSize() 

1979 

1980 @override 

1981 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1982 assert self.getDrbdSize() > 0 

1983 assert self.getSizePhys() > 0 

1984 deflate_diff = self.getDrbdSize() - LinstorVolumeManager.round_up_volume_size(self.getSizePhys()) 

1985 assert deflate_diff >= 0 

1986 return self._calcExtraSpaceForCoalescing() - deflate_diff 

1987 

1988 @override 

1989 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1990 assert self.getSizePhys() > 0 

1991 return self._calcExtraSpaceForCoalescing() + \ 

1992 LinstorVolumeManager.round_up_volume_size(self.getSizePhys()) 

1993 

1994################################################################################ 

1995# 

1996# SR 

1997# 

1998class SR(object): 

1999 class LogFilter: 

2000 def __init__(self, sr): 

2001 self.sr = sr 

2002 self.stateLogged = False 

2003 self.prevState = {} 

2004 self.currState = {} 

2005 

2006 def logState(self): 

2007 changes = "" 

2008 self.currState.clear() 

2009 for vdi in self.sr.vdiTrees: 

2010 self.currState[vdi.uuid] = self._getTreeStr(vdi) 

2011 if not self.prevState.get(vdi.uuid) or \ 

2012 self.prevState[vdi.uuid] != self.currState[vdi.uuid]: 

2013 changes += self.currState[vdi.uuid] 

2014 

2015 for uuid in self.prevState: 

2016 if not self.currState.get(uuid): 

2017 changes += "Tree %s gone\n" % uuid 

2018 

2019 result = "SR %s (%d VDIs in %d COW trees): " % \ 

2020 (self.sr, len(self.sr.vdis), len(self.sr.vdiTrees)) 

2021 

2022 if len(changes) > 0: 

2023 if self.stateLogged: 

2024 result += "showing only COW trees that changed:" 

2025 result += "\n%s" % changes 

2026 else: 

2027 result += "no changes" 

2028 

2029 for line in result.split("\n"): 

2030 Util.log("%s" % line) 

2031 self.prevState.clear() 

2032 for key, val in self.currState.items(): 

2033 self.prevState[key] = val 

2034 self.stateLogged = True 

2035 

2036 def logNewVDI(self, uuid): 

2037 if self.stateLogged: 

2038 Util.log("Found new VDI when scanning: %s" % uuid) 

2039 

2040 def _getTreeStr(self, vdi, indent=8): 

2041 treeStr = "%s%s\n" % (" " * indent, vdi) 

2042 for child in vdi.children: 

2043 treeStr += self._getTreeStr(child, indent + VDI.STR_TREE_INDENT) 

2044 return treeStr 

2045 

2046 TYPE_FILE = "file" 

2047 TYPE_LVHD = "lvhd" 

2048 TYPE_LINSTOR = "linstor" 

2049 TYPES = [TYPE_LVHD, TYPE_FILE, TYPE_LINSTOR] 

2050 

2051 LOCK_RETRY_INTERVAL = 3 

2052 LOCK_RETRY_ATTEMPTS = 20 

2053 LOCK_RETRY_ATTEMPTS_LOCK = 100 

2054 

2055 SCAN_RETRY_ATTEMPTS = 3 

2056 

2057 JRN_CLONE = "clone" # journal entry type for the clone operation (from SM) 

2058 TMP_RENAME_PREFIX = "OLD_" 

2059 

2060 KEY_OFFLINE_COALESCE_NEEDED = "leaf_coalesce_need_offline" 

2061 KEY_OFFLINE_COALESCE_OVERRIDE = "leaf_coalesce_offline_override" 

2062 

2063 @staticmethod 

2064 def getInstance(uuid, xapiSession, createLock=True, force=False): 

2065 xapi = XAPI(xapiSession, uuid) 

2066 type = normalizeType(xapi.srRecord["type"]) 

2067 if type == SR.TYPE_FILE: 

2068 return FileSR(uuid, xapi, createLock, force) 

2069 elif type == SR.TYPE_LVHD: 

2070 return LVMSR(uuid, xapi, createLock, force) 

2071 elif type == SR.TYPE_LINSTOR: 

2072 return LinstorSR(uuid, xapi, createLock, force) 

2073 raise util.SMException("SR type %s not recognized" % type) 

2074 

2075 def __init__(self, uuid, xapi, createLock, force): 

2076 self.logFilter = self.LogFilter(self) 

2077 self.uuid = uuid 

2078 self.path = "" 

2079 self.name = "" 

2080 self.vdis = {} 

2081 self.vdiTrees = [] 

2082 self.journaler = None 

2083 self.xapi = xapi 

2084 self._locked = 0 

2085 self._srLock = None 

2086 if createLock: 2086 ↛ 2087line 2086 didn't jump to line 2087, because the condition on line 2086 was never true

2087 self._srLock = lock.Lock(lock.LOCK_TYPE_SR, self.uuid) 

2088 else: 

2089 Util.log("Requested no SR locking") 

2090 self.name = self.xapi.srRecord["name_label"] 

2091 self._failedCoalesceTargets = [] 

2092 

2093 if not self.xapi.isPluggedHere(): 

2094 if force: 2094 ↛ 2095line 2094 didn't jump to line 2095, because the condition on line 2094 was never true

2095 Util.log("SR %s not attached on this host, ignoring" % uuid) 

2096 else: 

2097 if not self.wait_for_plug(): 

2098 raise util.SMException("SR %s not attached on this host" % uuid) 

2099 

2100 if force: 2100 ↛ 2101line 2100 didn't jump to line 2101, because the condition on line 2100 was never true

2101 Util.log("Not checking if we are Master (SR %s)" % uuid) 

2102 elif not self.xapi.isMaster(): 2102 ↛ 2103line 2102 didn't jump to line 2103, because the condition on line 2102 was never true

2103 raise util.SMException("This host is NOT master, will not run") 

2104 

2105 self.no_space_candidates = {} 

2106 

2107 def msg_cleared(self, xapi_session, msg_ref): 

2108 try: 

2109 msg = xapi_session.xenapi.message.get_record(msg_ref) 

2110 except XenAPI.Failure: 

2111 return True 

2112 

2113 return msg is None 

2114 

2115 def check_no_space_candidates(self): 

2116 xapi_session = self.xapi.getSession() 

2117 

2118 msg_id = self.xapi.srRecord["sm_config"].get(VDI.DB_GC_NO_SPACE) 

2119 if self.no_space_candidates: 

2120 if msg_id is None or self.msg_cleared(xapi_session, msg_id): 

2121 util.SMlog("Could not coalesce due to a lack of space " 

2122 f"in SR {self.uuid}") 

2123 msg_body = ("Unable to perform data coalesce due to a lack " 

2124 f"of space in SR {self.uuid}") 

2125 msg_id = xapi_session.xenapi.message.create( 

2126 'SM_GC_NO_SPACE', 

2127 3, 

2128 "SR", 

2129 self.uuid, 

2130 msg_body) 

2131 xapi_session.xenapi.SR.remove_from_sm_config( 

2132 self.xapi.srRef, VDI.DB_GC_NO_SPACE) 

2133 xapi_session.xenapi.SR.add_to_sm_config( 

2134 self.xapi.srRef, VDI.DB_GC_NO_SPACE, msg_id) 

2135 

2136 for candidate in self.no_space_candidates.values(): 

2137 candidate.setConfig(VDI.DB_GC_NO_SPACE, msg_id) 

2138 elif msg_id is not None: 

2139 # Everything was coalescable, remove the message 

2140 xapi_session.xenapi.SR.remove_from_sm_config(self.xapi.srRef, VDI.DB_GC_NO_SPACE) 

2141 xapi_session.xenapi.message.destroy(msg_id) 

2142 

2143 def clear_no_space_msg(self, vdi): 

2144 msg_id = None 

2145 try: 

2146 msg_id = vdi.getConfig(VDI.DB_GC_NO_SPACE) 

2147 except XenAPI.Failure: 

2148 pass 

2149 

2150 self.no_space_candidates.pop(vdi.uuid, None) 

2151 if msg_id is not None: 2151 ↛ exitline 2151 didn't return from function 'clear_no_space_msg', because the condition on line 2151 was never false

2152 vdi.delConfig(VDI.DB_GC_NO_SPACE) 

2153 

2154 

2155 def wait_for_plug(self): 

2156 for _ in range(1, 10): 

2157 time.sleep(2) 

2158 if self.xapi.isPluggedHere(): 

2159 return True 

2160 return False 

2161 

2162 def gcEnabled(self, refresh=True): 

2163 if refresh: 

2164 self.xapi.srRecord = \ 

2165 self.xapi.session.xenapi.SR.get_record(self.xapi._srRef) 

2166 if self.xapi.srRecord["other_config"].get(VDI.DB_GC) == "false": 

2167 Util.log("GC is disabled for this SR, abort") 

2168 return False 

2169 return True 

2170 

2171 def scan(self, force=False) -> None: 

2172 """Scan the SR and load VDI info for each VDI. If called repeatedly, 

2173 update VDI objects if they already exist""" 

2174 pass 

2175 

2176 def scanLocked(self, force=False): 

2177 self.lock() 

2178 try: 

2179 self.scan(force) 

2180 finally: 

2181 self.unlock() 

2182 

2183 def getVDI(self, uuid): 

2184 return self.vdis.get(uuid) 

2185 

2186 def hasWork(self): 

2187 if len(self.findGarbage()) > 0: 

2188 return True 

2189 if self.findCoalesceable(): 

2190 return True 

2191 if self.findLeafCoalesceable(): 

2192 return True 

2193 if self.needUpdateBlockInfo(): 

2194 return True 

2195 return False 

2196 

2197 def findCoalesceable(self): 

2198 """Find a coalesceable VDI. Return a vdi that should be coalesced 

2199 (choosing one among all coalesceable candidates according to some 

2200 criteria) or None if there is no VDI that could be coalesced""" 

2201 

2202 candidates = [] 

2203 

2204 srSwitch = self.xapi.srRecord["other_config"].get(VDI.DB_COALESCE) 

2205 if srSwitch == "false": 

2206 Util.log("Coalesce disabled for this SR") 

2207 return candidates 

2208 

2209 # finish any VDI for which a relink journal entry exists first 

2210 journals = self.journaler.getAll(VDI.JRN_RELINK) 

2211 for uuid in journals: 

2212 vdi = self.getVDI(uuid) 

2213 if vdi and vdi not in self._failedCoalesceTargets: 

2214 return vdi 

2215 

2216 for vdi in self.vdis.values(): 

2217 if vdi.isCoalesceable() and vdi not in self._failedCoalesceTargets: 

2218 candidates.append(vdi) 

2219 Util.log("%s is coalescable" % vdi.uuid) 

2220 

2221 self.xapi.update_task_progress("coalescable", len(candidates)) 

2222 

2223 # pick one in the tallest tree 

2224 treeHeight = dict() 

2225 for c in candidates: 

2226 height = c.getTreeRoot().getTreeHeight() 

2227 if treeHeight.get(height): 

2228 treeHeight[height].append(c) 

2229 else: 

2230 treeHeight[height] = [c] 

2231 

2232 freeSpace = self.getFreeSpace() 

2233 heights = list(treeHeight.keys()) 

2234 heights.sort(reverse=True) 

2235 for h in heights: 

2236 for c in treeHeight[h]: 

2237 spaceNeeded = c._calcExtraSpaceForCoalescing() 

2238 if spaceNeeded <= freeSpace: 

2239 Util.log("Coalesce candidate: %s (tree height %d)" % (c, h)) 

2240 self.clear_no_space_msg(c) 

2241 return c 

2242 else: 

2243 self.no_space_candidates[c.uuid] = c 

2244 Util.log("No space to coalesce %s (free space: %d)" % \ 

2245 (c, freeSpace)) 

2246 return None 

2247 

2248 def getSwitch(self, key): 

2249 return self.xapi.srRecord["other_config"].get(key) 

2250 

2251 def forbiddenBySwitch(self, switch, condition, fail_msg): 

2252 srSwitch = self.getSwitch(switch) 

2253 ret = False 

2254 if srSwitch: 

2255 ret = srSwitch == condition 

2256 

2257 if ret: 

2258 Util.log(fail_msg) 

2259 

2260 return ret 

2261 

2262 def leafCoalesceForbidden(self): 

2263 return (self.forbiddenBySwitch(VDI.DB_COALESCE, 

2264 "false", 

2265 "Coalesce disabled for this SR") or 

2266 self.forbiddenBySwitch(VDI.DB_LEAFCLSC, 

2267 VDI.LEAFCLSC_DISABLED, 

2268 "Leaf-coalesce disabled for this SR")) 

2269 

2270 def findLeafCoalesceable(self): 

2271 """Find leaf-coalesceable VDIs in each COW tree""" 

2272 

2273 candidates = [] 

2274 if self.leafCoalesceForbidden(): 

2275 return candidates 

2276 

2277 self.gatherLeafCoalesceable(candidates) 

2278 

2279 self.xapi.update_task_progress("coalescable", len(candidates)) 

2280 

2281 freeSpace = self.getFreeSpace() 

2282 for candidate in candidates: 

2283 # check the space constraints to see if leaf-coalesce is actually 

2284 # feasible for this candidate 

2285 spaceNeeded = candidate._calcExtraSpaceForSnapshotCoalescing() 

2286 spaceNeededLive = spaceNeeded 

2287 if spaceNeeded > freeSpace: 

2288 spaceNeededLive = candidate._calcExtraSpaceForLeafCoalescing() 

2289 if candidate.canLiveCoalesce(self.getStorageSpeed()): 

2290 spaceNeeded = spaceNeededLive 

2291 

2292 if spaceNeeded <= freeSpace: 

2293 Util.log("Leaf-coalesce candidate: %s" % candidate) 

2294 self.clear_no_space_msg(candidate) 

2295 return candidate 

2296 else: 

2297 Util.log("No space to leaf-coalesce %s (free space: %d)" % \ 

2298 (candidate, freeSpace)) 

2299 if spaceNeededLive <= freeSpace: 

2300 Util.log("...but enough space if skip snap-coalesce") 

2301 candidate.setConfig(VDI.DB_LEAFCLSC, 

2302 VDI.LEAFCLSC_OFFLINE) 

2303 self.no_space_candidates[candidate.uuid] = candidate 

2304 

2305 return None 

2306 

2307 def gatherLeafCoalesceable(self, candidates): 

2308 for vdi in self.vdis.values(): 

2309 if not vdi.isLeafCoalesceable(): 

2310 continue 

2311 if vdi in self._failedCoalesceTargets: 

2312 continue 

2313 if vdi.getConfig(vdi.DB_ONBOOT) == vdi.ONBOOT_RESET: 

2314 Util.log("Skipping reset-on-boot %s" % vdi) 

2315 continue 

2316 if vdi.getConfig(vdi.DB_ALLOW_CACHING): 

2317 Util.log("Skipping allow_caching=true %s" % vdi) 

2318 continue 

2319 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_DISABLED: 

2320 Util.log("Leaf-coalesce disabled for %s" % vdi) 

2321 continue 

2322 if not (AUTO_ONLINE_LEAF_COALESCE_ENABLED or 

2323 vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE): 

2324 continue 

2325 candidates.append(vdi) 

2326 

2327 def coalesce(self, vdi, dryRun=False): 

2328 """Coalesce vdi onto parent""" 

2329 Util.log("Coalescing %s -> %s" % (vdi, vdi.parent)) 

2330 if dryRun: 2330 ↛ 2331line 2330 didn't jump to line 2331, because the condition on line 2330 was never true

2331 return 

2332 

2333 try: 

2334 self._coalesce(vdi) 

2335 except util.SMException as e: 

2336 if isinstance(e, AbortException): 2336 ↛ 2337line 2336 didn't jump to line 2337, because the condition on line 2336 was never true

2337 self.cleanup() 

2338 raise 

2339 else: 

2340 self._failedCoalesceTargets.append(vdi) 

2341 Util.logException("coalesce") 

2342 Util.log("Coalesce failed, skipping") 

2343 self.cleanup() 

2344 

2345 def coalesceLeaf(self, vdi, dryRun=False): 

2346 """Leaf-coalesce vdi onto parent""" 

2347 Util.log("Leaf-coalescing %s -> %s" % (vdi, vdi.parent)) 

2348 if dryRun: 

2349 return 

2350 

2351 try: 

2352 uuid = vdi.uuid 

2353 try: 

2354 # "vdi" object will no longer be valid after this call 

2355 self._coalesceLeaf(vdi) 

2356 finally: 

2357 vdi = self.getVDI(uuid) 

2358 if vdi: 

2359 vdi.delConfig(vdi.DB_LEAFCLSC) 

2360 except AbortException: 

2361 self.cleanup() 

2362 raise 

2363 except (util.SMException, XenAPI.Failure) as e: 

2364 self._failedCoalesceTargets.append(vdi) 

2365 Util.logException("leaf-coalesce") 

2366 Util.log("Leaf-coalesce failed on %s, skipping" % vdi) 

2367 self.cleanup() 

2368 

2369 def garbageCollect(self, dryRun=False): 

2370 vdiList = self.findGarbage() 

2371 Util.log("Found %d VDIs for deletion:" % len(vdiList)) 

2372 for vdi in vdiList: 

2373 Util.log(" %s" % vdi) 

2374 if not dryRun: 

2375 self.deleteVDIs(vdiList) 

2376 self.cleanupJournals(dryRun) 

2377 

2378 def findGarbage(self): 

2379 vdiList = [] 

2380 for vdi in self.vdiTrees: 

2381 vdiList.extend(vdi.getAllPrunable()) 

2382 return vdiList 

2383 

2384 def deleteVDIs(self, vdiList) -> None: 

2385 for vdi in vdiList: 

2386 if IPCFlag(self.uuid).test(FLAG_TYPE_ABORT): 

2387 raise AbortException("Aborting due to signal") 

2388 Util.log("Deleting unlinked VDI %s" % vdi) 

2389 self.deleteVDI(vdi) 

2390 

2391 def deleteVDI(self, vdi) -> None: 

2392 assert(len(vdi.children) == 0) 

2393 del self.vdis[vdi.uuid] 

2394 if vdi.parent: 2394 ↛ 2396line 2394 didn't jump to line 2396, because the condition on line 2394 was never false

2395 vdi.parent.children.remove(vdi) 

2396 if vdi in self.vdiTrees: 2396 ↛ 2397line 2396 didn't jump to line 2397, because the condition on line 2396 was never true

2397 self.vdiTrees.remove(vdi) 

2398 vdi.delete() 

2399 

2400 def forgetVDI(self, vdiUuid) -> None: 

2401 self.xapi.forgetVDI(self.uuid, vdiUuid) 

2402 

2403 def pauseVDIs(self, vdiList) -> None: 

2404 paused = [] 

2405 failed = False 

2406 for vdi in vdiList: 

2407 try: 

2408 vdi.pause() 

2409 paused.append(vdi) 

2410 except: 

2411 Util.logException("pauseVDIs") 

2412 failed = True 

2413 break 

2414 

2415 if failed: 

2416 self.unpauseVDIs(paused) 

2417 raise util.SMException("Failed to pause VDIs") 

2418 

2419 def unpauseVDIs(self, vdiList): 

2420 failed = False 

2421 for vdi in vdiList: 

2422 try: 

2423 vdi.unpause() 

2424 except: 

2425 Util.log("ERROR: Failed to unpause VDI %s" % vdi) 

2426 failed = True 

2427 if failed: 

2428 raise util.SMException("Failed to unpause VDIs") 

2429 

2430 def getFreeSpace(self) -> int: 

2431 return 0 

2432 

2433 def cleanup(self): 

2434 Util.log("In cleanup") 

2435 return 

2436 

2437 @override 

2438 def __str__(self) -> str: 

2439 if self.name: 

2440 ret = "%s ('%s')" % (self.uuid[0:4], self.name) 

2441 else: 

2442 ret = "%s" % self.uuid 

2443 return ret 

2444 

2445 def lock(self): 

2446 """Acquire the SR lock. Nested acquire()'s are ok. Check for Abort 

2447 signal to avoid deadlocking (trying to acquire the SR lock while the 

2448 lock is held by a process that is trying to abort us)""" 

2449 if not self._srLock: 

2450 return 

2451 

2452 if self._locked == 0: 

2453 abortFlag = IPCFlag(self.uuid) 

2454 for i in range(SR.LOCK_RETRY_ATTEMPTS_LOCK): 

2455 if self._srLock.acquireNoblock(): 

2456 self._locked += 1 

2457 return 

2458 if abortFlag.test(FLAG_TYPE_ABORT): 

2459 raise AbortException("Abort requested") 

2460 time.sleep(SR.LOCK_RETRY_INTERVAL) 

2461 raise util.SMException("Unable to acquire the SR lock") 

2462 

2463 self._locked += 1 

2464 

2465 def unlock(self): 

2466 if not self._srLock: 2466 ↛ 2468line 2466 didn't jump to line 2468, because the condition on line 2466 was never false

2467 return 

2468 assert(self._locked > 0) 

2469 self._locked -= 1 

2470 if self._locked == 0: 

2471 self._srLock.release() 

2472 

2473 def needUpdateBlockInfo(self) -> bool: 

2474 for vdi in self.vdis.values(): 

2475 if vdi.scanError or len(vdi.children) == 0: 

2476 continue 

2477 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

2478 return True 

2479 return False 

2480 

2481 def updateBlockInfo(self) -> None: 

2482 for vdi in self.vdis.values(): 

2483 if vdi.scanError or len(vdi.children) == 0: 

2484 continue 

2485 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

2486 vdi.updateBlockInfo() 

2487 

2488 def cleanupCoalesceJournals(self): 

2489 """Remove stale coalesce VDI indicators""" 

2490 entries = self.journaler.getAll(VDI.JRN_COALESCE) 

2491 for uuid, jval in entries.items(): 

2492 self.journaler.remove(VDI.JRN_COALESCE, uuid) 

2493 

2494 def cleanupJournals(self, dryRun=False): 

2495 """delete journal entries for non-existing VDIs""" 

2496 for t in [LVMVDI.JRN_ZERO, VDI.JRN_RELINK, SR.JRN_CLONE]: 

2497 entries = self.journaler.getAll(t) 

2498 for uuid, jval in entries.items(): 

2499 if self.getVDI(uuid): 

2500 continue 

2501 if t == SR.JRN_CLONE: 

2502 baseUuid, clonUuid = jval.split("_") 

2503 if self.getVDI(baseUuid): 

2504 continue 

2505 Util.log(" Deleting stale '%s' journal entry for %s " 

2506 "(%s)" % (t, uuid, jval)) 

2507 if not dryRun: 

2508 self.journaler.remove(t, uuid) 

2509 

2510 def cleanupCache(self, maxAge=-1) -> int: 

2511 return 0 

2512 

2513 def _hasLeavesAttachedOn(self, vdi: VDI): 

2514 leaves = vdi.getAllLeaves() 

2515 leaves_vdi = [leaf.uuid for leaf in leaves] 

2516 return util.get_hosts_attached_on(self.xapi.session, leaves_vdi) 

2517 

2518 def _gc_running_file(self, vdi: VDI): 

2519 run_file = "gc_running_{}".format(vdi.uuid) 

2520 return os.path.join(NON_PERSISTENT_DIR, str(self.uuid), run_file) 

2521 

2522 def _create_running_file(self, vdi: VDI): 

2523 with open(self._gc_running_file(vdi), "w") as f: 

2524 f.write("1") 

2525 

2526 def _delete_running_file(self, vdi: VDI): 

2527 os.unlink(self._gc_running_file(vdi)) 

2528 

2529 def _coalesce(self, vdi: VDI): 

2530 list_not_to_relink = None 

2531 if self.journaler.get(vdi.JRN_RELINK, vdi.uuid): 2531 ↛ 2534line 2531 didn't jump to line 2534, because the condition on line 2531 was never true

2532 # this means we had done the actual coalescing already and just 

2533 # need to finish relinking and/or refreshing the children 

2534 Util.log("==> Coalesce apparently already done: skipping") 

2535 

2536 # The parent volume must be active for the parent change to occur. 

2537 # The parent volume may become inactive if the host is rebooted. 

2538 vdi._ensureParentActiveForRelink() 

2539 else: 

2540 # JRN_COALESCE is used to check which VDI is being coalesced in 

2541 # order to decide whether to abort the coalesce. We remove the 

2542 # journal as soon as the COW coalesce step is done, because we 

2543 # don't expect the rest of the process to take long 

2544 

2545 if os.path.exists(self._gc_running_file(vdi)): 2545 ↛ 2546line 2545 didn't jump to line 2546, because the condition on line 2545 was never true

2546 util.SMlog("gc_running already exist for {}. Ignoring...".format(self.uuid)) 

2547 

2548 self._create_running_file(vdi) 

2549 

2550 self.journaler.create(vdi.JRN_COALESCE, vdi.uuid, "1") 

2551 host_refs = self._hasLeavesAttachedOn(vdi) 

2552 #TODO: this check of multiple host_refs should be done earlier in `is_coalesceable` to avoid stopping this late every time 

2553 if len(host_refs) > 1: 2553 ↛ 2554line 2553 didn't jump to line 2554, because the condition on line 2553 was never true

2554 Util.log("Not coalesceable, chain activated more than once") 

2555 raise Exception("Not coalesceable, chain activated more than once") #TODO: Use correct error 

2556 

2557 try: 

2558 if host_refs and vdi.cowutil.isCoalesceableOnRemote(): 2558 ↛ 2560line 2558 didn't jump to line 2560, because the condition on line 2558 was never true

2559 #Leaf opened on another host, we need to call online coalesce 

2560 Util.log("Remote coalesce for {}".format(vdi.path)) 

2561 vdi._doCoalesceOnHost(list(host_refs)[0]) 

2562 # If we use a host OpaqueRef to do a online coalesce, this vdi will not need to be relinked since it was done by tapdisk 

2563 # If we coalesce up the chain, we shouldn't need to do the relink at all, we only need to do the relink on the children if their direct parent was the one we were coalescing 

2564 for child in vdi.children: 

2565 real_parent_uuid = child.extractUuid(child.getParent()) 

2566 if real_parent_uuid == vdi.parent.uuid: 

2567 child._update_vhd_parent(real_parent_uuid) # We update the sm-config:vhd-parent value for this VDI since it has already been relinked 

2568 list_not_to_relink = [leaf.uuid for leaf in child.getAllLeaves()] 

2569 else: 

2570 Util.log("Offline coalesce for {}".format(vdi.path)) 

2571 vdi._doCoalesce() 

2572 except Exception as e: 

2573 Util.log("EXCEPTION while coalescing: {}".format(e)) 

2574 self._delete_running_file(vdi) 

2575 raise 

2576 

2577 self.journaler.remove(vdi.JRN_COALESCE, vdi.uuid) 

2578 self._delete_running_file(vdi) 

2579 

2580 util.fistpoint.activate("LVHDRT_before_create_relink_journal", self.uuid) 

2581 

2582 # we now need to relink the children: lock the SR to prevent ops 

2583 # like SM.clone from manipulating the VDIs we'll be relinking and 

2584 # rescan the SR first in case the children changed since the last 

2585 # scan 

2586 self.journaler.create(vdi.JRN_RELINK, vdi.uuid, "1") 

2587 

2588 self.lock() 

2589 try: 

2590 vdi.parent._tagChildrenForRelink(list_not_to_relink) 

2591 self.scan() 

2592 vdi._relinkSkip() 

2593 finally: 

2594 self.unlock() 

2595 # Reload the children to leave things consistent 

2596 vdi.parent._reloadChildren(vdi) 

2597 self.journaler.remove(vdi.JRN_RELINK, vdi.uuid) 

2598 

2599 self.deleteVDI(vdi) 

2600 

2601 class CoalesceTracker: 

2602 GRACE_ITERATIONS = 2 

2603 MAX_ITERATIONS_NO_PROGRESS = 3 

2604 MAX_ITERATIONS = 20 

2605 MAX_INCREASE_FROM_MINIMUM = 1.2 

2606 HISTORY_STRING = "Iteration: {its} -- Initial size {initSize}" \ 

2607 " --> Final size {finSize}" 

2608 

2609 def __init__(self, sr): 

2610 self.itsNoProgress = 0 

2611 self.its = 0 

2612 self.minSize = float("inf") 

2613 self._history = [] 

2614 self.reason = "" 

2615 self.startSize = None 

2616 self.finishSize = None 

2617 self.sr = sr 

2618 self.grace_remaining = self.GRACE_ITERATIONS 

2619 

2620 @property 

2621 def history(self): 

2622 return [x['msg'] for x in self._history] 

2623 

2624 def moving_average(self): 

2625 """ 

2626 Calculate a three point moving average 

2627 """ 

2628 assert len(self._history) >= 3 

2629 

2630 mv_average = sum([x['finalsize'] for x in self._history]) / len(self._history) 

2631 util.SMlog(f'Calculated moving average as {mv_average}') 

2632 return mv_average 

2633 

2634 def abortCoalesce(self, prevSize, curSize): 

2635 self.its += 1 

2636 self._history.append( 

2637 { 

2638 'finalsize': curSize, 

2639 'msg': self.HISTORY_STRING.format(its=self.its, 

2640 initSize=prevSize, 

2641 finSize=curSize) 

2642 } 

2643 ) 

2644 

2645 self.finishSize = curSize 

2646 

2647 if self.startSize is None: 

2648 self.startSize = prevSize 

2649 

2650 if curSize < self.minSize: 

2651 self.minSize = curSize 

2652 

2653 if prevSize < self.minSize: 

2654 self.minSize = prevSize 

2655 

2656 if self.its < 4: 

2657 # Perform at least three iterations 

2658 return False 

2659 

2660 if prevSize >= curSize or curSize < self.moving_average(): 

2661 # We made progress 

2662 return False 

2663 else: 

2664 self.itsNoProgress += 1 

2665 Util.log("No progress, attempt:" 

2666 " {attempt}".format(attempt=self.itsNoProgress)) 

2667 util.fistpoint.activate("cleanup_tracker_no_progress", self.sr.uuid) 

2668 

2669 if self.its > self.MAX_ITERATIONS: 

2670 max = self.MAX_ITERATIONS 

2671 self.reason = \ 

2672 "Max iterations ({max}) exceeded".format(max=max) 

2673 return True 

2674 

2675 if self.itsNoProgress > self.MAX_ITERATIONS_NO_PROGRESS: 

2676 max = self.MAX_ITERATIONS_NO_PROGRESS 

2677 self.reason = \ 

2678 "No progress made for {max} iterations".format(max=max) 

2679 return True 

2680 

2681 maxSizeFromMin = self.MAX_INCREASE_FROM_MINIMUM * self.minSize 

2682 if curSize > maxSizeFromMin: 

2683 self.grace_remaining -= 1 

2684 if self.grace_remaining == 0: 

2685 self.reason = "Unexpected bump in size," \ 

2686 " compared to minimum achieved" 

2687 

2688 return True 

2689 

2690 return False 

2691 

2692 def printSizes(self): 

2693 Util.log("Starting size was {size}" 

2694 .format(size=self.startSize)) 

2695 Util.log("Final size was {size}" 

2696 .format(size=self.finishSize)) 

2697 Util.log("Minimum size achieved was {size}" 

2698 .format(size=self.minSize)) 

2699 

2700 def printReasoning(self): 

2701 Util.log("Aborted coalesce") 

2702 for hist in self.history: 

2703 Util.log(hist) 

2704 Util.log(self.reason) 

2705 self.printSizes() 

2706 

2707 def printSummary(self): 

2708 if self.its == 0: 

2709 return 

2710 

2711 if self.reason: 2711 ↛ 2712line 2711 didn't jump to line 2712, because the condition on line 2711 was never true

2712 Util.log("Aborted coalesce") 

2713 Util.log(self.reason) 

2714 else: 

2715 Util.log("Coalesce summary") 

2716 

2717 Util.log(f"Performed {self.its} iterations") 

2718 self.printSizes() 

2719 

2720 

2721 def _coalesceLeaf(self, vdi): 

2722 """Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot 

2723 complete due to external changes, namely vdi_delete and vdi_snapshot 

2724 that alter leaf-coalescibility of vdi""" 

2725 tracker = self.CoalesceTracker(self) 

2726 while not vdi.canLiveCoalesce(self.getStorageSpeed()): 

2727 prevSizePhys = vdi.getSizePhys() 

2728 if not self._snapshotCoalesce(vdi): 2728 ↛ 2729line 2728 didn't jump to line 2729, because the condition on line 2728 was never true

2729 return False 

2730 if tracker.abortCoalesce(prevSizePhys, vdi.getSizePhys()): 

2731 tracker.printReasoning() 

2732 raise util.SMException("VDI {uuid} could not be coalesced" 

2733 .format(uuid=vdi.uuid)) 

2734 tracker.printSummary() 

2735 return self._liveLeafCoalesce(vdi) 

2736 

2737 def calcStorageSpeed(self, startTime, endTime, coalescedSize): 

2738 speed = None 

2739 total_time = endTime - startTime 

2740 if total_time > 0: 

2741 speed = float(coalescedSize) / float(total_time) 

2742 return speed 

2743 

2744 def writeSpeedToFile(self, speed): 

2745 content = [] 

2746 speedFile = None 

2747 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2748 self.lock() 

2749 try: 

2750 Util.log("Writing to file: {myfile}".format(myfile=path)) 

2751 lines = "" 

2752 if not os.path.isfile(path): 

2753 lines = str(speed) + "\n" 

2754 else: 

2755 speedFile = open(path, "r+") 

2756 content = speedFile.readlines() 

2757 content.append(str(speed) + "\n") 

2758 if len(content) > N_RUNNING_AVERAGE: 

2759 del content[0] 

2760 lines = "".join(content) 

2761 

2762 util.atomicFileWrite(path, VAR_RUN, lines) 

2763 finally: 

2764 if speedFile is not None: 

2765 speedFile.close() 

2766 Util.log("Closing file: {myfile}".format(myfile=path)) 

2767 self.unlock() 

2768 

2769 def recordStorageSpeed(self, startTime, endTime, coalescedSize): 

2770 speed = self.calcStorageSpeed(startTime, endTime, coalescedSize) 

2771 if speed is None: 

2772 return 

2773 

2774 self.writeSpeedToFile(speed) 

2775 

2776 def getStorageSpeed(self): 

2777 speedFile = None 

2778 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2779 self.lock() 

2780 try: 

2781 speed = None 

2782 if os.path.isfile(path): 

2783 speedFile = open(path) 

2784 content = speedFile.readlines() 

2785 try: 

2786 content = [float(i) for i in content] 

2787 except ValueError: 

2788 Util.log("Something bad in the speed log:{log}". 

2789 format(log=speedFile.readlines())) 

2790 return speed 

2791 

2792 if len(content): 

2793 speed = sum(content) / float(len(content)) 

2794 if speed <= 0: 2794 ↛ 2796line 2794 didn't jump to line 2796, because the condition on line 2794 was never true

2795 # Defensive, should be impossible. 

2796 Util.log("Bad speed: {speed} calculated for SR: {uuid}". 

2797 format(speed=speed, uuid=self.uuid)) 

2798 speed = None 

2799 else: 

2800 Util.log("Speed file empty for SR: {uuid}". 

2801 format(uuid=self.uuid)) 

2802 else: 

2803 Util.log("Speed log missing for SR: {uuid}". 

2804 format(uuid=self.uuid)) 

2805 return speed 

2806 finally: 

2807 if not (speedFile is None): 

2808 speedFile.close() 

2809 self.unlock() 

2810 

2811 def _snapshotCoalesce(self, vdi): 

2812 # Note that because we are not holding any locks here, concurrent SM 

2813 # operations may change this tree under our feet. In particular, vdi 

2814 # can be deleted, or it can be snapshotted. 

2815 assert(AUTO_ONLINE_LEAF_COALESCE_ENABLED) 

2816 Util.log("Single-snapshotting %s" % vdi) 

2817 util.fistpoint.activate("LVHDRT_coaleaf_delay_1", self.uuid) 

2818 try: 

2819 ret = self.xapi.singleSnapshotVDI(vdi) 

2820 Util.log("Single-snapshot returned: %s" % ret) 

2821 except XenAPI.Failure as e: 

2822 if util.isInvalidVDI(e): 

2823 Util.log("The VDI appears to have been concurrently deleted") 

2824 return False 

2825 raise 

2826 self.scanLocked() 

2827 tempSnap = vdi.parent 

2828 if not tempSnap.isCoalesceable(): 

2829 Util.log("The VDI appears to have been concurrently snapshotted") 

2830 return False 

2831 Util.log("Coalescing parent %s" % tempSnap) 

2832 util.fistpoint.activate("LVHDRT_coaleaf_delay_2", self.uuid) 

2833 sizePhys = vdi.getSizePhys() 

2834 self._coalesce(tempSnap) 

2835 if not vdi.isLeafCoalesceable(): 

2836 Util.log("The VDI tree appears to have been altered since") 

2837 return False 

2838 return True 

2839 

2840 def _liveLeafCoalesce(self, vdi: VDI) -> bool: 

2841 util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid) 

2842 self.lock() 

2843 try: 

2844 self.scan() 

2845 if not self.getVDI(vdi.uuid): 

2846 Util.log("The VDI appears to have been deleted meanwhile") 

2847 return False 

2848 if not vdi.isLeafCoalesceable(): 

2849 Util.log("The VDI is no longer leaf-coalesceable") 

2850 return False 

2851 

2852 uuid = vdi.uuid 

2853 vdi.pause(failfast=True) 

2854 try: 

2855 try: 

2856 # "vdi" object will no longer be valid after this call 

2857 self._create_running_file(vdi) 

2858 self._doCoalesceLeaf(vdi) 

2859 except: 

2860 Util.logException("_doCoalesceLeaf") 

2861 self._handleInterruptedCoalesceLeaf() 

2862 raise 

2863 finally: 

2864 vdi = self.getVDI(uuid) 

2865 if vdi: 

2866 vdi.ensureUnpaused() 

2867 self._delete_running_file(vdi) 

2868 vdiOld = self.getVDI(self.TMP_RENAME_PREFIX + uuid) 

2869 if vdiOld: 

2870 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2871 self.deleteVDI(vdiOld) 

2872 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2873 finally: 

2874 self.cleanup() 

2875 self.unlock() 

2876 self.logFilter.logState() 

2877 return True 

2878 

2879 def _doCoalesceLeaf(self, vdi: VDI): 

2880 """Actual coalescing of a leaf VDI onto parent. Must be called in an 

2881 offline/atomic context""" 

2882 self.journaler.create(VDI.JRN_LEAF, vdi.uuid, vdi.parent.uuid) 

2883 self._prepareCoalesceLeaf(vdi) 

2884 vdi.parent._setHidden(False) 

2885 vdi.parent._increaseSizeVirt(vdi.sizeVirt, False) 

2886 vdi.validate(True) 

2887 vdi.parent.validate(True) 

2888 util.fistpoint.activate("LVHDRT_coaleaf_before_coalesce", self.uuid) 

2889 timeout = vdi.LIVE_LEAF_COALESCE_TIMEOUT 

2890 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE: 2890 ↛ 2891line 2890 didn't jump to line 2891, because the condition on line 2890 was never true

2891 Util.log("Leaf-coalesce forced, will not use timeout") 

2892 timeout = 0 

2893 vdi._coalesceCowImage(timeout) 

2894 util.fistpoint.activate("LVHDRT_coaleaf_after_coalesce", self.uuid) 

2895 vdi.parent.validate(True) 

2896 #vdi._verifyContents(timeout / 2) 

2897 

2898 # rename 

2899 vdiUuid = vdi.uuid 

2900 oldName = vdi.fileName 

2901 origParentUuid = vdi.parent.uuid 

2902 vdi.rename(self.TMP_RENAME_PREFIX + vdiUuid) 

2903 util.fistpoint.activate("LVHDRT_coaleaf_one_renamed", self.uuid) 

2904 vdi.parent.rename(vdiUuid) 

2905 util.fistpoint.activate("LVHDRT_coaleaf_both_renamed", self.uuid) 

2906 self._updateSlavesOnRename(vdi.parent, oldName, origParentUuid) 

2907 

2908 # Note that "vdi.parent" is now the single remaining leaf and "vdi" is 

2909 # garbage 

2910 

2911 # update the VDI record 

2912 if vdi.parent.vdi_type == VdiType.RAW: 2912 ↛ 2913line 2912 didn't jump to line 2913, because the condition on line 2912 was never true

2913 vdi.parent.setConfig(VDI.DB_VDI_TYPE, VdiType.RAW) 

2914 vdi.parent.delConfig(VDI.DB_VDI_BLOCKS) 

2915 util.fistpoint.activate("LVHDRT_coaleaf_after_vdirec", self.uuid) 

2916 

2917 self._updateNode(vdi) 

2918 

2919 # delete the obsolete leaf & inflate the parent (in that order, to 

2920 # minimize free space requirements) 

2921 parent = vdi.parent 

2922 vdi._setHidden(True) 

2923 vdi.parent.children = [] 

2924 vdi.parent = None 

2925 

2926 if parent.parent is None: 

2927 parent.delConfig(VDI.DB_VDI_PARENT) 

2928 

2929 extraSpace = self._calcExtraSpaceNeeded(vdi, parent) 

2930 freeSpace = self.getFreeSpace() 

2931 if freeSpace < extraSpace: 2931 ↛ 2934line 2931 didn't jump to line 2934, because the condition on line 2931 was never true

2932 # don't delete unless we need the space: deletion is time-consuming 

2933 # because it requires contacting the slaves, and we're paused here 

2934 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2935 self.deleteVDI(vdi) 

2936 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2937 

2938 util.fistpoint.activate("LVHDRT_coaleaf_before_remove_j", self.uuid) 

2939 self.journaler.remove(VDI.JRN_LEAF, vdiUuid) 

2940 

2941 self.forgetVDI(origParentUuid) 

2942 self._finishCoalesceLeaf(parent) 

2943 self._updateSlavesOnResize(parent) 

2944 

2945 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

2946 assert(VdiType.isCowImage(parent.vdi_type)) 

2947 extra = child.getSizePhys() - parent.getSizePhys() 

2948 if extra < 0: 2948 ↛ 2949line 2948 didn't jump to line 2949, because the condition on line 2948 was never true

2949 extra = 0 

2950 return extra 

2951 

2952 def _prepareCoalesceLeaf(self, vdi) -> None: 

2953 pass 

2954 

2955 def _updateNode(self, vdi) -> None: 

2956 pass 

2957 

2958 def _finishCoalesceLeaf(self, parent) -> None: 

2959 pass 

2960 

2961 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: 

2962 pass 

2963 

2964 def _updateSlavesOnRename(self, vdi, oldName, origParentUuid) -> None: 

2965 pass 

2966 

2967 def _updateSlavesOnResize(self, vdi) -> None: 

2968 pass 

2969 

2970 def _removeStaleVDIs(self, uuidsPresent) -> None: 

2971 for uuid in list(self.vdis.keys()): 

2972 if not uuid in uuidsPresent: 

2973 Util.log("VDI %s disappeared since last scan" % \ 

2974 self.vdis[uuid]) 

2975 del self.vdis[uuid] 

2976 

2977 def _handleInterruptedCoalesceLeaf(self) -> None: 

2978 """An interrupted leaf-coalesce operation may leave the COW tree in an 

2979 inconsistent state. If the old-leaf VDI is still present, we revert the 

2980 operation (in case the original error is persistent); otherwise we must 

2981 finish the operation""" 

2982 pass 

2983 

2984 def _buildTree(self, force): 

2985 self.vdiTrees = [] 

2986 for vdi in self.vdis.values(): 

2987 if vdi.parentUuid: 

2988 parent = self.getVDI(vdi.parentUuid) 

2989 if not parent: 

2990 if vdi.uuid.startswith(self.TMP_RENAME_PREFIX): 

2991 self.vdiTrees.append(vdi) 

2992 continue 

2993 if force: 

2994 Util.log("ERROR: Parent VDI %s not found! (for %s)" % \ 

2995 (vdi.parentUuid, vdi.uuid)) 

2996 self.vdiTrees.append(vdi) 

2997 continue 

2998 else: 

2999 raise util.SMException("Parent VDI %s of %s not " \ 

3000 "found" % (vdi.parentUuid, vdi.uuid)) 

3001 vdi.parent = parent 

3002 parent.children.append(vdi) 

3003 else: 

3004 self.vdiTrees.append(vdi) 

3005 

3006 

3007class FileSR(SR): 

3008 TYPE = SR.TYPE_FILE 

3009 CACHE_FILE_EXT = ".vhdcache" 

3010 # cache cleanup actions 

3011 CACHE_ACTION_KEEP = 0 

3012 CACHE_ACTION_REMOVE = 1 

3013 CACHE_ACTION_REMOVE_IF_INACTIVE = 2 

3014 

3015 def __init__(self, uuid, xapi, createLock, force): 

3016 SR.__init__(self, uuid, xapi, createLock, force) 

3017 self.path = "/var/run/sr-mount/%s" % self.uuid 

3018 self.journaler = fjournaler.Journaler(self.path) 

3019 

3020 @override 

3021 def scan(self, force=False) -> None: 

3022 if not util.pathexists(self.path): 

3023 raise util.SMException("directory %s not found!" % self.uuid) 

3024 

3025 uuidsPresent: List[str] = [] 

3026 

3027 for vdi_type in VDI_COW_TYPES: 

3028 scan_result = self._scan(vdi_type, force) 

3029 for uuid, image_info in scan_result.items(): 

3030 vdi = self.getVDI(uuid) 

3031 if not vdi: 

3032 self.logFilter.logNewVDI(uuid) 

3033 vdi = FileVDI(self, uuid, vdi_type) 

3034 self.vdis[uuid] = vdi 

3035 vdi.load(image_info) 

3036 uuidsPresent.extend(scan_result.keys()) 

3037 

3038 rawList = [x for x in os.listdir(self.path) if x.endswith(VdiTypeExtension.RAW)] 

3039 for rawName in rawList: 

3040 uuid = FileVDI.extractUuid(rawName) 

3041 uuidsPresent.append(uuid) 

3042 vdi = self.getVDI(uuid) 

3043 if not vdi: 

3044 self.logFilter.logNewVDI(uuid) 

3045 vdi = FileVDI(self, uuid, VdiType.RAW) 

3046 self.vdis[uuid] = vdi 

3047 self._removeStaleVDIs(uuidsPresent) 

3048 self._buildTree(force) 

3049 self.logFilter.logState() 

3050 self._handleInterruptedCoalesceLeaf() 

3051 

3052 @override 

3053 def getFreeSpace(self) -> int: 

3054 return util.get_fs_size(self.path) - util.get_fs_utilisation(self.path) 

3055 

3056 @override 

3057 def deleteVDIs(self, vdiList) -> None: 

3058 rootDeleted = False 

3059 for vdi in vdiList: 

3060 if not vdi.parent: 

3061 rootDeleted = True 

3062 break 

3063 SR.deleteVDIs(self, vdiList) 

3064 if self.xapi.srRecord["type"] == "nfs" and rootDeleted: 

3065 self.xapi.markCacheSRsDirty() 

3066 

3067 @override 

3068 def cleanupCache(self, maxAge=-1) -> int: 

3069 """Clean up IntelliCache cache files. Caches for leaf nodes are 

3070 removed when the leaf node no longer exists or its allow-caching 

3071 attribute is not set. Caches for parent nodes are removed when the 

3072 parent node no longer exists or it hasn't been used in more than 

3073 <maxAge> hours. 

3074 Return number of caches removed. 

3075 """ 

3076 numRemoved = 0 

3077 cacheFiles = [x for x in os.listdir(self.path) if self._isCacheFileName(x)] 

3078 Util.log("Found %d cache files" % len(cacheFiles)) 

3079 cutoff = datetime.datetime.now() - datetime.timedelta(hours=maxAge) 

3080 for cacheFile in cacheFiles: 

3081 uuid = cacheFile[:-len(self.CACHE_FILE_EXT)] 

3082 action = self.CACHE_ACTION_KEEP 

3083 rec = self.xapi.getRecordVDI(uuid) 

3084 if not rec: 

3085 Util.log("Cache %s: VDI doesn't exist" % uuid) 

3086 action = self.CACHE_ACTION_REMOVE 

3087 elif rec["managed"] and not rec["allow_caching"]: 

3088 Util.log("Cache %s: caching disabled" % uuid) 

3089 action = self.CACHE_ACTION_REMOVE 

3090 elif not rec["managed"] and maxAge >= 0: 

3091 lastAccess = datetime.datetime.fromtimestamp( \ 

3092 os.path.getatime(os.path.join(self.path, cacheFile))) 

3093 if lastAccess < cutoff: 

3094 Util.log("Cache %s: older than %d hrs" % (uuid, maxAge)) 

3095 action = self.CACHE_ACTION_REMOVE_IF_INACTIVE 

3096 

3097 if action == self.CACHE_ACTION_KEEP: 

3098 Util.log("Keeping cache %s" % uuid) 

3099 continue 

3100 

3101 lockId = uuid 

3102 parentUuid = None 

3103 if rec and rec["managed"]: 

3104 parentUuid = rec["sm_config"].get("vhd-parent") 

3105 if parentUuid: 

3106 lockId = parentUuid 

3107 

3108 cacheLock = lock.Lock(blktap2.VDI.LOCK_CACHE_SETUP, lockId) 

3109 cacheLock.acquire() 

3110 try: 

3111 if self._cleanupCache(uuid, action): 

3112 numRemoved += 1 

3113 finally: 

3114 cacheLock.release() 

3115 return numRemoved 

3116 

3117 def _cleanupCache(self, uuid, action): 

3118 assert(action != self.CACHE_ACTION_KEEP) 

3119 rec = self.xapi.getRecordVDI(uuid) 

3120 if rec and rec["allow_caching"]: 

3121 Util.log("Cache %s appears to have become valid" % uuid) 

3122 return False 

3123 

3124 fullPath = os.path.join(self.path, uuid + self.CACHE_FILE_EXT) 

3125 tapdisk = blktap2.Tapdisk.find_by_path(fullPath) 

3126 if tapdisk: 

3127 if action == self.CACHE_ACTION_REMOVE_IF_INACTIVE: 

3128 Util.log("Cache %s still in use" % uuid) 

3129 return False 

3130 Util.log("Shutting down tapdisk for %s" % fullPath) 

3131 tapdisk.shutdown() 

3132 

3133 Util.log("Deleting file %s" % fullPath) 

3134 os.unlink(fullPath) 

3135 return True 

3136 

3137 def _isCacheFileName(self, name): 

3138 return (len(name) == Util.UUID_LEN + len(self.CACHE_FILE_EXT)) and \ 

3139 name.endswith(self.CACHE_FILE_EXT) 

3140 

3141 def _scan(self, vdi_type, force): 

3142 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3143 error = False 

3144 pattern = os.path.join(self.path, "*%s" % VDI_TYPE_TO_EXTENSION[vdi_type]) 

3145 scan_result = getCowUtil(vdi_type).getAllInfoFromVG(pattern, FileVDI.extractUuid) 

3146 for uuid, vdiInfo in scan_result.items(): 

3147 if vdiInfo.error: 

3148 error = True 

3149 break 

3150 if not error: 

3151 return scan_result 

3152 Util.log("Scan error on attempt %d" % i) 

3153 if force: 

3154 return scan_result 

3155 raise util.SMException("Scan error") 

3156 

3157 @override 

3158 def deleteVDI(self, vdi) -> None: 

3159 self._checkSlaves(vdi) 

3160 SR.deleteVDI(self, vdi) 

3161 

3162 def _checkSlaves(self, vdi): 

3163 onlineHosts = self.xapi.getOnlineHosts() 

3164 abortFlag = IPCFlag(self.uuid) 

3165 for pbdRecord in self.xapi.getAttachedPBDs(): 

3166 hostRef = pbdRecord["host"] 

3167 if hostRef == self.xapi._hostRef: 

3168 continue 

3169 if abortFlag.test(FLAG_TYPE_ABORT): 

3170 raise AbortException("Aborting due to signal") 

3171 try: 

3172 self._checkSlave(hostRef, vdi) 

3173 except XenAPI.Failure: 

3174 if hostRef in onlineHosts: 

3175 raise 

3176 

3177 def _checkSlave(self, hostRef, vdi): 

3178 call = (hostRef, "nfs-on-slave", "check", {'path': vdi.path}) 

3179 Util.log("Checking with slave: %s" % repr(call)) 

3180 _host = self.xapi.session.xenapi.host 

3181 text = _host.call_plugin( * call) 

3182 

3183 @override 

3184 def _handleInterruptedCoalesceLeaf(self) -> None: 

3185 entries = self.journaler.getAll(VDI.JRN_LEAF) 

3186 for uuid, parentUuid in entries.items(): 

3187 fileList = os.listdir(self.path) 

3188 childName = uuid + VdiTypeExtension.VHD 

3189 tmpChildName = self.TMP_RENAME_PREFIX + uuid + VdiTypeExtension.VHD 

3190 parentName1 = parentUuid + VdiTypeExtension.VHD 

3191 parentName2 = parentUuid + VdiTypeExtension.RAW 

3192 parentPresent = (parentName1 in fileList or parentName2 in fileList) 

3193 if parentPresent or tmpChildName in fileList: 

3194 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3195 else: 

3196 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3197 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3198 vdi = self.getVDI(uuid) 

3199 if vdi: 

3200 vdi.ensureUnpaused() 

3201 

3202 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3203 Util.log("*** UNDO LEAF-COALESCE") 

3204 parent = self.getVDI(parentUuid) 

3205 if not parent: 

3206 parent = self.getVDI(childUuid) 

3207 if not parent: 

3208 raise util.SMException("Neither %s nor %s found" % \ 

3209 (parentUuid, childUuid)) 

3210 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

3211 parent.rename(parentUuid) 

3212 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

3213 

3214 child = self.getVDI(childUuid) 

3215 if not child: 

3216 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3217 if not child: 

3218 raise util.SMException("Neither %s nor %s found" % \ 

3219 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

3220 Util.log("Renaming child back to %s" % childUuid) 

3221 child.rename(childUuid) 

3222 Util.log("Updating the VDI record") 

3223 child.setConfig(VDI.DB_VDI_PARENT, parentUuid) 

3224 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type) 

3225 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

3226 

3227 if child.isHidden(): 

3228 child._setHidden(False) 

3229 if not parent.isHidden(): 

3230 parent._setHidden(True) 

3231 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3232 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

3233 Util.log("*** leaf-coalesce undo successful") 

3234 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

3235 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

3236 

3237 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3238 Util.log("*** FINISH LEAF-COALESCE") 

3239 vdi = self.getVDI(childUuid) 

3240 if not vdi: 

3241 Util.log(f"_finishInterruptedCoalesceLeaf, vdi {childUuid} not found, aborting") 

3242 raise util.SMException("VDI %s not found" % childUuid) 

3243 try: 

3244 self.forgetVDI(parentUuid) 

3245 except XenAPI.Failure: 

3246 Util.logException('_finishInterruptedCoalesceLeaf') 

3247 pass 

3248 self._updateSlavesOnResize(vdi) 

3249 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

3250 Util.log("*** finished leaf-coalesce successfully") 

3251 

3252 

3253class LVMSR(SR): 

3254 TYPE = SR.TYPE_LVHD 

3255 SUBTYPES = ["lvhdoiscsi", "lvhdohba"] 

3256 

3257 def __init__(self, uuid, xapi, createLock, force): 

3258 SR.__init__(self, uuid, xapi, createLock, force) 

3259 self.vgName = "%s%s" % (VG_PREFIX, self.uuid) 

3260 self.path = os.path.join(VG_LOCATION, self.vgName) 

3261 

3262 sr_ref = self.xapi.session.xenapi.SR.get_by_uuid(self.uuid) 

3263 other_conf = self.xapi.session.xenapi.SR.get_other_config(sr_ref) 

3264 lvm_conf = other_conf.get('lvm-conf') if other_conf else None 

3265 self.lvmCache = lvmcache.LVMCache(self.vgName, lvm_conf) 

3266 

3267 self.lvActivator = LVActivator(self.uuid, self.lvmCache) 

3268 self.journaler = journaler.Journaler(self.lvmCache) 

3269 

3270 @override 

3271 def deleteVDI(self, vdi) -> None: 

3272 if self.lvActivator.get(vdi.uuid, False): 

3273 self.lvActivator.deactivate(vdi.uuid, False) 

3274 self._checkSlaves(vdi) 

3275 SR.deleteVDI(self, vdi) 

3276 

3277 @override 

3278 def forgetVDI(self, vdiUuid) -> None: 

3279 SR.forgetVDI(self, vdiUuid) 

3280 mdpath = os.path.join(self.path, lvutil.MDVOLUME_NAME) 

3281 LVMMetadataHandler(mdpath).deleteVdiFromMetadata(vdiUuid) 

3282 

3283 @override 

3284 def getFreeSpace(self) -> int: 

3285 stats = lvutil._getVGstats(self.vgName) 

3286 return stats['physical_size'] - stats['physical_utilisation'] 

3287 

3288 @override 

3289 def cleanup(self): 

3290 if not self.lvActivator.deactivateAll(): 

3291 Util.log("ERROR deactivating LVs while cleaning up") 

3292 

3293 @override 

3294 def needUpdateBlockInfo(self) -> bool: 

3295 for vdi in self.vdis.values(): 

3296 if vdi.scanError or not VdiType.isCowImage(vdi.vdi_type) or len(vdi.children) == 0: 

3297 continue 

3298 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

3299 return True 

3300 return False 

3301 

3302 @override 

3303 def updateBlockInfo(self) -> None: 

3304 numUpdated = 0 

3305 for vdi in self.vdis.values(): 

3306 if vdi.scanError or not VdiType.isCowImage(vdi.vdi_type) or len(vdi.children) == 0: 

3307 continue 

3308 if not vdi.getConfig(vdi.DB_VDI_BLOCKS): 

3309 vdi.updateBlockInfo() 

3310 numUpdated += 1 

3311 if numUpdated: 

3312 # deactivate the LVs back sooner rather than later. If we don't 

3313 # now, by the time this thread gets to deactivations, another one 

3314 # might have leaf-coalesced a node and deleted it, making the child 

3315 # inherit the refcount value and preventing the correct decrement 

3316 self.cleanup() 

3317 

3318 @override 

3319 def scan(self, force=False) -> None: 

3320 vdis = self._scan(force) 

3321 for uuid, vdiInfo in vdis.items(): 

3322 vdi = self.getVDI(uuid) 

3323 if not vdi: 

3324 self.logFilter.logNewVDI(uuid) 

3325 vdi = LVMVDI(self, uuid, vdiInfo.vdiType) 

3326 self.vdis[uuid] = vdi 

3327 vdi.load(vdiInfo) 

3328 self._removeStaleVDIs(vdis.keys()) 

3329 self._buildTree(force) 

3330 self.logFilter.logState() 

3331 self._handleInterruptedCoalesceLeaf() 

3332 

3333 def _scan(self, force): 

3334 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3335 error = False 

3336 self.lvmCache.refresh() 

3337 vdis = LvmCowUtil.getVDIInfo(self.lvmCache) 

3338 for uuid, vdiInfo in vdis.items(): 

3339 if vdiInfo.scanError: 

3340 error = True 

3341 break 

3342 if not error: 

3343 return vdis 

3344 Util.log("Scan error, retrying (%d)" % i) 

3345 if force: 

3346 return vdis 

3347 raise util.SMException("Scan error") 

3348 

3349 @override 

3350 def _removeStaleVDIs(self, uuidsPresent) -> None: 

3351 for uuid in list(self.vdis.keys()): 

3352 if not uuid in uuidsPresent: 

3353 Util.log("VDI %s disappeared since last scan" % \ 

3354 self.vdis[uuid]) 

3355 del self.vdis[uuid] 

3356 if self.lvActivator.get(uuid, False): 

3357 self.lvActivator.remove(uuid, False) 

3358 

3359 @override 

3360 def _liveLeafCoalesce(self, vdi) -> bool: 

3361 """If the parent is raw and the child was resized (virt. size), then 

3362 we'll need to resize the parent, which can take a while due to zeroing 

3363 out of the extended portion of the LV. Do it before pausing the child 

3364 to avoid a protracted downtime""" 

3365 if not VdiType.isCowImage(vdi.parent.vdi_type) and vdi.sizeVirt > vdi.parent.sizeVirt: 

3366 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

3367 vdi.parent._increaseSizeVirt(vdi.sizeVirt) 

3368 

3369 return SR._liveLeafCoalesce(self, vdi) 

3370 

3371 @override 

3372 def _prepareCoalesceLeaf(self, vdi) -> None: 

3373 vdi._activateChain() 

3374 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

3375 vdi.deflate() 

3376 vdi.inflateParentForCoalesce() 

3377 

3378 @override 

3379 def _updateNode(self, vdi) -> None: 

3380 # fix the refcounts: the remaining node should inherit the binary 

3381 # refcount from the leaf (because if it was online, it should remain 

3382 # refcounted as such), but the normal refcount from the parent (because 

3383 # this node is really the parent node) - minus 1 if it is online (since 

3384 # non-leaf nodes increment their normal counts when they are online and 

3385 # we are now a leaf, storing that 1 in the binary refcount). 

3386 ns = NS_PREFIX_LVM + self.uuid 

3387 cCnt, cBcnt = RefCounter.check(vdi.uuid, ns) 

3388 pCnt, pBcnt = RefCounter.check(vdi.parent.uuid, ns) 

3389 pCnt = pCnt - cBcnt 

3390 assert(pCnt >= 0) 

3391 RefCounter.set(vdi.parent.uuid, pCnt, cBcnt, ns) 

3392 

3393 @override 

3394 def _finishCoalesceLeaf(self, parent) -> None: 

3395 if not parent.isSnapshot() or parent.isAttachedRW(): 

3396 parent.inflateFully() 

3397 else: 

3398 parent.deflate() 

3399 

3400 @override 

3401 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

3402 return parent.lvmcowutil.calcVolumeSize(parent.sizeVirt) - parent.sizeLV 

3403 

3404 @override 

3405 def _handleInterruptedCoalesceLeaf(self) -> None: 

3406 entries = self.journaler.getAll(VDI.JRN_LEAF) 

3407 for uuid, parentUuid in entries.items(): 

3408 undo = False 

3409 for prefix in LV_PREFIX.values(): 

3410 parentLV = prefix + parentUuid 

3411 undo = self.lvmCache.checkLV(parentLV) 

3412 if undo: 

3413 break 

3414 

3415 if not undo: 

3416 for prefix in LV_PREFIX.values(): 

3417 tmpChildLV = prefix + uuid 

3418 undo = self.lvmCache.checkLV(tmpChildLV) 

3419 if undo: 

3420 break 

3421 

3422 if undo: 

3423 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3424 else: 

3425 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3426 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3427 vdi = self.getVDI(uuid) 

3428 if vdi: 

3429 vdi.ensureUnpaused() 

3430 

3431 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3432 Util.log("*** UNDO LEAF-COALESCE") 

3433 parent = self.getVDI(parentUuid) 

3434 if not parent: 

3435 parent = self.getVDI(childUuid) 

3436 if not parent: 

3437 raise util.SMException("Neither %s nor %s found" % \ 

3438 (parentUuid, childUuid)) 

3439 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

3440 parent.rename(parentUuid) 

3441 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

3442 

3443 child = self.getVDI(childUuid) 

3444 if not child: 

3445 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3446 if not child: 

3447 raise util.SMException("Neither %s nor %s found" % \ 

3448 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

3449 Util.log("Renaming child back to %s" % childUuid) 

3450 child.rename(childUuid) 

3451 Util.log("Updating the VDI record") 

3452 child.setConfig(VDI.DB_VDI_PARENT, parentUuid) 

3453 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type) 

3454 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

3455 

3456 # refcount (best effort - assume that it had succeeded if the 

3457 # second rename succeeded; if not, this adjustment will be wrong, 

3458 # leading to a non-deactivation of the LV) 

3459 ns = NS_PREFIX_LVM + self.uuid 

3460 cCnt, cBcnt = RefCounter.check(child.uuid, ns) 

3461 pCnt, pBcnt = RefCounter.check(parent.uuid, ns) 

3462 pCnt = pCnt + cBcnt 

3463 RefCounter.set(parent.uuid, pCnt, 0, ns) 

3464 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_refcount", self.uuid) 

3465 

3466 parent.deflate() 

3467 child.inflateFully() 

3468 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_deflate", self.uuid) 

3469 if child.isHidden(): 

3470 child._setHidden(False) 

3471 if not parent.isHidden(): 

3472 parent._setHidden(True) 

3473 if not parent.lvReadonly: 

3474 self.lvmCache.setReadonly(parent.fileName, True) 

3475 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3476 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

3477 Util.log("*** leaf-coalesce undo successful") 

3478 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

3479 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

3480 

3481 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3482 Util.log("*** FINISH LEAF-COALESCE") 

3483 vdi = self.getVDI(childUuid) 

3484 if not vdi: 

3485 raise util.SMException("VDI %s not found" % childUuid) 

3486 vdi.inflateFully() 

3487 util.fistpoint.activate("LVHDRT_coaleaf_finish_after_inflate", self.uuid) 

3488 try: 

3489 self.forgetVDI(parentUuid) 

3490 except XenAPI.Failure: 

3491 pass 

3492 self._updateSlavesOnResize(vdi) 

3493 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

3494 Util.log("*** finished leaf-coalesce successfully") 

3495 

3496 def _checkSlaves(self, vdi): 

3497 """Confirm with all slaves in the pool that 'vdi' is not in use. We 

3498 try to check all slaves, including those that the Agent believes are 

3499 offline, but ignore failures for offline hosts. This is to avoid cases 

3500 where the Agent thinks a host is offline but the host is up.""" 

3501 args = {"vgName": self.vgName, 

3502 "action1": "deactivateNoRefcount", 

3503 "lvName1": vdi.fileName, 

3504 "action2": "cleanupLockAndRefcount", 

3505 "uuid2": vdi.uuid, 

3506 "ns2": NS_PREFIX_LVM + self.uuid} 

3507 onlineHosts = self.xapi.getOnlineHosts() 

3508 abortFlag = IPCFlag(self.uuid) 

3509 for pbdRecord in self.xapi.getAttachedPBDs(): 

3510 hostRef = pbdRecord["host"] 

3511 if hostRef == self.xapi._hostRef: 

3512 continue 

3513 if abortFlag.test(FLAG_TYPE_ABORT): 

3514 raise AbortException("Aborting due to signal") 

3515 Util.log("Checking with slave %s (path %s)" % ( 

3516 self.xapi.getRecordHost(hostRef)['hostname'], vdi.path)) 

3517 try: 

3518 self.xapi.ensureInactive(hostRef, args) 

3519 except XenAPI.Failure: 

3520 if hostRef in onlineHosts: 

3521 raise 

3522 

3523 @override 

3524 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: 

3525 slaves = util.get_slaves_attached_on(self.xapi.session, [child.uuid]) 

3526 if not slaves: 

3527 Util.log("Update-on-leaf-undo: VDI %s not attached on any slave" % \ 

3528 child) 

3529 return 

3530 

3531 tmpName = child.vdi_type + self.TMP_RENAME_PREFIX + child.uuid 

3532 args = {"vgName": self.vgName, 

3533 "action1": "deactivateNoRefcount", 

3534 "lvName1": tmpName, 

3535 "action2": "deactivateNoRefcount", 

3536 "lvName2": child.fileName, 

3537 "action3": "refresh", 

3538 "lvName3": child.fileName, 

3539 "action4": "refresh", 

3540 "lvName4": parent.fileName} 

3541 for slave in slaves: 

3542 Util.log("Updating %s, %s, %s on slave %s" % \ 

3543 (tmpName, child.fileName, parent.fileName, 

3544 self.xapi.getRecordHost(slave)['hostname'])) 

3545 text = self.xapi.session.xenapi.host.call_plugin( \ 

3546 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3547 Util.log("call-plugin returned: '%s'" % text) 

3548 

3549 @override 

3550 def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid) -> None: 

3551 slaves = util.get_slaves_attached_on(self.xapi.session, [vdi.uuid]) 

3552 if not slaves: 

3553 Util.log("Update-on-rename: VDI %s not attached on any slave" % vdi) 

3554 return 

3555 

3556 args = {"vgName": self.vgName, 

3557 "action1": "deactivateNoRefcount", 

3558 "lvName1": oldNameLV, 

3559 "action2": "refresh", 

3560 "lvName2": vdi.fileName, 

3561 "action3": "cleanupLockAndRefcount", 

3562 "uuid3": origParentUuid, 

3563 "ns3": NS_PREFIX_LVM + self.uuid} 

3564 for slave in slaves: 

3565 Util.log("Updating %s to %s on slave %s" % \ 

3566 (oldNameLV, vdi.fileName, 

3567 self.xapi.getRecordHost(slave)['hostname'])) 

3568 text = self.xapi.session.xenapi.host.call_plugin( \ 

3569 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3570 Util.log("call-plugin returned: '%s'" % text) 

3571 

3572 @override 

3573 def _updateSlavesOnResize(self, vdi) -> None: 

3574 uuids = [x.uuid for x in vdi.getAllLeaves()] 

3575 slaves = util.get_slaves_attached_on(self.xapi.session, uuids) 

3576 if not slaves: 

3577 util.SMlog("Update-on-resize: %s not attached on any slave" % vdi) 

3578 return 

3579 LvmCowUtil.refreshVolumeOnSlaves(self.xapi.session, self.uuid, self.vgName, 

3580 vdi.fileName, vdi.uuid, slaves) 

3581 

3582 

3583class LinstorSR(SR): 

3584 TYPE = SR.TYPE_LINSTOR 

3585 

3586 def __init__(self, uuid, xapi, createLock, force): 

3587 if not LINSTOR_AVAILABLE: 

3588 raise util.SMException( 

3589 'Can\'t load cleanup LinstorSR: LINSTOR libraries are missing' 

3590 ) 

3591 

3592 SR.__init__(self, uuid, xapi, createLock, force) 

3593 self.path = LinstorVolumeManager.DEV_ROOT_PATH 

3594 

3595 class LinstorProxy: 

3596 def __init__(self, sr: LinstorSR) -> None: 

3597 self.sr = sr 

3598 

3599 def __getattr__(self, attr: str) -> Any: 

3600 assert self.sr, "Cannot use `LinstorProxy` without valid `LinstorVolumeManager` instance" 

3601 return getattr(self.sr._linstor, attr) 

3602 

3603 self._linstor_proxy = LinstorProxy(self) 

3604 self._reloadLinstor(journaler_only=True) 

3605 

3606 @override 

3607 def deleteVDI(self, vdi) -> None: 

3608 self._checkSlaves(vdi) 

3609 SR.deleteVDI(self, vdi) 

3610 

3611 @override 

3612 def getFreeSpace(self) -> int: 

3613 return self._linstor.max_volume_size_allowed 

3614 

3615 @override 

3616 def scan(self, force=False) -> None: 

3617 all_vdi_info = self._scan(force) 

3618 for uuid, vdiInfo in all_vdi_info.items(): 

3619 # When vdiInfo is None, the VDI is RAW. 

3620 vdi = self.getVDI(uuid) 

3621 if not vdi: 

3622 self.logFilter.logNewVDI(uuid) 

3623 vdi = LinstorVDI(self, uuid, vdiInfo.vdiType if vdiInfo else VdiType.RAW) 

3624 self.vdis[uuid] = vdi 

3625 if vdiInfo: 

3626 vdi.load(vdiInfo) 

3627 self._removeStaleVDIs(all_vdi_info.keys()) 

3628 self._buildTree(force) 

3629 self.logFilter.logState() 

3630 self._handleInterruptedCoalesceLeaf() 

3631 

3632 @override 

3633 def pauseVDIs(self, vdiList) -> None: 

3634 self._linstor.ensure_volume_list_is_not_locked( 

3635 vdiList, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3636 ) 

3637 return super(LinstorSR, self).pauseVDIs(vdiList) 

3638 

3639 def _reloadLinstor(self, journaler_only=False): 

3640 session = self.xapi.session 

3641 host_ref = util.get_this_host_ref(session) 

3642 sr_ref = session.xenapi.SR.get_by_uuid(self.uuid) 

3643 

3644 pbd = util.find_my_pbd(session, host_ref, sr_ref) 

3645 if pbd is None: 

3646 raise util.SMException('Failed to find PBD') 

3647 

3648 dconf = session.xenapi.PBD.get_device_config(pbd) 

3649 group_name = dconf['group-name'] 

3650 

3651 controller_uri = get_controller_uri() 

3652 self.journaler = LinstorJournaler( 

3653 controller_uri, group_name, logger=util.SMlog 

3654 ) 

3655 

3656 if journaler_only: 

3657 return 

3658 

3659 self._linstor = LinstorVolumeManager( 

3660 controller_uri, 

3661 group_name, 

3662 repair=True, 

3663 logger=util.SMlog 

3664 ) 

3665 

3666 def _scan(self, force): 

3667 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3668 self._reloadLinstor() 

3669 error = False 

3670 try: 

3671 all_vdi_info = self._load_vdi_info() 

3672 for uuid, vdiInfo in all_vdi_info.items(): 

3673 if vdiInfo and vdiInfo.error: 

3674 error = True 

3675 break 

3676 if not error: 

3677 return all_vdi_info 

3678 Util.log('Scan error, retrying ({})'.format(i)) 

3679 except Exception as e: 

3680 Util.log('Scan exception, retrying ({}): {}'.format(i, e)) 

3681 Util.log(traceback.format_exc()) 

3682 

3683 if force: 

3684 return all_vdi_info 

3685 raise util.SMException('Scan error') 

3686 

3687 def _load_vdi_info(self): 

3688 all_volume_info = self._linstor.get_volumes_with_info() 

3689 volumes_metadata = self._linstor.get_volumes_with_metadata() 

3690 

3691 all_vdi_info = {} 

3692 pending_vdis = [] 

3693 

3694 def handle_fail(vdi_uuid, e): 

3695 Util.log(f" [VDI {vdi_uuid}: failed to load VDI info]: {e}") 

3696 info = CowImageInfo(vdi_uuid) 

3697 info.error = 1 

3698 return info 

3699 

3700 for vdi_uuid, volume_info in all_volume_info.items(): 

3701 vdi_type = VdiType.RAW 

3702 try: 

3703 volume_metadata = volumes_metadata[vdi_uuid] 

3704 if not volume_info.name and not list(volume_metadata.items()): 

3705 continue # Ignore it, probably deleted. 

3706 

3707 if vdi_uuid.startswith('DELETED_'): 

3708 # Assume it's really a RAW volume of a failed snap without COW header/footer. 

3709 # We must remove this VDI now without adding it in the VDI list. 

3710 # Otherwise `Relinking` calls and other actions can be launched on it. 

3711 # We don't want that... 

3712 Util.log('Deleting bad VDI {}'.format(vdi_uuid)) 

3713 

3714 self.lock() 

3715 try: 

3716 self._linstor.destroy_volume(vdi_uuid) 

3717 try: 

3718 self.forgetVDI(vdi_uuid) 

3719 except: 

3720 pass 

3721 except Exception as e: 

3722 Util.log('Cannot delete bad VDI: {}'.format(e)) 

3723 finally: 

3724 self.unlock() 

3725 continue 

3726 

3727 vdi_type = volume_metadata.get(VDI_TYPE_TAG) 

3728 if VdiType.isCowImage(vdi_type): 

3729 pending_vdis.append((vdi_uuid, vdi_type)) 

3730 else: 

3731 all_vdi_info[vdi_uuid] = None 

3732 except Exception as e: 

3733 all_vdi_info[vdi_uuid] = handle_fail(vdi_uuid, e) 

3734 

3735 multi_cowutil = MultiLinstorCowUtil(self._linstor.uri, self._linstor.group_name) 

3736 

3737 def load_info(vdi, multi_cowutil): 

3738 vdi_uuid, vdi_type = vdi 

3739 try: 

3740 vdiInfo = multi_cowutil.get_local_cowutil(vdi_type).get_info(vdi_uuid) 

3741 except Exception as e: 

3742 vdiInfo = handle_fail(vdi_uuid, e) 

3743 vdiInfo.vdiType = vdi_type 

3744 return vdiInfo 

3745 

3746 try: 

3747 for vdiInfo in multi_cowutil.run(load_info, pending_vdis): 

3748 all_vdi_info[vdiInfo.uuid] = vdiInfo 

3749 finally: 

3750 del multi_cowutil 

3751 

3752 return all_vdi_info 

3753 

3754 @override 

3755 def _prepareCoalesceLeaf(self, vdi) -> None: 

3756 vdi._activateChain() 

3757 vdi.deflate() 

3758 vdi._inflateParentForCoalesce() 

3759 

3760 @override 

3761 def _finishCoalesceLeaf(self, parent) -> None: 

3762 if not parent.isSnapshot() or parent.isAttachedRW(): 

3763 parent.inflateFully() 

3764 else: 

3765 parent.deflate() 

3766 

3767 @override 

3768 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

3769 return LinstorCowUtil( 

3770 self.xapi.session, self._linstor, parent.vdi_type 

3771 ).compute_volume_size(parent.sizeVirt) - parent.getDrbdSize() 

3772 

3773 def _hasValidDevicePath(self, uuid): 

3774 try: 

3775 self._linstor.get_device_path(uuid) 

3776 except Exception: 

3777 # TODO: Maybe log exception. 

3778 return False 

3779 return True 

3780 

3781 @override 

3782 def _liveLeafCoalesce(self, vdi) -> bool: 

3783 self.lock() 

3784 try: 

3785 self._linstor.ensure_volume_is_not_locked( 

3786 vdi.uuid, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3787 ) 

3788 return super(LinstorSR, self)._liveLeafCoalesce(vdi) 

3789 finally: 

3790 self.unlock() 

3791 

3792 @override 

3793 def _handleInterruptedCoalesceLeaf(self) -> None: 

3794 entries = self.journaler.get_all(VDI.JRN_LEAF) 

3795 for uuid, parentUuid in entries.items(): 

3796 if self._hasValidDevicePath(parentUuid) or \ 

3797 self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid): 

3798 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3799 else: 

3800 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3801 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3802 vdi = self.getVDI(uuid) 

3803 if vdi: 

3804 vdi.ensureUnpaused() 

3805 

3806 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3807 Util.log('*** UNDO LEAF-COALESCE') 

3808 parent = self.getVDI(parentUuid) 

3809 if not parent: 

3810 parent = self.getVDI(childUuid) 

3811 if not parent: 

3812 raise util.SMException( 

3813 'Neither {} nor {} found'.format(parentUuid, childUuid) 

3814 ) 

3815 Util.log( 

3816 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid) 

3817 ) 

3818 parent.rename(parentUuid) 

3819 

3820 child = self.getVDI(childUuid) 

3821 if not child: 

3822 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3823 if not child: 

3824 raise util.SMException( 

3825 'Neither {} nor {} found'.format( 

3826 childUuid, self.TMP_RENAME_PREFIX + childUuid 

3827 ) 

3828 ) 

3829 Util.log('Renaming child back to {}'.format(childUuid)) 

3830 child.rename(childUuid) 

3831 Util.log('Updating the VDI record') 

3832 child.setConfig(VDI.DB_VDI_PARENT, parentUuid) 

3833 child.setConfig(VDI.DB_VDI_TYPE, child.vdi_type) 

3834 

3835 # TODO: Maybe deflate here. 

3836 

3837 if child.isHidden(): 

3838 child._setHidden(False) 

3839 if not parent.isHidden(): 

3840 parent._setHidden(True) 

3841 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3842 Util.log('*** leaf-coalesce undo successful') 

3843 

3844 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3845 Util.log('*** FINISH LEAF-COALESCE') 

3846 vdi = self.getVDI(childUuid) 

3847 if not vdi: 

3848 raise util.SMException('VDI {} not found'.format(childUuid)) 

3849 # TODO: Maybe inflate. 

3850 try: 

3851 self.forgetVDI(parentUuid) 

3852 except XenAPI.Failure: 

3853 pass 

3854 self._updateSlavesOnResize(vdi) 

3855 Util.log('*** finished leaf-coalesce successfully') 

3856 

3857 def _checkSlaves(self, vdi): 

3858 try: 

3859 all_openers = self._linstor.get_volume_openers(vdi.uuid) 

3860 for openers in all_openers.values(): 

3861 for opener in openers.values(): 

3862 if opener['process-name'] != 'tapdisk': 

3863 raise util.SMException( 

3864 'VDI {} is in use: {}'.format(vdi.uuid, all_openers) 

3865 ) 

3866 except LinstorVolumeManagerError as e: 

3867 if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS: 

3868 raise 

3869 

3870 

3871################################################################################ 

3872# 

3873# Helpers 

3874# 

3875def daemonize(): 

3876 pid = os.fork() 

3877 if pid: 

3878 os.waitpid(pid, 0) 

3879 Util.log("New PID [%d]" % pid) 

3880 return False 

3881 os.chdir("/") 

3882 os.setsid() 

3883 pid = os.fork() 

3884 if pid: 

3885 Util.log("Will finish as PID [%d]" % pid) 

3886 os._exit(0) 

3887 for fd in [0, 1, 2]: 

3888 try: 

3889 os.close(fd) 

3890 except OSError: 

3891 pass 

3892 # we need to fill those special fd numbers or pread won't work 

3893 sys.stdin = open("/dev/null", 'r') 

3894 sys.stderr = open("/dev/null", 'w') 

3895 sys.stdout = open("/dev/null", 'w') 

3896 # As we're a new process we need to clear the lock objects 

3897 lock.Lock.clearAll() 

3898 return True 

3899 

3900 

3901def normalizeType(type): 

3902 if type in LVMSR.SUBTYPES: 

3903 type = SR.TYPE_LVHD 

3904 if type in ["lvm", "lvmoiscsi", "lvmohba", "lvmofcoe"]: 

3905 # temporary while LVHD is symlinked as LVM 

3906 type = SR.TYPE_LVHD 

3907 if type in [ 

3908 "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs", 

3909 "moosefs", "xfs", "zfs", "largeblock" 

3910 ]: 

3911 type = SR.TYPE_FILE 

3912 if type in ["linstor"]: 

3913 type = SR.TYPE_LINSTOR 

3914 if type not in SR.TYPES: 

3915 raise util.SMException("Unsupported SR type: %s" % type) 

3916 return type 

3917 

3918GCPAUSE_DEFAULT_SLEEP = 5 * 60 

3919 

3920 

3921def _gc_init_file(sr_uuid): 

3922 return os.path.join(NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init') 

3923 

3924 

3925def _create_init_file(sr_uuid): 

3926 util.makedirs(os.path.join(NON_PERSISTENT_DIR, str(sr_uuid))) 

3927 with open(os.path.join(_gc_init_file(sr_uuid)), 'w+') as f: 

3928 f.write('1') 

3929 

3930 

3931def _gcLoopPause(sr, dryRun=False, immediate=False): 

3932 if immediate: 

3933 return 

3934 

3935 # Check to see if the GCPAUSE_FISTPOINT is present. If so the fist 

3936 # point will just return. Otherwise, fall back on an abortable sleep. 

3937 

3938 if util.fistpoint.is_active(util.GCPAUSE_FISTPOINT): 

3939 

3940 util.fistpoint.activate_custom_fn(util.GCPAUSE_FISTPOINT, 3940 ↛ exitline 3940 didn't jump to the function exit

3941 lambda *args: None) 

3942 elif os.path.exists(_gc_init_file(sr.uuid)): 

3943 def abortTest(): 

3944 return IPCFlag(sr.uuid).test(FLAG_TYPE_ABORT) 

3945 

3946 # If time.sleep hangs we are in deep trouble, however for 

3947 # completeness we set the timeout of the abort thread to 

3948 # 110% of GCPAUSE_DEFAULT_SLEEP. 

3949 Util.log("GC active, about to go quiet") 

3950 Util.runAbortable(lambda: time.sleep(GCPAUSE_DEFAULT_SLEEP), 3950 ↛ exitline 3950 didn't run the lambda on line 3950

3951 None, sr.uuid, abortTest, VDI.POLL_INTERVAL, 

3952 GCPAUSE_DEFAULT_SLEEP * 1.1) 

3953 Util.log("GC active, quiet period ended") 

3954 

3955 

3956def _gcLoop(sr, dryRun=False, immediate=False): 

3957 if not lockGCActive.acquireNoblock(): 3957 ↛ 3958line 3957 didn't jump to line 3958, because the condition on line 3957 was never true

3958 Util.log("Another GC instance already active, exiting") 

3959 return 

3960 

3961 # Check we're still attached after acquiring locks 

3962 if not sr.xapi.isPluggedHere(): 

3963 Util.log("SR no longer attached, exiting") 

3964 return 

3965 

3966 # Clean up Intellicache files 

3967 sr.cleanupCache() 

3968 

3969 # Track how many we do 

3970 coalesced = 0 

3971 task_status = "success" 

3972 try: 

3973 # Check if any work needs to be done 

3974 if not sr.xapi.isPluggedHere(): 3974 ↛ 3975line 3974 didn't jump to line 3975, because the condition on line 3974 was never true

3975 Util.log("SR no longer attached, exiting") 

3976 return 

3977 sr.scanLocked() 

3978 if not sr.hasWork(): 

3979 Util.log("No work, exiting") 

3980 return 

3981 sr.xapi.create_task( 

3982 "Garbage Collection", 

3983 "Garbage collection for SR %s" % sr.uuid) 

3984 _gcLoopPause(sr, dryRun, immediate=immediate) 

3985 while True: 

3986 if SIGTERM: 

3987 Util.log("Term requested") 

3988 return 

3989 

3990 if not sr.xapi.isPluggedHere(): 3990 ↛ 3991line 3990 didn't jump to line 3991, because the condition on line 3990 was never true

3991 Util.log("SR no longer attached, exiting") 

3992 break 

3993 sr.scanLocked() 

3994 if not sr.hasWork(): 

3995 Util.log("No work, exiting") 

3996 break 

3997 

3998 if not lockGCRunning.acquireNoblock(): 3998 ↛ 3999line 3998 didn't jump to line 3999, because the condition on line 3998 was never true

3999 Util.log("Unable to acquire GC running lock.") 

4000 return 

4001 try: 

4002 if not sr.gcEnabled(): 4002 ↛ 4003line 4002 didn't jump to line 4003, because the condition on line 4002 was never true

4003 break 

4004 

4005 sr.xapi.update_task_progress("done", coalesced) 

4006 

4007 sr.cleanupCoalesceJournals() 

4008 # Create the init file here in case startup is waiting on it 

4009 _create_init_file(sr.uuid) 

4010 sr.scanLocked() 

4011 sr.updateBlockInfo() 

4012 

4013 howmany = len(sr.findGarbage()) 

4014 if howmany > 0: 

4015 Util.log("Found %d orphaned vdis" % howmany) 

4016 sr.lock() 

4017 try: 

4018 sr.garbageCollect(dryRun) 

4019 finally: 

4020 sr.unlock() 

4021 sr.xapi.srUpdate() 

4022 

4023 candidate = sr.findCoalesceable() 

4024 if candidate: 

4025 util.fistpoint.activate( 

4026 "LVHDRT_finding_a_suitable_pair", sr.uuid) 

4027 sr.coalesce(candidate, dryRun) 

4028 sr.xapi.srUpdate() 

4029 coalesced += 1 

4030 continue 

4031 

4032 candidate = sr.findLeafCoalesceable() 

4033 if candidate: 4033 ↛ 4040line 4033 didn't jump to line 4040, because the condition on line 4033 was never false

4034 sr.coalesceLeaf(candidate, dryRun) 

4035 sr.xapi.srUpdate() 

4036 coalesced += 1 

4037 continue 

4038 

4039 finally: 

4040 lockGCRunning.release() 4040 ↛ 4045line 4040 didn't jump to line 4045, because the break on line 4003 wasn't executed

4041 except: 

4042 task_status = "failure" 

4043 raise 

4044 finally: 

4045 sr.xapi.set_task_status(task_status) 

4046 Util.log("GC process exiting, no work left") 

4047 _create_init_file(sr.uuid) 

4048 lockGCActive.release() 

4049 

4050 

4051def _gc(session, srUuid, dryRun=False, immediate=False): 

4052 init(srUuid) 

4053 sr = SR.getInstance(srUuid, session) 

4054 if not sr.gcEnabled(False): 4054 ↛ 4055line 4054 didn't jump to line 4055, because the condition on line 4054 was never true

4055 return 

4056 

4057 try: 

4058 _gcLoop(sr, dryRun, immediate=immediate) 

4059 finally: 

4060 sr.check_no_space_candidates() 

4061 sr.cleanup() 

4062 sr.logFilter.logState() 

4063 del sr.xapi 

4064 

4065 

4066def _abort(srUuid, soft=False): 

4067 """Aborts an GC/coalesce. 

4068 

4069 srUuid: the UUID of the SR whose GC/coalesce must be aborted 

4070 soft: If set to True and there is a pending abort signal, the function 

4071 doesn't do anything. If set to False, a new abort signal is issued. 

4072 

4073 returns: If soft is set to False, we return True holding lockGCActive. If 

4074 soft is set to False and an abort signal is pending, we return False 

4075 without holding lockGCActive. An exception is raised in case of error.""" 

4076 Util.log("=== SR %s: abort ===" % (srUuid)) 

4077 init(srUuid) 

4078 if not lockGCActive.acquireNoblock(): 

4079 gotLock = False 

4080 Util.log("Aborting currently-running instance (SR %s)" % srUuid) 

4081 abortFlag = IPCFlag(srUuid) 

4082 if not abortFlag.set(FLAG_TYPE_ABORT, soft): 

4083 return False 

4084 for i in range(SR.LOCK_RETRY_ATTEMPTS): 

4085 gotLock = lockGCActive.acquireNoblock() 

4086 if gotLock: 

4087 break 

4088 time.sleep(SR.LOCK_RETRY_INTERVAL) 

4089 abortFlag.clear(FLAG_TYPE_ABORT) 

4090 if not gotLock: 

4091 raise util.CommandException(code=errno.ETIMEDOUT, 

4092 reason="SR %s: error aborting existing process" % srUuid) 

4093 return True 

4094 

4095 

4096def init(srUuid): 

4097 global lockGCRunning 

4098 if not lockGCRunning: 4098 ↛ 4099line 4098 didn't jump to line 4099, because the condition on line 4098 was never true

4099 lockGCRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, srUuid) 

4100 global lockGCActive 

4101 if not lockGCActive: 4101 ↛ 4102line 4101 didn't jump to line 4102, because the condition on line 4101 was never true

4102 lockGCActive = LockActive(srUuid) 

4103 

4104 

4105class LockActive: 

4106 """ 

4107 Wraps the use of LOCK_TYPE_GC_ACTIVE such that the lock cannot be acquired 

4108 if another process holds the SR lock. 

4109 """ 

4110 def __init__(self, srUuid): 

4111 self._lock = lock.Lock(LOCK_TYPE_GC_ACTIVE, srUuid) 

4112 self._srLock = lock.Lock(lock.LOCK_TYPE_SR, srUuid) 

4113 

4114 def acquireNoblock(self): 

4115 self._srLock.acquire() 

4116 

4117 try: 

4118 return self._lock.acquireNoblock() 

4119 finally: 

4120 self._srLock.release() 

4121 

4122 def release(self): 

4123 self._lock.release() 

4124 

4125 

4126def usage(): 

4127 output = """Garbage collect and/or coalesce COW images in a COW-based SR 

4128 

4129Parameters: 

4130 -u --uuid UUID SR UUID 

4131 and one of: 

4132 -g --gc garbage collect, coalesce, and repeat while there is work 

4133 -G --gc_force garbage collect once, aborting any current operations 

4134 -c --cache-clean <max_age> clean up IntelliCache cache files older than 

4135 max_age hours 

4136 -a --abort abort any currently running operation (GC or coalesce) 

4137 -q --query query the current state (GC'ing, coalescing or not running) 

4138 -x --disable disable GC/coalesce (will be in effect until you exit) 

4139 -t --debug see Debug below 

4140 

4141Options: 

4142 -b --background run in background (return immediately) (valid for -g only) 

4143 -f --force continue in the presence of COW images with errors (when doing 

4144 GC, this might cause removal of any such images) (only valid 

4145 for -G) (DANGEROUS) 

4146 

4147Debug: 

4148 The --debug parameter enables manipulation of LVHD VDIs for debugging 

4149 purposes. ** NEVER USE IT ON A LIVE VM ** 

4150 The following parameters are required: 

4151 -t --debug <cmd> <cmd> is one of "activate", "deactivate", "inflate", 

4152 "deflate". 

4153 -v --vdi_uuid VDI UUID 

4154 """ 

4155 #-d --dry-run don't actually perform any SR-modifying operations 

4156 print(output) 

4157 Util.log("(Invalid usage)") 

4158 sys.exit(1) 

4159 

4160 

4161############################################################################## 

4162# 

4163# API 

4164# 

4165def abort(srUuid, soft=False): 

4166 """Abort GC/coalesce if we are currently GC'ing or coalescing a VDI pair. 

4167 """ 

4168 if _abort(srUuid, soft): 

4169 stop_gc_service(srUuid) 

4170 Util.log("abort: releasing the process lock") 

4171 lockGCActive.release() 

4172 return True 

4173 else: 

4174 return False 

4175 

4176 

4177def run_gc(session, srUuid, dryRun, immediate=False): 

4178 try: 

4179 _gc(session, srUuid, dryRun, immediate=immediate) 

4180 return 0 

4181 except AbortException: 

4182 Util.log("Aborted") 

4183 return 2 

4184 except Exception: 

4185 Util.logException("gc") 

4186 Util.log("* * * * * SR %s: ERROR\n" % srUuid) 

4187 return 1 

4188 

4189 

4190def gc(session, srUuid, inBackground, dryRun=False): 

4191 """Garbage collect all deleted VDIs in SR "srUuid". Fork & return 

4192 immediately if inBackground=True. 

4193 

4194 The following algorithm is used: 

4195 1. If we are already GC'ing in this SR, return 

4196 2. If we are already coalescing a VDI pair: 

4197 a. Scan the SR and determine if the VDI pair is GC'able 

4198 b. If the pair is not GC'able, return 

4199 c. If the pair is GC'able, abort coalesce 

4200 3. Scan the SR 

4201 4. If there is nothing to collect, nor to coalesce, return 

4202 5. If there is something to collect, GC all, then goto 3 

4203 6. If there is something to coalesce, coalesce one pair, then goto 3 

4204 """ 

4205 Util.log("=== SR %s: gc ===" % srUuid) 

4206 

4207 signal.signal(signal.SIGTERM, receiveSignal) 

4208 

4209 if inBackground: 

4210 if daemonize(): 4210 ↛ exitline 4210 didn't return from function 'gc', because the condition on line 4210 was never false

4211 # we are now running in the background. Catch & log any errors 

4212 # because there is no other way to propagate them back at this 

4213 # point 

4214 

4215 run_gc(None, srUuid, dryRun) 

4216 os._exit(0) 

4217 else: 

4218 os._exit(run_gc(session, srUuid, dryRun, immediate=True)) 

4219 

4220 

4221def start_gc(session, sr_uuid): 

4222 """ 

4223 This function is used to try to start a backgrounded GC session by forking 

4224 the current process. If using the systemd version, call start_gc_service() instead. 

4225 """ 

4226 # don't bother if an instance already running (this is just an 

4227 # optimization to reduce the overhead of forking a new process if we 

4228 # don't have to, but the process will check the lock anyways) 

4229 lockRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, sr_uuid) 

4230 if not lockRunning.acquireNoblock(): 

4231 if should_preempt(session, sr_uuid): 

4232 util.SMlog("Aborting currently-running coalesce of garbage VDI") 

4233 try: 

4234 if not abort(sr_uuid, soft=True): 

4235 util.SMlog("The GC has already been scheduled to re-start") 

4236 except util.CommandException as e: 

4237 if e.code != errno.ETIMEDOUT: 

4238 raise 

4239 util.SMlog('failed to abort the GC') 

4240 else: 

4241 util.SMlog("A GC instance already running, not kicking") 

4242 return 

4243 else: 

4244 lockRunning.release() 

4245 

4246 util.SMlog(f"Starting GC file is {__file__}") 

4247 subprocess.run([__file__, '-b', '-u', sr_uuid, '-g'], 

4248 stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4249 

4250def _gc_service_cmd(sr_uuid, action, extra_args=None): 

4251 """ 

4252 Build and run the systemctl command for the GC service using util.doexec. 

4253 """ 

4254 sr_uuid_esc = sr_uuid.replace("-", "\\x2d") 

4255 cmd=["/usr/bin/systemctl", "--quiet"] 

4256 if extra_args: 

4257 cmd.extend(extra_args) 

4258 cmd += [action, f"SMGC@{sr_uuid_esc}"] 

4259 return util.doexec(cmd) 

4260 

4261 

4262def start_gc_service(sr_uuid, wait=False): 

4263 """ 

4264 This starts the templated systemd service which runs GC on the given SR UUID. 

4265 If the service was already started, this is a no-op. 

4266 

4267 Because the service is a one-shot with RemainAfterExit=no, when called with 

4268 wait=True this will run the service synchronously and will not return until the 

4269 run has finished. This is used to force a run of the GC instead of just kicking it 

4270 in the background. 

4271 """ 

4272 util.SMlog(f"Kicking SMGC@{sr_uuid}...") 

4273 _gc_service_cmd(sr_uuid, "start", extra_args=None if wait else ["--no-block"]) 

4274 

4275 

4276def stop_gc_service(sr_uuid): 

4277 """ 

4278 Stops the templated systemd service which runs GC on the given SR UUID. 

4279 """ 

4280 util.SMlog(f"Stopping SMGC@{sr_uuid}...") 

4281 (rc, _stdout, stderr) = _gc_service_cmd(sr_uuid, "stop") 

4282 if rc != 0: 4282 ↛ exitline 4282 didn't return from function 'stop_gc_service', because the condition on line 4282 was never false

4283 util.SMlog(f"Failed to stop gc service `SMGC@{sr_uuid}`: `{stderr}`") 

4284 

4285 

4286def wait_for_completion(sr_uuid): 

4287 while get_state(sr_uuid): 

4288 time.sleep(5) 

4289 

4290 

4291def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False): 

4292 """Garbage collect all deleted VDIs in SR "srUuid". The caller must ensure 

4293 the SR lock is held. 

4294 The following algorithm is used: 

4295 1. If we are already GC'ing or coalescing a VDI pair, abort GC/coalesce 

4296 2. Scan the SR 

4297 3. GC 

4298 4. return 

4299 """ 

4300 Util.log("=== SR %s: gc_force ===" % srUuid) 

4301 init(srUuid) 

4302 sr = SR.getInstance(srUuid, session, lockSR, True) 

4303 if not lockGCActive.acquireNoblock(): 

4304 abort(srUuid) 

4305 else: 

4306 Util.log("Nothing was running, clear to proceed") 

4307 

4308 if force: 

4309 Util.log("FORCED: will continue even if there are COW image errors") 

4310 sr.scanLocked(force) 

4311 sr.cleanupCoalesceJournals() 

4312 

4313 try: 

4314 sr.cleanupCache() 

4315 sr.garbageCollect(dryRun) 

4316 finally: 

4317 sr.cleanup() 

4318 sr.logFilter.logState() 

4319 lockGCActive.release() 

4320 

4321 

4322def get_state(srUuid): 

4323 """Return whether GC/coalesce is currently running or not. This asks systemd for 

4324 the state of the templated SMGC service and will return True if it is "activating" 

4325 or "running" (for completeness, as in practice it will never achieve the latter state) 

4326 """ 

4327 sr_uuid_esc = srUuid.replace("-", "\\x2d") 

4328 cmd=[ "/usr/bin/systemctl", "is-active", f"SMGC@{sr_uuid_esc}"] 

4329 result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4330 state = result.stdout.decode('utf-8').rstrip() 

4331 if state == "activating" or state == "running": 

4332 return True 

4333 return False 

4334 

4335 

4336def should_preempt(session, srUuid): 

4337 sr = SR.getInstance(srUuid, session) 

4338 entries = sr.journaler.getAll(VDI.JRN_COALESCE) 

4339 if len(entries) == 0: 

4340 return False 

4341 elif len(entries) > 1: 

4342 raise util.SMException("More than one coalesce entry: " + str(entries)) 

4343 sr.scanLocked() 

4344 coalescedUuid = entries.popitem()[0] 

4345 garbage = sr.findGarbage() 

4346 for vdi in garbage: 

4347 if vdi.uuid == coalescedUuid: 

4348 return True 

4349 return False 

4350 

4351 

4352def get_coalesceable_leaves(session, srUuid, vdiUuids): 

4353 coalesceable = [] 

4354 sr = SR.getInstance(srUuid, session) 

4355 sr.scanLocked() 

4356 for uuid in vdiUuids: 

4357 vdi = sr.getVDI(uuid) 

4358 if not vdi: 

4359 raise util.SMException("VDI %s not found" % uuid) 

4360 if vdi.isLeafCoalesceable(): 

4361 coalesceable.append(uuid) 

4362 return coalesceable 

4363 

4364 

4365def cache_cleanup(session, srUuid, maxAge): 

4366 sr = SR.getInstance(srUuid, session) 

4367 return sr.cleanupCache(maxAge) 

4368 

4369 

4370def debug(sr_uuid, cmd, vdi_uuid): 

4371 Util.log("Debug command: %s" % cmd) 

4372 sr = SR.getInstance(sr_uuid, None) 

4373 if not isinstance(sr, LVMSR): 

4374 print("Error: not an LVHD SR") 

4375 return 

4376 sr.scanLocked() 

4377 vdi = sr.getVDI(vdi_uuid) 

4378 if not vdi: 

4379 print("Error: VDI %s not found") 

4380 return 

4381 print("Running %s on SR %s" % (cmd, sr)) 

4382 print("VDI before: %s" % vdi) 

4383 if cmd == "activate": 

4384 vdi._activate() 

4385 print("VDI file: %s" % vdi.path) 

4386 if cmd == "deactivate": 

4387 ns = NS_PREFIX_LVM + sr.uuid 

4388 sr.lvmCache.deactivate(ns, vdi.uuid, vdi.fileName, False) 

4389 if cmd == "inflate": 

4390 vdi.inflateFully() 

4391 sr.cleanup() 

4392 if cmd == "deflate": 

4393 vdi.deflate() 

4394 sr.cleanup() 

4395 sr.scanLocked() 

4396 print("VDI after: %s" % vdi) 

4397 

4398 

4399def abort_optional_reenable(uuid): 

4400 print("Disabling GC/coalesce for %s" % uuid) 

4401 ret = _abort(uuid) 

4402 input("Press enter to re-enable...") 

4403 print("GC/coalesce re-enabled") 

4404 lockGCRunning.release() 

4405 if ret: 

4406 lockGCActive.release() 

4407 

4408 

4409############################################################################## 

4410# 

4411# CLI 

4412# 

4413def main(): 

4414 action = "" 

4415 maxAge = 0 

4416 uuid = "" 

4417 background = False 

4418 force = False 

4419 dryRun = False 

4420 debug_cmd = "" 

4421 vdi_uuid = "" 

4422 shortArgs = "gGc:aqxu:bfdt:v:" 

4423 longArgs = ["gc", "gc_force", "clean_cache", "abort", "query", "disable", 

4424 "uuid=", "background", "force", "dry-run", "debug=", "vdi_uuid="] 

4425 

4426 try: 

4427 opts, args = getopt.getopt(sys.argv[1:], shortArgs, longArgs) 

4428 except getopt.GetoptError: 

4429 usage() 

4430 for o, a in opts: 

4431 if o in ("-g", "--gc"): 

4432 action = "gc" 

4433 if o in ("-G", "--gc_force"): 

4434 action = "gc_force" 

4435 if o in ("-c", "--clean_cache"): 

4436 action = "clean_cache" 

4437 maxAge = int(a) 

4438 if o in ("-a", "--abort"): 

4439 action = "abort" 

4440 if o in ("-q", "--query"): 

4441 action = "query" 

4442 if o in ("-x", "--disable"): 

4443 action = "disable" 

4444 if o in ("-u", "--uuid"): 

4445 uuid = a 

4446 if o in ("-b", "--background"): 

4447 background = True 

4448 if o in ("-f", "--force"): 

4449 force = True 

4450 if o in ("-d", "--dry-run"): 

4451 Util.log("Dry run mode") 

4452 dryRun = True 

4453 if o in ("-t", "--debug"): 

4454 action = "debug" 

4455 debug_cmd = a 

4456 if o in ("-v", "--vdi_uuid"): 

4457 vdi_uuid = a 

4458 

4459 if not action or not uuid: 

4460 usage() 

4461 if action == "debug" and not (debug_cmd and vdi_uuid) or \ 

4462 action != "debug" and (debug_cmd or vdi_uuid): 

4463 usage() 

4464 

4465 if action != "query" and action != "debug": 

4466 print("All output goes to log") 

4467 

4468 if action == "gc": 

4469 gc(None, uuid, background, dryRun) 

4470 elif action == "gc_force": 

4471 gc_force(None, uuid, force, dryRun, True) 

4472 elif action == "clean_cache": 

4473 cache_cleanup(None, uuid, maxAge) 

4474 elif action == "abort": 

4475 abort(uuid) 

4476 elif action == "query": 

4477 print("Currently running: %s" % get_state(uuid)) 

4478 elif action == "disable": 

4479 abort_optional_reenable(uuid) 

4480 elif action == "debug": 

4481 debug(uuid, debug_cmd, vdi_uuid) 

4482 

4483 

4484if __name__ == '__main__': 4484 ↛ 4485line 4484 didn't jump to line 4485, because the condition on line 4484 was never true

4485 main()