Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/python3 

2# 

3# Copyright (C) Citrix Systems Inc. 

4# 

5# This program is free software; you can redistribute it and/or modify 

6# it under the terms of the GNU Lesser General Public License as published 

7# by the Free Software Foundation; version 2.1 only. 

8# 

9# This program is distributed in the hope that it will be useful, 

10# but WITHOUT ANY WARRANTY; without even the implied warranty of 

11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12# GNU Lesser General Public License for more details. 

13# 

14# You should have received a copy of the GNU Lesser General Public License 

15# along with this program; if not, write to the Free Software Foundation, Inc., 

16# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 

17# 

18# Script to coalesce and garbage collect VHD-based SR's in the background 

19# 

20 

21from sm_typing import Optional, override 

22 

23import os 

24import os.path 

25import sys 

26import time 

27import signal 

28import subprocess 

29import getopt 

30import datetime 

31import traceback 

32import base64 

33import zlib 

34import errno 

35import stat 

36 

37import XenAPI # pylint: disable=import-error 

38import util 

39import lvutil 

40import vhdutil 

41import lvhdutil 

42import lvmcache 

43import journaler 

44import fjournaler 

45import lock 

46import blktap2 

47import xs_errors 

48from refcounter import RefCounter 

49from ipc import IPCFlag 

50from lvmanager import LVActivator 

51from srmetadata import LVMMetadataHandler, VDI_TYPE_TAG 

52from functools import reduce 

53from time import monotonic as _time 

54 

55try: 

56 from linstorjournaler import LinstorJournaler 

57 from linstorvhdutil import LinstorVhdUtil, MultiLinstorVhdUtil 

58 from linstorvolumemanager import get_controller_uri 

59 from linstorvolumemanager import LinstorVolumeManager 

60 from linstorvolumemanager import LinstorVolumeManagerError 

61 from linstorvolumemanager import PERSISTENT_PREFIX as LINSTOR_PERSISTENT_PREFIX 

62 

63 LINSTOR_AVAILABLE = True 

64except ImportError: 

65 LINSTOR_AVAILABLE = False 

66 

67# Disable automatic leaf-coalescing. Online leaf-coalesce is currently not 

68# possible due to lvhd_stop_using_() not working correctly. However, we leave 

69# this option available through the explicit LEAFCLSC_FORCE flag in the VDI 

70# record for use by the offline tool (which makes the operation safe by pausing 

71# the VM first) 

72AUTO_ONLINE_LEAF_COALESCE_ENABLED = True 

73 

74FLAG_TYPE_ABORT = "abort" # flag to request aborting of GC/coalesce 

75 

76# process "lock", used simply as an indicator that a process already exists 

77# that is doing GC/coalesce on this SR (such a process holds the lock, and we 

78# check for the fact by trying the lock). 

79lockGCRunning = None 

80 

81# process "lock" to indicate that the GC process has been activated but may not 

82# yet be running, stops a second process from being started. 

83LOCK_TYPE_GC_ACTIVE = "gc_active" 

84lockGCActive = None 

85 

86# Default coalesce error rate limit, in messages per minute. A zero value 

87# disables throttling, and a negative value disables error reporting. 

88DEFAULT_COALESCE_ERR_RATE = 1.0 / 60 

89 

90COALESCE_LAST_ERR_TAG = 'last-coalesce-error' 

91COALESCE_ERR_RATE_TAG = 'coalesce-error-rate' 

92VAR_RUN = "/var/run/" 

93SPEED_LOG_ROOT = VAR_RUN + "{uuid}.speed_log" 

94 

95N_RUNNING_AVERAGE = 10 

96 

97NON_PERSISTENT_DIR = '/run/nonpersistent/sm' 

98 

99# Signal Handler 

100SIGTERM = False 

101 

102 

103class AbortException(util.SMException): 

104 pass 

105 

106 

107def receiveSignal(signalNumber, frame): 

108 global SIGTERM 

109 

110 util.SMlog("GC: recieved SIGTERM") 

111 SIGTERM = True 

112 return 

113 

114 

115################################################################################ 

116# 

117# Util 

118# 

119class Util: 

120 RET_RC = 1 

121 RET_STDOUT = 2 

122 RET_STDERR = 4 

123 

124 UUID_LEN = 36 

125 

126 PREFIX = {"G": 1024 * 1024 * 1024, "M": 1024 * 1024, "K": 1024} 

127 

128 @staticmethod 

129 def log(text) -> None: 

130 util.SMlog(text, ident="SMGC") 

131 

132 @staticmethod 

133 def logException(tag): 

134 info = sys.exc_info() 

135 if info[0] == SystemExit: 135 ↛ 137line 135 didn't jump to line 137, because the condition on line 135 was never true

136 # this should not be happening when catching "Exception", but it is 

137 sys.exit(0) 

138 tb = reduce(lambda a, b: "%s%s" % (a, b), traceback.format_tb(info[2])) 

139 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

140 Util.log(" ***********************") 

141 Util.log(" * E X C E P T I O N *") 

142 Util.log(" ***********************") 

143 Util.log("%s: EXCEPTION %s, %s" % (tag, info[0], info[1])) 

144 Util.log(tb) 

145 Util.log("*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*") 

146 

147 @staticmethod 

148 def doexec(args, expectedRC, inputtext=None, ret=None, log=True): 

149 "Execute a subprocess, then return its return code, stdout, stderr" 

150 proc = subprocess.Popen(args, 

151 stdin=subprocess.PIPE, \ 

152 stdout=subprocess.PIPE, \ 

153 stderr=subprocess.PIPE, \ 

154 shell=True, \ 

155 close_fds=True) 

156 (stdout, stderr) = proc.communicate(inputtext) 

157 stdout = str(stdout) 

158 stderr = str(stderr) 

159 rc = proc.returncode 

160 if log: 

161 Util.log("`%s`: %s" % (args, rc)) 

162 if type(expectedRC) != type([]): 

163 expectedRC = [expectedRC] 

164 if not rc in expectedRC: 

165 reason = stderr.strip() 

166 if stdout.strip(): 

167 reason = "%s (stdout: %s)" % (reason, stdout.strip()) 

168 Util.log("Failed: %s" % reason) 

169 raise util.CommandException(rc, args, reason) 

170 

171 if ret == Util.RET_RC: 

172 return rc 

173 if ret == Util.RET_STDERR: 

174 return stderr 

175 return stdout 

176 

177 @staticmethod 

178 def runAbortable(func, ret, ns, abortTest, pollInterval, timeOut): 

179 """execute func in a separate thread and kill it if abortTest signals 

180 so""" 

181 abortSignaled = abortTest() # check now before we clear resultFlag 

182 resultFlag = IPCFlag(ns) 

183 resultFlag.clearAll() 

184 pid = os.fork() 

185 if pid: 

186 startTime = _time() 

187 try: 

188 while True: 

189 if resultFlag.test("success"): 

190 Util.log(" Child process completed successfully") 

191 resultFlag.clear("success") 

192 return 

193 if resultFlag.test("failure"): 

194 resultFlag.clear("failure") 

195 raise util.SMException("Child process exited with error") 

196 if abortTest() or abortSignaled or SIGTERM: 

197 os.killpg(pid, signal.SIGKILL) 

198 raise AbortException("Aborting due to signal") 

199 if timeOut and _time() - startTime > timeOut: 

200 os.killpg(pid, signal.SIGKILL) 

201 resultFlag.clearAll() 

202 raise util.SMException("Timed out") 

203 time.sleep(pollInterval) 

204 finally: 

205 wait_pid = 0 

206 rc = -1 

207 count = 0 

208 while wait_pid == 0 and count < 10: 

209 wait_pid, rc = os.waitpid(pid, os.WNOHANG) 

210 if wait_pid == 0: 

211 time.sleep(2) 

212 count += 1 

213 

214 if wait_pid == 0: 

215 Util.log("runAbortable: wait for process completion timed out") 

216 else: 

217 os.setpgrp() 

218 try: 

219 if func() == ret: 

220 resultFlag.set("success") 

221 else: 

222 resultFlag.set("failure") 

223 except Exception as e: 

224 Util.log("Child process failed with : (%s)" % e) 

225 resultFlag.set("failure") 

226 Util.logException("This exception has occured") 

227 os._exit(0) 

228 

229 @staticmethod 

230 def num2str(number): 

231 for prefix in ("G", "M", "K"): 231 ↛ 234line 231 didn't jump to line 234, because the loop on line 231 didn't complete

232 if number >= Util.PREFIX[prefix]: 

233 return "%.3f%s" % (float(number) / Util.PREFIX[prefix], prefix) 

234 return "%s" % number 

235 

236 @staticmethod 

237 def numBits(val): 

238 count = 0 

239 while val: 

240 count += val & 1 

241 val = val >> 1 

242 return count 

243 

244 @staticmethod 

245 def countBits(bitmap1, bitmap2): 

246 """return bit count in the bitmap produced by ORing the two bitmaps""" 

247 len1 = len(bitmap1) 

248 len2 = len(bitmap2) 

249 lenLong = len1 

250 lenShort = len2 

251 bitmapLong = bitmap1 

252 if len2 > len1: 

253 lenLong = len2 

254 lenShort = len1 

255 bitmapLong = bitmap2 

256 

257 count = 0 

258 for i in range(lenShort): 

259 val = bitmap1[i] | bitmap2[i] 

260 count += Util.numBits(val) 

261 

262 for i in range(i + 1, lenLong): 

263 val = bitmapLong[i] 

264 count += Util.numBits(val) 

265 return count 

266 

267 @staticmethod 

268 def getThisScript(): 

269 thisScript = util.get_real_path(__file__) 

270 if thisScript.endswith(".pyc"): 

271 thisScript = thisScript[:-1] 

272 return thisScript 

273 

274 

275################################################################################ 

276# 

277# XAPI 

278# 

279class XAPI: 

280 USER = "root" 

281 PLUGIN_ON_SLAVE = "on-slave" 

282 

283 CONFIG_SM = 0 

284 CONFIG_OTHER = 1 

285 CONFIG_ON_BOOT = 2 

286 CONFIG_ALLOW_CACHING = 3 

287 

288 CONFIG_NAME = { 

289 CONFIG_SM: "sm-config", 

290 CONFIG_OTHER: "other-config", 

291 CONFIG_ON_BOOT: "on-boot", 

292 CONFIG_ALLOW_CACHING: "allow_caching" 

293 } 

294 

295 class LookupError(util.SMException): 

296 pass 

297 

298 @staticmethod 

299 def getSession(): 

300 session = XenAPI.xapi_local() 

301 session.xenapi.login_with_password(XAPI.USER, '', '', 'SM') 

302 return session 

303 

304 def __init__(self, session, srUuid): 

305 self.sessionPrivate = False 

306 self.session = session 

307 if self.session is None: 

308 self.session = self.getSession() 

309 self.sessionPrivate = True 

310 self._srRef = self.session.xenapi.SR.get_by_uuid(srUuid) 

311 self.srRecord = self.session.xenapi.SR.get_record(self._srRef) 

312 self.hostUuid = util.get_this_host() 

313 self._hostRef = self.session.xenapi.host.get_by_uuid(self.hostUuid) 

314 self.task = None 

315 self.task_progress = {"coalescable": 0, "done": 0} 

316 

317 def __del__(self): 

318 if self.sessionPrivate: 

319 self.session.xenapi.session.logout() 

320 

321 @property 

322 def srRef(self): 

323 return self._srRef 

324 

325 def isPluggedHere(self): 

326 pbds = self.getAttachedPBDs() 

327 for pbdRec in pbds: 

328 if pbdRec["host"] == self._hostRef: 

329 return True 

330 return False 

331 

332 def poolOK(self): 

333 host_recs = self.session.xenapi.host.get_all_records() 

334 for host_ref, host_rec in host_recs.items(): 

335 if not host_rec["enabled"]: 

336 Util.log("Host %s not enabled" % host_rec["uuid"]) 

337 return False 

338 return True 

339 

340 def isMaster(self): 

341 if self.srRecord["shared"]: 

342 pool = list(self.session.xenapi.pool.get_all_records().values())[0] 

343 return pool["master"] == self._hostRef 

344 else: 

345 pbds = self.getAttachedPBDs() 

346 if len(pbds) < 1: 

347 raise util.SMException("Local SR not attached") 

348 elif len(pbds) > 1: 

349 raise util.SMException("Local SR multiply attached") 

350 return pbds[0]["host"] == self._hostRef 

351 

352 def getAttachedPBDs(self): 

353 """Return PBD records for all PBDs of this SR that are currently 

354 attached""" 

355 attachedPBDs = [] 

356 pbds = self.session.xenapi.PBD.get_all_records() 

357 for pbdRec in pbds.values(): 

358 if pbdRec["SR"] == self._srRef and pbdRec["currently_attached"]: 

359 attachedPBDs.append(pbdRec) 

360 return attachedPBDs 

361 

362 def getOnlineHosts(self): 

363 return util.get_online_hosts(self.session) 

364 

365 def ensureInactive(self, hostRef, args): 

366 text = self.session.xenapi.host.call_plugin( \ 

367 hostRef, self.PLUGIN_ON_SLAVE, "multi", args) 

368 Util.log("call-plugin returned: '%s'" % text) 

369 

370 def getRecordHost(self, hostRef): 

371 return self.session.xenapi.host.get_record(hostRef) 

372 

373 def _getRefVDI(self, uuid): 

374 return self.session.xenapi.VDI.get_by_uuid(uuid) 

375 

376 def getRefVDI(self, vdi): 

377 return self._getRefVDI(vdi.uuid) 

378 

379 def getRecordVDI(self, uuid): 

380 try: 

381 ref = self._getRefVDI(uuid) 

382 return self.session.xenapi.VDI.get_record(ref) 

383 except XenAPI.Failure: 

384 return None 

385 

386 def singleSnapshotVDI(self, vdi): 

387 return self.session.xenapi.VDI.snapshot(vdi.getRef(), 

388 {"type": "internal"}) 

389 

390 def forgetVDI(self, srUuid, vdiUuid): 

391 """Forget the VDI, but handle the case where the VDI has already been 

392 forgotten (i.e. ignore errors)""" 

393 try: 

394 vdiRef = self.session.xenapi.VDI.get_by_uuid(vdiUuid) 

395 self.session.xenapi.VDI.forget(vdiRef) 

396 except XenAPI.Failure: 

397 pass 

398 

399 def getConfigVDI(self, vdi, key): 

400 kind = vdi.CONFIG_TYPE[key] 

401 if kind == self.CONFIG_SM: 

402 cfg = self.session.xenapi.VDI.get_sm_config(vdi.getRef()) 

403 elif kind == self.CONFIG_OTHER: 

404 cfg = self.session.xenapi.VDI.get_other_config(vdi.getRef()) 

405 elif kind == self.CONFIG_ON_BOOT: 

406 cfg = self.session.xenapi.VDI.get_on_boot(vdi.getRef()) 

407 elif kind == self.CONFIG_ALLOW_CACHING: 

408 cfg = self.session.xenapi.VDI.get_allow_caching(vdi.getRef()) 

409 else: 

410 assert(False) 

411 Util.log("Got %s for %s: %s" % (self.CONFIG_NAME[kind], vdi, repr(cfg))) 

412 return cfg 

413 

414 def removeFromConfigVDI(self, vdi, key): 

415 kind = vdi.CONFIG_TYPE[key] 

416 if kind == self.CONFIG_SM: 

417 self.session.xenapi.VDI.remove_from_sm_config(vdi.getRef(), key) 

418 elif kind == self.CONFIG_OTHER: 

419 self.session.xenapi.VDI.remove_from_other_config(vdi.getRef(), key) 

420 else: 

421 assert(False) 

422 

423 def addToConfigVDI(self, vdi, key, val): 

424 kind = vdi.CONFIG_TYPE[key] 

425 if kind == self.CONFIG_SM: 

426 self.session.xenapi.VDI.add_to_sm_config(vdi.getRef(), key, val) 

427 elif kind == self.CONFIG_OTHER: 

428 self.session.xenapi.VDI.add_to_other_config(vdi.getRef(), key, val) 

429 else: 

430 assert(False) 

431 

432 def isSnapshot(self, vdi): 

433 return self.session.xenapi.VDI.get_is_a_snapshot(vdi.getRef()) 

434 

435 def markCacheSRsDirty(self): 

436 sr_refs = self.session.xenapi.SR.get_all_records_where( \ 

437 'field "local_cache_enabled" = "true"') 

438 for sr_ref in sr_refs: 

439 Util.log("Marking SR %s dirty" % sr_ref) 

440 util.set_dirty(self.session, sr_ref) 

441 

442 def srUpdate(self): 

443 Util.log("Starting asynch srUpdate for SR %s" % self.srRecord["uuid"]) 

444 abortFlag = IPCFlag(self.srRecord["uuid"]) 

445 task = self.session.xenapi.Async.SR.update(self._srRef) 

446 cancelTask = True 

447 try: 

448 for i in range(60): 

449 status = self.session.xenapi.task.get_status(task) 

450 if not status == "pending": 

451 Util.log("SR.update_asynch status changed to [%s]" % status) 

452 cancelTask = False 

453 return 

454 if abortFlag.test(FLAG_TYPE_ABORT): 

455 Util.log("Abort signalled during srUpdate, cancelling task...") 

456 try: 

457 self.session.xenapi.task.cancel(task) 

458 cancelTask = False 

459 Util.log("Task cancelled") 

460 except: 

461 pass 

462 return 

463 time.sleep(1) 

464 finally: 

465 if cancelTask: 

466 self.session.xenapi.task.cancel(task) 

467 self.session.xenapi.task.destroy(task) 

468 Util.log("Asynch srUpdate still running, but timeout exceeded.") 

469 

470 def update_task(self): 

471 self.session.xenapi.task.set_other_config( 

472 self.task, 

473 { 

474 "applies_to": self._srRef 

475 }) 

476 total = self.task_progress['coalescable'] + self.task_progress['done'] 

477 if (total > 0): 

478 self.session.xenapi.task.set_progress( 

479 self.task, float(self.task_progress['done']) / total) 

480 

481 def create_task(self, label, description): 

482 self.task = self.session.xenapi.task.create(label, description) 

483 self.update_task() 

484 

485 def update_task_progress(self, key, value): 

486 self.task_progress[key] = value 

487 if self.task: 

488 self.update_task() 

489 

490 def set_task_status(self, status): 

491 if self.task: 

492 self.session.xenapi.task.set_status(self.task, status) 

493 

494 

495################################################################################ 

496# 

497# VDI 

498# 

499class VDI(object): 

500 """Object representing a VDI of a VHD-based SR""" 

501 

502 POLL_INTERVAL = 1 

503 POLL_TIMEOUT = 30 

504 DEVICE_MAJOR = 202 

505 DRIVER_NAME_VHD = "vhd" 

506 

507 # config keys & values 

508 DB_VHD_PARENT = "vhd-parent" 

509 DB_VDI_TYPE = "vdi_type" 

510 DB_VHD_BLOCKS = "vhd-blocks" 

511 DB_VDI_PAUSED = "paused" 

512 DB_VDI_RELINKING = "relinking" 

513 DB_VDI_ACTIVATING = "activating" 

514 DB_GC = "gc" 

515 DB_COALESCE = "coalesce" 

516 DB_LEAFCLSC = "leaf-coalesce" # config key 

517 DB_GC_NO_SPACE = "gc_no_space" 

518 LEAFCLSC_DISABLED = "false" # set by user; means do not leaf-coalesce 

519 LEAFCLSC_FORCE = "force" # set by user; means skip snap-coalesce 

520 LEAFCLSC_OFFLINE = "offline" # set here for informational purposes: means 

521 # no space to snap-coalesce or unable to keep 

522 # up with VDI. This is not used by the SM, it 

523 # might be used by external components. 

524 DB_ONBOOT = "on-boot" 

525 ONBOOT_RESET = "reset" 

526 DB_ALLOW_CACHING = "allow_caching" 

527 

528 CONFIG_TYPE = { 

529 DB_VHD_PARENT: XAPI.CONFIG_SM, 

530 DB_VDI_TYPE: XAPI.CONFIG_SM, 

531 DB_VHD_BLOCKS: XAPI.CONFIG_SM, 

532 DB_VDI_PAUSED: XAPI.CONFIG_SM, 

533 DB_VDI_RELINKING: XAPI.CONFIG_SM, 

534 DB_VDI_ACTIVATING: XAPI.CONFIG_SM, 

535 DB_GC: XAPI.CONFIG_OTHER, 

536 DB_COALESCE: XAPI.CONFIG_OTHER, 

537 DB_LEAFCLSC: XAPI.CONFIG_OTHER, 

538 DB_ONBOOT: XAPI.CONFIG_ON_BOOT, 

539 DB_ALLOW_CACHING: XAPI.CONFIG_ALLOW_CACHING, 

540 DB_GC_NO_SPACE: XAPI.CONFIG_SM 

541 } 

542 

543 LIVE_LEAF_COALESCE_MAX_SIZE = 20 * 1024 * 1024 # bytes 

544 LIVE_LEAF_COALESCE_TIMEOUT = 10 # seconds 

545 TIMEOUT_SAFETY_MARGIN = 0.5 # extra margin when calculating 

546 # feasibility of leaf coalesce 

547 

548 JRN_RELINK = "relink" # journal entry type for relinking children 

549 JRN_COALESCE = "coalesce" # to communicate which VDI is being coalesced 

550 JRN_LEAF = "leaf" # used in coalesce-leaf 

551 

552 STR_TREE_INDENT = 4 

553 

554 def __init__(self, sr, uuid, raw): 

555 self.sr = sr 

556 self.scanError = True 

557 self.uuid = uuid 

558 self.raw = raw 

559 self.fileName = "" 

560 self.parentUuid = "" 

561 self.sizeVirt = -1 

562 self._sizeVHD = -1 

563 self._sizeAllocated = -1 

564 self._hidden = False 

565 self.parent = None 

566 self.children = [] 

567 self._vdiRef = None 

568 self._clearRef() 

569 

570 @staticmethod 

571 def extractUuid(path): 

572 raise NotImplementedError("Implement in sub class") 

573 

574 def load(self, info=None) -> None: 

575 """Load VDI info""" 

576 pass 

577 

578 def getDriverName(self) -> str: 

579 return self.DRIVER_NAME_VHD 

580 

581 def getRef(self): 

582 if self._vdiRef is None: 

583 self._vdiRef = self.sr.xapi.getRefVDI(self) 

584 return self._vdiRef 

585 

586 def getConfig(self, key, default=None): 

587 config = self.sr.xapi.getConfigVDI(self, key) 

588 if key == self.DB_ONBOOT or key == self.DB_ALLOW_CACHING: 588 ↛ 589line 588 didn't jump to line 589, because the condition on line 588 was never true

589 val = config 

590 else: 

591 val = config.get(key) 

592 if val: 

593 return val 

594 return default 

595 

596 def setConfig(self, key, val): 

597 self.sr.xapi.removeFromConfigVDI(self, key) 

598 self.sr.xapi.addToConfigVDI(self, key, val) 

599 Util.log("Set %s = %s for %s" % (key, val, self)) 

600 

601 def delConfig(self, key): 

602 self.sr.xapi.removeFromConfigVDI(self, key) 

603 Util.log("Removed %s from %s" % (key, self)) 

604 

605 def ensureUnpaused(self): 

606 if self.getConfig(self.DB_VDI_PAUSED) == "true": 

607 Util.log("Unpausing VDI %s" % self) 

608 self.unpause() 

609 

610 def pause(self, failfast=False) -> None: 

611 if not blktap2.VDI.tap_pause(self.sr.xapi.session, self.sr.uuid, 

612 self.uuid, failfast): 

613 raise util.SMException("Failed to pause VDI %s" % self) 

614 

615 def _report_tapdisk_unpause_error(self): 

616 try: 

617 xapi = self.sr.xapi.session.xenapi 

618 sr_ref = xapi.SR.get_by_uuid(self.sr.uuid) 

619 msg_name = "failed to unpause tapdisk" 

620 msg_body = "Failed to unpause tapdisk for VDI %s, " \ 

621 "VMs using this tapdisk have lost access " \ 

622 "to the corresponding disk(s)" % self.uuid 

623 xapi.message.create(msg_name, "4", "SR", self.sr.uuid, msg_body) 

624 except Exception as e: 

625 util.SMlog("failed to generate message: %s" % e) 

626 

627 def unpause(self): 

628 if not blktap2.VDI.tap_unpause(self.sr.xapi.session, self.sr.uuid, 

629 self.uuid): 

630 self._report_tapdisk_unpause_error() 

631 raise util.SMException("Failed to unpause VDI %s" % self) 

632 

633 def refresh(self, ignoreNonexistent=True): 

634 """Pause-unpause in one step""" 

635 self.sr.lock() 

636 try: 

637 try: 

638 if not blktap2.VDI.tap_refresh(self.sr.xapi.session, 638 ↛ 640line 638 didn't jump to line 640, because the condition on line 638 was never true

639 self.sr.uuid, self.uuid): 

640 self._report_tapdisk_unpause_error() 

641 raise util.SMException("Failed to refresh %s" % self) 

642 except XenAPI.Failure as e: 

643 if util.isInvalidVDI(e) and ignoreNonexistent: 

644 Util.log("VDI %s not found, ignoring" % self) 

645 return 

646 raise 

647 finally: 

648 self.sr.unlock() 

649 

650 def isSnapshot(self): 

651 return self.sr.xapi.isSnapshot(self) 

652 

653 def isAttachedRW(self): 

654 return util.is_attached_rw( 

655 self.sr.xapi.session.xenapi.VDI.get_sm_config(self.getRef())) 

656 

657 def getVHDBlocks(self): 

658 val = self.updateBlockInfo() 

659 bitmap = zlib.decompress(base64.b64decode(val)) 

660 return bitmap 

661 

662 def isCoalesceable(self): 

663 """A VDI is coalesceable if it has no siblings and is not a leaf""" 

664 return not self.scanError and \ 

665 self.parent and \ 

666 len(self.parent.children) == 1 and \ 

667 self.isHidden() and \ 

668 len(self.children) > 0 

669 

670 def isLeafCoalesceable(self): 

671 """A VDI is leaf-coalesceable if it has no siblings and is a leaf""" 

672 return not self.scanError and \ 

673 self.parent and \ 

674 len(self.parent.children) == 1 and \ 

675 not self.isHidden() and \ 

676 len(self.children) == 0 

677 

678 def canLiveCoalesce(self, speed): 

679 """Can we stop-and-leaf-coalesce this VDI? The VDI must be 

680 isLeafCoalesceable() already""" 

681 feasibleSize = False 

682 allowedDownTime = \ 

683 self.TIMEOUT_SAFETY_MARGIN * self.LIVE_LEAF_COALESCE_TIMEOUT 

684 vhd_size = self.getAllocatedSize() 

685 if speed: 

686 feasibleSize = \ 

687 vhd_size // speed < allowedDownTime 

688 else: 

689 feasibleSize = \ 

690 vhd_size < self.LIVE_LEAF_COALESCE_MAX_SIZE 

691 

692 return (feasibleSize or 

693 self.getConfig(self.DB_LEAFCLSC) == self.LEAFCLSC_FORCE) 

694 

695 def getAllPrunable(self): 

696 if len(self.children) == 0: # base case 

697 # it is possible to have a hidden leaf that was recently coalesced 

698 # onto its parent, its children already relinked but not yet 

699 # reloaded - in which case it may not be garbage collected yet: 

700 # some tapdisks could still be using the file. 

701 if self.sr.journaler.get(self.JRN_RELINK, self.uuid): 

702 return [] 

703 if not self.scanError and self.isHidden(): 

704 return [self] 

705 return [] 

706 

707 thisPrunable = True 

708 vdiList = [] 

709 for child in self.children: 

710 childList = child.getAllPrunable() 

711 vdiList.extend(childList) 

712 if child not in childList: 

713 thisPrunable = False 

714 

715 # We can destroy the current VDI if all childs are hidden BUT the 

716 # current VDI must be hidden too to do that! 

717 # Example in this case (after a failed live leaf coalesce): 

718 # 

719 # SMGC: [32436] SR 07ed ('linstor-nvme-sr') (2 VDIs in 1 VHD trees): 

720 # SMGC: [32436] b5458d61(1.000G/4.127M) 

721 # SMGC: [32436] *OLD_b545(1.000G/4.129M) 

722 # 

723 # OLD_b545 is hidden and must be removed, but b5458d61 not. 

724 # Normally we are not in this function when the delete action is 

725 # executed but in `_liveLeafCoalesce`. 

726 

727 if not self.scanError and not self.isHidden() and thisPrunable: 

728 vdiList.append(self) 

729 return vdiList 

730 

731 def getSizeVHD(self) -> int: 

732 return self._sizeVHD 

733 

734 def getAllocatedSize(self) -> int: 

735 return self._sizeAllocated 

736 

737 def getTreeRoot(self): 

738 "Get the root of the tree that self belongs to" 

739 root = self 

740 while root.parent: 

741 root = root.parent 

742 return root 

743 

744 def getTreeHeight(self): 

745 "Get the height of the subtree rooted at self" 

746 if len(self.children) == 0: 

747 return 1 

748 

749 maxChildHeight = 0 

750 for child in self.children: 

751 childHeight = child.getTreeHeight() 

752 if childHeight > maxChildHeight: 

753 maxChildHeight = childHeight 

754 

755 return maxChildHeight + 1 

756 

757 def getAllLeaves(self): 

758 "Get all leaf nodes in the subtree rooted at self" 

759 if len(self.children) == 0: 

760 return [self] 

761 

762 leaves = [] 

763 for child in self.children: 

764 leaves.extend(child.getAllLeaves()) 

765 return leaves 

766 

767 def updateBlockInfo(self) -> Optional[str]: 

768 val = base64.b64encode(self._queryVHDBlocks()).decode() 

769 self.setConfig(VDI.DB_VHD_BLOCKS, val) 

770 return val 

771 

772 def rename(self, uuid) -> None: 

773 "Rename the VDI file" 

774 assert(not self.sr.vdis.get(uuid)) 

775 self._clearRef() 

776 oldUuid = self.uuid 

777 self.uuid = uuid 

778 self.children = [] 

779 # updating the children themselves is the responsibility of the caller 

780 del self.sr.vdis[oldUuid] 

781 self.sr.vdis[self.uuid] = self 

782 

783 def delete(self) -> None: 

784 "Physically delete the VDI" 

785 lock.Lock.cleanup(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid) 

786 lock.Lock.cleanupAll(self.uuid) 

787 self._clear() 

788 

789 def getParent(self) -> str: 

790 return vhdutil.getParent(self.path, lambda x: x.strip()) 790 ↛ exitline 790 didn't run the lambda on line 790

791 

792 def repair(self, parent) -> None: 

793 vhdutil.repair(parent) 

794 

795 @override 

796 def __str__(self) -> str: 

797 strHidden = "" 

798 if self.isHidden(): 798 ↛ 799line 798 didn't jump to line 799, because the condition on line 798 was never true

799 strHidden = "*" 

800 strSizeVirt = "?" 

801 if self.sizeVirt > 0: 801 ↛ 802line 801 didn't jump to line 802, because the condition on line 801 was never true

802 strSizeVirt = Util.num2str(self.sizeVirt) 

803 strSizeVHD = "?" 

804 if self._sizeVHD > 0: 804 ↛ 805line 804 didn't jump to line 805, because the condition on line 804 was never true

805 strSizeVHD = "/%s" % Util.num2str(self._sizeVHD) 

806 strSizeAllocated = "?" 

807 if self._sizeAllocated >= 0: 

808 strSizeAllocated = "/%s" % Util.num2str(self._sizeAllocated) 

809 strType = "" 

810 if self.raw: 

811 strType = "[RAW]" 

812 strSizeVHD = "" 

813 

814 return "%s%s(%s%s%s)%s" % (strHidden, self.uuid[0:8], strSizeVirt, 

815 strSizeVHD, strSizeAllocated, strType) 

816 

817 def validate(self, fast=False) -> None: 

818 if not vhdutil.check(self.path, fast=fast): 818 ↛ 819line 818 didn't jump to line 819, because the condition on line 818 was never true

819 raise util.SMException("VHD %s corrupted" % self) 

820 

821 def _clear(self): 

822 self.uuid = "" 

823 self.path = "" 

824 self.parentUuid = "" 

825 self.parent = None 

826 self._clearRef() 

827 

828 def _clearRef(self): 

829 self._vdiRef = None 

830 

831 def _doCoalesce(self) -> None: 

832 """Coalesce self onto parent. Only perform the actual coalescing of 

833 VHD, but not the subsequent relinking. We'll do that as the next step, 

834 after reloading the entire SR in case things have changed while we 

835 were coalescing""" 

836 self.validate() 

837 self.parent.validate(True) 

838 self.parent._increaseSizeVirt(self.sizeVirt) 

839 self.sr._updateSlavesOnResize(self.parent) 

840 self._coalesceVHD(0) 

841 self.parent.validate(True) 

842 #self._verifyContents(0) 

843 self.parent.updateBlockInfo() 

844 

845 def _verifyContents(self, timeOut): 

846 Util.log(" Coalesce verification on %s" % self) 

847 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

848 Util.runAbortable(lambda: self._runTapdiskDiff(), True, 

849 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

850 Util.log(" Coalesce verification succeeded") 

851 

852 def _runTapdiskDiff(self): 

853 cmd = "tapdisk-diff -n %s:%s -m %s:%s" % \ 

854 (self.getDriverName(), self.path, \ 

855 self.parent.getDriverName(), self.parent.path) 

856 Util.doexec(cmd, 0) 

857 return True 

858 

859 @staticmethod 

860 def _reportCoalesceError(vdi, ce): 

861 """Reports a coalesce error to XenCenter. 

862 

863 vdi: the VDI object on which the coalesce error occured 

864 ce: the CommandException that was raised""" 

865 

866 msg_name = os.strerror(ce.code) 

867 if ce.code == errno.ENOSPC: 

868 # TODO We could add more information here, e.g. exactly how much 

869 # space is required for the particular coalesce, as well as actions 

870 # to be taken by the user and consequences of not taking these 

871 # actions. 

872 msg_body = 'Run out of space while coalescing.' 

873 elif ce.code == errno.EIO: 

874 msg_body = 'I/O error while coalescing.' 

875 else: 

876 msg_body = '' 

877 util.SMlog('Coalesce failed on SR %s: %s (%s)' 

878 % (vdi.sr.uuid, msg_name, msg_body)) 

879 

880 # Create a XenCenter message, but don't spam. 

881 xapi = vdi.sr.xapi.session.xenapi 

882 sr_ref = xapi.SR.get_by_uuid(vdi.sr.uuid) 

883 oth_cfg = xapi.SR.get_other_config(sr_ref) 

884 if COALESCE_ERR_RATE_TAG in oth_cfg: 

885 coalesce_err_rate = float(oth_cfg[COALESCE_ERR_RATE_TAG]) 

886 else: 

887 coalesce_err_rate = DEFAULT_COALESCE_ERR_RATE 

888 

889 xcmsg = False 

890 if coalesce_err_rate == 0: 

891 xcmsg = True 

892 elif coalesce_err_rate > 0: 

893 now = datetime.datetime.now() 

894 sm_cfg = xapi.SR.get_sm_config(sr_ref) 

895 if COALESCE_LAST_ERR_TAG in sm_cfg: 

896 # seconds per message (minimum distance in time between two 

897 # messages in seconds) 

898 spm = datetime.timedelta(seconds=(1.0 / coalesce_err_rate) * 60) 

899 last = datetime.datetime.fromtimestamp( 

900 float(sm_cfg[COALESCE_LAST_ERR_TAG])) 

901 if now - last >= spm: 

902 xapi.SR.remove_from_sm_config(sr_ref, 

903 COALESCE_LAST_ERR_TAG) 

904 xcmsg = True 

905 else: 

906 xcmsg = True 

907 if xcmsg: 

908 xapi.SR.add_to_sm_config(sr_ref, COALESCE_LAST_ERR_TAG, 

909 str(now.strftime('%s'))) 

910 if xcmsg: 

911 xapi.message.create(msg_name, "3", "SR", vdi.sr.uuid, msg_body) 

912 

913 def coalesce(self) -> int: 

914 # size is returned in sectors 

915 return vhdutil.coalesce(self.path) * 512 

916 

917 @staticmethod 

918 def _doCoalesceVHD(vdi): 

919 try: 

920 startTime = time.time() 

921 vhdSize = vdi.getAllocatedSize() 

922 coalesced_size = vdi.coalesce() 

923 endTime = time.time() 

924 vdi.sr.recordStorageSpeed(startTime, endTime, coalesced_size) 

925 except util.CommandException as ce: 

926 # We use try/except for the following piece of code because it runs 

927 # in a separate process context and errors will not be caught and 

928 # reported by anyone. 

929 try: 

930 # Report coalesce errors back to user via XC 

931 VDI._reportCoalesceError(vdi, ce) 

932 except Exception as e: 

933 util.SMlog('failed to create XenCenter message: %s' % e) 

934 raise ce 

935 except: 

936 raise 

937 

938 def _vdi_is_raw(self, vdi_path): 

939 """ 

940 Given path to vdi determine if it is raw 

941 """ 

942 uuid = self.extractUuid(vdi_path) 

943 return self.sr.vdis[uuid].raw 

944 

945 def _coalesceVHD(self, timeOut): 

946 Util.log(" Running VHD coalesce on %s" % self) 

947 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 947 ↛ exitline 947 didn't run the lambda on line 947

948 try: 

949 util.fistpoint.activate_custom_fn( 

950 "cleanup_coalesceVHD_inject_failure", 

951 util.inject_failure) 

952 Util.runAbortable(lambda: VDI._doCoalesceVHD(self), None, 

953 self.sr.uuid, abortTest, VDI.POLL_INTERVAL, timeOut) 

954 except: 

955 #exception at this phase could indicate a failure in vhd coalesce 

956 # or a kill of vhd coalesce by runAbortable due to timeOut 

957 # Try a repair and reraise the exception 

958 parent = "" 

959 try: 

960 parent = self.getParent() 

961 if not self._vdi_is_raw(parent): 

962 # Repair error is logged and ignored. Error reraised later 

963 util.SMlog('Coalesce failed on %s, attempting repair on ' \ 

964 'parent %s' % (self.uuid, parent)) 

965 self.repair(parent) 

966 except Exception as e: 

967 util.SMlog('(error ignored) Failed to repair parent %s ' \ 

968 'after failed coalesce on %s, err: %s' % 

969 (parent, self.path, e)) 

970 raise 

971 

972 util.fistpoint.activate("LVHDRT_coalescing_VHD_data", self.sr.uuid) 

973 

974 def _relinkSkip(self) -> None: 

975 """Relink children of this VDI to point to the parent of this VDI""" 

976 abortFlag = IPCFlag(self.sr.uuid) 

977 for child in self.children: 

978 if abortFlag.test(FLAG_TYPE_ABORT): 978 ↛ 979line 978 didn't jump to line 979, because the condition on line 978 was never true

979 raise AbortException("Aborting due to signal") 

980 Util.log(" Relinking %s from %s to %s" % \ 

981 (child, self, self.parent)) 

982 util.fistpoint.activate("LVHDRT_relinking_grandchildren", self.sr.uuid) 

983 child._setParent(self.parent) 

984 self.children = [] 

985 

986 def _reloadChildren(self, vdiSkip): 

987 """Pause & unpause all VDIs in the subtree to cause blktap to reload 

988 the VHD metadata for this file in any online VDI""" 

989 abortFlag = IPCFlag(self.sr.uuid) 

990 for child in self.children: 

991 if child == vdiSkip: 

992 continue 

993 if abortFlag.test(FLAG_TYPE_ABORT): 993 ↛ 994line 993 didn't jump to line 994, because the condition on line 993 was never true

994 raise AbortException("Aborting due to signal") 

995 Util.log(" Reloading VDI %s" % child) 

996 child._reload() 

997 

998 def _reload(self): 

999 """Pause & unpause to cause blktap to reload the VHD metadata""" 

1000 for child in self.children: 1000 ↛ 1001line 1000 didn't jump to line 1001, because the loop on line 1000 never started

1001 child._reload() 

1002 

1003 # only leaves can be attached 

1004 if len(self.children) == 0: 1004 ↛ exitline 1004 didn't return from function '_reload', because the condition on line 1004 was never false

1005 try: 

1006 self.delConfig(VDI.DB_VDI_RELINKING) 

1007 except XenAPI.Failure as e: 

1008 if not util.isInvalidVDI(e): 

1009 raise 

1010 self.refresh() 

1011 

1012 def _tagChildrenForRelink(self): 

1013 if len(self.children) == 0: 

1014 retries = 0 

1015 try: 

1016 while retries < 15: 

1017 retries += 1 

1018 if self.getConfig(VDI.DB_VDI_ACTIVATING) is not None: 

1019 Util.log("VDI %s is activating, wait to relink" % 

1020 self.uuid) 

1021 else: 

1022 self.setConfig(VDI.DB_VDI_RELINKING, "True") 

1023 

1024 if self.getConfig(VDI.DB_VDI_ACTIVATING): 

1025 self.delConfig(VDI.DB_VDI_RELINKING) 

1026 Util.log("VDI %s started activating while tagging" % 

1027 self.uuid) 

1028 else: 

1029 return 

1030 time.sleep(2) 

1031 

1032 raise util.SMException("Failed to tag vdi %s for relink" % self) 

1033 except XenAPI.Failure as e: 

1034 if not util.isInvalidVDI(e): 

1035 raise 

1036 

1037 for child in self.children: 

1038 child._tagChildrenForRelink() 

1039 

1040 def _loadInfoParent(self): 

1041 ret = vhdutil.getParent(self.path, lvhdutil.extractUuid) 

1042 if ret: 

1043 self.parentUuid = ret 

1044 

1045 def _setParent(self, parent) -> None: 

1046 vhdutil.setParent(self.path, parent.path, False) 

1047 self.parent = parent 

1048 self.parentUuid = parent.uuid 

1049 parent.children.append(self) 

1050 try: 

1051 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1052 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1053 (self.uuid, self.parentUuid)) 

1054 except: 

1055 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1056 (self.uuid, self.parentUuid)) 

1057 

1058 def _ensureParentActiveForRelink(self) -> None: 

1059 pass 

1060 

1061 def isHidden(self) -> bool: 

1062 if self._hidden is None: 1062 ↛ 1063line 1062 didn't jump to line 1063, because the condition on line 1062 was never true

1063 self._loadInfoHidden() 

1064 return self._hidden 

1065 

1066 def _loadInfoHidden(self) -> None: 

1067 hidden = vhdutil.getHidden(self.path) 

1068 self._hidden = (hidden != 0) 

1069 

1070 def _setHidden(self, hidden=True) -> None: 

1071 self._hidden = None 

1072 vhdutil.setHidden(self.path, hidden) 

1073 self._hidden = hidden 

1074 

1075 def _increaseSizeVirt(self, size, atomic=True) -> None: 

1076 """ensure the virtual size of 'self' is at least 'size'. Note that 

1077 resizing a VHD must always be offline and atomically: the file must 

1078 not be open by anyone and no concurrent operations may take place. 

1079 Thus we use the Agent API call for performing paused atomic 

1080 operations. If the caller is already in the atomic context, it must 

1081 call with atomic = False""" 

1082 if self.sizeVirt >= size: 1082 ↛ 1084line 1082 didn't jump to line 1084, because the condition on line 1082 was never false

1083 return 

1084 Util.log(" Expanding VHD virt size for VDI %s: %s -> %s" % \ 

1085 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1086 

1087 msize = vhdutil.getMaxResizeSize(self.path) * 1024 * 1024 

1088 if (size <= msize): 

1089 vhdutil.setSizeVirtFast(self.path, size) 

1090 else: 

1091 if atomic: 

1092 vdiList = self._getAllSubtree() 

1093 self.sr.lock() 

1094 try: 

1095 self.sr.pauseVDIs(vdiList) 

1096 try: 

1097 self._setSizeVirt(size) 

1098 finally: 

1099 self.sr.unpauseVDIs(vdiList) 

1100 finally: 

1101 self.sr.unlock() 

1102 else: 

1103 self._setSizeVirt(size) 

1104 

1105 self.sizeVirt = vhdutil.getSizeVirt(self.path) 

1106 

1107 def _setSizeVirt(self, size) -> None: 

1108 """WARNING: do not call this method directly unless all VDIs in the 

1109 subtree are guaranteed to be unplugged (and remain so for the duration 

1110 of the operation): this operation is only safe for offline VHDs""" 

1111 jFile = os.path.join(self.sr.path, self.uuid) 

1112 vhdutil.setSizeVirt(self.path, size, jFile) 

1113 

1114 def _queryVHDBlocks(self) -> bytes: 

1115 return vhdutil.getBlockBitmap(self.path) 

1116 

1117 def _getCoalescedSizeData(self): 

1118 """Get the data size of the resulting VHD if we coalesce self onto 

1119 parent. We calculate the actual size by using the VHD block allocation 

1120 information (as opposed to just adding up the two VHD sizes to get an 

1121 upper bound)""" 

1122 # make sure we don't use stale BAT info from vdi_rec since the child 

1123 # was writable all this time 

1124 self.delConfig(VDI.DB_VHD_BLOCKS) 

1125 blocksChild = self.getVHDBlocks() 

1126 blocksParent = self.parent.getVHDBlocks() 

1127 numBlocks = Util.countBits(blocksChild, blocksParent) 

1128 Util.log("Num combined blocks = %d" % numBlocks) 

1129 sizeData = numBlocks * vhdutil.VHD_BLOCK_SIZE 

1130 assert(sizeData <= self.sizeVirt) 

1131 return sizeData 

1132 

1133 def _calcExtraSpaceForCoalescing(self) -> int: 

1134 sizeData = self._getCoalescedSizeData() 

1135 sizeCoalesced = sizeData + vhdutil.calcOverheadBitmap(sizeData) + \ 

1136 vhdutil.calcOverheadEmpty(self.sizeVirt) 

1137 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1138 return sizeCoalesced - self.parent.getSizeVHD() 

1139 

1140 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1141 """How much extra space in the SR will be required to 

1142 [live-]leaf-coalesce this VDI""" 

1143 # the space requirements are the same as for inline coalesce 

1144 return self._calcExtraSpaceForCoalescing() 

1145 

1146 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1147 """How much extra space in the SR will be required to 

1148 snapshot-coalesce this VDI""" 

1149 return self._calcExtraSpaceForCoalescing() + \ 

1150 vhdutil.calcOverheadEmpty(self.sizeVirt) # extra snap leaf 

1151 

1152 def _getAllSubtree(self): 

1153 """Get self and all VDIs in the subtree of self as a flat list""" 

1154 vdiList = [self] 

1155 for child in self.children: 

1156 vdiList.extend(child._getAllSubtree()) 

1157 return vdiList 

1158 

1159 

1160class FileVDI(VDI): 

1161 """Object representing a VDI in a file-based SR (EXT or NFS)""" 

1162 

1163 @override 

1164 @staticmethod 

1165 def extractUuid(path): 

1166 path = os.path.basename(path.strip()) 

1167 if not (path.endswith(vhdutil.FILE_EXTN_VHD) or \ 1167 ↛ 1169line 1167 didn't jump to line 1169, because the condition on line 1167 was never true

1168 path.endswith(vhdutil.FILE_EXTN_RAW)): 

1169 return None 

1170 uuid = path.replace(vhdutil.FILE_EXTN_VHD, "").replace( \ 

1171 vhdutil.FILE_EXTN_RAW, "") 

1172 # TODO: validate UUID format 

1173 return uuid 

1174 

1175 def __init__(self, sr, uuid, raw): 

1176 VDI.__init__(self, sr, uuid, raw) 

1177 if self.raw: 1177 ↛ 1178line 1177 didn't jump to line 1178, because the condition on line 1177 was never true

1178 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_RAW) 

1179 else: 

1180 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD) 

1181 

1182 @override 

1183 def load(self, info=None) -> None: 

1184 if not info: 

1185 if not util.pathexists(self.path): 

1186 raise util.SMException("%s not found" % self.path) 

1187 try: 

1188 info = vhdutil.getVHDInfo(self.path, self.extractUuid) 

1189 except util.SMException: 

1190 Util.log(" [VDI %s: failed to read VHD metadata]" % self.uuid) 

1191 return 

1192 self.parent = None 

1193 self.children = [] 

1194 self.parentUuid = info.parentUuid 

1195 self.sizeVirt = info.sizeVirt 

1196 self._sizeVHD = info.sizePhys 

1197 self._sizeAllocated = info.sizeAllocated 

1198 self._hidden = info.hidden 

1199 self.scanError = False 

1200 self.path = os.path.join(self.sr.path, "%s%s" % \ 

1201 (self.uuid, vhdutil.FILE_EXTN_VHD)) 

1202 

1203 @override 

1204 def rename(self, uuid) -> None: 

1205 oldPath = self.path 

1206 VDI.rename(self, uuid) 

1207 self.fileName = "%s%s" % (self.uuid, vhdutil.FILE_EXTN_VHD) 

1208 self.path = os.path.join(self.sr.path, self.fileName) 

1209 assert(not util.pathexists(self.path)) 

1210 Util.log("Renaming %s -> %s" % (oldPath, self.path)) 

1211 os.rename(oldPath, self.path) 

1212 

1213 @override 

1214 def delete(self) -> None: 

1215 if len(self.children) > 0: 1215 ↛ 1216line 1215 didn't jump to line 1216, because the condition on line 1215 was never true

1216 raise util.SMException("VDI %s has children, can't delete" % \ 

1217 self.uuid) 

1218 try: 

1219 self.sr.lock() 

1220 try: 

1221 os.unlink(self.path) 

1222 self.sr.forgetVDI(self.uuid) 

1223 finally: 

1224 self.sr.unlock() 

1225 except OSError: 

1226 raise util.SMException("os.unlink(%s) failed" % self.path) 

1227 VDI.delete(self) 

1228 

1229 @override 

1230 def getAllocatedSize(self) -> int: 

1231 if self._sizeAllocated == -1: 1231 ↛ 1232line 1231 didn't jump to line 1232, because the condition on line 1231 was never true

1232 self._sizeAllocated = vhdutil.getAllocatedSize(self.path) 

1233 return self._sizeAllocated 

1234 

1235 

1236class LVHDVDI(VDI): 

1237 """Object representing a VDI in an LVHD SR""" 

1238 

1239 JRN_ZERO = "zero" # journal entry type for zeroing out end of parent 

1240 DRIVER_NAME_RAW = "aio" 

1241 

1242 @override 

1243 def load(self, info=None) -> None: 

1244 # `info` is always set. `None` default value is only here to match parent method. 

1245 assert info, "No info given to LVHDVDI.load" 

1246 self.parent = None 

1247 self.children = [] 

1248 self._sizeVHD = -1 

1249 self._sizeAllocated = -1 

1250 self.scanError = info.scanError 

1251 self.sizeLV = info.sizeLV 

1252 self.sizeVirt = info.sizeVirt 

1253 self.fileName = info.lvName 

1254 self.lvActive = info.lvActive 

1255 self.lvOpen = info.lvOpen 

1256 self.lvReadonly = info.lvReadonly 

1257 self._hidden = info.hidden 

1258 self.parentUuid = info.parentUuid 

1259 self.path = os.path.join(self.sr.path, self.fileName) 

1260 

1261 @override 

1262 @staticmethod 

1263 def extractUuid(path): 

1264 return lvhdutil.extractUuid(path) 

1265 

1266 @override 

1267 def getDriverName(self) -> str: 

1268 if self.raw: 

1269 return self.DRIVER_NAME_RAW 

1270 return self.DRIVER_NAME_VHD 

1271 

1272 def inflate(self, size): 

1273 """inflate the LV containing the VHD to 'size'""" 

1274 if self.raw: 

1275 return 

1276 self._activate() 

1277 self.sr.lock() 

1278 try: 

1279 lvhdutil.inflate(self.sr.journaler, self.sr.uuid, self.uuid, size) 

1280 util.fistpoint.activate("LVHDRT_inflating_the_parent", self.sr.uuid) 

1281 finally: 

1282 self.sr.unlock() 

1283 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1284 self._sizeVHD = -1 

1285 self._sizeAllocated = -1 

1286 

1287 def deflate(self): 

1288 """deflate the LV containing the VHD to minimum""" 

1289 if self.raw: 

1290 return 

1291 self._activate() 

1292 self.sr.lock() 

1293 try: 

1294 lvhdutil.deflate(self.sr.lvmCache, self.fileName, self.getSizeVHD()) 

1295 finally: 

1296 self.sr.unlock() 

1297 self.sizeLV = self.sr.lvmCache.getSize(self.fileName) 

1298 self._sizeVHD = -1 

1299 self._sizeAllocated = -1 

1300 

1301 def inflateFully(self): 

1302 self.inflate(lvhdutil.calcSizeVHDLV(self.sizeVirt)) 

1303 

1304 def inflateParentForCoalesce(self): 

1305 """Inflate the parent only as much as needed for the purposes of 

1306 coalescing""" 

1307 if self.parent.raw: 

1308 return 

1309 inc = self._calcExtraSpaceForCoalescing() 

1310 if inc > 0: 

1311 util.fistpoint.activate("LVHDRT_coalescing_before_inflate_grandparent", self.sr.uuid) 

1312 self.parent.inflate(self.parent.sizeLV + inc) 

1313 

1314 @override 

1315 def updateBlockInfo(self) -> Optional[str]: 

1316 if not self.raw: 

1317 return VDI.updateBlockInfo(self) 

1318 return None 

1319 

1320 @override 

1321 def rename(self, uuid) -> None: 

1322 oldUuid = self.uuid 

1323 oldLVName = self.fileName 

1324 VDI.rename(self, uuid) 

1325 self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + self.uuid 

1326 if self.raw: 

1327 self.fileName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + self.uuid 

1328 self.path = os.path.join(self.sr.path, self.fileName) 

1329 assert(not self.sr.lvmCache.checkLV(self.fileName)) 

1330 

1331 self.sr.lvmCache.rename(oldLVName, self.fileName) 

1332 if self.sr.lvActivator.get(oldUuid, False): 

1333 self.sr.lvActivator.replace(oldUuid, self.uuid, self.fileName, False) 

1334 

1335 ns = lvhdutil.NS_PREFIX_LVM + self.sr.uuid 

1336 (cnt, bcnt) = RefCounter.check(oldUuid, ns) 

1337 RefCounter.set(self.uuid, cnt, bcnt, ns) 

1338 RefCounter.reset(oldUuid, ns) 

1339 

1340 @override 

1341 def delete(self) -> None: 

1342 if len(self.children) > 0: 

1343 raise util.SMException("VDI %s has children, can't delete" % \ 

1344 self.uuid) 

1345 self.sr.lock() 

1346 try: 

1347 self.sr.lvmCache.remove(self.fileName) 

1348 self.sr.forgetVDI(self.uuid) 

1349 finally: 

1350 self.sr.unlock() 

1351 RefCounter.reset(self.uuid, lvhdutil.NS_PREFIX_LVM + self.sr.uuid) 

1352 VDI.delete(self) 

1353 

1354 @override 

1355 def getSizeVHD(self) -> int: 

1356 if self._sizeVHD == -1: 

1357 self._loadInfoSizeVHD() 

1358 return self._sizeVHD 

1359 

1360 def _loadInfoSizeVHD(self): 

1361 """Get the physical utilization of the VHD file. We do it individually 

1362 (and not using the VHD batch scanner) as an optimization: this info is 

1363 relatively expensive and we need it only for VDI's involved in 

1364 coalescing.""" 

1365 if self.raw: 

1366 return 

1367 self._activate() 

1368 self._sizeVHD = vhdutil.getSizePhys(self.path) 

1369 if self._sizeVHD <= 0: 

1370 raise util.SMException("phys size of %s = %d" % \ 

1371 (self, self._sizeVHD)) 

1372 

1373 @override 

1374 def getAllocatedSize(self) -> int: 

1375 if self._sizeAllocated == -1: 

1376 self._loadInfoSizeAllocated() 

1377 return self._sizeAllocated 

1378 

1379 def _loadInfoSizeAllocated(self): 

1380 """ 

1381 Get the allocated size of the VHD volume. 

1382 """ 

1383 if self.raw: 

1384 return 

1385 self._activate() 

1386 self._sizeAllocated = vhdutil.getAllocatedSize(self.path) 

1387 

1388 @override 

1389 def _loadInfoHidden(self) -> None: 

1390 if self.raw: 

1391 self._hidden = self.sr.lvmCache.getHidden(self.fileName) 

1392 else: 

1393 VDI._loadInfoHidden(self) 

1394 

1395 @override 

1396 def _setHidden(self, hidden=True) -> None: 

1397 if self.raw: 

1398 self._hidden = None 

1399 self.sr.lvmCache.setHidden(self.fileName, hidden) 

1400 self._hidden = hidden 

1401 else: 

1402 VDI._setHidden(self, hidden) 

1403 

1404 @override 

1405 def __str__(self) -> str: 

1406 strType = "VHD" 

1407 if self.raw: 

1408 strType = "RAW" 

1409 strHidden = "" 

1410 if self.isHidden(): 

1411 strHidden = "*" 

1412 strSizeVHD = "" 

1413 if self._sizeVHD > 0: 

1414 strSizeVHD = Util.num2str(self._sizeVHD) 

1415 strSizeAllocated = "" 

1416 if self._sizeAllocated >= 0: 

1417 strSizeAllocated = Util.num2str(self._sizeAllocated) 

1418 strActive = "n" 

1419 if self.lvActive: 

1420 strActive = "a" 

1421 if self.lvOpen: 

1422 strActive += "o" 

1423 return "%s%s[%s](%s/%s/%s/%s|%s)" % (strHidden, self.uuid[0:8], strType, 

1424 Util.num2str(self.sizeVirt), strSizeVHD, strSizeAllocated, 

1425 Util.num2str(self.sizeLV), strActive) 

1426 

1427 @override 

1428 def validate(self, fast=False) -> None: 

1429 if not self.raw: 

1430 VDI.validate(self, fast) 

1431 

1432 @override 

1433 def _doCoalesce(self) -> None: 

1434 """LVHD parents must first be activated, inflated, and made writable""" 

1435 try: 

1436 self._activateChain() 

1437 self.sr.lvmCache.setReadonly(self.parent.fileName, False) 

1438 self.parent.validate() 

1439 self.inflateParentForCoalesce() 

1440 VDI._doCoalesce(self) 

1441 finally: 

1442 self.parent._loadInfoSizeVHD() 

1443 self.parent.deflate() 

1444 self.sr.lvmCache.setReadonly(self.parent.fileName, True) 

1445 

1446 @override 

1447 def _setParent(self, parent) -> None: 

1448 self._activate() 

1449 if self.lvReadonly: 

1450 self.sr.lvmCache.setReadonly(self.fileName, False) 

1451 

1452 try: 

1453 vhdutil.setParent(self.path, parent.path, parent.raw) 

1454 finally: 

1455 if self.lvReadonly: 

1456 self.sr.lvmCache.setReadonly(self.fileName, True) 

1457 self._deactivate() 

1458 self.parent = parent 

1459 self.parentUuid = parent.uuid 

1460 parent.children.append(self) 

1461 try: 

1462 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1463 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1464 (self.uuid, self.parentUuid)) 

1465 except: 

1466 Util.log("Failed to update the vhd-parent with %s for child %s" % \ 

1467 (self.parentUuid, self.uuid)) 

1468 

1469 def _activate(self): 

1470 self.sr.lvActivator.activate(self.uuid, self.fileName, False) 

1471 

1472 def _activateChain(self): 

1473 vdi = self 

1474 while vdi: 

1475 vdi._activate() 

1476 vdi = vdi.parent 

1477 

1478 def _deactivate(self): 

1479 self.sr.lvActivator.deactivate(self.uuid, False) 

1480 

1481 @override 

1482 def _ensureParentActiveForRelink(self) -> None: 

1483 self.parent._activate() 

1484 

1485 @override 

1486 def _increaseSizeVirt(self, size, atomic=True) -> None: 

1487 "ensure the virtual size of 'self' is at least 'size'" 

1488 self._activate() 

1489 if not self.raw: 

1490 VDI._increaseSizeVirt(self, size, atomic) 

1491 return 

1492 

1493 # raw VDI case 

1494 offset = self.sizeLV 

1495 if self.sizeVirt < size: 

1496 oldSize = self.sizeLV 

1497 self.sizeLV = util.roundup(lvutil.LVM_SIZE_INCREMENT, size) 

1498 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.sizeLV)) 

1499 self.sr.lvmCache.setSize(self.fileName, self.sizeLV) 

1500 offset = oldSize 

1501 unfinishedZero = False 

1502 jval = self.sr.journaler.get(self.JRN_ZERO, self.uuid) 

1503 if jval: 

1504 unfinishedZero = True 

1505 offset = int(jval) 

1506 length = self.sizeLV - offset 

1507 if not length: 

1508 return 

1509 

1510 if unfinishedZero: 

1511 Util.log(" ==> Redoing unfinished zeroing out") 

1512 else: 

1513 self.sr.journaler.create(self.JRN_ZERO, self.uuid, \ 

1514 str(offset)) 

1515 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1516 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1517 func = lambda: util.zeroOut(self.path, offset, length) 

1518 Util.runAbortable(func, True, self.sr.uuid, abortTest, 

1519 VDI.POLL_INTERVAL, 0) 

1520 self.sr.journaler.remove(self.JRN_ZERO, self.uuid) 

1521 

1522 @override 

1523 def _setSizeVirt(self, size) -> None: 

1524 """WARNING: do not call this method directly unless all VDIs in the 

1525 subtree are guaranteed to be unplugged (and remain so for the duration 

1526 of the operation): this operation is only safe for offline VHDs""" 

1527 self._activate() 

1528 jFile = lvhdutil.createVHDJournalLV(self.sr.lvmCache, self.uuid, 

1529 vhdutil.MAX_VHD_JOURNAL_SIZE) 

1530 try: 

1531 lvhdutil.setSizeVirt(self.sr.journaler, self.sr.uuid, self.uuid, 

1532 size, jFile) 

1533 finally: 

1534 lvhdutil.deleteVHDJournalLV(self.sr.lvmCache, self.uuid) 

1535 

1536 @override 

1537 def _queryVHDBlocks(self) -> bytes: 

1538 self._activate() 

1539 return VDI._queryVHDBlocks(self) 

1540 

1541 @override 

1542 def _calcExtraSpaceForCoalescing(self) -> int: 

1543 if self.parent.raw: 

1544 return 0 # raw parents are never deflated in the first place 

1545 sizeCoalesced = lvhdutil.calcSizeVHDLV(self._getCoalescedSizeData()) 

1546 Util.log("Coalesced size = %s" % Util.num2str(sizeCoalesced)) 

1547 return sizeCoalesced - self.parent.sizeLV 

1548 

1549 @override 

1550 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1551 """How much extra space in the SR will be required to 

1552 [live-]leaf-coalesce this VDI""" 

1553 # we can deflate the leaf to minimize the space requirements 

1554 deflateDiff = self.sizeLV - lvhdutil.calcSizeLV(self.getSizeVHD()) 

1555 return self._calcExtraSpaceForCoalescing() - deflateDiff 

1556 

1557 @override 

1558 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1559 return self._calcExtraSpaceForCoalescing() + \ 

1560 lvhdutil.calcSizeLV(self.getSizeVHD()) 

1561 

1562 

1563class LinstorVDI(VDI): 

1564 """Object representing a VDI in a LINSTOR SR""" 

1565 

1566 VOLUME_LOCK_TIMEOUT = 30 

1567 

1568 @override 

1569 def load(self, info=None) -> None: 

1570 self.parentUuid = info.parentUuid 

1571 self.scanError = True 

1572 self.parent = None 

1573 self.children = [] 

1574 

1575 self.fileName = self.sr._linstor.get_volume_name(self.uuid) 

1576 self.path = self.sr._linstor.build_device_path(self.fileName) 

1577 

1578 if not info: 

1579 try: 

1580 info = self.sr._vhdutil.get_vhd_info(self.uuid) 

1581 except util.SMException: 

1582 Util.log( 

1583 ' [VDI {}: failed to read VHD metadata]'.format(self.uuid) 

1584 ) 

1585 return 

1586 

1587 self.parentUuid = info.parentUuid 

1588 self.sizeVirt = info.sizeVirt 

1589 self._sizeVHD = -1 

1590 self._sizeAllocated = -1 

1591 self.drbd_size = -1 

1592 self._hidden = info.hidden 

1593 self.scanError = False 

1594 self.vdi_type = vhdutil.VDI_TYPE_VHD 

1595 

1596 @override 

1597 def getSizeVHD(self, fetch=False) -> int: 

1598 if self._sizeVHD < 0 or fetch: 

1599 self._sizeVHD = self.sr._vhdutil.get_size_phys(self.uuid) 

1600 return self._sizeVHD 

1601 

1602 def getDrbdSize(self, fetch=False): 

1603 if self.drbd_size < 0 or fetch: 

1604 self.drbd_size = self.sr._vhdutil.get_drbd_size(self.uuid) 

1605 return self.drbd_size 

1606 

1607 @override 

1608 def getAllocatedSize(self) -> int: 

1609 if self._sizeAllocated == -1: 

1610 if not self.raw: 

1611 self._sizeAllocated = self.sr._vhdutil.get_allocated_size(self.uuid) 

1612 return self._sizeAllocated 

1613 

1614 def inflate(self, size): 

1615 if self.raw: 

1616 return 

1617 self.sr.lock() 

1618 try: 

1619 # Ensure we use the real DRBD size and not the cached one. 

1620 # Why? Because this attribute can be changed if volume is resized by user. 

1621 self.drbd_size = self.getDrbdSize(fetch=True) 

1622 self.sr._vhdutil.inflate(self.sr.journaler, self.uuid, self.path, size, self.drbd_size) 

1623 finally: 

1624 self.sr.unlock() 

1625 self.drbd_size = -1 

1626 self._sizeVHD = -1 

1627 self._sizeAllocated = -1 

1628 

1629 def deflate(self): 

1630 if self.raw: 

1631 return 

1632 self.sr.lock() 

1633 try: 

1634 # Ensure we use the real sizes and not the cached info. 

1635 self.drbd_size = self.getDrbdSize(fetch=True) 

1636 self._sizeVHD = self.getSizeVHD(fetch=True) 

1637 self.sr._vhdutil.force_deflate(self.path, self._sizeVHD, self.drbd_size, zeroize=False) 

1638 finally: 

1639 self.sr.unlock() 

1640 self.drbd_size = -1 

1641 self._sizeVHD = -1 

1642 self._sizeAllocated = -1 

1643 

1644 def inflateFully(self): 

1645 if not self.raw: 

1646 self.inflate(LinstorVhdUtil.compute_volume_size(self.sizeVirt, self.vdi_type)) 

1647 

1648 @override 

1649 def rename(self, uuid) -> None: 

1650 Util.log('Renaming {} -> {} (path={})'.format( 

1651 self.uuid, uuid, self.path 

1652 )) 

1653 self.sr._linstor.update_volume_uuid(self.uuid, uuid) 

1654 VDI.rename(self, uuid) 

1655 

1656 @override 

1657 def delete(self) -> None: 

1658 if len(self.children) > 0: 

1659 raise util.SMException( 

1660 'VDI {} has children, can\'t delete'.format(self.uuid) 

1661 ) 

1662 self.sr.lock() 

1663 try: 

1664 self.sr._linstor.destroy_volume(self.uuid) 

1665 self.sr.forgetVDI(self.uuid) 

1666 finally: 

1667 self.sr.unlock() 

1668 VDI.delete(self) 

1669 

1670 @override 

1671 def validate(self, fast=False) -> None: 

1672 if not self.raw and not self.sr._vhdutil.check(self.uuid, fast=fast): 

1673 raise util.SMException('VHD {} corrupted'.format(self)) 

1674 

1675 @override 

1676 def pause(self, failfast=False) -> None: 

1677 self.sr._linstor.ensure_volume_is_not_locked( 

1678 self.uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1679 ) 

1680 return super(LinstorVDI, self).pause(failfast) 

1681 

1682 @override 

1683 def coalesce(self) -> int: 

1684 # Note: We raise `SMException` here to skip the current coalesce in case of failure. 

1685 # Using another exception we can't execute the next coalesce calls. 

1686 return self.sr._vhdutil.force_coalesce(self.path) * 512 

1687 

1688 @override 

1689 def getParent(self) -> str: 

1690 return self.sr._vhdutil.get_parent( 

1691 self.sr._linstor.get_volume_uuid_from_device_path(self.path) 

1692 ) 

1693 

1694 @override 

1695 def repair(self, parent_uuid) -> None: 

1696 self.sr._vhdutil.force_repair( 

1697 self.sr._linstor.get_device_path(parent_uuid) 

1698 ) 

1699 

1700 @override 

1701 def _relinkSkip(self) -> None: 

1702 abortFlag = IPCFlag(self.sr.uuid) 

1703 for child in self.children: 

1704 if abortFlag.test(FLAG_TYPE_ABORT): 

1705 raise AbortException('Aborting due to signal') 

1706 Util.log( 

1707 ' Relinking {} from {} to {}'.format( 

1708 child, self, self.parent 

1709 ) 

1710 ) 

1711 

1712 session = child.sr.xapi.session 

1713 sr_uuid = child.sr.uuid 

1714 vdi_uuid = child.uuid 

1715 try: 

1716 self.sr._linstor.ensure_volume_is_not_locked( 

1717 vdi_uuid, timeout=self.VOLUME_LOCK_TIMEOUT 

1718 ) 

1719 blktap2.VDI.tap_pause(session, sr_uuid, vdi_uuid) 

1720 child._setParent(self.parent) 

1721 finally: 

1722 blktap2.VDI.tap_unpause(session, sr_uuid, vdi_uuid) 

1723 self.children = [] 

1724 

1725 @override 

1726 def _setParent(self, parent) -> None: 

1727 self.sr._linstor.get_device_path(self.uuid) 

1728 self.sr._vhdutil.force_parent(self.path, parent.path) 

1729 self.parent = parent 

1730 self.parentUuid = parent.uuid 

1731 parent.children.append(self) 

1732 try: 

1733 self.setConfig(self.DB_VHD_PARENT, self.parentUuid) 

1734 Util.log("Updated the vhd-parent field for child %s with %s" % \ 

1735 (self.uuid, self.parentUuid)) 

1736 except: 

1737 Util.log("Failed to update %s with vhd-parent field %s" % \ 

1738 (self.uuid, self.parentUuid)) 

1739 

1740 @override 

1741 def _doCoalesce(self) -> None: 

1742 try: 

1743 self._activateChain() 

1744 self.parent.validate() 

1745 self._inflateParentForCoalesce() 

1746 VDI._doCoalesce(self) 

1747 finally: 

1748 self.parent.deflate() 

1749 

1750 def _activateChain(self): 

1751 vdi = self 

1752 while vdi: 

1753 try: 

1754 p = self.sr._linstor.get_device_path(vdi.uuid) 

1755 except Exception as e: 

1756 # Use SMException to skip coalesce. 

1757 # Otherwise the GC is stopped... 

1758 raise util.SMException(str(e)) 

1759 vdi = vdi.parent 

1760 

1761 @override 

1762 def _setHidden(self, hidden=True) -> None: 

1763 HIDDEN_TAG = 'hidden' 

1764 

1765 if self.raw: 

1766 self._hidden = None 

1767 self.sr._linstor.update_volume_metadata(self.uuid, { 

1768 HIDDEN_TAG: hidden 

1769 }) 

1770 self._hidden = hidden 

1771 else: 

1772 VDI._setHidden(self, hidden) 

1773 

1774 @override 

1775 def _increaseSizeVirt(self, size, atomic=True): 

1776 if self.raw: 

1777 offset = self.drbd_size 

1778 if self.sizeVirt < size: 

1779 oldSize = self.drbd_size 

1780 self.drbd_size = LinstorVolumeManager.round_up_volume_size(size) 

1781 Util.log(" Growing %s: %d->%d" % (self.path, oldSize, self.drbd_size)) 

1782 self.sr._linstor.resize_volume(self.uuid, self.drbd_size) 

1783 offset = oldSize 

1784 unfinishedZero = False 

1785 jval = self.sr.journaler.get(LinstorJournaler.ZERO, self.uuid) 

1786 if jval: 

1787 unfinishedZero = True 

1788 offset = int(jval) 

1789 length = self.drbd_size - offset 

1790 if not length: 

1791 return 

1792 

1793 if unfinishedZero: 

1794 Util.log(" ==> Redoing unfinished zeroing out") 

1795 else: 

1796 self.sr.journaler.create(LinstorJournaler.ZERO, self.uuid, str(offset)) 

1797 Util.log(" Zeroing %s: from %d, %dB" % (self.path, offset, length)) 

1798 abortTest = lambda: IPCFlag(self.sr.uuid).test(FLAG_TYPE_ABORT) 

1799 func = lambda: util.zeroOut(self.path, offset, length) 

1800 Util.runAbortable(func, True, self.sr.uuid, abortTest, VDI.POLL_INTERVAL, 0) 

1801 self.sr.journaler.remove(LinstorJournaler.ZERO, self.uuid) 

1802 return 

1803 

1804 if self.sizeVirt >= size: 

1805 return 

1806 Util.log(" Expanding VHD virt size for VDI %s: %s -> %s" % \ 

1807 (self, Util.num2str(self.sizeVirt), Util.num2str(size))) 

1808 

1809 msize = self.sr._vhdutil.get_max_resize_size(self.uuid) * 1024 * 1024 

1810 if (size <= msize): 

1811 self.sr._vhdutil.set_size_virt_fast(self.path, size) 

1812 else: 

1813 if atomic: 

1814 vdiList = self._getAllSubtree() 

1815 self.sr.lock() 

1816 try: 

1817 self.sr.pauseVDIs(vdiList) 

1818 try: 

1819 self._setSizeVirt(size) 

1820 finally: 

1821 self.sr.unpauseVDIs(vdiList) 

1822 finally: 

1823 self.sr.unlock() 

1824 else: 

1825 self._setSizeVirt(size) 

1826 

1827 self.sizeVirt = self.sr._vhdutil.get_size_virt(self.uuid) 

1828 

1829 @override 

1830 def _setSizeVirt(self, size) -> None: 

1831 jfile = self.uuid + '-jvhd' 

1832 self.sr._linstor.create_volume( 

1833 jfile, vhdutil.MAX_VHD_JOURNAL_SIZE, persistent=False, volume_name=jfile 

1834 ) 

1835 try: 

1836 self.inflate(LinstorVhdUtil.compute_volume_size(size, self.vdi_type)) 

1837 self.sr._vhdutil.set_size_virt(size, jfile) 

1838 finally: 

1839 try: 

1840 self.sr._linstor.destroy_volume(jfile) 

1841 except Exception: 

1842 # We can ignore it, in any case this volume is not persistent. 

1843 pass 

1844 

1845 @override 

1846 def _queryVHDBlocks(self) -> bytes: 

1847 return self.sr._vhdutil.get_block_bitmap(self.uuid) 

1848 

1849 def _inflateParentForCoalesce(self): 

1850 if self.parent.raw: 

1851 return 

1852 inc = self._calcExtraSpaceForCoalescing() 

1853 if inc > 0: 

1854 self.parent.inflate(self.parent.getDrbdSize() + inc) 

1855 

1856 @override 

1857 def _calcExtraSpaceForCoalescing(self) -> int: 

1858 if self.parent.raw: 

1859 return 0 

1860 size_coalesced = LinstorVhdUtil.compute_volume_size( 

1861 self._getCoalescedSizeData(), self.vdi_type 

1862 ) 

1863 Util.log("Coalesced size = %s" % Util.num2str(size_coalesced)) 

1864 return size_coalesced - self.parent.getDrbdSize() 

1865 

1866 @override 

1867 def _calcExtraSpaceForLeafCoalescing(self) -> int: 

1868 assert self.getDrbdSize() > 0 

1869 assert self.getSizeVHD() > 0 

1870 deflate_diff = self.getDrbdSize() - LinstorVolumeManager.round_up_volume_size(self.getSizeVHD()) 

1871 assert deflate_diff >= 0 

1872 return self._calcExtraSpaceForCoalescing() - deflate_diff 

1873 

1874 @override 

1875 def _calcExtraSpaceForSnapshotCoalescing(self) -> int: 

1876 assert self.getSizeVHD() > 0 

1877 return self._calcExtraSpaceForCoalescing() + \ 

1878 LinstorVolumeManager.round_up_volume_size(self.getSizeVHD()) 

1879 

1880################################################################################ 

1881# 

1882# SR 

1883# 

1884class SR(object): 

1885 class LogFilter: 

1886 def __init__(self, sr): 

1887 self.sr = sr 

1888 self.stateLogged = False 

1889 self.prevState = {} 

1890 self.currState = {} 

1891 

1892 def logState(self): 

1893 changes = "" 

1894 self.currState.clear() 

1895 for vdi in self.sr.vdiTrees: 

1896 self.currState[vdi.uuid] = self._getTreeStr(vdi) 

1897 if not self.prevState.get(vdi.uuid) or \ 

1898 self.prevState[vdi.uuid] != self.currState[vdi.uuid]: 

1899 changes += self.currState[vdi.uuid] 

1900 

1901 for uuid in self.prevState: 

1902 if not self.currState.get(uuid): 

1903 changes += "Tree %s gone\n" % uuid 

1904 

1905 result = "SR %s (%d VDIs in %d VHD trees): " % \ 

1906 (self.sr, len(self.sr.vdis), len(self.sr.vdiTrees)) 

1907 

1908 if len(changes) > 0: 

1909 if self.stateLogged: 

1910 result += "showing only VHD trees that changed:" 

1911 result += "\n%s" % changes 

1912 else: 

1913 result += "no changes" 

1914 

1915 for line in result.split("\n"): 

1916 Util.log("%s" % line) 

1917 self.prevState.clear() 

1918 for key, val in self.currState.items(): 

1919 self.prevState[key] = val 

1920 self.stateLogged = True 

1921 

1922 def logNewVDI(self, uuid): 

1923 if self.stateLogged: 

1924 Util.log("Found new VDI when scanning: %s" % uuid) 

1925 

1926 def _getTreeStr(self, vdi, indent=8): 

1927 treeStr = "%s%s\n" % (" " * indent, vdi) 

1928 for child in vdi.children: 

1929 treeStr += self._getTreeStr(child, indent + VDI.STR_TREE_INDENT) 

1930 return treeStr 

1931 

1932 TYPE_FILE = "file" 

1933 TYPE_LVHD = "lvhd" 

1934 TYPE_LINSTOR = "linstor" 

1935 TYPES = [TYPE_LVHD, TYPE_FILE, TYPE_LINSTOR] 

1936 

1937 LOCK_RETRY_INTERVAL = 3 

1938 LOCK_RETRY_ATTEMPTS = 20 

1939 LOCK_RETRY_ATTEMPTS_LOCK = 100 

1940 

1941 SCAN_RETRY_ATTEMPTS = 3 

1942 

1943 JRN_CLONE = "clone" # journal entry type for the clone operation (from SM) 

1944 TMP_RENAME_PREFIX = "OLD_" 

1945 

1946 KEY_OFFLINE_COALESCE_NEEDED = "leaf_coalesce_need_offline" 

1947 KEY_OFFLINE_COALESCE_OVERRIDE = "leaf_coalesce_offline_override" 

1948 

1949 @staticmethod 

1950 def getInstance(uuid, xapiSession, createLock=True, force=False): 

1951 xapi = XAPI(xapiSession, uuid) 

1952 type = normalizeType(xapi.srRecord["type"]) 

1953 if type == SR.TYPE_FILE: 

1954 return FileSR(uuid, xapi, createLock, force) 

1955 elif type == SR.TYPE_LVHD: 

1956 return LVHDSR(uuid, xapi, createLock, force) 

1957 elif type == SR.TYPE_LINSTOR: 

1958 return LinstorSR(uuid, xapi, createLock, force) 

1959 raise util.SMException("SR type %s not recognized" % type) 

1960 

1961 def __init__(self, uuid, xapi, createLock, force): 

1962 self.logFilter = self.LogFilter(self) 

1963 self.uuid = uuid 

1964 self.path = "" 

1965 self.name = "" 

1966 self.vdis = {} 

1967 self.vdiTrees = [] 

1968 self.journaler = None 

1969 self.xapi = xapi 

1970 self._locked = 0 

1971 self._srLock = None 

1972 if createLock: 1972 ↛ 1973line 1972 didn't jump to line 1973, because the condition on line 1972 was never true

1973 self._srLock = lock.Lock(vhdutil.LOCK_TYPE_SR, self.uuid) 

1974 else: 

1975 Util.log("Requested no SR locking") 

1976 self.name = self.xapi.srRecord["name_label"] 

1977 self._failedCoalesceTargets = [] 

1978 

1979 if not self.xapi.isPluggedHere(): 

1980 if force: 1980 ↛ 1981line 1980 didn't jump to line 1981, because the condition on line 1980 was never true

1981 Util.log("SR %s not attached on this host, ignoring" % uuid) 

1982 else: 

1983 if not self.wait_for_plug(): 

1984 raise util.SMException("SR %s not attached on this host" % uuid) 

1985 

1986 if force: 1986 ↛ 1987line 1986 didn't jump to line 1987, because the condition on line 1986 was never true

1987 Util.log("Not checking if we are Master (SR %s)" % uuid) 

1988 elif not self.xapi.isMaster(): 1988 ↛ 1989line 1988 didn't jump to line 1989, because the condition on line 1988 was never true

1989 raise util.SMException("This host is NOT master, will not run") 

1990 

1991 self.no_space_candidates = {} 

1992 

1993 def msg_cleared(self, xapi_session, msg_ref): 

1994 try: 

1995 msg = xapi_session.xenapi.message.get_record(msg_ref) 

1996 except XenAPI.Failure: 

1997 return True 

1998 

1999 return msg is None 

2000 

2001 def check_no_space_candidates(self): 

2002 xapi_session = self.xapi.getSession() 

2003 

2004 msg_id = self.xapi.srRecord["sm_config"].get(VDI.DB_GC_NO_SPACE) 

2005 if self.no_space_candidates: 

2006 if msg_id is None or self.msg_cleared(xapi_session, msg_id): 

2007 util.SMlog("Could not coalesce due to a lack of space " 

2008 f"in SR {self.uuid}") 

2009 msg_body = ("Unable to perform data coalesce due to a lack " 

2010 f"of space in SR {self.uuid}") 

2011 msg_id = xapi_session.xenapi.message.create( 

2012 'SM_GC_NO_SPACE', 

2013 3, 

2014 "SR", 

2015 self.uuid, 

2016 msg_body) 

2017 xapi_session.xenapi.SR.remove_from_sm_config( 

2018 self.xapi.srRef, VDI.DB_GC_NO_SPACE) 

2019 xapi_session.xenapi.SR.add_to_sm_config( 

2020 self.xapi.srRef, VDI.DB_GC_NO_SPACE, msg_id) 

2021 

2022 for candidate in self.no_space_candidates.values(): 

2023 candidate.setConfig(VDI.DB_GC_NO_SPACE, msg_id) 

2024 elif msg_id is not None: 

2025 # Everything was coalescable, remove the message 

2026 xapi_session.xenapi.SR.remove_from_sm_config(self.xapi.srRef, VDI.DB_GC_NO_SPACE) 

2027 xapi_session.xenapi.message.destroy(msg_id) 

2028 

2029 def clear_no_space_msg(self, vdi): 

2030 msg_id = None 

2031 try: 

2032 msg_id = vdi.getConfig(VDI.DB_GC_NO_SPACE) 

2033 except XenAPI.Failure: 

2034 pass 

2035 

2036 self.no_space_candidates.pop(vdi.uuid, None) 

2037 if msg_id is not None: 2037 ↛ exitline 2037 didn't return from function 'clear_no_space_msg', because the condition on line 2037 was never false

2038 vdi.delConfig(VDI.DB_GC_NO_SPACE) 

2039 

2040 

2041 def wait_for_plug(self): 

2042 for _ in range(1, 10): 

2043 time.sleep(2) 

2044 if self.xapi.isPluggedHere(): 

2045 return True 

2046 return False 

2047 

2048 def gcEnabled(self, refresh=True): 

2049 if refresh: 

2050 self.xapi.srRecord = \ 

2051 self.xapi.session.xenapi.SR.get_record(self.xapi._srRef) 

2052 if self.xapi.srRecord["other_config"].get(VDI.DB_GC) == "false": 

2053 Util.log("GC is disabled for this SR, abort") 

2054 return False 

2055 return True 

2056 

2057 def scan(self, force=False) -> None: 

2058 """Scan the SR and load VDI info for each VDI. If called repeatedly, 

2059 update VDI objects if they already exist""" 

2060 pass 

2061 

2062 def scanLocked(self, force=False): 

2063 self.lock() 

2064 try: 

2065 self.scan(force) 

2066 finally: 

2067 self.unlock() 

2068 

2069 def getVDI(self, uuid): 

2070 return self.vdis.get(uuid) 

2071 

2072 def hasWork(self): 

2073 if len(self.findGarbage()) > 0: 

2074 return True 

2075 if self.findCoalesceable(): 

2076 return True 

2077 if self.findLeafCoalesceable(): 

2078 return True 

2079 if self.needUpdateBlockInfo(): 

2080 return True 

2081 return False 

2082 

2083 def findCoalesceable(self): 

2084 """Find a coalesceable VDI. Return a vdi that should be coalesced 

2085 (choosing one among all coalesceable candidates according to some 

2086 criteria) or None if there is no VDI that could be coalesced""" 

2087 

2088 candidates = [] 

2089 

2090 srSwitch = self.xapi.srRecord["other_config"].get(VDI.DB_COALESCE) 

2091 if srSwitch == "false": 

2092 Util.log("Coalesce disabled for this SR") 

2093 return candidates 

2094 

2095 # finish any VDI for which a relink journal entry exists first 

2096 journals = self.journaler.getAll(VDI.JRN_RELINK) 

2097 for uuid in journals: 

2098 vdi = self.getVDI(uuid) 

2099 if vdi and vdi not in self._failedCoalesceTargets: 

2100 return vdi 

2101 

2102 for vdi in self.vdis.values(): 

2103 if vdi.isCoalesceable() and vdi not in self._failedCoalesceTargets: 

2104 candidates.append(vdi) 

2105 Util.log("%s is coalescable" % vdi.uuid) 

2106 

2107 self.xapi.update_task_progress("coalescable", len(candidates)) 

2108 

2109 # pick one in the tallest tree 

2110 treeHeight = dict() 

2111 for c in candidates: 

2112 height = c.getTreeRoot().getTreeHeight() 

2113 if treeHeight.get(height): 

2114 treeHeight[height].append(c) 

2115 else: 

2116 treeHeight[height] = [c] 

2117 

2118 freeSpace = self.getFreeSpace() 

2119 heights = list(treeHeight.keys()) 

2120 heights.sort(reverse=True) 

2121 for h in heights: 

2122 for c in treeHeight[h]: 

2123 spaceNeeded = c._calcExtraSpaceForCoalescing() 

2124 if spaceNeeded <= freeSpace: 

2125 Util.log("Coalesce candidate: %s (tree height %d)" % (c, h)) 

2126 self.clear_no_space_msg(c) 

2127 return c 

2128 else: 

2129 self.no_space_candidates[c.uuid] = c 

2130 Util.log("No space to coalesce %s (free space: %d)" % \ 

2131 (c, freeSpace)) 

2132 return None 

2133 

2134 def getSwitch(self, key): 

2135 return self.xapi.srRecord["other_config"].get(key) 

2136 

2137 def forbiddenBySwitch(self, switch, condition, fail_msg): 

2138 srSwitch = self.getSwitch(switch) 

2139 ret = False 

2140 if srSwitch: 

2141 ret = srSwitch == condition 

2142 

2143 if ret: 

2144 Util.log(fail_msg) 

2145 

2146 return ret 

2147 

2148 def leafCoalesceForbidden(self): 

2149 return (self.forbiddenBySwitch(VDI.DB_COALESCE, 

2150 "false", 

2151 "Coalesce disabled for this SR") or 

2152 self.forbiddenBySwitch(VDI.DB_LEAFCLSC, 

2153 VDI.LEAFCLSC_DISABLED, 

2154 "Leaf-coalesce disabled for this SR")) 

2155 

2156 def findLeafCoalesceable(self): 

2157 """Find leaf-coalesceable VDIs in each VHD tree""" 

2158 

2159 candidates = [] 

2160 if self.leafCoalesceForbidden(): 

2161 return candidates 

2162 

2163 self.gatherLeafCoalesceable(candidates) 

2164 

2165 self.xapi.update_task_progress("coalescable", len(candidates)) 

2166 

2167 freeSpace = self.getFreeSpace() 

2168 for candidate in candidates: 

2169 # check the space constraints to see if leaf-coalesce is actually 

2170 # feasible for this candidate 

2171 spaceNeeded = candidate._calcExtraSpaceForSnapshotCoalescing() 

2172 spaceNeededLive = spaceNeeded 

2173 if spaceNeeded > freeSpace: 

2174 spaceNeededLive = candidate._calcExtraSpaceForLeafCoalescing() 

2175 if candidate.canLiveCoalesce(self.getStorageSpeed()): 

2176 spaceNeeded = spaceNeededLive 

2177 

2178 if spaceNeeded <= freeSpace: 

2179 Util.log("Leaf-coalesce candidate: %s" % candidate) 

2180 self.clear_no_space_msg(candidate) 

2181 return candidate 

2182 else: 

2183 Util.log("No space to leaf-coalesce %s (free space: %d)" % \ 

2184 (candidate, freeSpace)) 

2185 if spaceNeededLive <= freeSpace: 

2186 Util.log("...but enough space if skip snap-coalesce") 

2187 candidate.setConfig(VDI.DB_LEAFCLSC, 

2188 VDI.LEAFCLSC_OFFLINE) 

2189 self.no_space_candidates[candidate.uuid] = candidate 

2190 

2191 return None 

2192 

2193 def gatherLeafCoalesceable(self, candidates): 

2194 for vdi in self.vdis.values(): 

2195 if not vdi.isLeafCoalesceable(): 

2196 continue 

2197 if vdi in self._failedCoalesceTargets: 

2198 continue 

2199 if vdi.getConfig(vdi.DB_ONBOOT) == vdi.ONBOOT_RESET: 

2200 Util.log("Skipping reset-on-boot %s" % vdi) 

2201 continue 

2202 if vdi.getConfig(vdi.DB_ALLOW_CACHING): 

2203 Util.log("Skipping allow_caching=true %s" % vdi) 

2204 continue 

2205 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_DISABLED: 

2206 Util.log("Leaf-coalesce disabled for %s" % vdi) 

2207 continue 

2208 if not (AUTO_ONLINE_LEAF_COALESCE_ENABLED or 

2209 vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE): 

2210 continue 

2211 candidates.append(vdi) 

2212 

2213 def coalesce(self, vdi, dryRun=False): 

2214 """Coalesce vdi onto parent""" 

2215 Util.log("Coalescing %s -> %s" % (vdi, vdi.parent)) 

2216 if dryRun: 2216 ↛ 2217line 2216 didn't jump to line 2217, because the condition on line 2216 was never true

2217 return 

2218 

2219 try: 

2220 self._coalesce(vdi) 

2221 except util.SMException as e: 

2222 if isinstance(e, AbortException): 2222 ↛ 2223line 2222 didn't jump to line 2223, because the condition on line 2222 was never true

2223 self.cleanup() 

2224 raise 

2225 else: 

2226 self._failedCoalesceTargets.append(vdi) 

2227 Util.logException("coalesce") 

2228 Util.log("Coalesce failed, skipping") 

2229 self.cleanup() 

2230 

2231 def coalesceLeaf(self, vdi, dryRun=False): 

2232 """Leaf-coalesce vdi onto parent""" 

2233 Util.log("Leaf-coalescing %s -> %s" % (vdi, vdi.parent)) 

2234 if dryRun: 

2235 return 

2236 

2237 try: 

2238 uuid = vdi.uuid 

2239 try: 

2240 # "vdi" object will no longer be valid after this call 

2241 self._coalesceLeaf(vdi) 

2242 finally: 

2243 vdi = self.getVDI(uuid) 

2244 if vdi: 

2245 vdi.delConfig(vdi.DB_LEAFCLSC) 

2246 except AbortException: 

2247 self.cleanup() 

2248 raise 

2249 except (util.SMException, XenAPI.Failure) as e: 

2250 self._failedCoalesceTargets.append(vdi) 

2251 Util.logException("leaf-coalesce") 

2252 Util.log("Leaf-coalesce failed on %s, skipping" % vdi) 

2253 self.cleanup() 

2254 

2255 def garbageCollect(self, dryRun=False): 

2256 vdiList = self.findGarbage() 

2257 Util.log("Found %d VDIs for deletion:" % len(vdiList)) 

2258 for vdi in vdiList: 

2259 Util.log(" %s" % vdi) 

2260 if not dryRun: 

2261 self.deleteVDIs(vdiList) 

2262 self.cleanupJournals(dryRun) 

2263 

2264 def findGarbage(self): 

2265 vdiList = [] 

2266 for vdi in self.vdiTrees: 

2267 vdiList.extend(vdi.getAllPrunable()) 

2268 return vdiList 

2269 

2270 def deleteVDIs(self, vdiList) -> None: 

2271 for vdi in vdiList: 

2272 if IPCFlag(self.uuid).test(FLAG_TYPE_ABORT): 

2273 raise AbortException("Aborting due to signal") 

2274 Util.log("Deleting unlinked VDI %s" % vdi) 

2275 self.deleteVDI(vdi) 

2276 

2277 def deleteVDI(self, vdi) -> None: 

2278 assert(len(vdi.children) == 0) 

2279 del self.vdis[vdi.uuid] 

2280 if vdi.parent: 2280 ↛ 2282line 2280 didn't jump to line 2282, because the condition on line 2280 was never false

2281 vdi.parent.children.remove(vdi) 

2282 if vdi in self.vdiTrees: 2282 ↛ 2283line 2282 didn't jump to line 2283, because the condition on line 2282 was never true

2283 self.vdiTrees.remove(vdi) 

2284 vdi.delete() 

2285 

2286 def forgetVDI(self, vdiUuid) -> None: 

2287 self.xapi.forgetVDI(self.uuid, vdiUuid) 

2288 

2289 def pauseVDIs(self, vdiList) -> None: 

2290 paused = [] 

2291 failed = False 

2292 for vdi in vdiList: 

2293 try: 

2294 vdi.pause() 

2295 paused.append(vdi) 

2296 except: 

2297 Util.logException("pauseVDIs") 

2298 failed = True 

2299 break 

2300 

2301 if failed: 

2302 self.unpauseVDIs(paused) 

2303 raise util.SMException("Failed to pause VDIs") 

2304 

2305 def unpauseVDIs(self, vdiList): 

2306 failed = False 

2307 for vdi in vdiList: 

2308 try: 

2309 vdi.unpause() 

2310 except: 

2311 Util.log("ERROR: Failed to unpause VDI %s" % vdi) 

2312 failed = True 

2313 if failed: 

2314 raise util.SMException("Failed to unpause VDIs") 

2315 

2316 def getFreeSpace(self) -> int: 

2317 return 0 

2318 

2319 def cleanup(self): 

2320 Util.log("In cleanup") 

2321 return 

2322 

2323 @override 

2324 def __str__(self) -> str: 

2325 if self.name: 

2326 ret = "%s ('%s')" % (self.uuid[0:4], self.name) 

2327 else: 

2328 ret = "%s" % self.uuid 

2329 return ret 

2330 

2331 def lock(self): 

2332 """Acquire the SR lock. Nested acquire()'s are ok. Check for Abort 

2333 signal to avoid deadlocking (trying to acquire the SR lock while the 

2334 lock is held by a process that is trying to abort us)""" 

2335 if not self._srLock: 

2336 return 

2337 

2338 if self._locked == 0: 

2339 abortFlag = IPCFlag(self.uuid) 

2340 for i in range(SR.LOCK_RETRY_ATTEMPTS_LOCK): 

2341 if self._srLock.acquireNoblock(): 

2342 self._locked += 1 

2343 return 

2344 if abortFlag.test(FLAG_TYPE_ABORT): 

2345 raise AbortException("Abort requested") 

2346 time.sleep(SR.LOCK_RETRY_INTERVAL) 

2347 raise util.SMException("Unable to acquire the SR lock") 

2348 

2349 self._locked += 1 

2350 

2351 def unlock(self): 

2352 if not self._srLock: 2352 ↛ 2354line 2352 didn't jump to line 2354, because the condition on line 2352 was never false

2353 return 

2354 assert(self._locked > 0) 

2355 self._locked -= 1 

2356 if self._locked == 0: 

2357 self._srLock.release() 

2358 

2359 def needUpdateBlockInfo(self) -> bool: 

2360 for vdi in self.vdis.values(): 

2361 if vdi.scanError or len(vdi.children) == 0: 

2362 continue 

2363 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2364 return True 

2365 return False 

2366 

2367 def updateBlockInfo(self) -> None: 

2368 for vdi in self.vdis.values(): 

2369 if vdi.scanError or len(vdi.children) == 0: 

2370 continue 

2371 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

2372 vdi.updateBlockInfo() 

2373 

2374 def cleanupCoalesceJournals(self): 

2375 """Remove stale coalesce VDI indicators""" 

2376 entries = self.journaler.getAll(VDI.JRN_COALESCE) 

2377 for uuid, jval in entries.items(): 

2378 self.journaler.remove(VDI.JRN_COALESCE, uuid) 

2379 

2380 def cleanupJournals(self, dryRun=False): 

2381 """delete journal entries for non-existing VDIs""" 

2382 for t in [LVHDVDI.JRN_ZERO, VDI.JRN_RELINK, SR.JRN_CLONE]: 

2383 entries = self.journaler.getAll(t) 

2384 for uuid, jval in entries.items(): 

2385 if self.getVDI(uuid): 

2386 continue 

2387 if t == SR.JRN_CLONE: 

2388 baseUuid, clonUuid = jval.split("_") 

2389 if self.getVDI(baseUuid): 

2390 continue 

2391 Util.log(" Deleting stale '%s' journal entry for %s " 

2392 "(%s)" % (t, uuid, jval)) 

2393 if not dryRun: 

2394 self.journaler.remove(t, uuid) 

2395 

2396 def cleanupCache(self, maxAge=-1) -> int: 

2397 return 0 

2398 

2399 def _coalesce(self, vdi): 

2400 if self.journaler.get(vdi.JRN_RELINK, vdi.uuid): 2400 ↛ 2403line 2400 didn't jump to line 2403, because the condition on line 2400 was never true

2401 # this means we had done the actual coalescing already and just 

2402 # need to finish relinking and/or refreshing the children 

2403 Util.log("==> Coalesce apparently already done: skipping") 

2404 

2405 # The parent volume must be active for the parent change to occur. 

2406 # The parent volume may become inactive if the host is rebooted. 

2407 vdi._ensureParentActiveForRelink() 

2408 else: 

2409 # JRN_COALESCE is used to check which VDI is being coalesced in 

2410 # order to decide whether to abort the coalesce. We remove the 

2411 # journal as soon as the VHD coalesce step is done, because we 

2412 # don't expect the rest of the process to take long 

2413 self.journaler.create(vdi.JRN_COALESCE, vdi.uuid, "1") 

2414 vdi._doCoalesce() 

2415 self.journaler.remove(vdi.JRN_COALESCE, vdi.uuid) 

2416 

2417 util.fistpoint.activate("LVHDRT_before_create_relink_journal", self.uuid) 

2418 

2419 # we now need to relink the children: lock the SR to prevent ops 

2420 # like SM.clone from manipulating the VDIs we'll be relinking and 

2421 # rescan the SR first in case the children changed since the last 

2422 # scan 

2423 self.journaler.create(vdi.JRN_RELINK, vdi.uuid, "1") 

2424 

2425 self.lock() 

2426 try: 

2427 vdi.parent._tagChildrenForRelink() 

2428 self.scan() 

2429 vdi._relinkSkip() 

2430 finally: 

2431 self.unlock() 

2432 # Reload the children to leave things consistent 

2433 vdi.parent._reloadChildren(vdi) 

2434 

2435 self.journaler.remove(vdi.JRN_RELINK, vdi.uuid) 

2436 self.deleteVDI(vdi) 

2437 

2438 class CoalesceTracker: 

2439 GRACE_ITERATIONS = 2 

2440 MAX_ITERATIONS_NO_PROGRESS = 3 

2441 MAX_ITERATIONS = 20 

2442 MAX_INCREASE_FROM_MINIMUM = 1.2 

2443 HISTORY_STRING = "Iteration: {its} -- Initial size {initSize}" \ 

2444 " --> Final size {finSize}" 

2445 

2446 def __init__(self, sr): 

2447 self.itsNoProgress = 0 

2448 self.its = 0 

2449 self.minSize = float("inf") 

2450 self._history = [] 

2451 self.reason = "" 

2452 self.startSize = None 

2453 self.finishSize = None 

2454 self.sr = sr 

2455 self.grace_remaining = self.GRACE_ITERATIONS 

2456 

2457 @property 

2458 def history(self): 

2459 return [x['msg'] for x in self._history] 

2460 

2461 def moving_average(self): 

2462 """ 

2463 Calculate a three point moving average 

2464 """ 

2465 assert len(self._history) >= 3 

2466 

2467 mv_average = sum([x['finalsize'] for x in self._history]) / len(self._history) 

2468 util.SMlog(f'Calculated moving average as {mv_average}') 

2469 return mv_average 

2470 

2471 def abortCoalesce(self, prevSize, curSize): 

2472 self.its += 1 

2473 self._history.append( 

2474 { 

2475 'finalsize': curSize, 

2476 'msg': self.HISTORY_STRING.format(its=self.its, 

2477 initSize=prevSize, 

2478 finSize=curSize) 

2479 } 

2480 ) 

2481 

2482 self.finishSize = curSize 

2483 

2484 if self.startSize is None: 

2485 self.startSize = prevSize 

2486 

2487 if curSize < self.minSize: 

2488 self.minSize = curSize 

2489 

2490 if prevSize < self.minSize: 

2491 self.minSize = prevSize 

2492 

2493 if self.its < 4: 

2494 # Perform at least three iterations 

2495 return False 

2496 

2497 if prevSize >= curSize or curSize < self.moving_average(): 

2498 # We made progress 

2499 return False 

2500 else: 

2501 self.itsNoProgress += 1 

2502 Util.log("No progress, attempt:" 

2503 " {attempt}".format(attempt=self.itsNoProgress)) 

2504 util.fistpoint.activate("cleanup_tracker_no_progress", self.sr.uuid) 

2505 

2506 if self.its > self.MAX_ITERATIONS: 

2507 max = self.MAX_ITERATIONS 

2508 self.reason = \ 

2509 "Max iterations ({max}) exceeded".format(max=max) 

2510 return True 

2511 

2512 if self.itsNoProgress > self.MAX_ITERATIONS_NO_PROGRESS: 

2513 max = self.MAX_ITERATIONS_NO_PROGRESS 

2514 self.reason = \ 

2515 "No progress made for {max} iterations".format(max=max) 

2516 return True 

2517 

2518 maxSizeFromMin = self.MAX_INCREASE_FROM_MINIMUM * self.minSize 

2519 if curSize > maxSizeFromMin: 

2520 self.grace_remaining -= 1 

2521 if self.grace_remaining == 0: 

2522 self.reason = "Unexpected bump in size," \ 

2523 " compared to minimum achieved" 

2524 

2525 return True 

2526 

2527 return False 

2528 

2529 def printSizes(self): 

2530 Util.log("Starting size was {size}" 

2531 .format(size=self.startSize)) 

2532 Util.log("Final size was {size}" 

2533 .format(size=self.finishSize)) 

2534 Util.log("Minimum size achieved was {size}" 

2535 .format(size=self.minSize)) 

2536 

2537 def printReasoning(self): 

2538 Util.log("Aborted coalesce") 

2539 for hist in self.history: 

2540 Util.log(hist) 

2541 Util.log(self.reason) 

2542 self.printSizes() 

2543 

2544 def printSummary(self): 

2545 if self.its == 0: 

2546 return 

2547 

2548 if self.reason: 2548 ↛ 2549line 2548 didn't jump to line 2549, because the condition on line 2548 was never true

2549 Util.log("Aborted coalesce") 

2550 Util.log(self.reason) 

2551 else: 

2552 Util.log("Coalesce summary") 

2553 

2554 Util.log(f"Performed {self.its} iterations") 

2555 self.printSizes() 

2556 

2557 

2558 def _coalesceLeaf(self, vdi): 

2559 """Leaf-coalesce VDI vdi. Return true if we succeed, false if we cannot 

2560 complete due to external changes, namely vdi_delete and vdi_snapshot 

2561 that alter leaf-coalescibility of vdi""" 

2562 tracker = self.CoalesceTracker(self) 

2563 while not vdi.canLiveCoalesce(self.getStorageSpeed()): 

2564 prevSizeVHD = vdi.getSizeVHD() 

2565 if not self._snapshotCoalesce(vdi): 2565 ↛ 2566line 2565 didn't jump to line 2566, because the condition on line 2565 was never true

2566 return False 

2567 if tracker.abortCoalesce(prevSizeVHD, vdi.getSizeVHD()): 

2568 tracker.printReasoning() 

2569 raise util.SMException("VDI {uuid} could not be coalesced" 

2570 .format(uuid=vdi.uuid)) 

2571 tracker.printSummary() 

2572 return self._liveLeafCoalesce(vdi) 

2573 

2574 def calcStorageSpeed(self, startTime, endTime, vhdSize): 

2575 speed = None 

2576 total_time = endTime - startTime 

2577 if total_time > 0: 

2578 speed = float(vhdSize) / float(total_time) 

2579 return speed 

2580 

2581 def writeSpeedToFile(self, speed): 

2582 content = [] 

2583 speedFile = None 

2584 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2585 self.lock() 

2586 try: 

2587 Util.log("Writing to file: {myfile}".format(myfile=path)) 

2588 lines = "" 

2589 if not os.path.isfile(path): 

2590 lines = str(speed) + "\n" 

2591 else: 

2592 speedFile = open(path, "r+") 

2593 content = speedFile.readlines() 

2594 content.append(str(speed) + "\n") 

2595 if len(content) > N_RUNNING_AVERAGE: 

2596 del content[0] 

2597 lines = "".join(content) 

2598 

2599 util.atomicFileWrite(path, VAR_RUN, lines) 

2600 finally: 

2601 if speedFile is not None: 

2602 speedFile.close() 

2603 Util.log("Closing file: {myfile}".format(myfile=path)) 

2604 self.unlock() 

2605 

2606 def recordStorageSpeed(self, startTime, endTime, vhdSize): 

2607 speed = self.calcStorageSpeed(startTime, endTime, vhdSize) 

2608 if speed is None: 

2609 return 

2610 

2611 self.writeSpeedToFile(speed) 

2612 

2613 def getStorageSpeed(self): 

2614 speedFile = None 

2615 path = SPEED_LOG_ROOT.format(uuid=self.uuid) 

2616 self.lock() 

2617 try: 

2618 speed = None 

2619 if os.path.isfile(path): 

2620 speedFile = open(path) 

2621 content = speedFile.readlines() 

2622 try: 

2623 content = [float(i) for i in content] 

2624 except ValueError: 

2625 Util.log("Something bad in the speed log:{log}". 

2626 format(log=speedFile.readlines())) 

2627 return speed 

2628 

2629 if len(content): 

2630 speed = sum(content) / float(len(content)) 

2631 if speed <= 0: 2631 ↛ 2633line 2631 didn't jump to line 2633, because the condition on line 2631 was never true

2632 # Defensive, should be impossible. 

2633 Util.log("Bad speed: {speed} calculated for SR: {uuid}". 

2634 format(speed=speed, uuid=self.uuid)) 

2635 speed = None 

2636 else: 

2637 Util.log("Speed file empty for SR: {uuid}". 

2638 format(uuid=self.uuid)) 

2639 else: 

2640 Util.log("Speed log missing for SR: {uuid}". 

2641 format(uuid=self.uuid)) 

2642 return speed 

2643 finally: 

2644 if not (speedFile is None): 

2645 speedFile.close() 

2646 self.unlock() 

2647 

2648 def _snapshotCoalesce(self, vdi): 

2649 # Note that because we are not holding any locks here, concurrent SM 

2650 # operations may change this tree under our feet. In particular, vdi 

2651 # can be deleted, or it can be snapshotted. 

2652 assert(AUTO_ONLINE_LEAF_COALESCE_ENABLED) 

2653 Util.log("Single-snapshotting %s" % vdi) 

2654 util.fistpoint.activate("LVHDRT_coaleaf_delay_1", self.uuid) 

2655 try: 

2656 ret = self.xapi.singleSnapshotVDI(vdi) 

2657 Util.log("Single-snapshot returned: %s" % ret) 

2658 except XenAPI.Failure as e: 

2659 if util.isInvalidVDI(e): 

2660 Util.log("The VDI appears to have been concurrently deleted") 

2661 return False 

2662 raise 

2663 self.scanLocked() 

2664 tempSnap = vdi.parent 

2665 if not tempSnap.isCoalesceable(): 

2666 Util.log("The VDI appears to have been concurrently snapshotted") 

2667 return False 

2668 Util.log("Coalescing parent %s" % tempSnap) 

2669 util.fistpoint.activate("LVHDRT_coaleaf_delay_2", self.uuid) 

2670 vhdSize = vdi.getSizeVHD() 

2671 self._coalesce(tempSnap) 

2672 if not vdi.isLeafCoalesceable(): 

2673 Util.log("The VDI tree appears to have been altered since") 

2674 return False 

2675 return True 

2676 

2677 def _liveLeafCoalesce(self, vdi) -> bool: 

2678 util.fistpoint.activate("LVHDRT_coaleaf_delay_3", self.uuid) 

2679 self.lock() 

2680 try: 

2681 self.scan() 

2682 if not self.getVDI(vdi.uuid): 

2683 Util.log("The VDI appears to have been deleted meanwhile") 

2684 return False 

2685 if not vdi.isLeafCoalesceable(): 

2686 Util.log("The VDI is no longer leaf-coalesceable") 

2687 return False 

2688 

2689 uuid = vdi.uuid 

2690 vdi.pause(failfast=True) 

2691 try: 

2692 try: 

2693 # "vdi" object will no longer be valid after this call 

2694 self._doCoalesceLeaf(vdi) 

2695 except: 

2696 Util.logException("_doCoalesceLeaf") 

2697 self._handleInterruptedCoalesceLeaf() 

2698 raise 

2699 finally: 

2700 vdi = self.getVDI(uuid) 

2701 if vdi: 

2702 vdi.ensureUnpaused() 

2703 vdiOld = self.getVDI(self.TMP_RENAME_PREFIX + uuid) 

2704 if vdiOld: 

2705 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2706 self.deleteVDI(vdiOld) 

2707 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2708 finally: 

2709 self.cleanup() 

2710 self.unlock() 

2711 self.logFilter.logState() 

2712 return True 

2713 

2714 def _doCoalesceLeaf(self, vdi): 

2715 """Actual coalescing of a leaf VDI onto parent. Must be called in an 

2716 offline/atomic context""" 

2717 self.journaler.create(VDI.JRN_LEAF, vdi.uuid, vdi.parent.uuid) 

2718 self._prepareCoalesceLeaf(vdi) 

2719 vdi.parent._setHidden(False) 

2720 vdi.parent._increaseSizeVirt(vdi.sizeVirt, False) 

2721 vdi.validate(True) 

2722 vdi.parent.validate(True) 

2723 util.fistpoint.activate("LVHDRT_coaleaf_before_coalesce", self.uuid) 

2724 timeout = vdi.LIVE_LEAF_COALESCE_TIMEOUT 

2725 if vdi.getConfig(vdi.DB_LEAFCLSC) == vdi.LEAFCLSC_FORCE: 2725 ↛ 2726line 2725 didn't jump to line 2726, because the condition on line 2725 was never true

2726 Util.log("Leaf-coalesce forced, will not use timeout") 

2727 timeout = 0 

2728 vdi._coalesceVHD(timeout) 

2729 util.fistpoint.activate("LVHDRT_coaleaf_after_coalesce", self.uuid) 

2730 vdi.parent.validate(True) 

2731 #vdi._verifyContents(timeout / 2) 

2732 

2733 # rename 

2734 vdiUuid = vdi.uuid 

2735 oldName = vdi.fileName 

2736 origParentUuid = vdi.parent.uuid 

2737 vdi.rename(self.TMP_RENAME_PREFIX + vdiUuid) 

2738 util.fistpoint.activate("LVHDRT_coaleaf_one_renamed", self.uuid) 

2739 vdi.parent.rename(vdiUuid) 

2740 util.fistpoint.activate("LVHDRT_coaleaf_both_renamed", self.uuid) 

2741 self._updateSlavesOnRename(vdi.parent, oldName, origParentUuid) 

2742 

2743 # Note that "vdi.parent" is now the single remaining leaf and "vdi" is 

2744 # garbage 

2745 

2746 # update the VDI record 

2747 if vdi.parent.raw: 2747 ↛ 2748line 2747 didn't jump to line 2748, because the condition on line 2747 was never true

2748 vdi.parent.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_RAW) 

2749 vdi.parent.delConfig(VDI.DB_VHD_BLOCKS) 

2750 util.fistpoint.activate("LVHDRT_coaleaf_after_vdirec", self.uuid) 

2751 

2752 self._updateNode(vdi) 

2753 

2754 # delete the obsolete leaf & inflate the parent (in that order, to 

2755 # minimize free space requirements) 

2756 parent = vdi.parent 

2757 vdi._setHidden(True) 

2758 vdi.parent.children = [] 

2759 vdi.parent = None 

2760 

2761 if parent.parent is None: 

2762 parent.delConfig(VDI.DB_VHD_PARENT) 

2763 

2764 extraSpace = self._calcExtraSpaceNeeded(vdi, parent) 

2765 freeSpace = self.getFreeSpace() 

2766 if freeSpace < extraSpace: 2766 ↛ 2769line 2766 didn't jump to line 2769, because the condition on line 2766 was never true

2767 # don't delete unless we need the space: deletion is time-consuming 

2768 # because it requires contacting the slaves, and we're paused here 

2769 util.fistpoint.activate("LVHDRT_coaleaf_before_delete", self.uuid) 

2770 self.deleteVDI(vdi) 

2771 util.fistpoint.activate("LVHDRT_coaleaf_after_delete", self.uuid) 

2772 

2773 util.fistpoint.activate("LVHDRT_coaleaf_before_remove_j", self.uuid) 

2774 self.journaler.remove(VDI.JRN_LEAF, vdiUuid) 

2775 

2776 self.forgetVDI(origParentUuid) 

2777 self._finishCoalesceLeaf(parent) 

2778 self._updateSlavesOnResize(parent) 

2779 

2780 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

2781 assert(not parent.raw) # raw parents not supported 

2782 extra = child.getSizeVHD() - parent.getSizeVHD() 

2783 if extra < 0: 2783 ↛ 2784line 2783 didn't jump to line 2784, because the condition on line 2783 was never true

2784 extra = 0 

2785 return extra 

2786 

2787 def _prepareCoalesceLeaf(self, vdi) -> None: 

2788 pass 

2789 

2790 def _updateNode(self, vdi) -> None: 

2791 pass 

2792 

2793 def _finishCoalesceLeaf(self, parent) -> None: 

2794 pass 

2795 

2796 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: 

2797 pass 

2798 

2799 def _updateSlavesOnRename(self, vdi, oldName, origParentUuid) -> None: 

2800 pass 

2801 

2802 def _updateSlavesOnResize(self, vdi) -> None: 

2803 pass 

2804 

2805 def _removeStaleVDIs(self, uuidsPresent) -> None: 

2806 for uuid in list(self.vdis.keys()): 

2807 if not uuid in uuidsPresent: 

2808 Util.log("VDI %s disappeared since last scan" % \ 

2809 self.vdis[uuid]) 

2810 del self.vdis[uuid] 

2811 

2812 def _handleInterruptedCoalesceLeaf(self) -> None: 

2813 """An interrupted leaf-coalesce operation may leave the VHD tree in an 

2814 inconsistent state. If the old-leaf VDI is still present, we revert the 

2815 operation (in case the original error is persistent); otherwise we must 

2816 finish the operation""" 

2817 pass 

2818 

2819 def _buildTree(self, force): 

2820 self.vdiTrees = [] 

2821 for vdi in self.vdis.values(): 

2822 if vdi.parentUuid: 

2823 parent = self.getVDI(vdi.parentUuid) 

2824 if not parent: 

2825 if vdi.uuid.startswith(self.TMP_RENAME_PREFIX): 

2826 self.vdiTrees.append(vdi) 

2827 continue 

2828 if force: 

2829 Util.log("ERROR: Parent VDI %s not found! (for %s)" % \ 

2830 (vdi.parentUuid, vdi.uuid)) 

2831 self.vdiTrees.append(vdi) 

2832 continue 

2833 else: 

2834 raise util.SMException("Parent VDI %s of %s not " \ 

2835 "found" % (vdi.parentUuid, vdi.uuid)) 

2836 vdi.parent = parent 

2837 parent.children.append(vdi) 

2838 else: 

2839 self.vdiTrees.append(vdi) 

2840 

2841 

2842class FileSR(SR): 

2843 TYPE = SR.TYPE_FILE 

2844 CACHE_FILE_EXT = ".vhdcache" 

2845 # cache cleanup actions 

2846 CACHE_ACTION_KEEP = 0 

2847 CACHE_ACTION_REMOVE = 1 

2848 CACHE_ACTION_REMOVE_IF_INACTIVE = 2 

2849 

2850 def __init__(self, uuid, xapi, createLock, force): 

2851 SR.__init__(self, uuid, xapi, createLock, force) 

2852 self.path = "/var/run/sr-mount/%s" % self.uuid 

2853 self.journaler = fjournaler.Journaler(self.path) 

2854 

2855 @override 

2856 def scan(self, force=False) -> None: 

2857 if not util.pathexists(self.path): 

2858 raise util.SMException("directory %s not found!" % self.uuid) 

2859 vhds = self._scan(force) 

2860 for uuid, vhdInfo in vhds.items(): 

2861 vdi = self.getVDI(uuid) 

2862 if not vdi: 

2863 self.logFilter.logNewVDI(uuid) 

2864 vdi = FileVDI(self, uuid, False) 

2865 self.vdis[uuid] = vdi 

2866 vdi.load(vhdInfo) 

2867 uuidsPresent = list(vhds.keys()) 

2868 rawList = [x for x in os.listdir(self.path) if x.endswith(vhdutil.FILE_EXTN_RAW)] 

2869 for rawName in rawList: 

2870 uuid = FileVDI.extractUuid(rawName) 

2871 uuidsPresent.append(uuid) 

2872 vdi = self.getVDI(uuid) 

2873 if not vdi: 

2874 self.logFilter.logNewVDI(uuid) 

2875 vdi = FileVDI(self, uuid, True) 

2876 self.vdis[uuid] = vdi 

2877 self._removeStaleVDIs(uuidsPresent) 

2878 self._buildTree(force) 

2879 self.logFilter.logState() 

2880 self._handleInterruptedCoalesceLeaf() 

2881 

2882 @override 

2883 def getFreeSpace(self) -> int: 

2884 return util.get_fs_size(self.path) - util.get_fs_utilisation(self.path) 

2885 

2886 @override 

2887 def deleteVDIs(self, vdiList) -> None: 

2888 rootDeleted = False 

2889 for vdi in vdiList: 

2890 if not vdi.parent: 

2891 rootDeleted = True 

2892 break 

2893 SR.deleteVDIs(self, vdiList) 

2894 if self.xapi.srRecord["type"] == "nfs" and rootDeleted: 

2895 self.xapi.markCacheSRsDirty() 

2896 

2897 @override 

2898 def cleanupCache(self, maxAge=-1) -> int: 

2899 """Clean up IntelliCache cache files. Caches for leaf nodes are 

2900 removed when the leaf node no longer exists or its allow-caching 

2901 attribute is not set. Caches for parent nodes are removed when the 

2902 parent node no longer exists or it hasn't been used in more than 

2903 <maxAge> hours. 

2904 Return number of caches removed. 

2905 """ 

2906 numRemoved = 0 

2907 cacheFiles = [x for x in os.listdir(self.path) if self._isCacheFileName(x)] 

2908 Util.log("Found %d cache files" % len(cacheFiles)) 

2909 cutoff = datetime.datetime.now() - datetime.timedelta(hours=maxAge) 

2910 for cacheFile in cacheFiles: 

2911 uuid = cacheFile[:-len(self.CACHE_FILE_EXT)] 

2912 action = self.CACHE_ACTION_KEEP 

2913 rec = self.xapi.getRecordVDI(uuid) 

2914 if not rec: 

2915 Util.log("Cache %s: VDI doesn't exist" % uuid) 

2916 action = self.CACHE_ACTION_REMOVE 

2917 elif rec["managed"] and not rec["allow_caching"]: 

2918 Util.log("Cache %s: caching disabled" % uuid) 

2919 action = self.CACHE_ACTION_REMOVE 

2920 elif not rec["managed"] and maxAge >= 0: 

2921 lastAccess = datetime.datetime.fromtimestamp( \ 

2922 os.path.getatime(os.path.join(self.path, cacheFile))) 

2923 if lastAccess < cutoff: 

2924 Util.log("Cache %s: older than %d hrs" % (uuid, maxAge)) 

2925 action = self.CACHE_ACTION_REMOVE_IF_INACTIVE 

2926 

2927 if action == self.CACHE_ACTION_KEEP: 

2928 Util.log("Keeping cache %s" % uuid) 

2929 continue 

2930 

2931 lockId = uuid 

2932 parentUuid = None 

2933 if rec and rec["managed"]: 

2934 parentUuid = rec["sm_config"].get("vhd-parent") 

2935 if parentUuid: 

2936 lockId = parentUuid 

2937 

2938 cacheLock = lock.Lock(blktap2.VDI.LOCK_CACHE_SETUP, lockId) 

2939 cacheLock.acquire() 

2940 try: 

2941 if self._cleanupCache(uuid, action): 

2942 numRemoved += 1 

2943 finally: 

2944 cacheLock.release() 

2945 return numRemoved 

2946 

2947 def _cleanupCache(self, uuid, action): 

2948 assert(action != self.CACHE_ACTION_KEEP) 

2949 rec = self.xapi.getRecordVDI(uuid) 

2950 if rec and rec["allow_caching"]: 

2951 Util.log("Cache %s appears to have become valid" % uuid) 

2952 return False 

2953 

2954 fullPath = os.path.join(self.path, uuid + self.CACHE_FILE_EXT) 

2955 tapdisk = blktap2.Tapdisk.find_by_path(fullPath) 

2956 if tapdisk: 

2957 if action == self.CACHE_ACTION_REMOVE_IF_INACTIVE: 

2958 Util.log("Cache %s still in use" % uuid) 

2959 return False 

2960 Util.log("Shutting down tapdisk for %s" % fullPath) 

2961 tapdisk.shutdown() 

2962 

2963 Util.log("Deleting file %s" % fullPath) 

2964 os.unlink(fullPath) 

2965 return True 

2966 

2967 def _isCacheFileName(self, name): 

2968 return (len(name) == Util.UUID_LEN + len(self.CACHE_FILE_EXT)) and \ 

2969 name.endswith(self.CACHE_FILE_EXT) 

2970 

2971 def _scan(self, force): 

2972 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

2973 error = False 

2974 pattern = os.path.join(self.path, "*%s" % vhdutil.FILE_EXTN_VHD) 

2975 vhds = vhdutil.getAllVHDs(pattern, FileVDI.extractUuid) 

2976 for uuid, vhdInfo in vhds.items(): 

2977 if vhdInfo.error: 

2978 error = True 

2979 break 

2980 if not error: 

2981 return vhds 

2982 Util.log("Scan error on attempt %d" % i) 

2983 if force: 

2984 return vhds 

2985 raise util.SMException("Scan error") 

2986 

2987 @override 

2988 def deleteVDI(self, vdi) -> None: 

2989 self._checkSlaves(vdi) 

2990 SR.deleteVDI(self, vdi) 

2991 

2992 def _checkSlaves(self, vdi): 

2993 onlineHosts = self.xapi.getOnlineHosts() 

2994 abortFlag = IPCFlag(self.uuid) 

2995 for pbdRecord in self.xapi.getAttachedPBDs(): 

2996 hostRef = pbdRecord["host"] 

2997 if hostRef == self.xapi._hostRef: 

2998 continue 

2999 if abortFlag.test(FLAG_TYPE_ABORT): 

3000 raise AbortException("Aborting due to signal") 

3001 try: 

3002 self._checkSlave(hostRef, vdi) 

3003 except XenAPI.Failure: 

3004 if hostRef in onlineHosts: 

3005 raise 

3006 

3007 def _checkSlave(self, hostRef, vdi): 

3008 call = (hostRef, "nfs-on-slave", "check", {'path': vdi.path}) 

3009 Util.log("Checking with slave: %s" % repr(call)) 

3010 _host = self.xapi.session.xenapi.host 

3011 text = _host.call_plugin( * call) 

3012 

3013 @override 

3014 def _handleInterruptedCoalesceLeaf(self) -> None: 

3015 entries = self.journaler.getAll(VDI.JRN_LEAF) 

3016 for uuid, parentUuid in entries.items(): 

3017 fileList = os.listdir(self.path) 

3018 childName = uuid + vhdutil.FILE_EXTN_VHD 

3019 tmpChildName = self.TMP_RENAME_PREFIX + uuid + vhdutil.FILE_EXTN_VHD 

3020 parentName1 = parentUuid + vhdutil.FILE_EXTN_VHD 

3021 parentName2 = parentUuid + vhdutil.FILE_EXTN_RAW 

3022 parentPresent = (parentName1 in fileList or parentName2 in fileList) 

3023 if parentPresent or tmpChildName in fileList: 

3024 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3025 else: 

3026 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3027 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3028 vdi = self.getVDI(uuid) 

3029 if vdi: 

3030 vdi.ensureUnpaused() 

3031 

3032 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3033 Util.log("*** UNDO LEAF-COALESCE") 

3034 parent = self.getVDI(parentUuid) 

3035 if not parent: 

3036 parent = self.getVDI(childUuid) 

3037 if not parent: 

3038 raise util.SMException("Neither %s nor %s found" % \ 

3039 (parentUuid, childUuid)) 

3040 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

3041 parent.rename(parentUuid) 

3042 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

3043 

3044 child = self.getVDI(childUuid) 

3045 if not child: 

3046 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3047 if not child: 

3048 raise util.SMException("Neither %s nor %s found" % \ 

3049 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

3050 Util.log("Renaming child back to %s" % childUuid) 

3051 child.rename(childUuid) 

3052 Util.log("Updating the VDI record") 

3053 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

3054 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

3055 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

3056 

3057 if child.isHidden(): 

3058 child._setHidden(False) 

3059 if not parent.isHidden(): 

3060 parent._setHidden(True) 

3061 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3062 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

3063 Util.log("*** leaf-coalesce undo successful") 

3064 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

3065 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

3066 

3067 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3068 Util.log("*** FINISH LEAF-COALESCE") 

3069 vdi = self.getVDI(childUuid) 

3070 if not vdi: 

3071 Util.log(f"_finishInterruptedCoalesceLeaf, vdi {childUuid} not found, aborting") 

3072 raise util.SMException("VDI %s not found" % childUuid) 

3073 try: 

3074 self.forgetVDI(parentUuid) 

3075 except XenAPI.Failure: 

3076 Util.logException('_finishInterruptedCoalesceLeaf') 

3077 pass 

3078 self._updateSlavesOnResize(vdi) 

3079 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

3080 Util.log("*** finished leaf-coalesce successfully") 

3081 

3082 

3083class LVHDSR(SR): 

3084 TYPE = SR.TYPE_LVHD 

3085 SUBTYPES = ["lvhdoiscsi", "lvhdohba"] 

3086 

3087 def __init__(self, uuid, xapi, createLock, force): 

3088 SR.__init__(self, uuid, xapi, createLock, force) 

3089 self.vgName = "%s%s" % (lvhdutil.VG_PREFIX, self.uuid) 

3090 self.path = os.path.join(lvhdutil.VG_LOCATION, self.vgName) 

3091 

3092 sr_ref = self.xapi.session.xenapi.SR.get_by_uuid(self.uuid) 

3093 other_conf = self.xapi.session.xenapi.SR.get_other_config(sr_ref) 

3094 lvm_conf = other_conf.get('lvm-conf') if other_conf else None 

3095 self.lvmCache = lvmcache.LVMCache(self.vgName, lvm_conf) 

3096 

3097 self.lvActivator = LVActivator(self.uuid, self.lvmCache) 

3098 self.journaler = journaler.Journaler(self.lvmCache) 

3099 

3100 @override 

3101 def deleteVDI(self, vdi) -> None: 

3102 if self.lvActivator.get(vdi.uuid, False): 

3103 self.lvActivator.deactivate(vdi.uuid, False) 

3104 self._checkSlaves(vdi) 

3105 SR.deleteVDI(self, vdi) 

3106 

3107 @override 

3108 def forgetVDI(self, vdiUuid) -> None: 

3109 SR.forgetVDI(self, vdiUuid) 

3110 mdpath = os.path.join(self.path, lvutil.MDVOLUME_NAME) 

3111 LVMMetadataHandler(mdpath).deleteVdiFromMetadata(vdiUuid) 

3112 

3113 @override 

3114 def getFreeSpace(self) -> int: 

3115 stats = lvutil._getVGstats(self.vgName) 

3116 return stats['physical_size'] - stats['physical_utilisation'] 

3117 

3118 @override 

3119 def cleanup(self): 

3120 if not self.lvActivator.deactivateAll(): 

3121 Util.log("ERROR deactivating LVs while cleaning up") 

3122 

3123 @override 

3124 def needUpdateBlockInfo(self) -> bool: 

3125 for vdi in self.vdis.values(): 

3126 if vdi.scanError or vdi.raw or len(vdi.children) == 0: 

3127 continue 

3128 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

3129 return True 

3130 return False 

3131 

3132 @override 

3133 def updateBlockInfo(self) -> None: 

3134 numUpdated = 0 

3135 for vdi in self.vdis.values(): 

3136 if vdi.scanError or vdi.raw or len(vdi.children) == 0: 

3137 continue 

3138 if not vdi.getConfig(vdi.DB_VHD_BLOCKS): 

3139 vdi.updateBlockInfo() 

3140 numUpdated += 1 

3141 if numUpdated: 

3142 # deactivate the LVs back sooner rather than later. If we don't 

3143 # now, by the time this thread gets to deactivations, another one 

3144 # might have leaf-coalesced a node and deleted it, making the child 

3145 # inherit the refcount value and preventing the correct decrement 

3146 self.cleanup() 

3147 

3148 @override 

3149 def scan(self, force=False) -> None: 

3150 vdis = self._scan(force) 

3151 for uuid, vdiInfo in vdis.items(): 

3152 vdi = self.getVDI(uuid) 

3153 if not vdi: 

3154 self.logFilter.logNewVDI(uuid) 

3155 vdi = LVHDVDI(self, uuid, 

3156 vdiInfo.vdiType == vhdutil.VDI_TYPE_RAW) 

3157 self.vdis[uuid] = vdi 

3158 vdi.load(vdiInfo) 

3159 self._removeStaleVDIs(vdis.keys()) 

3160 self._buildTree(force) 

3161 self.logFilter.logState() 

3162 self._handleInterruptedCoalesceLeaf() 

3163 

3164 def _scan(self, force): 

3165 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3166 error = False 

3167 self.lvmCache.refresh() 

3168 vdis = lvhdutil.getVDIInfo(self.lvmCache) 

3169 for uuid, vdiInfo in vdis.items(): 

3170 if vdiInfo.scanError: 

3171 error = True 

3172 break 

3173 if not error: 

3174 return vdis 

3175 Util.log("Scan error, retrying (%d)" % i) 

3176 if force: 

3177 return vdis 

3178 raise util.SMException("Scan error") 

3179 

3180 @override 

3181 def _removeStaleVDIs(self, uuidsPresent) -> None: 

3182 for uuid in list(self.vdis.keys()): 

3183 if not uuid in uuidsPresent: 

3184 Util.log("VDI %s disappeared since last scan" % \ 

3185 self.vdis[uuid]) 

3186 del self.vdis[uuid] 

3187 if self.lvActivator.get(uuid, False): 

3188 self.lvActivator.remove(uuid, False) 

3189 

3190 @override 

3191 def _liveLeafCoalesce(self, vdi) -> bool: 

3192 """If the parent is raw and the child was resized (virt. size), then 

3193 we'll need to resize the parent, which can take a while due to zeroing 

3194 out of the extended portion of the LV. Do it before pausing the child 

3195 to avoid a protracted downtime""" 

3196 if vdi.parent.raw and vdi.sizeVirt > vdi.parent.sizeVirt: 

3197 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

3198 vdi.parent._increaseSizeVirt(vdi.sizeVirt) 

3199 

3200 return SR._liveLeafCoalesce(self, vdi) 

3201 

3202 @override 

3203 def _prepareCoalesceLeaf(self, vdi) -> None: 

3204 vdi._activateChain() 

3205 self.lvmCache.setReadonly(vdi.parent.fileName, False) 

3206 vdi.deflate() 

3207 vdi.inflateParentForCoalesce() 

3208 

3209 @override 

3210 def _updateNode(self, vdi) -> None: 

3211 # fix the refcounts: the remaining node should inherit the binary 

3212 # refcount from the leaf (because if it was online, it should remain 

3213 # refcounted as such), but the normal refcount from the parent (because 

3214 # this node is really the parent node) - minus 1 if it is online (since 

3215 # non-leaf nodes increment their normal counts when they are online and 

3216 # we are now a leaf, storing that 1 in the binary refcount). 

3217 ns = lvhdutil.NS_PREFIX_LVM + self.uuid 

3218 cCnt, cBcnt = RefCounter.check(vdi.uuid, ns) 

3219 pCnt, pBcnt = RefCounter.check(vdi.parent.uuid, ns) 

3220 pCnt = pCnt - cBcnt 

3221 assert(pCnt >= 0) 

3222 RefCounter.set(vdi.parent.uuid, pCnt, cBcnt, ns) 

3223 

3224 @override 

3225 def _finishCoalesceLeaf(self, parent) -> None: 

3226 if not parent.isSnapshot() or parent.isAttachedRW(): 

3227 parent.inflateFully() 

3228 else: 

3229 parent.deflate() 

3230 

3231 @override 

3232 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

3233 return lvhdutil.calcSizeVHDLV(parent.sizeVirt) - parent.sizeLV 

3234 

3235 @override 

3236 def _handleInterruptedCoalesceLeaf(self) -> None: 

3237 entries = self.journaler.getAll(VDI.JRN_LEAF) 

3238 for uuid, parentUuid in entries.items(): 

3239 childLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + uuid 

3240 tmpChildLV = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \ 

3241 self.TMP_RENAME_PREFIX + uuid 

3242 parentLV1 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + parentUuid 

3243 parentLV2 = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_RAW] + parentUuid 

3244 parentPresent = (self.lvmCache.checkLV(parentLV1) or \ 

3245 self.lvmCache.checkLV(parentLV2)) 

3246 if parentPresent or self.lvmCache.checkLV(tmpChildLV): 

3247 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3248 else: 

3249 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3250 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3251 vdi = self.getVDI(uuid) 

3252 if vdi: 

3253 vdi.ensureUnpaused() 

3254 

3255 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3256 Util.log("*** UNDO LEAF-COALESCE") 

3257 parent = self.getVDI(parentUuid) 

3258 if not parent: 

3259 parent = self.getVDI(childUuid) 

3260 if not parent: 

3261 raise util.SMException("Neither %s nor %s found" % \ 

3262 (parentUuid, childUuid)) 

3263 Util.log("Renaming parent back: %s -> %s" % (childUuid, parentUuid)) 

3264 parent.rename(parentUuid) 

3265 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename", self.uuid) 

3266 

3267 child = self.getVDI(childUuid) 

3268 if not child: 

3269 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3270 if not child: 

3271 raise util.SMException("Neither %s nor %s found" % \ 

3272 (childUuid, self.TMP_RENAME_PREFIX + childUuid)) 

3273 Util.log("Renaming child back to %s" % childUuid) 

3274 child.rename(childUuid) 

3275 Util.log("Updating the VDI record") 

3276 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

3277 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

3278 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_rename2", self.uuid) 

3279 

3280 # refcount (best effort - assume that it had succeeded if the 

3281 # second rename succeeded; if not, this adjustment will be wrong, 

3282 # leading to a non-deactivation of the LV) 

3283 ns = lvhdutil.NS_PREFIX_LVM + self.uuid 

3284 cCnt, cBcnt = RefCounter.check(child.uuid, ns) 

3285 pCnt, pBcnt = RefCounter.check(parent.uuid, ns) 

3286 pCnt = pCnt + cBcnt 

3287 RefCounter.set(parent.uuid, pCnt, 0, ns) 

3288 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_refcount", self.uuid) 

3289 

3290 parent.deflate() 

3291 child.inflateFully() 

3292 util.fistpoint.activate("LVHDRT_coaleaf_undo_after_deflate", self.uuid) 

3293 if child.isHidden(): 

3294 child._setHidden(False) 

3295 if not parent.isHidden(): 

3296 parent._setHidden(True) 

3297 if not parent.lvReadonly: 

3298 self.lvmCache.setReadonly(parent.fileName, True) 

3299 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3300 util.fistpoint.activate("LVHDRT_coaleaf_undo_end", self.uuid) 

3301 Util.log("*** leaf-coalesce undo successful") 

3302 if util.fistpoint.is_active("LVHDRT_coaleaf_stop_after_recovery"): 

3303 child.setConfig(VDI.DB_LEAFCLSC, VDI.LEAFCLSC_DISABLED) 

3304 

3305 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3306 Util.log("*** FINISH LEAF-COALESCE") 

3307 vdi = self.getVDI(childUuid) 

3308 if not vdi: 

3309 raise util.SMException("VDI %s not found" % childUuid) 

3310 vdi.inflateFully() 

3311 util.fistpoint.activate("LVHDRT_coaleaf_finish_after_inflate", self.uuid) 

3312 try: 

3313 self.forgetVDI(parentUuid) 

3314 except XenAPI.Failure: 

3315 pass 

3316 self._updateSlavesOnResize(vdi) 

3317 util.fistpoint.activate("LVHDRT_coaleaf_finish_end", self.uuid) 

3318 Util.log("*** finished leaf-coalesce successfully") 

3319 

3320 def _checkSlaves(self, vdi): 

3321 """Confirm with all slaves in the pool that 'vdi' is not in use. We 

3322 try to check all slaves, including those that the Agent believes are 

3323 offline, but ignore failures for offline hosts. This is to avoid cases 

3324 where the Agent thinks a host is offline but the host is up.""" 

3325 args = {"vgName": self.vgName, 

3326 "action1": "deactivateNoRefcount", 

3327 "lvName1": vdi.fileName, 

3328 "action2": "cleanupLockAndRefcount", 

3329 "uuid2": vdi.uuid, 

3330 "ns2": lvhdutil.NS_PREFIX_LVM + self.uuid} 

3331 onlineHosts = self.xapi.getOnlineHosts() 

3332 abortFlag = IPCFlag(self.uuid) 

3333 for pbdRecord in self.xapi.getAttachedPBDs(): 

3334 hostRef = pbdRecord["host"] 

3335 if hostRef == self.xapi._hostRef: 

3336 continue 

3337 if abortFlag.test(FLAG_TYPE_ABORT): 

3338 raise AbortException("Aborting due to signal") 

3339 Util.log("Checking with slave %s (path %s)" % ( 

3340 self.xapi.getRecordHost(hostRef)['hostname'], vdi.path)) 

3341 try: 

3342 self.xapi.ensureInactive(hostRef, args) 

3343 except XenAPI.Failure: 

3344 if hostRef in onlineHosts: 

3345 raise 

3346 

3347 @override 

3348 def _updateSlavesOnUndoLeafCoalesce(self, parent, child) -> None: 

3349 slaves = util.get_slaves_attached_on(self.xapi.session, [child.uuid]) 

3350 if not slaves: 

3351 Util.log("Update-on-leaf-undo: VDI %s not attached on any slave" % \ 

3352 child) 

3353 return 

3354 

3355 tmpName = lvhdutil.LV_PREFIX[vhdutil.VDI_TYPE_VHD] + \ 

3356 self.TMP_RENAME_PREFIX + child.uuid 

3357 args = {"vgName": self.vgName, 

3358 "action1": "deactivateNoRefcount", 

3359 "lvName1": tmpName, 

3360 "action2": "deactivateNoRefcount", 

3361 "lvName2": child.fileName, 

3362 "action3": "refresh", 

3363 "lvName3": child.fileName, 

3364 "action4": "refresh", 

3365 "lvName4": parent.fileName} 

3366 for slave in slaves: 

3367 Util.log("Updating %s, %s, %s on slave %s" % \ 

3368 (tmpName, child.fileName, parent.fileName, 

3369 self.xapi.getRecordHost(slave)['hostname'])) 

3370 text = self.xapi.session.xenapi.host.call_plugin( \ 

3371 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3372 Util.log("call-plugin returned: '%s'" % text) 

3373 

3374 @override 

3375 def _updateSlavesOnRename(self, vdi, oldNameLV, origParentUuid) -> None: 

3376 slaves = util.get_slaves_attached_on(self.xapi.session, [vdi.uuid]) 

3377 if not slaves: 

3378 Util.log("Update-on-rename: VDI %s not attached on any slave" % vdi) 

3379 return 

3380 

3381 args = {"vgName": self.vgName, 

3382 "action1": "deactivateNoRefcount", 

3383 "lvName1": oldNameLV, 

3384 "action2": "refresh", 

3385 "lvName2": vdi.fileName, 

3386 "action3": "cleanupLockAndRefcount", 

3387 "uuid3": origParentUuid, 

3388 "ns3": lvhdutil.NS_PREFIX_LVM + self.uuid} 

3389 for slave in slaves: 

3390 Util.log("Updating %s to %s on slave %s" % \ 

3391 (oldNameLV, vdi.fileName, 

3392 self.xapi.getRecordHost(slave)['hostname'])) 

3393 text = self.xapi.session.xenapi.host.call_plugin( \ 

3394 slave, self.xapi.PLUGIN_ON_SLAVE, "multi", args) 

3395 Util.log("call-plugin returned: '%s'" % text) 

3396 

3397 @override 

3398 def _updateSlavesOnResize(self, vdi) -> None: 

3399 uuids = [x.uuid for x in vdi.getAllLeaves()] 

3400 slaves = util.get_slaves_attached_on(self.xapi.session, uuids) 

3401 if not slaves: 

3402 util.SMlog("Update-on-resize: %s not attached on any slave" % vdi) 

3403 return 

3404 lvhdutil.lvRefreshOnSlaves(self.xapi.session, self.uuid, self.vgName, 

3405 vdi.fileName, vdi.uuid, slaves) 

3406 

3407 

3408class LinstorSR(SR): 

3409 TYPE = SR.TYPE_LINSTOR 

3410 

3411 def __init__(self, uuid, xapi, createLock, force): 

3412 if not LINSTOR_AVAILABLE: 

3413 raise util.SMException( 

3414 'Can\'t load cleanup LinstorSR: LINSTOR libraries are missing' 

3415 ) 

3416 

3417 SR.__init__(self, uuid, xapi, createLock, force) 

3418 self.path = LinstorVolumeManager.DEV_ROOT_PATH 

3419 self._reloadLinstor(journaler_only=True) 

3420 

3421 @override 

3422 def deleteVDI(self, vdi) -> None: 

3423 self._checkSlaves(vdi) 

3424 SR.deleteVDI(self, vdi) 

3425 

3426 @override 

3427 def getFreeSpace(self) -> int: 

3428 return self._linstor.max_volume_size_allowed 

3429 

3430 @override 

3431 def scan(self, force=False) -> None: 

3432 all_vdi_info = self._scan(force) 

3433 for uuid, vdiInfo in all_vdi_info.items(): 

3434 # When vdiInfo is None, the VDI is RAW. 

3435 vdi = self.getVDI(uuid) 

3436 if not vdi: 

3437 self.logFilter.logNewVDI(uuid) 

3438 vdi = LinstorVDI(self, uuid, not vdiInfo) 

3439 self.vdis[uuid] = vdi 

3440 if vdiInfo: 

3441 vdi.load(vdiInfo) 

3442 self._removeStaleVDIs(all_vdi_info.keys()) 

3443 self._buildTree(force) 

3444 self.logFilter.logState() 

3445 self._handleInterruptedCoalesceLeaf() 

3446 

3447 @override 

3448 def pauseVDIs(self, vdiList) -> None: 

3449 self._linstor.ensure_volume_list_is_not_locked( 

3450 vdiList, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3451 ) 

3452 return super(LinstorSR, self).pauseVDIs(vdiList) 

3453 

3454 def _reloadLinstor(self, journaler_only=False): 

3455 session = self.xapi.session 

3456 host_ref = util.get_this_host_ref(session) 

3457 sr_ref = session.xenapi.SR.get_by_uuid(self.uuid) 

3458 

3459 pbd = util.find_my_pbd(session, host_ref, sr_ref) 

3460 if pbd is None: 

3461 raise util.SMException('Failed to find PBD') 

3462 

3463 dconf = session.xenapi.PBD.get_device_config(pbd) 

3464 group_name = dconf['group-name'] 

3465 

3466 controller_uri = get_controller_uri() 

3467 self.journaler = LinstorJournaler( 

3468 controller_uri, group_name, logger=util.SMlog 

3469 ) 

3470 

3471 if journaler_only: 

3472 return 

3473 

3474 self._linstor = LinstorVolumeManager( 

3475 controller_uri, 

3476 group_name, 

3477 repair=True, 

3478 logger=util.SMlog 

3479 ) 

3480 self._vhdutil = LinstorVhdUtil(session, self._linstor) 

3481 

3482 def _scan(self, force): 

3483 for i in range(SR.SCAN_RETRY_ATTEMPTS): 

3484 self._reloadLinstor() 

3485 error = False 

3486 try: 

3487 all_vdi_info = self._load_vdi_info() 

3488 for uuid, vdiInfo in all_vdi_info.items(): 

3489 if vdiInfo and vdiInfo.error: 

3490 error = True 

3491 break 

3492 if not error: 

3493 return all_vdi_info 

3494 Util.log('Scan error, retrying ({})'.format(i)) 

3495 except Exception as e: 

3496 Util.log('Scan exception, retrying ({}): {}'.format(i, e)) 

3497 Util.log(traceback.format_exc()) 

3498 

3499 if force: 

3500 return all_vdi_info 

3501 raise util.SMException('Scan error') 

3502 

3503 def _load_vdi_info(self): 

3504 all_volume_info = self._linstor.get_volumes_with_info() 

3505 volumes_metadata = self._linstor.get_volumes_with_metadata() 

3506 

3507 all_vdi_info = {} 

3508 pending_vdi_uuids = [] 

3509 

3510 def handle_fail(vdi_uuid, e): 

3511 Util.log(f" [VDI {vdi_uuid}: failed to load VDI info]: {e}") 

3512 info = vhdutil.VHDInfo(vdi_uuid) 

3513 info.error = 1 

3514 return info 

3515 

3516 for vdi_uuid, volume_info in all_volume_info.items(): 

3517 try: 

3518 volume_metadata = volumes_metadata[vdi_uuid] 

3519 if not volume_info.name and not list(volume_metadata.items()): 

3520 continue # Ignore it, probably deleted. 

3521 

3522 if vdi_uuid.startswith('DELETED_'): 

3523 # Assume it's really a RAW volume of a failed snap without VHD header/footer. 

3524 # We must remove this VDI now without adding it in the VDI list. 

3525 # Otherwise `Relinking` calls and other actions can be launched on it. 

3526 # We don't want that... 

3527 Util.log('Deleting bad VDI {}'.format(vdi_uuid)) 

3528 

3529 self.lock() 

3530 try: 

3531 self._linstor.destroy_volume(vdi_uuid) 

3532 try: 

3533 self.forgetVDI(vdi_uuid) 

3534 except: 

3535 pass 

3536 except Exception as e: 

3537 Util.log('Cannot delete bad VDI: {}'.format(e)) 

3538 finally: 

3539 self.unlock() 

3540 continue 

3541 

3542 vdi_type = volume_metadata.get(VDI_TYPE_TAG) 

3543 if vdi_type == vhdutil.VDI_TYPE_VHD: 

3544 pending_vdi_uuids.append(vdi_uuid) 

3545 else: 

3546 all_vdi_info[vdi_uuid] = None 

3547 except Exception as e: 

3548 all_vdi_info[vdi_uuid] = handle_fail(vdi_uuid, e) 

3549 

3550 multi_vhdutil = MultiLinstorVhdUtil(self._linstor.uri, self._linstor.group_name) 

3551 

3552 def load_info(vdi_uuid, vhdutil_instance): 

3553 try: 

3554 return vhdutil_instance.get_vhd_info(vdi_uuid) 

3555 except Exception as e: 

3556 return handle_fail(vdi_uuid, e) 

3557 

3558 try: 

3559 for vdiInfo in multi_vhdutil.run(load_info, pending_vdi_uuids): 

3560 all_vdi_info[vdiInfo.uuid] = vdiInfo 

3561 finally: 

3562 del multi_vhdutil 

3563 

3564 return all_vdi_info 

3565 

3566 @override 

3567 def _prepareCoalesceLeaf(self, vdi) -> None: 

3568 vdi._activateChain() 

3569 vdi.deflate() 

3570 vdi._inflateParentForCoalesce() 

3571 

3572 @override 

3573 def _finishCoalesceLeaf(self, parent) -> None: 

3574 if not parent.isSnapshot() or parent.isAttachedRW(): 

3575 parent.inflateFully() 

3576 else: 

3577 parent.deflate() 

3578 

3579 @override 

3580 def _calcExtraSpaceNeeded(self, child, parent) -> int: 

3581 return LinstorVhdUtil.compute_volume_size(parent.sizeVirt, parent.vdi_type) - parent.getDrbdSize() 

3582 

3583 def _hasValidDevicePath(self, uuid): 

3584 try: 

3585 self._linstor.get_device_path(uuid) 

3586 except Exception: 

3587 # TODO: Maybe log exception. 

3588 return False 

3589 return True 

3590 

3591 @override 

3592 def _liveLeafCoalesce(self, vdi) -> bool: 

3593 self.lock() 

3594 try: 

3595 self._linstor.ensure_volume_is_not_locked( 

3596 vdi.uuid, timeout=LinstorVDI.VOLUME_LOCK_TIMEOUT 

3597 ) 

3598 return super(LinstorSR, self)._liveLeafCoalesce(vdi) 

3599 finally: 

3600 self.unlock() 

3601 

3602 @override 

3603 def _handleInterruptedCoalesceLeaf(self) -> None: 

3604 entries = self.journaler.get_all(VDI.JRN_LEAF) 

3605 for uuid, parentUuid in entries.items(): 

3606 if self._hasValidDevicePath(parentUuid) or \ 

3607 self._hasValidDevicePath(self.TMP_RENAME_PREFIX + uuid): 

3608 self._undoInterruptedCoalesceLeaf(uuid, parentUuid) 

3609 else: 

3610 self._finishInterruptedCoalesceLeaf(uuid, parentUuid) 

3611 self.journaler.remove(VDI.JRN_LEAF, uuid) 

3612 vdi = self.getVDI(uuid) 

3613 if vdi: 

3614 vdi.ensureUnpaused() 

3615 

3616 def _undoInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3617 Util.log('*** UNDO LEAF-COALESCE') 

3618 parent = self.getVDI(parentUuid) 

3619 if not parent: 

3620 parent = self.getVDI(childUuid) 

3621 if not parent: 

3622 raise util.SMException( 

3623 'Neither {} nor {} found'.format(parentUuid, childUuid) 

3624 ) 

3625 Util.log( 

3626 'Renaming parent back: {} -> {}'.format(childUuid, parentUuid) 

3627 ) 

3628 parent.rename(parentUuid) 

3629 

3630 child = self.getVDI(childUuid) 

3631 if not child: 

3632 child = self.getVDI(self.TMP_RENAME_PREFIX + childUuid) 

3633 if not child: 

3634 raise util.SMException( 

3635 'Neither {} nor {} found'.format( 

3636 childUuid, self.TMP_RENAME_PREFIX + childUuid 

3637 ) 

3638 ) 

3639 Util.log('Renaming child back to {}'.format(childUuid)) 

3640 child.rename(childUuid) 

3641 Util.log('Updating the VDI record') 

3642 child.setConfig(VDI.DB_VHD_PARENT, parentUuid) 

3643 child.setConfig(VDI.DB_VDI_TYPE, vhdutil.VDI_TYPE_VHD) 

3644 

3645 # TODO: Maybe deflate here. 

3646 

3647 if child.isHidden(): 

3648 child._setHidden(False) 

3649 if not parent.isHidden(): 

3650 parent._setHidden(True) 

3651 self._updateSlavesOnUndoLeafCoalesce(parent, child) 

3652 Util.log('*** leaf-coalesce undo successful') 

3653 

3654 def _finishInterruptedCoalesceLeaf(self, childUuid, parentUuid): 

3655 Util.log('*** FINISH LEAF-COALESCE') 

3656 vdi = self.getVDI(childUuid) 

3657 if not vdi: 

3658 raise util.SMException('VDI {} not found'.format(childUuid)) 

3659 # TODO: Maybe inflate. 

3660 try: 

3661 self.forgetVDI(parentUuid) 

3662 except XenAPI.Failure: 

3663 pass 

3664 self._updateSlavesOnResize(vdi) 

3665 Util.log('*** finished leaf-coalesce successfully') 

3666 

3667 def _checkSlaves(self, vdi): 

3668 try: 

3669 all_openers = self._linstor.get_volume_openers(vdi.uuid) 

3670 for openers in all_openers.values(): 

3671 for opener in openers.values(): 

3672 if opener['process-name'] != 'tapdisk': 

3673 raise util.SMException( 

3674 'VDI {} is in use: {}'.format(vdi.uuid, all_openers) 

3675 ) 

3676 except LinstorVolumeManagerError as e: 

3677 if e.code != LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS: 

3678 raise 

3679 

3680 

3681################################################################################ 

3682# 

3683# Helpers 

3684# 

3685def daemonize(): 

3686 pid = os.fork() 

3687 if pid: 

3688 os.waitpid(pid, 0) 

3689 Util.log("New PID [%d]" % pid) 

3690 return False 

3691 os.chdir("/") 

3692 os.setsid() 

3693 pid = os.fork() 

3694 if pid: 

3695 Util.log("Will finish as PID [%d]" % pid) 

3696 os._exit(0) 

3697 for fd in [0, 1, 2]: 

3698 try: 

3699 os.close(fd) 

3700 except OSError: 

3701 pass 

3702 # we need to fill those special fd numbers or pread won't work 

3703 sys.stdin = open("/dev/null", 'r') 

3704 sys.stderr = open("/dev/null", 'w') 

3705 sys.stdout = open("/dev/null", 'w') 

3706 # As we're a new process we need to clear the lock objects 

3707 lock.Lock.clearAll() 

3708 return True 

3709 

3710 

3711def normalizeType(type): 

3712 if type in LVHDSR.SUBTYPES: 

3713 type = SR.TYPE_LVHD 

3714 if type in ["lvm", "lvmoiscsi", "lvmohba", "lvmofcoe"]: 

3715 # temporary while LVHD is symlinked as LVM 

3716 type = SR.TYPE_LVHD 

3717 if type in [ 

3718 "ext", "nfs", "ocfsoiscsi", "ocfsohba", "smb", "cephfs", "glusterfs", 

3719 "moosefs", "xfs", "zfs", "largeblock" 

3720 ]: 

3721 type = SR.TYPE_FILE 

3722 if type in ["linstor"]: 

3723 type = SR.TYPE_LINSTOR 

3724 if type not in SR.TYPES: 

3725 raise util.SMException("Unsupported SR type: %s" % type) 

3726 return type 

3727 

3728GCPAUSE_DEFAULT_SLEEP = 5 * 60 

3729 

3730 

3731def _gc_init_file(sr_uuid): 

3732 return os.path.join(NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init') 

3733 

3734 

3735def _create_init_file(sr_uuid): 

3736 util.makedirs(os.path.join(NON_PERSISTENT_DIR, str(sr_uuid))) 

3737 with open(os.path.join( 

3738 NON_PERSISTENT_DIR, str(sr_uuid), 'gc_init'), 'w+') as f: 

3739 f.write('1') 

3740 

3741 

3742def _gcLoopPause(sr, dryRun=False, immediate=False): 

3743 if immediate: 

3744 return 

3745 

3746 # Check to see if the GCPAUSE_FISTPOINT is present. If so the fist 

3747 # point will just return. Otherwise, fall back on an abortable sleep. 

3748 

3749 if util.fistpoint.is_active(util.GCPAUSE_FISTPOINT): 

3750 

3751 util.fistpoint.activate_custom_fn(util.GCPAUSE_FISTPOINT, 3751 ↛ exitline 3751 didn't jump to the function exit

3752 lambda *args: None) 

3753 elif os.path.exists(_gc_init_file(sr.uuid)): 

3754 def abortTest(): 

3755 return IPCFlag(sr.uuid).test(FLAG_TYPE_ABORT) 

3756 

3757 # If time.sleep hangs we are in deep trouble, however for 

3758 # completeness we set the timeout of the abort thread to 

3759 # 110% of GCPAUSE_DEFAULT_SLEEP. 

3760 Util.log("GC active, about to go quiet") 

3761 Util.runAbortable(lambda: time.sleep(GCPAUSE_DEFAULT_SLEEP), 3761 ↛ exitline 3761 didn't run the lambda on line 3761

3762 None, sr.uuid, abortTest, VDI.POLL_INTERVAL, 

3763 GCPAUSE_DEFAULT_SLEEP * 1.1) 

3764 Util.log("GC active, quiet period ended") 

3765 

3766 

3767def _gcLoop(sr, dryRun=False, immediate=False): 

3768 if not lockGCActive.acquireNoblock(): 3768 ↛ 3769line 3768 didn't jump to line 3769, because the condition on line 3768 was never true

3769 Util.log("Another GC instance already active, exiting") 

3770 return 

3771 

3772 # Check we're still attached after acquiring locks 

3773 if not sr.xapi.isPluggedHere(): 

3774 Util.log("SR no longer attached, exiting") 

3775 return 

3776 

3777 # Clean up Intellicache files 

3778 sr.cleanupCache() 

3779 

3780 # Track how many we do 

3781 coalesced = 0 

3782 task_status = "success" 

3783 try: 

3784 # Check if any work needs to be done 

3785 if not sr.xapi.isPluggedHere(): 3785 ↛ 3786line 3785 didn't jump to line 3786, because the condition on line 3785 was never true

3786 Util.log("SR no longer attached, exiting") 

3787 return 

3788 sr.scanLocked() 

3789 if not sr.hasWork(): 

3790 Util.log("No work, exiting") 

3791 return 

3792 sr.xapi.create_task( 

3793 "Garbage Collection", 

3794 "Garbage collection for SR %s" % sr.uuid) 

3795 _gcLoopPause(sr, dryRun, immediate=immediate) 

3796 while True: 

3797 if SIGTERM: 

3798 Util.log("Term requested") 

3799 return 

3800 

3801 if not sr.xapi.isPluggedHere(): 3801 ↛ 3802line 3801 didn't jump to line 3802, because the condition on line 3801 was never true

3802 Util.log("SR no longer attached, exiting") 

3803 break 

3804 sr.scanLocked() 

3805 if not sr.hasWork(): 

3806 Util.log("No work, exiting") 

3807 break 

3808 

3809 if not lockGCRunning.acquireNoblock(): 3809 ↛ 3810line 3809 didn't jump to line 3810, because the condition on line 3809 was never true

3810 Util.log("Unable to acquire GC running lock.") 

3811 return 

3812 try: 

3813 if not sr.gcEnabled(): 3813 ↛ 3814line 3813 didn't jump to line 3814, because the condition on line 3813 was never true

3814 break 

3815 

3816 sr.xapi.update_task_progress("done", coalesced) 

3817 

3818 sr.cleanupCoalesceJournals() 

3819 # Create the init file here in case startup is waiting on it 

3820 _create_init_file(sr.uuid) 

3821 sr.scanLocked() 

3822 sr.updateBlockInfo() 

3823 

3824 howmany = len(sr.findGarbage()) 

3825 if howmany > 0: 

3826 Util.log("Found %d orphaned vdis" % howmany) 

3827 sr.lock() 

3828 try: 

3829 sr.garbageCollect(dryRun) 

3830 finally: 

3831 sr.unlock() 

3832 sr.xapi.srUpdate() 

3833 

3834 candidate = sr.findCoalesceable() 

3835 if candidate: 

3836 util.fistpoint.activate( 

3837 "LVHDRT_finding_a_suitable_pair", sr.uuid) 

3838 sr.coalesce(candidate, dryRun) 

3839 sr.xapi.srUpdate() 

3840 coalesced += 1 

3841 continue 

3842 

3843 candidate = sr.findLeafCoalesceable() 

3844 if candidate: 3844 ↛ 3851line 3844 didn't jump to line 3851, because the condition on line 3844 was never false

3845 sr.coalesceLeaf(candidate, dryRun) 

3846 sr.xapi.srUpdate() 

3847 coalesced += 1 

3848 continue 

3849 

3850 finally: 

3851 lockGCRunning.release() 3851 ↛ 3856line 3851 didn't jump to line 3856, because the break on line 3814 wasn't executed

3852 except: 

3853 task_status = "failure" 

3854 raise 

3855 finally: 

3856 sr.xapi.set_task_status(task_status) 

3857 Util.log("GC process exiting, no work left") 

3858 _create_init_file(sr.uuid) 

3859 lockGCActive.release() 

3860 

3861 

3862def _gc(session, srUuid, dryRun=False, immediate=False): 

3863 init(srUuid) 

3864 sr = SR.getInstance(srUuid, session) 

3865 if not sr.gcEnabled(False): 3865 ↛ 3866line 3865 didn't jump to line 3866, because the condition on line 3865 was never true

3866 return 

3867 

3868 try: 

3869 _gcLoop(sr, dryRun, immediate=immediate) 

3870 finally: 

3871 sr.check_no_space_candidates() 

3872 sr.cleanup() 

3873 sr.logFilter.logState() 

3874 del sr.xapi 

3875 

3876 

3877def _abort(srUuid, soft=False): 

3878 """Aborts an GC/coalesce. 

3879 

3880 srUuid: the UUID of the SR whose GC/coalesce must be aborted 

3881 soft: If set to True and there is a pending abort signal, the function 

3882 doesn't do anything. If set to False, a new abort signal is issued. 

3883 

3884 returns: If soft is set to False, we return True holding lockGCActive. If 

3885 soft is set to False and an abort signal is pending, we return False 

3886 without holding lockGCActive. An exception is raised in case of error.""" 

3887 Util.log("=== SR %s: abort ===" % (srUuid)) 

3888 init(srUuid) 

3889 if not lockGCActive.acquireNoblock(): 

3890 gotLock = False 

3891 Util.log("Aborting currently-running instance (SR %s)" % srUuid) 

3892 abortFlag = IPCFlag(srUuid) 

3893 if not abortFlag.set(FLAG_TYPE_ABORT, soft): 

3894 return False 

3895 for i in range(SR.LOCK_RETRY_ATTEMPTS): 

3896 gotLock = lockGCActive.acquireNoblock() 

3897 if gotLock: 

3898 break 

3899 time.sleep(SR.LOCK_RETRY_INTERVAL) 

3900 abortFlag.clear(FLAG_TYPE_ABORT) 

3901 if not gotLock: 

3902 raise util.CommandException(code=errno.ETIMEDOUT, 

3903 reason="SR %s: error aborting existing process" % srUuid) 

3904 return True 

3905 

3906 

3907def init(srUuid): 

3908 global lockGCRunning 

3909 if not lockGCRunning: 3909 ↛ 3910line 3909 didn't jump to line 3910, because the condition on line 3909 was never true

3910 lockGCRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, srUuid) 

3911 global lockGCActive 

3912 if not lockGCActive: 3912 ↛ 3913line 3912 didn't jump to line 3913, because the condition on line 3912 was never true

3913 lockGCActive = LockActive(srUuid) 

3914 

3915 

3916class LockActive: 

3917 """ 

3918 Wraps the use of LOCK_TYPE_GC_ACTIVE such that the lock cannot be acquired 

3919 if another process holds the SR lock. 

3920 """ 

3921 def __init__(self, srUuid): 

3922 self._lock = lock.Lock(LOCK_TYPE_GC_ACTIVE, srUuid) 

3923 self._srLock = lock.Lock(vhdutil.LOCK_TYPE_SR, srUuid) 

3924 

3925 def acquireNoblock(self): 

3926 self._srLock.acquire() 

3927 

3928 try: 

3929 return self._lock.acquireNoblock() 

3930 finally: 

3931 self._srLock.release() 

3932 

3933 def release(self): 

3934 self._lock.release() 

3935 

3936 

3937def usage(): 

3938 output = """Garbage collect and/or coalesce VHDs in a VHD-based SR 

3939 

3940Parameters: 

3941 -u --uuid UUID SR UUID 

3942 and one of: 

3943 -g --gc garbage collect, coalesce, and repeat while there is work 

3944 -G --gc_force garbage collect once, aborting any current operations 

3945 -c --cache-clean <max_age> clean up IntelliCache cache files older than 

3946 max_age hours 

3947 -a --abort abort any currently running operation (GC or coalesce) 

3948 -q --query query the current state (GC'ing, coalescing or not running) 

3949 -x --disable disable GC/coalesce (will be in effect until you exit) 

3950 -t --debug see Debug below 

3951 

3952Options: 

3953 -b --background run in background (return immediately) (valid for -g only) 

3954 -f --force continue in the presence of VHDs with errors (when doing 

3955 GC, this might cause removal of any such VHDs) (only valid 

3956 for -G) (DANGEROUS) 

3957 

3958Debug: 

3959 The --debug parameter enables manipulation of LVHD VDIs for debugging 

3960 purposes. ** NEVER USE IT ON A LIVE VM ** 

3961 The following parameters are required: 

3962 -t --debug <cmd> <cmd> is one of "activate", "deactivate", "inflate", 

3963 "deflate". 

3964 -v --vdi_uuid VDI UUID 

3965 """ 

3966 #-d --dry-run don't actually perform any SR-modifying operations 

3967 print(output) 

3968 Util.log("(Invalid usage)") 

3969 sys.exit(1) 

3970 

3971 

3972############################################################################## 

3973# 

3974# API 

3975# 

3976def abort(srUuid, soft=False): 

3977 """Abort GC/coalesce if we are currently GC'ing or coalescing a VDI pair. 

3978 """ 

3979 if _abort(srUuid, soft): 

3980 stop_gc_service(srUuid) 

3981 Util.log("abort: releasing the process lock") 

3982 lockGCActive.release() 

3983 return True 

3984 else: 

3985 return False 

3986 

3987 

3988def run_gc(session, srUuid, dryRun, immediate=False): 

3989 try: 

3990 _gc(session, srUuid, dryRun, immediate=immediate) 

3991 return 0 

3992 except AbortException: 

3993 Util.log("Aborted") 

3994 return 2 

3995 except Exception: 

3996 Util.logException("gc") 

3997 Util.log("* * * * * SR %s: ERROR\n" % srUuid) 

3998 return 1 

3999 

4000 

4001def gc(session, srUuid, inBackground, dryRun=False): 

4002 """Garbage collect all deleted VDIs in SR "srUuid". Fork & return 

4003 immediately if inBackground=True. 

4004 

4005 The following algorithm is used: 

4006 1. If we are already GC'ing in this SR, return 

4007 2. If we are already coalescing a VDI pair: 

4008 a. Scan the SR and determine if the VDI pair is GC'able 

4009 b. If the pair is not GC'able, return 

4010 c. If the pair is GC'able, abort coalesce 

4011 3. Scan the SR 

4012 4. If there is nothing to collect, nor to coalesce, return 

4013 5. If there is something to collect, GC all, then goto 3 

4014 6. If there is something to coalesce, coalesce one pair, then goto 3 

4015 """ 

4016 Util.log("=== SR %s: gc ===" % srUuid) 

4017 

4018 signal.signal(signal.SIGTERM, receiveSignal) 

4019 

4020 if inBackground: 

4021 if daemonize(): 4021 ↛ exitline 4021 didn't return from function 'gc', because the condition on line 4021 was never false

4022 # we are now running in the background. Catch & log any errors 

4023 # because there is no other way to propagate them back at this 

4024 # point 

4025 

4026 run_gc(None, srUuid, dryRun) 

4027 os._exit(0) 

4028 else: 

4029 os._exit(run_gc(session, srUuid, dryRun, immediate=True)) 

4030 

4031 

4032def start_gc(session, sr_uuid): 

4033 """ 

4034 This function is used to try to start a backgrounded GC session by forking 

4035 the current process. If using the systemd version, call start_gc_service() instead. 

4036 """ 

4037 # don't bother if an instance already running (this is just an 

4038 # optimization to reduce the overhead of forking a new process if we 

4039 # don't have to, but the process will check the lock anyways) 

4040 lockRunning = lock.Lock(lock.LOCK_TYPE_GC_RUNNING, sr_uuid) 

4041 if not lockRunning.acquireNoblock(): 

4042 if should_preempt(session, sr_uuid): 

4043 util.SMlog("Aborting currently-running coalesce of garbage VDI") 

4044 try: 

4045 if not abort(sr_uuid, soft=True): 

4046 util.SMlog("The GC has already been scheduled to re-start") 

4047 except util.CommandException as e: 

4048 if e.code != errno.ETIMEDOUT: 

4049 raise 

4050 util.SMlog('failed to abort the GC') 

4051 else: 

4052 util.SMlog("A GC instance already running, not kicking") 

4053 return 

4054 else: 

4055 lockRunning.release() 

4056 

4057 util.SMlog(f"Starting GC file is {__file__}") 

4058 subprocess.run([__file__, '-b', '-u', sr_uuid, '-g'], 

4059 stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4060 

4061def _gc_service_cmd(sr_uuid, action, extra_args=None): 

4062 """ 

4063 Build and run the systemctl command for the GC service using util.doexec. 

4064 """ 

4065 sr_uuid_esc = sr_uuid.replace("-", "\\x2d") 

4066 cmd=["/usr/bin/systemctl", "--quiet"] 

4067 if extra_args: 

4068 cmd.extend(extra_args) 

4069 cmd += [action, f"SMGC@{sr_uuid_esc}"] 

4070 return util.doexec(cmd) 

4071 

4072 

4073def start_gc_service(sr_uuid, wait=False): 

4074 """ 

4075 This starts the templated systemd service which runs GC on the given SR UUID. 

4076 If the service was already started, this is a no-op. 

4077 

4078 Because the service is a one-shot with RemainAfterExit=no, when called with 

4079 wait=True this will run the service synchronously and will not return until the 

4080 run has finished. This is used to force a run of the GC instead of just kicking it 

4081 in the background. 

4082 """ 

4083 util.SMlog(f"Kicking SMGC@{sr_uuid}...") 

4084 _gc_service_cmd(sr_uuid, "start", extra_args=None if wait else ["--no-block"]) 

4085 

4086 

4087def stop_gc_service(sr_uuid): 

4088 """ 

4089 Stops the templated systemd service which runs GC on the given SR UUID. 

4090 """ 

4091 util.SMlog(f"Stopping SMGC@{sr_uuid}...") 

4092 (rc, _stdout, stderr) = _gc_service_cmd(sr_uuid, "stop") 

4093 if rc != 0: 4093 ↛ exitline 4093 didn't return from function 'stop_gc_service', because the condition on line 4093 was never false

4094 util.SMlog(f"Failed to stop gc service `SMGC@{sr_uuid}`: `{stderr}`") 

4095 

4096 

4097def wait_for_completion(sr_uuid): 

4098 while get_state(sr_uuid): 

4099 time.sleep(5) 

4100 

4101 

4102def gc_force(session, srUuid, force=False, dryRun=False, lockSR=False): 

4103 """Garbage collect all deleted VDIs in SR "srUuid". The caller must ensure 

4104 the SR lock is held. 

4105 The following algorithm is used: 

4106 1. If we are already GC'ing or coalescing a VDI pair, abort GC/coalesce 

4107 2. Scan the SR 

4108 3. GC 

4109 4. return 

4110 """ 

4111 Util.log("=== SR %s: gc_force ===" % srUuid) 

4112 init(srUuid) 

4113 sr = SR.getInstance(srUuid, session, lockSR, True) 

4114 if not lockGCActive.acquireNoblock(): 

4115 abort(srUuid) 

4116 else: 

4117 Util.log("Nothing was running, clear to proceed") 

4118 

4119 if force: 

4120 Util.log("FORCED: will continue even if there are VHD errors") 

4121 sr.scanLocked(force) 

4122 sr.cleanupCoalesceJournals() 

4123 

4124 try: 

4125 sr.cleanupCache() 

4126 sr.garbageCollect(dryRun) 

4127 finally: 

4128 sr.cleanup() 

4129 sr.logFilter.logState() 

4130 lockGCActive.release() 

4131 

4132 

4133def get_state(srUuid): 

4134 """Return whether GC/coalesce is currently running or not. This asks systemd for 

4135 the state of the templated SMGC service and will return True if it is "activating" 

4136 or "running" (for completeness, as in practice it will never achieve the latter state) 

4137 """ 

4138 sr_uuid_esc = srUuid.replace("-", "\\x2d") 

4139 cmd=[ "/usr/bin/systemctl", "is-active", f"SMGC@{sr_uuid_esc}"] 

4140 result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) 

4141 state = result.stdout.decode('utf-8').rstrip() 

4142 if state == "activating" or state == "running": 

4143 return True 

4144 return False 

4145 

4146 

4147def should_preempt(session, srUuid): 

4148 sr = SR.getInstance(srUuid, session) 

4149 entries = sr.journaler.getAll(VDI.JRN_COALESCE) 

4150 if len(entries) == 0: 

4151 return False 

4152 elif len(entries) > 1: 

4153 raise util.SMException("More than one coalesce entry: " + str(entries)) 

4154 sr.scanLocked() 

4155 coalescedUuid = entries.popitem()[0] 

4156 garbage = sr.findGarbage() 

4157 for vdi in garbage: 

4158 if vdi.uuid == coalescedUuid: 

4159 return True 

4160 return False 

4161 

4162 

4163def get_coalesceable_leaves(session, srUuid, vdiUuids): 

4164 coalesceable = [] 

4165 sr = SR.getInstance(srUuid, session) 

4166 sr.scanLocked() 

4167 for uuid in vdiUuids: 

4168 vdi = sr.getVDI(uuid) 

4169 if not vdi: 

4170 raise util.SMException("VDI %s not found" % uuid) 

4171 if vdi.isLeafCoalesceable(): 

4172 coalesceable.append(uuid) 

4173 return coalesceable 

4174 

4175 

4176def cache_cleanup(session, srUuid, maxAge): 

4177 sr = SR.getInstance(srUuid, session) 

4178 return sr.cleanupCache(maxAge) 

4179 

4180 

4181def debug(sr_uuid, cmd, vdi_uuid): 

4182 Util.log("Debug command: %s" % cmd) 

4183 sr = SR.getInstance(sr_uuid, None) 

4184 if not isinstance(sr, LVHDSR): 

4185 print("Error: not an LVHD SR") 

4186 return 

4187 sr.scanLocked() 

4188 vdi = sr.getVDI(vdi_uuid) 

4189 if not vdi: 

4190 print("Error: VDI %s not found") 

4191 return 

4192 print("Running %s on SR %s" % (cmd, sr)) 

4193 print("VDI before: %s" % vdi) 

4194 if cmd == "activate": 

4195 vdi._activate() 

4196 print("VDI file: %s" % vdi.path) 

4197 if cmd == "deactivate": 

4198 ns = lvhdutil.NS_PREFIX_LVM + sr.uuid 

4199 sr.lvmCache.deactivate(ns, vdi.uuid, vdi.fileName, False) 

4200 if cmd == "inflate": 

4201 vdi.inflateFully() 

4202 sr.cleanup() 

4203 if cmd == "deflate": 

4204 vdi.deflate() 

4205 sr.cleanup() 

4206 sr.scanLocked() 

4207 print("VDI after: %s" % vdi) 

4208 

4209 

4210def abort_optional_reenable(uuid): 

4211 print("Disabling GC/coalesce for %s" % uuid) 

4212 ret = _abort(uuid) 

4213 input("Press enter to re-enable...") 

4214 print("GC/coalesce re-enabled") 

4215 lockGCRunning.release() 

4216 if ret: 

4217 lockGCActive.release() 

4218 

4219 

4220############################################################################## 

4221# 

4222# CLI 

4223# 

4224def main(): 

4225 action = "" 

4226 maxAge = 0 

4227 uuid = "" 

4228 background = False 

4229 force = False 

4230 dryRun = False 

4231 debug_cmd = "" 

4232 vdi_uuid = "" 

4233 shortArgs = "gGc:aqxu:bfdt:v:" 

4234 longArgs = ["gc", "gc_force", "clean_cache", "abort", "query", "disable", 

4235 "uuid=", "background", "force", "dry-run", "debug=", "vdi_uuid="] 

4236 

4237 try: 

4238 opts, args = getopt.getopt(sys.argv[1:], shortArgs, longArgs) 

4239 except getopt.GetoptError: 

4240 usage() 

4241 for o, a in opts: 

4242 if o in ("-g", "--gc"): 

4243 action = "gc" 

4244 if o in ("-G", "--gc_force"): 

4245 action = "gc_force" 

4246 if o in ("-c", "--clean_cache"): 

4247 action = "clean_cache" 

4248 maxAge = int(a) 

4249 if o in ("-a", "--abort"): 

4250 action = "abort" 

4251 if o in ("-q", "--query"): 

4252 action = "query" 

4253 if o in ("-x", "--disable"): 

4254 action = "disable" 

4255 if o in ("-u", "--uuid"): 

4256 uuid = a 

4257 if o in ("-b", "--background"): 

4258 background = True 

4259 if o in ("-f", "--force"): 

4260 force = True 

4261 if o in ("-d", "--dry-run"): 

4262 Util.log("Dry run mode") 

4263 dryRun = True 

4264 if o in ("-t", "--debug"): 

4265 action = "debug" 

4266 debug_cmd = a 

4267 if o in ("-v", "--vdi_uuid"): 

4268 vdi_uuid = a 

4269 

4270 if not action or not uuid: 

4271 usage() 

4272 if action == "debug" and not (debug_cmd and vdi_uuid) or \ 

4273 action != "debug" and (debug_cmd or vdi_uuid): 

4274 usage() 

4275 

4276 if action != "query" and action != "debug": 

4277 print("All output goes to log") 

4278 

4279 if action == "gc": 

4280 gc(None, uuid, background, dryRun) 

4281 elif action == "gc_force": 

4282 gc_force(None, uuid, force, dryRun, True) 

4283 elif action == "clean_cache": 

4284 cache_cleanup(None, uuid, maxAge) 

4285 elif action == "abort": 

4286 abort(uuid) 

4287 elif action == "query": 

4288 print("Currently running: %s" % get_state(uuid)) 

4289 elif action == "disable": 

4290 abort_optional_reenable(uuid) 

4291 elif action == "debug": 

4292 debug(uuid, debug_cmd, vdi_uuid) 

4293 

4294 

4295if __name__ == '__main__': 4295 ↛ 4296line 4295 didn't jump to line 4296, because the condition on line 4295 was never true

4296 main()