Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/env python3 

2# 

3# Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr 

4# 

5# This program is free software: you can redistribute it and/or modify 

6# it under the terms of the GNU General Public License as published by 

7# the Free Software Foundation, either version 3 of the License, or 

8# (at your option) any later version. 

9# This program is distributed in the hope that it will be useful, 

10# but WITHOUT ANY WARRANTY; without even the implied warranty of 

11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12# GNU General Public License for more details. 

13# 

14# You should have received a copy of the GNU General Public License 

15# along with this program. If not, see <https://www.gnu.org/licenses/>. 

16# 

17 

18from sm_typing import ( 

19 Any, 

20 Dict, 

21 List, 

22 override, 

23) 

24 

25import json 

26import linstor 

27import os.path 

28import re 

29import shutil 

30import socket 

31import stat 

32import time 

33import util 

34import uuid 

35 

36# Persistent prefix to add to RAW persistent volumes. 

37PERSISTENT_PREFIX = 'xcp-persistent-' 

38 

39# Contains the data of the "/var/lib/linstor" directory. 

40DATABASE_VOLUME_NAME = PERSISTENT_PREFIX + 'database' 

41DATABASE_SIZE = 1 << 30 # 1GB. 

42DATABASE_PATH = '/var/lib/linstor' 

43DATABASE_MKFS = 'mkfs.ext4' 

44 

45LINSTOR_SATELLITE_PORT = 3366 

46 

47REG_DRBDADM_PRIMARY = re.compile("([^\\s]+)\\s+role:Primary") 

48REG_DRBDSETUP_IP = re.compile('[^\\s]+\\s+(.*):.*$') 

49 

50DRBD_BY_RES_PATH = '/dev/drbd/by-res/' 

51 

52PLUGIN = 'linstor-manager' 

53 

54 

55# ============================================================================== 

56 

57def get_local_volume_openers(resource_name, volume): 

58 if not resource_name or volume is None: 

59 raise Exception('Cannot get DRBD openers without resource name and/or volume.') 

60 

61 path = '/sys/kernel/debug/drbd/resources/{}/volumes/{}/openers'.format( 

62 resource_name, volume 

63 ) 

64 

65 with open(path, 'r') as openers: 

66 # Not a big cost, so read all lines directly. 

67 lines = openers.readlines() 

68 

69 result = {} 

70 

71 opener_re = re.compile('(.*)\\s+([0-9]+)\\s+([0-9]+)') 

72 for line in lines: 

73 match = opener_re.match(line) 

74 assert match 

75 

76 groups = match.groups() 

77 process_name = groups[0] 

78 pid = groups[1] 

79 open_duration_ms = groups[2] 

80 result[pid] = { 

81 'process-name': process_name, 

82 'open-duration': open_duration_ms 

83 } 

84 

85 return json.dumps(result) 

86 

87def get_all_volume_openers(resource_name, volume): 

88 PLUGIN_CMD = 'getDrbdOpeners' 

89 

90 volume = str(volume) 

91 openers = {} 

92 

93 session = util.get_localAPI_session() 

94 

95 hosts = session.xenapi.host.get_all_records() 

96 for host_ref, host_record in hosts.items(): 

97 node_name = host_record['hostname'] 

98 try: 

99 if not session.xenapi.host_metrics.get_record( 

100 host_record['metrics'] 

101 )['live']: 

102 # Ensure we call plugin on online hosts only. 

103 continue 

104 

105 openers[node_name] = json.loads( 

106 session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, { 

107 'resourceName': resource_name, 

108 'volume': volume 

109 }) 

110 ) 

111 except Exception as e: 

112 util.SMlog('Failed to get openers of `{}` on `{}`: {}'.format( 

113 resource_name, node_name, e 

114 )) 

115 

116 return openers 

117 

118 

119# ============================================================================== 

120 

121def round_up(value, divisor): 

122 assert divisor 

123 divisor = int(divisor) 

124 return ((int(value) + divisor - 1) // divisor) * divisor 

125 

126 

127def round_down(value, divisor): 

128 assert divisor 

129 value = int(value) 

130 return value - (value % int(divisor)) 

131 

132 

133# ============================================================================== 

134 

135def _get_controller_addresses() -> List[str]: 

136 try: 

137 (ret, stdout, stderr) = util.doexec([ 

138 "/usr/sbin/ss", "-tnpH", "state", "established", f"( sport = :{LINSTOR_SATELLITE_PORT} )" 

139 ]) 

140 if ret == 0: 

141 return [ 

142 line.split()[3].rsplit(":", 1)[0] 

143 for line in stdout.splitlines() 

144 ] 

145 util.SMlog(f"Unexpected code {ret}: {stderr}") 

146 except Exception as e: 

147 util.SMlog(f"Unable to get controller addresses: {e}") 

148 return [] 

149 

150def _get_controller_uri() -> str: 

151 # TODO: Check that an IP address from the current pool is returned. 

152 addresses = _get_controller_addresses() 

153 return "linstor://" + addresses[0] if addresses else "" 

154 

155def get_controller_uri(): 

156 retries = 0 

157 while True: 

158 uri = _get_controller_uri() 

159 if uri: 

160 return uri 

161 

162 retries += 1 

163 if retries >= 30: 

164 break 

165 time.sleep(1) 

166 

167 

168def get_controller_node_name(): 

169 PLUGIN_CMD = 'hasControllerRunning' 

170 

171 (ret, stdout, stderr) = util.doexec([ 

172 'drbdadm', 'status', DATABASE_VOLUME_NAME 

173 ]) 

174 

175 if ret == 0: 

176 if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)): 

177 return 'localhost' 

178 

179 res = REG_DRBDADM_PRIMARY.search(stdout) 

180 if res: 

181 return res.groups()[0] 

182 

183 session = util.timeout_call(5, util.get_localAPI_session) 

184 

185 for host_ref, host_record in session.xenapi.host.get_all_records().items(): 

186 node_name = host_record['hostname'] 

187 try: 

188 if not session.xenapi.host_metrics.get_record( 

189 host_record['metrics'] 

190 )['live']: 

191 continue 

192 

193 if util.strtobool(session.xenapi.host.call_plugin( 

194 host_ref, PLUGIN, PLUGIN_CMD, {} 

195 )): 

196 return node_name 

197 except Exception as e: 

198 util.SMlog('Failed to call plugin to get controller on `{}`: {}'.format( 

199 node_name, e 

200 )) 

201 

202 

203def demote_drbd_resource(node_name, resource_name): 

204 PLUGIN_CMD = 'demoteDrbdResource' 

205 

206 session = util.timeout_call(5, util.get_localAPI_session) 

207 

208 for host_ref, host_record in session.xenapi.host.get_all_records().items(): 

209 if host_record['hostname'] != node_name: 

210 continue 

211 

212 try: 

213 session.xenapi.host.call_plugin( 

214 host_ref, PLUGIN, PLUGIN_CMD, {'resource_name': resource_name} 

215 ) 

216 except Exception as e: 

217 util.SMlog('Failed to demote resource `{}` on `{}`: {}'.format( 

218 resource_name, node_name, e 

219 )) 

220 raise Exception( 

221 'Can\'t demote resource `{}`, unable to find node `{}`' 

222 .format(resource_name, node_name) 

223 ) 

224 

225# ============================================================================== 

226 

227class LinstorVolumeManagerError(Exception): 

228 ERR_GENERIC = 0, 

229 ERR_VOLUME_EXISTS = 1, 

230 ERR_VOLUME_NOT_EXISTS = 2, 

231 ERR_VOLUME_DESTROY = 3, 

232 ERR_GROUP_NOT_EXISTS = 4, 

233 ERR_VOLUME_IN_USE = 5 

234 

235 def __init__(self, message, code=ERR_GENERIC): 

236 super(LinstorVolumeManagerError, self).__init__(message) 

237 self._code = code 

238 

239 @property 

240 def code(self): 

241 return self._code 

242 

243 

244# ============================================================================== 

245 

246# Note: 

247# If a storage pool is not accessible after a network change: 

248# linstor node interface modify <NODE> default --ip <IP> 

249 

250 

251class LinstorVolumeManager(object): 

252 """ 

253 API to manager LINSTOR volumes in XCP-ng. 

254 A volume in this context is a physical part of the storage layer. 

255 """ 

256 

257 __slots__ = ( 

258 '_linstor', '_uri', '_logger', '_redundancy', 

259 '_base_group_name', '_group_name', '_ha_group_name', 

260 '_volumes', '_storage_pools', '_storage_pools_time', 

261 '_kv_cache', '_resource_cache', '_volume_info_cache', 

262 '_kv_cache_dirty', '_resource_cache_dirty', '_volume_info_cache_dirty', 

263 '_resources_info_cache', 

264 ) 

265 

266 DEV_ROOT_PATH = DRBD_BY_RES_PATH 

267 

268 # Default sector size. 

269 BLOCK_SIZE = 512 

270 

271 # List of volume properties. 

272 PROP_METADATA = 'metadata' 

273 PROP_NOT_EXISTS = 'not-exists' 

274 PROP_VOLUME_NAME = 'volume-name' 

275 PROP_IS_READONLY_TIMESTAMP = 'readonly-timestamp' 

276 

277 # A volume can only be locked for a limited duration. 

278 # The goal is to give enough time to slaves to execute some actions on 

279 # a device before an UUID update or a coalesce for example. 

280 # Expiration is expressed in seconds. 

281 LOCKED_EXPIRATION_DELAY = 1 * 60 

282 

283 # Used when volume uuid is being updated. 

284 PROP_UPDATING_UUID_SRC = 'updating-uuid-src' 

285 

286 # States of property PROP_NOT_EXISTS. 

287 STATE_EXISTS = '0' 

288 STATE_NOT_EXISTS = '1' 

289 STATE_CREATING = '2' 

290 

291 # Property namespaces. 

292 NAMESPACE_SR = 'xcp/sr' 

293 NAMESPACE_VOLUME = 'xcp/volume' 

294 

295 # Regex to match properties. 

296 REG_PROP = '^([^/]+)/{}$' 

297 

298 REG_METADATA = re.compile(REG_PROP.format(PROP_METADATA)) 

299 REG_NOT_EXISTS = re.compile(REG_PROP.format(PROP_NOT_EXISTS)) 

300 REG_VOLUME_NAME = re.compile(REG_PROP.format(PROP_VOLUME_NAME)) 

301 REG_UPDATING_UUID_SRC = re.compile(REG_PROP.format(PROP_UPDATING_UUID_SRC)) 

302 

303 # Prefixes of SR/VOLUME in the LINSTOR DB. 

304 # A LINSTOR (resource, group, ...) name cannot start with a number. 

305 # So we add a prefix behind our SR/VOLUME uuids. 

306 PREFIX_SR = 'xcp-sr-' 

307 PREFIX_HA = 'xcp-ha-' 

308 PREFIX_VOLUME = 'xcp-volume-' 

309 

310 # Limit request number when storage pool info is asked, we fetch 

311 # the current pool status after N elapsed seconds. 

312 STORAGE_POOLS_FETCH_INTERVAL = 15 

313 

314 @staticmethod 

315 def default_logger(*args): 

316 print(args) 

317 

318 # -------------------------------------------------------------------------- 

319 # API. 

320 # -------------------------------------------------------------------------- 

321 

322 class VolumeInfo(object): 

323 __slots__ = ( 

324 'name', 

325 'allocated_size', # Allocated size, place count is not used. 

326 'virtual_size', # Total virtual available size of this volume 

327 # (i.e. the user size at creation). 

328 'diskful' # Array of nodes that have a diskful volume. 

329 ) 

330 

331 def __init__(self, name): 

332 self.name = name 

333 self.allocated_size = 0 

334 self.virtual_size = 0 

335 self.diskful = [] 

336 

337 @override 

338 def __repr__(self) -> str: 

339 return 'VolumeInfo("{}", {}, {}, {})'.format( 

340 self.name, self.allocated_size, self.virtual_size, 

341 self.diskful 

342 ) 

343 

344 # -------------------------------------------------------------------------- 

345 

346 def __init__( 

347 self, uri, group_name, repair=False, logger=default_logger.__func__, 

348 attempt_count=30 

349 ): 

350 """ 

351 Create a new LinstorVolumeManager object. 

352 :param str uri: URI to communicate with the LINSTOR controller. 

353 :param str group_name: The SR group name to use. 

354 :param bool repair: If true we try to remove bad volumes due to a crash 

355 or unexpected behavior. 

356 :param function logger: Function to log messages. 

357 :param int attempt_count: Number of attempts to join the controller. 

358 """ 

359 

360 self._uri = uri 

361 self._linstor = self._create_linstor_instance( 

362 uri, attempt_count=attempt_count 

363 ) 

364 

365 

366 mismatched_nodes = [ 

367 node for node in self._linstor.node_list().pop().nodes if node.connection_status == "VERSION_MISMATCH" 

368 ] 

369 

370 if mismatched_nodes: 

371 raise LinstorVolumeManagerError( 

372 "Some linstor nodes are not using the same version. " + 

373 f"Incriminated nodes are: {','.join([node.name for node in mismatched_nodes])}" 

374 ) 

375 

376 self._base_group_name = group_name 

377 

378 # Ensure group exists. 

379 group_name = self._build_group_name(group_name) 

380 groups = self._linstor.resource_group_list_raise([group_name]).resource_groups 

381 if not groups: 

382 raise LinstorVolumeManagerError( 

383 'Unable to find `{}` Linstor SR'.format(group_name) 

384 ) 

385 

386 # Ok. ;) 

387 self._logger = logger 

388 self._redundancy = groups[0].select_filter.place_count 

389 self._group_name = group_name 

390 self._ha_group_name = self._build_ha_group_name(self._base_group_name) 

391 self._volumes = set() 

392 self._storage_pools_time = 0 

393 

394 # To increase performance and limit request count to LINSTOR services, 

395 # we use caches. 

396 self._kv_cache = self._create_kv_cache() 

397 self._resource_cache = None 

398 self._resource_cache_dirty = True 

399 self._volume_info_cache = None 

400 self._volume_info_cache_dirty = True 

401 self._resources_info_cache = None 

402 self._build_volumes(repair=repair) 

403 

404 @property 

405 def uri(self) -> str: 

406 return self._uri 

407 

408 @property 

409 def group_name(self): 

410 """ 

411 Give the used group name. 

412 :return: The group name. 

413 :rtype: str 

414 """ 

415 return self._base_group_name 

416 

417 @property 

418 def redundancy(self): 

419 """ 

420 Give the used redundancy. 

421 :return: The redundancy. 

422 :rtype: int 

423 """ 

424 return self._redundancy 

425 

426 @property 

427 def volumes(self): 

428 """ 

429 Give the volumes uuid set. 

430 :return: The volumes uuid set. 

431 :rtype: set(str) 

432 """ 

433 return self._volumes 

434 

435 @property 

436 def max_volume_size_allowed(self): 

437 """ 

438 Give the max volume size currently available in B. 

439 :return: The current size. 

440 :rtype: int 

441 """ 

442 

443 candidates = self._find_best_size_candidates() 

444 if not candidates: 

445 raise LinstorVolumeManagerError( 

446 'Failed to get max volume size allowed' 

447 ) 

448 

449 size = candidates[0].max_volume_size 

450 if size < 0: 

451 raise LinstorVolumeManagerError( 

452 'Invalid max volume size allowed given: {}'.format(size) 

453 ) 

454 return self.round_down_volume_size(size * 1024) 

455 

456 @property 

457 def physical_size(self): 

458 """ 

459 Give the total physical size of the SR. 

460 :return: The physical size. 

461 :rtype: int 

462 """ 

463 return self._compute_size('total_capacity') 

464 

465 @property 

466 def physical_free_size(self): 

467 """ 

468 Give the total free physical size of the SR. 

469 :return: The physical free size. 

470 :rtype: int 

471 """ 

472 return self._compute_size('free_capacity') 

473 

474 @property 

475 def allocated_volume_size(self): 

476 """ 

477 Give the allocated size for all volumes. The place count is not 

478 used here. When thick lvm is used, the size for one volume should 

479 be equal to the virtual volume size. With thin lvm, the size is equal 

480 or lower to the volume size. 

481 :return: The allocated size of all volumes. 

482 :rtype: int 

483 """ 

484 

485 # Paths: /res_name/vol_number/size 

486 sizes = {} 

487 

488 for resource in self._get_resource_cache().resources: 

489 if resource.name not in sizes: 

490 current = sizes[resource.name] = {} 

491 else: 

492 current = sizes[resource.name] 

493 

494 for volume in resource.volumes: 

495 # We ignore diskless pools of the form "DfltDisklessStorPool". 

496 if volume.storage_pool_name != self._group_name: 

497 continue 

498 

499 allocated_size = max(volume.allocated_size, 0) 

500 current_allocated_size = current.get(volume.number) or -1 

501 if allocated_size > current_allocated_size: 

502 current[volume.number] = allocated_size 

503 

504 total_size = 0 

505 for volumes in sizes.values(): 

506 for size in volumes.values(): 

507 total_size += size 

508 

509 return total_size * 1024 

510 

511 def get_min_physical_size(self): 

512 """ 

513 Give the minimum physical size of the SR. 

514 I.e. the size of the smallest disk + the number of pools. 

515 :return: The physical min size. 

516 :rtype: tuple(int, int) 

517 """ 

518 size = None 

519 pool_count = 0 

520 for pool in self._get_storage_pools(force=True): 

521 space = pool.free_space 

522 if space: 

523 pool_count += 1 

524 current_size = space.total_capacity 

525 if current_size < 0: 

526 raise LinstorVolumeManagerError( 

527 'Failed to get pool total_capacity attr of `{}`' 

528 .format(pool.node_name) 

529 ) 

530 if size is None or current_size < size: 

531 size = current_size 

532 return (pool_count, (size or 0) * 1024) 

533 

534 @property 

535 def metadata(self): 

536 """ 

537 Get the metadata of the SR. 

538 :return: Dictionary that contains metadata. 

539 :rtype: dict(str, dict) 

540 """ 

541 

542 sr_properties = self._get_sr_properties() 

543 metadata = sr_properties.get(self.PROP_METADATA) 

544 if metadata is not None: 

545 metadata = json.loads(metadata) 

546 if isinstance(metadata, dict): 

547 return metadata 

548 raise LinstorVolumeManagerError( 

549 'Expected dictionary in SR metadata: {}'.format( 

550 self._group_name 

551 ) 

552 ) 

553 

554 return {} 

555 

556 @metadata.setter 

557 def metadata(self, metadata): 

558 """ 

559 Set the metadata of the SR. 

560 :param dict metadata: Dictionary that contains metadata. 

561 """ 

562 

563 assert isinstance(metadata, dict) 

564 sr_properties = self._get_sr_properties() 

565 sr_properties[self.PROP_METADATA] = json.dumps(metadata) 

566 

567 @property 

568 def disconnected_hosts(self): 

569 """ 

570 Get the list of disconnected hosts. 

571 :return: Set that contains disconnected hosts. 

572 :rtype: set(str) 

573 """ 

574 

575 disconnected_hosts = set() 

576 for pool in self._get_storage_pools(): 

577 for report in pool.reports: 

578 if report.ret_code & linstor.consts.WARN_NOT_CONNECTED == \ 

579 linstor.consts.WARN_NOT_CONNECTED: 

580 disconnected_hosts.add(pool.node_name) 

581 break 

582 return disconnected_hosts 

583 

584 def check_volume_exists(self, volume_uuid): 

585 """ 

586 Check if a volume exists in the SR. 

587 :return: True if volume exists. 

588 :rtype: bool 

589 """ 

590 return volume_uuid in self._volumes 

591 

592 def create_volume( 

593 self, 

594 volume_uuid, 

595 size, 

596 persistent=True, 

597 volume_name=None, 

598 high_availability=False 

599 ): 

600 """ 

601 Create a new volume on the SR. 

602 :param str volume_uuid: The volume uuid to use. 

603 :param int size: volume size in B. 

604 :param bool persistent: If false the volume will be unavailable 

605 on the next constructor call LinstorSR(...). 

606 :param str volume_name: If set, this name is used in the LINSTOR 

607 database instead of a generated name. 

608 :param bool high_availability: If set, the volume is created in 

609 the HA group. 

610 :return: The current device path of the volume. 

611 :rtype: str 

612 """ 

613 

614 self._logger('Creating LINSTOR volume {}...'.format(volume_uuid)) 

615 if not volume_name: 

616 volume_name = self.build_volume_name(util.gen_uuid()) 

617 volume_properties = self._create_volume_with_properties( 

618 volume_uuid, 

619 volume_name, 

620 size, 

621 True, # place_resources 

622 high_availability 

623 ) 

624 

625 # Volume created! Now try to find the device path. 

626 try: 

627 self._logger( 

628 'Find device path of LINSTOR volume {}...'.format(volume_uuid) 

629 ) 

630 device_path = self._find_device_path(volume_uuid, volume_name) 

631 if persistent: 

632 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS 

633 self._volumes.add(volume_uuid) 

634 self._logger( 

635 'LINSTOR volume {} created!'.format(volume_uuid) 

636 ) 

637 return device_path 

638 except Exception: 

639 # There is an issue to find the path. 

640 # At this point the volume has just been created, so force flag can be used. 

641 self._destroy_volume(volume_uuid, force=True) 

642 raise 

643 

644 def mark_volume_as_persistent(self, volume_uuid): 

645 """ 

646 Mark volume as persistent if created with persistent=False. 

647 :param str volume_uuid: The volume uuid to mark. 

648 """ 

649 

650 self._ensure_volume_exists(volume_uuid) 

651 

652 # Mark volume as persistent. 

653 volume_properties = self._get_volume_properties(volume_uuid) 

654 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS 

655 

656 def destroy_volume(self, volume_uuid): 

657 """ 

658 Destroy a volume. 

659 :param str volume_uuid: The volume uuid to destroy. 

660 """ 

661 

662 self._ensure_volume_exists(volume_uuid) 

663 self.ensure_volume_is_not_locked(volume_uuid) 

664 

665 is_volume_in_use = any(node["in-use"] for node in self.get_resource_info(volume_uuid)["nodes"].values()) 

666 if is_volume_in_use: 

667 raise LinstorVolumeManagerError( 

668 f"Could not destroy volume `{volume_uuid}` as it is currently in use", 

669 LinstorVolumeManagerError.ERR_VOLUME_IN_USE 

670 ) 

671 

672 # Mark volume as destroyed. 

673 volume_properties = self._get_volume_properties(volume_uuid) 

674 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS 

675 

676 try: 

677 self._volumes.remove(volume_uuid) 

678 self._destroy_volume(volume_uuid) 

679 except Exception as e: 

680 raise LinstorVolumeManagerError( 

681 str(e), 

682 LinstorVolumeManagerError.ERR_VOLUME_DESTROY 

683 ) 

684 

685 def lock_volume(self, volume_uuid, locked=True): 

686 """ 

687 Prevent modifications of the volume properties during 

688 "self.LOCKED_EXPIRATION_DELAY" seconds. The SR must be locked 

689 when used. This method is useful to attach/detach correctly a volume on 

690 a slave. Without it the GC can rename a volume, in this case the old 

691 volume path can be used by a slave... 

692 :param str volume_uuid: The volume uuid to protect/unprotect. 

693 :param bool locked: Lock/unlock the volume. 

694 """ 

695 

696 self._ensure_volume_exists(volume_uuid) 

697 

698 self._logger( 

699 '{} volume {} as locked'.format( 

700 'Mark' if locked else 'Unmark', 

701 volume_uuid 

702 ) 

703 ) 

704 

705 volume_properties = self._get_volume_properties(volume_uuid) 

706 if locked: 

707 volume_properties[ 

708 self.PROP_IS_READONLY_TIMESTAMP 

709 ] = str(time.time()) 

710 elif self.PROP_IS_READONLY_TIMESTAMP in volume_properties: 

711 volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP) 

712 

713 def ensure_volume_is_not_locked(self, volume_uuid, timeout=None): 

714 """ 

715 Ensure a volume is not locked. Wait if necessary. 

716 :param str volume_uuid: The volume uuid to check. 

717 :param int timeout: If the volume is always locked after the expiration 

718 of the timeout, an exception is thrown. 

719 """ 

720 return self.ensure_volume_list_is_not_locked([volume_uuid], timeout) 

721 

722 def ensure_volume_list_is_not_locked(self, volume_uuids, timeout=None): 

723 checked = set() 

724 for volume_uuid in volume_uuids: 

725 if volume_uuid in self._volumes: 

726 checked.add(volume_uuid) 

727 

728 if not checked: 

729 return 

730 

731 waiting = False 

732 

733 volume_properties = self._get_kv_cache() 

734 

735 start = time.time() 

736 while True: 

737 # Can't delete in for loop, use a copy of the list. 

738 remaining = checked.copy() 

739 for volume_uuid in checked: 

740 volume_properties.namespace = \ 

741 self._build_volume_namespace(volume_uuid) 

742 timestamp = volume_properties.get( 

743 self.PROP_IS_READONLY_TIMESTAMP 

744 ) 

745 if timestamp is None: 

746 remaining.remove(volume_uuid) 

747 continue 

748 

749 now = time.time() 

750 if now - float(timestamp) > self.LOCKED_EXPIRATION_DELAY: 

751 self._logger( 

752 'Remove readonly timestamp on {}'.format(volume_uuid) 

753 ) 

754 volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP) 

755 remaining.remove(volume_uuid) 

756 continue 

757 

758 if not waiting: 

759 self._logger( 

760 'Volume {} is locked, waiting...'.format(volume_uuid) 

761 ) 

762 waiting = True 

763 break 

764 

765 if not remaining: 

766 break 

767 checked = remaining 

768 

769 if timeout is not None and now - start > timeout: 

770 raise LinstorVolumeManagerError( 

771 'volume `{}` is locked and timeout has been reached' 

772 .format(volume_uuid), 

773 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS 

774 ) 

775 

776 # We must wait to use the volume. After that we can modify it 

777 # ONLY if the SR is locked to avoid bad reads on the slaves. 

778 time.sleep(1) 

779 volume_properties = self._create_kv_cache() 

780 

781 if waiting: 

782 self._logger('No volume locked now!') 

783 

784 def remove_volume_if_diskless(self, volume_uuid): 

785 """ 

786 Remove disless path from local node. 

787 :param str volume_uuid: The volume uuid to remove. 

788 """ 

789 

790 self._ensure_volume_exists(volume_uuid) 

791 

792 volume_properties = self._get_volume_properties(volume_uuid) 

793 volume_name = volume_properties.get(self.PROP_VOLUME_NAME) 

794 

795 node_name = socket.gethostname() 

796 

797 for resource in self._get_resource_cache().resources: 

798 if resource.name == volume_name and resource.node_name == node_name: 

799 if linstor.consts.FLAG_TIE_BREAKER in resource.flags: 

800 return 

801 break 

802 

803 result = self._linstor.resource_delete_if_diskless( 

804 node_name=node_name, rsc_name=volume_name 

805 ) 

806 if not linstor.Linstor.all_api_responses_no_error(result): 

807 raise LinstorVolumeManagerError( 

808 'Unable to delete diskless path of `{}` on node `{}`: {}' 

809 .format(volume_name, node_name, ', '.join( 

810 [str(x) for x in result])) 

811 ) 

812 

813 def introduce_volume(self, volume_uuid): 

814 pass # TODO: Implement me. 

815 

816 def resize_volume(self, volume_uuid, new_size): 

817 """ 

818 Resize a volume. 

819 :param str volume_uuid: The volume uuid to resize. 

820 :param int new_size: New size in B. 

821 """ 

822 

823 volume_name = self.get_volume_name(volume_uuid) 

824 self.ensure_volume_is_not_locked(volume_uuid) 

825 new_size = self.round_up_volume_size(new_size) // 1024 

826 

827 # We can't resize anything until DRBD is up to date. 

828 # We wait here for 5min max and raise an easy to understand error for the user. 

829 # 5min is an arbitrary time, it's impossible to get a fit all situation value 

830 # and it's currently impossible to know how much time we have to wait 

831 # This is mostly an issue for thick provisioning, thin isn't affected. 

832 start_time = time.monotonic() 

833 try: 

834 self._linstor.resource_dfn_wait_synced(volume_name, wait_interval=1.0, timeout=60*5) 

835 except linstor.LinstorTimeoutError: 

836 raise LinstorVolumeManagerError( 

837 f"Volume resizing of `{volume_uuid}` from SR `{self._group_name}` is incomplete: timeout reached but it continues in background." 

838 ) 

839 util.SMlog(f"DRBD is up to date, syncing took {time.monotonic() - start_time}s") 

840 

841 result = self._linstor.volume_dfn_modify( 

842 rsc_name=volume_name, 

843 volume_nr=0, 

844 size=new_size 

845 ) 

846 

847 self._mark_resource_cache_as_dirty() 

848 

849 error_str = self._get_error_str(result) 

850 if error_str: 

851 raise LinstorVolumeManagerError( 

852 f"Could not resize volume `{volume_uuid}` from SR `{self._group_name}`: {error_str}" 

853 ) 

854 

855 def get_volume_name(self, volume_uuid): 

856 """ 

857 Get the name of a particular volume. 

858 :param str volume_uuid: The volume uuid of the name to get. 

859 :return: The volume name. 

860 :rtype: str 

861 """ 

862 

863 self._ensure_volume_exists(volume_uuid) 

864 volume_properties = self._get_volume_properties(volume_uuid) 

865 volume_name = volume_properties.get(self.PROP_VOLUME_NAME) 

866 if volume_name: 

867 return volume_name 

868 raise LinstorVolumeManagerError( 

869 'Failed to get volume name of {}'.format(volume_uuid) 

870 ) 

871 

872 def get_volume_size(self, volume_uuid): 

873 """ 

874 Get the size of a particular volume. 

875 :param str volume_uuid: The volume uuid of the size to get. 

876 :return: The volume size. 

877 :rtype: int 

878 """ 

879 

880 volume_name = self.get_volume_name(volume_uuid) 

881 dfns = self._linstor.resource_dfn_list_raise( 

882 query_volume_definitions=True, 

883 filter_by_resource_definitions=[volume_name] 

884 ).resource_definitions 

885 

886 size = dfns[0].volume_definitions[0].size 

887 if size < 0: 

888 raise LinstorVolumeManagerError( 

889 'Failed to get volume size of: {}'.format(volume_uuid) 

890 ) 

891 return size * 1024 

892 

893 def set_auto_promote_timeout(self, volume_uuid, timeout): 

894 """ 

895 Define the blocking time of open calls when a DRBD 

896 is already open on another host. 

897 :param str volume_uuid: The volume uuid to modify. 

898 """ 

899 

900 volume_name = self.get_volume_name(volume_uuid) 

901 result = self._linstor.resource_dfn_modify(volume_name, { 

902 'DrbdOptions/Resource/auto-promote-timeout': timeout 

903 }) 

904 error_str = self._get_error_str(result) 

905 if error_str: 

906 raise LinstorVolumeManagerError( 

907 'Could not change the auto promote timeout of `{}`: {}' 

908 .format(volume_uuid, error_str) 

909 ) 

910 

911 def set_drbd_ha_properties(self, volume_name, enabled=True): 

912 """ 

913 Set or not HA DRBD properties required by drbd-reactor and 

914 by specific volumes. 

915 :param str volume_name: The volume to modify. 

916 :param bool enabled: Enable or disable HA properties. 

917 """ 

918 

919 properties = { 

920 'DrbdOptions/auto-quorum': 'disabled', 

921 'DrbdOptions/Resource/auto-promote': 'no', 

922 'DrbdOptions/Resource/on-no-data-accessible': 'io-error', 

923 'DrbdOptions/Resource/on-no-quorum': 'io-error', 

924 'DrbdOptions/Resource/on-suspended-primary-outdated': 'force-secondary', 

925 'DrbdOptions/Resource/quorum': 'majority' 

926 } 

927 if enabled: 

928 result = self._linstor.resource_dfn_modify(volume_name, properties) 

929 else: 

930 result = self._linstor.resource_dfn_modify(volume_name, {}, delete_props=list(properties.keys())) 

931 

932 error_str = self._get_error_str(result) 

933 if error_str: 

934 raise LinstorVolumeManagerError( 

935 'Could not modify HA DRBD properties on volume `{}`: {}' 

936 .format(volume_name, error_str) 

937 ) 

938 

939 def get_volume_info(self, volume_uuid): 

940 """ 

941 Get the volume info of a particular volume. 

942 :param str volume_uuid: The volume uuid of the volume info to get. 

943 :return: The volume info. 

944 :rtype: VolumeInfo 

945 """ 

946 

947 volume_name = self.get_volume_name(volume_uuid) 

948 return self._get_volumes_info()[volume_name] 

949 

950 def get_device_path(self, volume_uuid): 

951 """ 

952 Get the dev path of a volume, create a diskless if necessary. 

953 :param str volume_uuid: The volume uuid to get the dev path. 

954 :return: The current device path of the volume. 

955 :rtype: str 

956 """ 

957 

958 volume_name = self.get_volume_name(volume_uuid) 

959 return self._find_device_path(volume_uuid, volume_name) 

960 

961 def get_volume_uuid_from_device_path(self, device_path): 

962 """ 

963 Get the volume uuid of a device_path. 

964 :param str device_path: The dev path to find the volume uuid. 

965 :return: The volume uuid of the local device path. 

966 :rtype: str 

967 """ 

968 

969 expected_volume_name = \ 

970 self.get_volume_name_from_device_path(device_path) 

971 

972 volume_names = self.get_volumes_with_name() 

973 for volume_uuid, volume_name in volume_names.items(): 

974 if volume_name == expected_volume_name: 

975 return volume_uuid 

976 

977 raise LinstorVolumeManagerError( 

978 'Unable to find volume uuid from dev path `{}`'.format(device_path) 

979 ) 

980 

981 def get_volume_name_from_device_path(self, device_path): 

982 """ 

983 Get the volume name of a device_path. 

984 :param str device_path: The dev path to find the volume name. 

985 :return: The volume name of the device path. 

986 :rtype: str 

987 """ 

988 

989 # Assume that we have a path like this: 

990 # - "/dev/drbd/by-res/xcp-volume-<UUID>/0" 

991 # - "../xcp-volume-<UUID>/0" 

992 if device_path.startswith(DRBD_BY_RES_PATH): 

993 prefix_len = len(DRBD_BY_RES_PATH) 

994 else: 

995 assert device_path.startswith('../') 

996 prefix_len = 3 

997 

998 res_name_end = device_path.find('/', prefix_len) 

999 assert res_name_end != -1 

1000 return device_path[prefix_len:res_name_end] 

1001 

1002 def update_volume_uuid(self, volume_uuid, new_volume_uuid, force=False): 

1003 """ 

1004 Change the uuid of a volume. 

1005 :param str volume_uuid: The volume to modify. 

1006 :param str new_volume_uuid: The new volume uuid to use. 

1007 :param bool force: If true we doesn't check if volume_uuid is in the 

1008 volume list. I.e. the volume can be marked as deleted but the volume 

1009 can still be in the LINSTOR KV store if the deletion has failed. 

1010 In specific cases like "undo" after a failed clone we must rename a bad 

1011 deleted VDI. 

1012 """ 

1013 

1014 self._logger( 

1015 'Trying to update volume UUID {} to {}...' 

1016 .format(volume_uuid, new_volume_uuid) 

1017 ) 

1018 assert volume_uuid != new_volume_uuid, 'can\'t update volume UUID, same value' 

1019 

1020 if not force: 

1021 self._ensure_volume_exists(volume_uuid) 

1022 self.ensure_volume_is_not_locked(volume_uuid) 

1023 

1024 if new_volume_uuid in self._volumes: 

1025 raise LinstorVolumeManagerError( 

1026 'Volume `{}` already exists'.format(new_volume_uuid), 

1027 LinstorVolumeManagerError.ERR_VOLUME_EXISTS 

1028 ) 

1029 

1030 volume_properties = self._get_volume_properties(volume_uuid) 

1031 if volume_properties.get(self.PROP_UPDATING_UUID_SRC): 

1032 raise LinstorVolumeManagerError( 

1033 'Cannot update volume uuid {}: invalid state' 

1034 .format(volume_uuid) 

1035 ) 

1036 

1037 # 1. Copy in temp variables metadata and volume_name. 

1038 metadata = volume_properties.get(self.PROP_METADATA) 

1039 volume_name = volume_properties.get(self.PROP_VOLUME_NAME) 

1040 

1041 # 2. Switch to new volume namespace. 

1042 volume_properties.namespace = self._build_volume_namespace( 

1043 new_volume_uuid 

1044 ) 

1045 

1046 if list(volume_properties.items()): 

1047 raise LinstorVolumeManagerError( 

1048 'Cannot update volume uuid {} to {}: ' 

1049 .format(volume_uuid, new_volume_uuid) + 

1050 'this last one is not empty' 

1051 ) 

1052 

1053 try: 

1054 # 3. Mark new volume properties with PROP_UPDATING_UUID_SRC. 

1055 # If we crash after that, the new properties can be removed 

1056 # properly. 

1057 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS 

1058 volume_properties[self.PROP_UPDATING_UUID_SRC] = volume_uuid 

1059 

1060 # 4. Copy the properties. 

1061 # Note: On new volumes, during clone for example, the metadata 

1062 # may be missing. So we must test it to avoid this error: 

1063 # "None has to be a str/unicode, but is <type 'NoneType'>" 

1064 if metadata: 

1065 volume_properties[self.PROP_METADATA] = metadata 

1066 volume_properties[self.PROP_VOLUME_NAME] = volume_name 

1067 

1068 # 5. Ok! 

1069 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS 

1070 except Exception as err: 

1071 try: 

1072 # Clear the new volume properties in case of failure. 

1073 assert volume_properties.namespace == \ 

1074 self._build_volume_namespace(new_volume_uuid) 

1075 volume_properties.clear() 

1076 except Exception as e: 

1077 self._logger( 

1078 'Failed to clear new volume properties: {} (ignoring...)' 

1079 .format(e) 

1080 ) 

1081 raise LinstorVolumeManagerError( 

1082 'Failed to copy volume properties: {}'.format(err) 

1083 ) 

1084 

1085 try: 

1086 # 6. After this point, it's ok we can remove the 

1087 # PROP_UPDATING_UUID_SRC property and clear the src properties 

1088 # without problems. 

1089 

1090 # 7. Switch to old volume namespace. 

1091 volume_properties.namespace = self._build_volume_namespace( 

1092 volume_uuid 

1093 ) 

1094 volume_properties.clear() 

1095 

1096 # 8. Switch a last time to new volume namespace. 

1097 volume_properties.namespace = self._build_volume_namespace( 

1098 new_volume_uuid 

1099 ) 

1100 volume_properties.pop(self.PROP_UPDATING_UUID_SRC) 

1101 except Exception as e: 

1102 raise LinstorVolumeManagerError( 

1103 'Failed to clear volume properties ' 

1104 'after volume uuid update: {}'.format(e) 

1105 ) 

1106 

1107 try: 

1108 self._volumes.remove(volume_uuid) 

1109 except KeyError: 

1110 # Can be missing if we are building the volume set attr AND 

1111 # we are processing a deleted resource. 

1112 assert force 

1113 

1114 self._volumes.add(new_volume_uuid) 

1115 

1116 self._logger( 

1117 'UUID update succeeded of {} to {}! (properties={})' 

1118 .format( 

1119 volume_uuid, new_volume_uuid, 

1120 self._get_filtered_properties(volume_properties) 

1121 ) 

1122 ) 

1123 

1124 def update_volume_name(self, volume_uuid, volume_name): 

1125 """ 

1126 Change the volume name of a volume. 

1127 :param str volume_uuid: The volume to modify. 

1128 :param str volume_name: The volume_name to use. 

1129 """ 

1130 

1131 self._ensure_volume_exists(volume_uuid) 

1132 self.ensure_volume_is_not_locked(volume_uuid) 

1133 if not volume_name.startswith(self.PREFIX_VOLUME): 

1134 raise LinstorVolumeManagerError( 

1135 'Volume name `{}` must be start with `{}`' 

1136 .format(volume_name, self.PREFIX_VOLUME) 

1137 ) 

1138 

1139 if volume_name not in self._fetch_resource_names(): 

1140 raise LinstorVolumeManagerError( 

1141 'Volume `{}` doesn\'t exist'.format(volume_name) 

1142 ) 

1143 

1144 volume_properties = self._get_volume_properties(volume_uuid) 

1145 volume_properties[self.PROP_VOLUME_NAME] = volume_name 

1146 

1147 def get_usage_states(self, volume_uuid): 

1148 """ 

1149 Check if a volume is currently used. 

1150 :param str volume_uuid: The volume uuid to check. 

1151 :return: A dictionary that contains states. 

1152 :rtype: dict(str, bool or None) 

1153 """ 

1154 

1155 states = {} 

1156 

1157 volume_name = self.get_volume_name(volume_uuid) 

1158 for resource_state in self._linstor.resource_list_raise( 

1159 filter_by_resources=[volume_name] 

1160 ).resource_states: 

1161 states[resource_state.node_name] = resource_state.in_use 

1162 

1163 return states 

1164 

1165 def get_volume_openers(self, volume_uuid): 

1166 """ 

1167 Get openers of a volume. 

1168 :param str volume_uuid: The volume uuid to monitor. 

1169 :return: A dictionary that contains openers. 

1170 :rtype: dict(str, obj) 

1171 """ 

1172 return get_all_volume_openers(self.get_volume_name(volume_uuid), '0') 

1173 

1174 def get_volumes_with_name(self): 

1175 """ 

1176 Give a volume dictionary that contains names actually owned. 

1177 :return: A volume/name dict. 

1178 :rtype: dict(str, str) 

1179 """ 

1180 return self._get_volumes_by_property(self.REG_VOLUME_NAME) 

1181 

1182 def get_volumes_with_info(self): 

1183 """ 

1184 Give a volume dictionary that contains VolumeInfos. 

1185 :return: A volume/VolumeInfo dict. 

1186 :rtype: dict(str, VolumeInfo) 

1187 """ 

1188 

1189 volumes = {} 

1190 

1191 volume_names = self.get_volumes_with_name() 

1192 all_volume_info = self._get_volumes_info(volume_names) 

1193 for volume_uuid, volume_name in volume_names.items(): 

1194 if volume_name: 

1195 volume_info = all_volume_info.get(volume_name) 

1196 if volume_info: 

1197 volumes[volume_uuid] = volume_info 

1198 continue 

1199 

1200 # Well I suppose if this volume is not available, 

1201 # LINSTOR has been used directly without using this API. 

1202 volumes[volume_uuid] = self.VolumeInfo('') 

1203 

1204 return volumes 

1205 

1206 def get_volumes_with_metadata(self): 

1207 """ 

1208 Give a volume dictionary that contains metadata. 

1209 :return: A volume/metadata dict. 

1210 :rtype: dict(str, dict) 

1211 """ 

1212 

1213 volumes = {} 

1214 

1215 metadata = self._get_volumes_by_property(self.REG_METADATA) 

1216 for volume_uuid, volume_metadata in metadata.items(): 

1217 if volume_metadata: 

1218 volume_metadata = json.loads(volume_metadata) 

1219 if isinstance(volume_metadata, dict): 

1220 volumes[volume_uuid] = volume_metadata 

1221 continue 

1222 raise LinstorVolumeManagerError( 

1223 'Expected dictionary in volume metadata: {}' 

1224 .format(volume_uuid) 

1225 ) 

1226 

1227 volumes[volume_uuid] = {} 

1228 

1229 return volumes 

1230 

1231 def get_volume_metadata(self, volume_uuid): 

1232 """ 

1233 Get the metadata of a volume. 

1234 :return: Dictionary that contains metadata. 

1235 :rtype: dict 

1236 """ 

1237 

1238 self._ensure_volume_exists(volume_uuid) 

1239 volume_properties = self._get_volume_properties(volume_uuid) 

1240 metadata = volume_properties.get(self.PROP_METADATA) 

1241 if metadata: 

1242 metadata = json.loads(metadata) 

1243 if isinstance(metadata, dict): 

1244 return metadata 

1245 raise LinstorVolumeManagerError( 

1246 'Expected dictionary in volume metadata: {}' 

1247 .format(volume_uuid) 

1248 ) 

1249 return {} 

1250 

1251 def set_volume_metadata(self, volume_uuid, metadata): 

1252 """ 

1253 Set the metadata of a volume. 

1254 :param dict metadata: Dictionary that contains metadata. 

1255 """ 

1256 

1257 self._ensure_volume_exists(volume_uuid) 

1258 self.ensure_volume_is_not_locked(volume_uuid) 

1259 

1260 assert isinstance(metadata, dict) 

1261 volume_properties = self._get_volume_properties(volume_uuid) 

1262 volume_properties[self.PROP_METADATA] = json.dumps(metadata) 

1263 

1264 def update_volume_metadata(self, volume_uuid, metadata): 

1265 """ 

1266 Update the metadata of a volume. It modify only the given keys. 

1267 It doesn't remove unreferenced key instead of set_volume_metadata. 

1268 :param dict metadata: Dictionary that contains metadata. 

1269 """ 

1270 

1271 self._ensure_volume_exists(volume_uuid) 

1272 self.ensure_volume_is_not_locked(volume_uuid) 

1273 

1274 assert isinstance(metadata, dict) 

1275 volume_properties = self._get_volume_properties(volume_uuid) 

1276 

1277 current_metadata = json.loads( 

1278 volume_properties.get(self.PROP_METADATA, '{}') 

1279 ) 

1280 if not isinstance(metadata, dict): 

1281 raise LinstorVolumeManagerError( 

1282 'Expected dictionary in volume metadata: {}' 

1283 .format(volume_uuid) 

1284 ) 

1285 

1286 for key, value in metadata.items(): 

1287 current_metadata[key] = value 

1288 volume_properties[self.PROP_METADATA] = json.dumps(current_metadata) 

1289 

1290 def shallow_clone_volume(self, volume_uuid, clone_uuid, persistent=True): 

1291 """ 

1292 Clone a volume. Do not copy the data, this method creates a new volume 

1293 with the same size. 

1294 :param str volume_uuid: The volume to clone. 

1295 :param str clone_uuid: The cloned volume. 

1296 :param bool persistent: If false the volume will be unavailable 

1297 on the next constructor call LinstorSR(...). 

1298 :return: The current device path of the cloned volume. 

1299 :rtype: str 

1300 """ 

1301 

1302 volume_name = self.get_volume_name(volume_uuid) 

1303 self.ensure_volume_is_not_locked(volume_uuid) 

1304 

1305 # 1. Find ideal nodes + size to use. 

1306 ideal_node_names, size = self._get_volume_node_names_and_size( 

1307 volume_name 

1308 ) 

1309 if size <= 0: 

1310 raise LinstorVolumeManagerError( 

1311 'Invalid size of {} for volume `{}`'.format(size, volume_name) 

1312 ) 

1313 

1314 # 2. Create clone! 

1315 return self.create_volume(clone_uuid, size, persistent) 

1316 

1317 def remove_resourceless_volumes(self): 

1318 """ 

1319 Remove all volumes without valid or non-empty name 

1320 (i.e. without LINSTOR resource). It's different than 

1321 LinstorVolumeManager constructor that takes a `repair` param that 

1322 removes volumes with `PROP_NOT_EXISTS` to 1. 

1323 """ 

1324 

1325 resource_names = self._fetch_resource_names() 

1326 for volume_uuid, volume_name in self.get_volumes_with_name().items(): 

1327 if not volume_name or volume_name not in resource_names: 

1328 # Don't force, we can be sure of what's happening. 

1329 self.destroy_volume(volume_uuid) 

1330 

1331 def destroy(self): 

1332 """ 

1333 Destroy this SR. Object should not be used after that. 

1334 :param bool force: Try to destroy volumes before if true. 

1335 """ 

1336 

1337 # 1. Ensure volume list is empty. No cost. 

1338 if self._volumes: 

1339 raise LinstorVolumeManagerError( 

1340 'Cannot destroy LINSTOR volume manager: ' 

1341 'It exists remaining volumes' 

1342 ) 

1343 

1344 # 2. Fetch ALL resource names. 

1345 # This list may therefore contain volumes created outside 

1346 # the scope of the driver. 

1347 resource_names = self._fetch_resource_names(ignore_deleted=False) 

1348 try: 

1349 resource_names.remove(DATABASE_VOLUME_NAME) 

1350 except KeyError: 

1351 # Really strange to reach that point. 

1352 # Normally we always have the database volume in the list. 

1353 pass 

1354 

1355 # 3. Ensure the resource name list is entirely empty... 

1356 if resource_names: 

1357 raise LinstorVolumeManagerError( 

1358 'Cannot destroy LINSTOR volume manager: ' 

1359 'It exists remaining volumes (created externally or being deleted)' 

1360 ) 

1361 

1362 # 4. Destroying... 

1363 controller_is_running = self._controller_is_running() 

1364 uri = 'linstor://localhost' 

1365 try: 

1366 if controller_is_running: 

1367 self._start_controller(start=False) 

1368 

1369 # 4.1. Umount LINSTOR database. 

1370 self._mount_database_volume( 

1371 self.build_device_path(DATABASE_VOLUME_NAME), 

1372 mount=False, 

1373 force=True 

1374 ) 

1375 

1376 # 4.2. Refresh instance. 

1377 self._start_controller(start=True) 

1378 self._linstor = self._create_linstor_instance( 

1379 uri, keep_uri_unmodified=True 

1380 ) 

1381 

1382 # 4.3. Destroy database volume. 

1383 self._destroy_resource(DATABASE_VOLUME_NAME) 

1384 

1385 # 4.4. Refresh linstor connection. 

1386 # Without we get this error: 

1387 # "Cannot delete resource group 'xcp-sr-linstor_group_thin_device' because it has existing resource definitions.." 

1388 # Because the deletion of the databse was not seen by Linstor for some reason. 

1389 # It seems a simple refresh of the Linstor connection make it aware of the deletion. 

1390 self._linstor.disconnect() 

1391 self._linstor.connect() 

1392 

1393 # 4.5. Destroy remaining drbd nodes on hosts. 

1394 # We check if there is a DRBD node on hosts that could mean blocking when destroying resource groups. 

1395 # It needs to be done locally by each host so we go through the linstor-manager plugin. 

1396 # If we don't do this sometimes, the destroy will fail when trying to destroy the resource groups with: 

1397 # "linstor-manager:destroy error: Failed to destroy SP `xcp-sr-linstor_group_thin_device` on node `r620-s2`: The specified storage pool 'xcp-sr-linstor_group_thin_device' on node 'r620-s2' can not be deleted as volumes / snapshot-volumes are still using it." 

1398 session = util.timeout_call(5, util.get_localAPI_session) 

1399 for host_ref in session.xenapi.host.get_all(): 

1400 try: 

1401 response = session.xenapi.host.call_plugin( 

1402 host_ref, 'linstor-manager', 'destroyDrbdVolumes', {'volume_group': self._group_name} 

1403 ) 

1404 except Exception as e: 

1405 util.SMlog('Calling destroyDrbdVolumes on host {} failed with error {}'.format(host_ref, e)) 

1406 

1407 # 4.6. Destroy group and storage pools. 

1408 self._destroy_resource_group(self._linstor, self._group_name) 

1409 self._destroy_resource_group(self._linstor, self._ha_group_name) 

1410 for pool in self._get_storage_pools(force=True): 

1411 self._destroy_storage_pool( 

1412 self._linstor, pool.name, pool.node_name 

1413 ) 

1414 except Exception as e: 

1415 self._start_controller(start=controller_is_running) 

1416 raise e 

1417 

1418 try: 

1419 self._start_controller(start=False) 

1420 for file in os.listdir(DATABASE_PATH): 

1421 if file != 'lost+found': 

1422 os.remove(DATABASE_PATH + '/' + file) 

1423 except Exception as e: 

1424 util.SMlog( 

1425 'Ignoring failure after LINSTOR SR destruction: {}' 

1426 .format(e) 

1427 ) 

1428 

1429 def find_up_to_date_diskful_nodes(self, volume_uuid): 

1430 """ 

1431 Find all nodes that contain a specific volume using diskful disks. 

1432 The disk must be up to data to be used. 

1433 :param str volume_uuid: The volume to use. 

1434 :return: The available nodes. 

1435 :rtype: tuple(set(str), str) 

1436 """ 

1437 

1438 volume_name = self.get_volume_name(volume_uuid) 

1439 

1440 in_use_by = None 

1441 node_names = set() 

1442 

1443 resource_states = filter( 

1444 lambda resource_state: resource_state.name == volume_name, 

1445 self._get_resource_cache().resource_states 

1446 ) 

1447 

1448 for resource_state in resource_states: 

1449 volume_state = resource_state.volume_states[0] 

1450 if volume_state.disk_state == 'UpToDate': 

1451 node_names.add(resource_state.node_name) 

1452 if resource_state.in_use: 

1453 in_use_by = resource_state.node_name 

1454 

1455 return (node_names, in_use_by) 

1456 

1457 def invalidate_resource_cache(self): 

1458 """ 

1459 If resources are impacted by external commands like vhdutil, 

1460 it's necessary to call this function to invalidate current resource 

1461 cache. 

1462 """ 

1463 self._mark_resource_cache_as_dirty() 

1464 

1465 def has_node(self, node_name): 

1466 """ 

1467 Check if a node exists in the LINSTOR database. 

1468 :rtype: bool 

1469 """ 

1470 result = self._linstor.node_list() 

1471 error_str = self._get_error_str(result) 

1472 if error_str: 

1473 raise LinstorVolumeManagerError( 

1474 'Failed to list nodes using `{}`: {}' 

1475 .format(node_name, error_str) 

1476 ) 

1477 return bool(result[0].node(node_name)) 

1478 

1479 def create_node(self, node_name, ip): 

1480 """ 

1481 Create a new node in the LINSTOR database. 

1482 :param str node_name: Node name to use. 

1483 :param str ip: Host IP to communicate. 

1484 """ 

1485 result = self._linstor.node_create( 

1486 node_name, 

1487 linstor.consts.VAL_NODE_TYPE_CMBD, 

1488 ip 

1489 ) 

1490 errors = self._filter_errors(result) 

1491 if errors: 

1492 error_str = self._get_error_str(errors) 

1493 raise LinstorVolumeManagerError( 

1494 'Failed to create node `{}`: {}'.format(node_name, error_str) 

1495 ) 

1496 

1497 def destroy_node(self, node_name): 

1498 """ 

1499 Destroy a node in the LINSTOR database. 

1500 :param str node_name: Node name to remove. 

1501 """ 

1502 result = self._linstor.node_delete(node_name) 

1503 errors = self._filter_errors(result) 

1504 if errors: 

1505 error_str = self._get_error_str(errors) 

1506 raise LinstorVolumeManagerError( 

1507 'Failed to destroy node `{}`: {}'.format(node_name, error_str) 

1508 ) 

1509 

1510 def create_node_interface(self, node_name, name, ip): 

1511 """ 

1512 Create a new node interface in the LINSTOR database. 

1513 :param str node_name: Node name of the interface to use. 

1514 :param str name: Interface to create. 

1515 :param str ip: IP of the interface. 

1516 """ 

1517 result = self._linstor.netinterface_create(node_name, name, ip) 

1518 errors = self._filter_errors(result) 

1519 if errors: 

1520 error_str = self._get_error_str(errors) 

1521 raise LinstorVolumeManagerError( 

1522 'Failed to create node interface on `{}`: {}'.format(node_name, error_str) 

1523 ) 

1524 

1525 def destroy_node_interface(self, node_name, name): 

1526 """ 

1527 Destroy a node interface in the LINSTOR database. 

1528 :param str node_name: Node name of the interface to remove. 

1529 :param str name: Interface to remove. 

1530 """ 

1531 

1532 if name == 'default': 

1533 raise LinstorVolumeManagerError( 

1534 'Unable to delete the default interface of a node!' 

1535 ) 

1536 

1537 result = self._linstor.netinterface_delete(node_name, name) 

1538 errors = self._filter_errors(result) 

1539 if errors: 

1540 error_str = self._get_error_str(errors) 

1541 raise LinstorVolumeManagerError( 

1542 'Failed to destroy node interface on `{}`: {}'.format(node_name, error_str) 

1543 ) 

1544 

1545 def modify_node_interface(self, node_name, name, ip): 

1546 """ 

1547 Modify a node interface in the LINSTOR database. Create it if necessary. 

1548 :param str node_name: Node name of the interface to use. 

1549 :param str name: Interface to modify or create. 

1550 :param str ip: IP of the interface. 

1551 """ 

1552 result = self._linstor.netinterface_create(node_name, name, ip) 

1553 errors = self._filter_errors(result) 

1554 if not errors: 

1555 return 

1556 

1557 if self._check_errors(errors, [linstor.consts.FAIL_EXISTS_NET_IF]): 

1558 result = self._linstor.netinterface_modify(node_name, name, ip) 

1559 errors = self._filter_errors(result) 

1560 if not errors: 

1561 return 

1562 

1563 error_str = self._get_error_str(errors) 

1564 raise LinstorVolumeManagerError( 

1565 'Unable to modify interface on `{}`: {}'.format(node_name, error_str) 

1566 ) 

1567 

1568 def list_node_interfaces(self, node_name): 

1569 """ 

1570 List all node interfaces. 

1571 :param str node_name: Node name to use to list interfaces. 

1572 :rtype: list 

1573 : 

1574 """ 

1575 result = self._linstor.net_interface_list(node_name) 

1576 if not result: 

1577 raise LinstorVolumeManagerError( 

1578 'Unable to list interfaces on `{}`: no list received'.format(node_name) 

1579 ) 

1580 

1581 interfaces = {} 

1582 for interface in result: 

1583 interface = interface._rest_data 

1584 interfaces[interface['name']] = { 

1585 'address': interface['address'], 

1586 'active': interface['is_active'] 

1587 } 

1588 return interfaces 

1589 

1590 def get_node_preferred_interface(self, node_name): 

1591 """ 

1592 Get the preferred interface used by a node. 

1593 :param str node_name: Node name of the interface to get. 

1594 :rtype: str 

1595 """ 

1596 try: 

1597 nodes = self._linstor.node_list_raise([node_name]).nodes 

1598 if nodes: 

1599 properties = nodes[0].props 

1600 return properties.get('PrefNic', 'default') 

1601 return nodes 

1602 except Exception as e: 

1603 raise LinstorVolumeManagerError( 

1604 'Failed to get preferred interface: `{}`'.format(e) 

1605 ) 

1606 

1607 def set_node_preferred_interface(self, node_name, name): 

1608 """ 

1609 Set the preferred interface to use on a node. 

1610 :param str node_name: Node name of the interface. 

1611 :param str name: Preferred interface to use. 

1612 """ 

1613 result = self._linstor.node_modify(node_name, property_dict={'PrefNic': name}) 

1614 errors = self._filter_errors(result) 

1615 if errors: 

1616 error_str = self._get_error_str(errors) 

1617 raise LinstorVolumeManagerError( 

1618 'Failed to set preferred node interface on `{}`: {}'.format(node_name, error_str) 

1619 ) 

1620 

1621 def get_nodes_info(self): 

1622 """ 

1623 Get all nodes + statuses, used or not by the pool. 

1624 :rtype: dict(str, dict) 

1625 """ 

1626 try: 

1627 nodes = {} 

1628 for node in self._linstor.node_list_raise().nodes: 

1629 nodes[node.name] = node.connection_status 

1630 return nodes 

1631 except Exception as e: 

1632 raise LinstorVolumeManagerError( 

1633 'Failed to get all nodes: `{}`'.format(e) 

1634 ) 

1635 

1636 def get_storage_pools_info(self): 

1637 """ 

1638 Give all storage pools of current group name. 

1639 :rtype: dict(str, list) 

1640 """ 

1641 storage_pools = {} 

1642 for pool in self._get_storage_pools(force=True): 

1643 if pool.node_name not in storage_pools: 

1644 storage_pools[pool.node_name] = [] 

1645 

1646 size = -1 

1647 capacity = -1 

1648 

1649 space = pool.free_space 

1650 if space: 

1651 size = space.free_capacity 

1652 if size < 0: 

1653 size = -1 

1654 else: 

1655 size *= 1024 

1656 capacity = space.total_capacity 

1657 if capacity <= 0: 

1658 capacity = -1 

1659 else: 

1660 capacity *= 1024 

1661 

1662 storage_pools[pool.node_name].append({ 

1663 'name': pool.name, 

1664 'linstor-uuid': pool.uuid, 

1665 'free-size': size, 

1666 'capacity': capacity 

1667 }) 

1668 

1669 return storage_pools 

1670 

1671 def get_resources_info(self): 

1672 """ 

1673 Give all resources of current group name. 

1674 :rtype: dict(str, list) 

1675 """ 

1676 if self._resources_info_cache and not self._resource_cache_dirty: 

1677 return self._resources_info_cache 

1678 

1679 resources = {} 

1680 resource_list = self._get_resource_cache() 

1681 volume_names = self.get_volumes_with_name() 

1682 for resource in resource_list.resources: 

1683 if resource.name not in resources: 

1684 resources[resource.name] = { 'nodes': {}, 'uuid': '' } 

1685 resource_nodes = resources[resource.name]['nodes'] 

1686 

1687 resource_nodes[resource.node_name] = { 

1688 'volumes': [], 

1689 'diskful': linstor.consts.FLAG_DISKLESS not in resource.flags, 

1690 'tie-breaker': linstor.consts.FLAG_TIE_BREAKER in resource.flags 

1691 } 

1692 resource_volumes = resource_nodes[resource.node_name]['volumes'] 

1693 

1694 for volume in resource.volumes: 

1695 # We ignore diskless pools of the form "DfltDisklessStorPool". 

1696 if volume.storage_pool_name != self._group_name: 

1697 continue 

1698 

1699 usable_size = volume.usable_size 

1700 if usable_size < 0: 

1701 usable_size = -1 

1702 else: 

1703 usable_size *= 1024 

1704 

1705 allocated_size = volume.allocated_size 

1706 if allocated_size < 0: 

1707 allocated_size = -1 

1708 else: 

1709 allocated_size *= 1024 

1710 

1711 resource_volumes.append({ 

1712 'storage-pool-name': volume.storage_pool_name, 

1713 'linstor-uuid': volume.uuid, 

1714 'number': volume.number, 

1715 'device-path': volume.device_path, 

1716 'usable-size': usable_size, 

1717 'allocated-size': allocated_size 

1718 }) 

1719 

1720 for resource_state in resource_list.resource_states: 

1721 resource = resources[resource_state.rsc_name]['nodes'][resource_state.node_name] 

1722 resource['in-use'] = resource_state.in_use 

1723 

1724 volumes = resource['volumes'] 

1725 for volume_state in resource_state.volume_states: 

1726 volume = next((x for x in volumes if x['number'] == volume_state.number), None) 

1727 if volume: 

1728 volume['disk-state'] = volume_state.disk_state 

1729 

1730 for volume_uuid, volume_name in volume_names.items(): 

1731 resource = resources.get(volume_name) 

1732 if resource: 

1733 resource['uuid'] = volume_uuid 

1734 

1735 self._resources_info_cache = resources 

1736 return self._resources_info_cache 

1737 

1738 def get_resource_info(self, volume_uuid: str) -> Dict[str, Any]: 

1739 """ 

1740 Give a resource info based on its UUID. 

1741 :param volume_uuid str: volume uuid to search for 

1742 :rtype: dict(str, any) 

1743 """ 

1744 for volume in self.get_resources_info().values(): 

1745 if volume["uuid"] == volume_uuid: 

1746 return volume 

1747 

1748 raise LinstorVolumeManagerError( 

1749 f"Could not find info about volume `{volume_uuid}`", 

1750 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS 

1751 ) 

1752 

1753 def get_database_path(self): 

1754 """ 

1755 Get the database path. 

1756 :return: The current database path. 

1757 :rtype: str 

1758 """ 

1759 return self._request_database_path(self._linstor, activate=True) 

1760 

1761 @classmethod 

1762 def get_all_group_names(cls, base_name): 

1763 """ 

1764 Get all group names. I.e. list of current group + HA. 

1765 :param str base_name: The SR group_name to use. 

1766 :return: List of group names. 

1767 :rtype: list 

1768 """ 

1769 return [cls._build_group_name(base_name), cls._build_ha_group_name(base_name)] 

1770 

1771 @classmethod 

1772 def create_sr(cls, group_name, ips, redundancy, thin_provisioning, logger=default_logger.__func__): 

1773 """ 

1774 Create a new SR on the given nodes. 

1775 :param str group_name: The SR group_name to use. 

1776 :param set(str) ips: Node ips. 

1777 :param int redundancy: How many copy of volumes should we store? 

1778 :param bool thin_provisioning: Use thin or thick provisioning. 

1779 :param function logger: Function to log messages. 

1780 :return: A new LinstorSr instance. 

1781 :rtype: LinstorSr 

1782 """ 

1783 

1784 try: 

1785 cls._start_controller(start=True) 

1786 sr = cls._create_sr(group_name, ips, redundancy, thin_provisioning, logger) 

1787 finally: 

1788 # Controller must be stopped and volume unmounted because 

1789 # it is the role of the drbd-reactor daemon to do the right 

1790 # actions. 

1791 cls._start_controller(start=False) 

1792 cls._mount_volume( 

1793 cls.build_device_path(DATABASE_VOLUME_NAME), 

1794 DATABASE_PATH, 

1795 mount=False 

1796 ) 

1797 return sr 

1798 

1799 @classmethod 

1800 def _create_sr(cls, group_name, ips, redundancy, thin_provisioning, logger=default_logger.__func__): 

1801 # 1. Check if SR already exists. 

1802 uri = 'linstor://localhost' 

1803 

1804 lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True) 

1805 

1806 node_names = list(ips.keys()) 

1807 for node_name, ip in ips.items(): 

1808 while True: 

1809 # Try to create node. 

1810 result = lin.node_create( 

1811 node_name, 

1812 linstor.consts.VAL_NODE_TYPE_CMBD, 

1813 ip 

1814 ) 

1815 

1816 errors = cls._filter_errors(result) 

1817 if cls._check_errors( 

1818 errors, [linstor.consts.FAIL_EXISTS_NODE] 

1819 ): 

1820 # If it already exists, remove, then recreate. 

1821 result = lin.node_delete(node_name) 

1822 error_str = cls._get_error_str(result) 

1823 if error_str: 

1824 raise LinstorVolumeManagerError( 

1825 'Failed to remove old node `{}`: {}' 

1826 .format(node_name, error_str) 

1827 ) 

1828 elif not errors: 

1829 break # Created! 

1830 else: 

1831 raise LinstorVolumeManagerError( 

1832 'Failed to create node `{}` with ip `{}`: {}'.format( 

1833 node_name, ip, cls._get_error_str(errors) 

1834 ) 

1835 ) 

1836 

1837 driver_pool_name = group_name 

1838 base_group_name = group_name 

1839 group_name = cls._build_group_name(group_name) 

1840 storage_pool_name = group_name 

1841 pools = lin.storage_pool_list_raise(filter_by_stor_pools=[storage_pool_name]).storage_pools 

1842 if pools: 

1843 existing_node_names = [pool.node_name for pool in pools] 

1844 raise LinstorVolumeManagerError( 

1845 'Unable to create SR `{}`. It already exists on node(s): {}' 

1846 .format(group_name, existing_node_names) 

1847 ) 

1848 

1849 if lin.resource_group_list_raise( 

1850 cls.get_all_group_names(base_group_name) 

1851 ).resource_groups: 

1852 if not lin.resource_dfn_list_raise().resource_definitions: 

1853 backup_path = cls._create_database_backup_path() 

1854 logger( 

1855 'Group name already exists `{}` without LVs. ' 

1856 'Ignoring and moving the config files in {}'.format(group_name, backup_path) 

1857 ) 

1858 cls._move_files(DATABASE_PATH, backup_path) 

1859 else: 

1860 raise LinstorVolumeManagerError( 

1861 'Unable to create SR `{}`: The group name already exists' 

1862 .format(group_name) 

1863 ) 

1864 

1865 if thin_provisioning: 

1866 driver_pool_parts = driver_pool_name.split('/') 

1867 if not len(driver_pool_parts) == 2: 

1868 raise LinstorVolumeManagerError( 

1869 'Invalid group name using thin provisioning. ' 

1870 'Expected format: \'VG/LV`\'' 

1871 ) 

1872 

1873 # 2. Create storage pool on each node + resource group. 

1874 reg_volume_group_not_found = re.compile( 

1875 ".*Volume group '.*' not found$" 

1876 ) 

1877 

1878 i = 0 

1879 try: 

1880 # 2.a. Create storage pools. 

1881 storage_pool_count = 0 

1882 while i < len(node_names): 

1883 node_name = node_names[i] 

1884 

1885 result = lin.storage_pool_create( 

1886 node_name=node_name, 

1887 storage_pool_name=storage_pool_name, 

1888 storage_driver='LVM_THIN' if thin_provisioning else 'LVM', 

1889 driver_pool_name=driver_pool_name 

1890 ) 

1891 

1892 errors = linstor.Linstor.filter_api_call_response_errors( 

1893 result 

1894 ) 

1895 if errors: 

1896 if len(errors) == 1 and errors[0].is_error( 

1897 linstor.consts.FAIL_STOR_POOL_CONFIGURATION_ERROR 

1898 ) and reg_volume_group_not_found.match(errors[0].message): 

1899 logger( 

1900 'Volume group `{}` not found on `{}`. Ignoring...' 

1901 .format(group_name, node_name) 

1902 ) 

1903 cls._destroy_storage_pool(lin, storage_pool_name, node_name) 

1904 else: 

1905 error_str = cls._get_error_str(result) 

1906 raise LinstorVolumeManagerError( 

1907 'Could not create SP `{}` on node `{}`: {}' 

1908 .format(group_name, node_name, error_str) 

1909 ) 

1910 else: 

1911 storage_pool_count += 1 

1912 i += 1 

1913 

1914 if not storage_pool_count: 

1915 raise LinstorVolumeManagerError( 

1916 'Unable to create SR `{}`: No VG group found'.format( 

1917 group_name, 

1918 ) 

1919 ) 

1920 

1921 # 2.b. Create resource groups. 

1922 ha_group_name = cls._build_ha_group_name(base_group_name) 

1923 cls._create_resource_group( 

1924 lin, 

1925 group_name, 

1926 storage_pool_name, 

1927 redundancy, 

1928 True 

1929 ) 

1930 cls._create_resource_group( 

1931 lin, 

1932 ha_group_name, 

1933 storage_pool_name, 

1934 3, 

1935 True 

1936 ) 

1937 

1938 # 3. Create the LINSTOR database volume and mount it. 

1939 try: 

1940 logger('Creating database volume...') 

1941 volume_path = cls._create_database_volume( 

1942 lin, ha_group_name, storage_pool_name, node_names, redundancy 

1943 ) 

1944 except LinstorVolumeManagerError as e: 

1945 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS: 

1946 logger('Destroying database volume after creation fail...') 

1947 cls._force_destroy_database_volume(lin, group_name) 

1948 raise 

1949 

1950 try: 

1951 logger('Mounting database volume...') 

1952 

1953 # First we must disable the controller to move safely the 

1954 # LINSTOR config. 

1955 cls._start_controller(start=False) 

1956 

1957 cls._mount_database_volume(volume_path) 

1958 except Exception as e: 

1959 # Ensure we are connected because controller has been 

1960 # restarted during mount call. 

1961 logger('Destroying database volume after mount fail...') 

1962 

1963 try: 

1964 cls._start_controller(start=True) 

1965 except Exception: 

1966 pass 

1967 

1968 lin = cls._create_linstor_instance( 

1969 uri, keep_uri_unmodified=True 

1970 ) 

1971 cls._force_destroy_database_volume(lin, group_name) 

1972 raise e 

1973 

1974 cls._start_controller(start=True) 

1975 lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True) 

1976 

1977 # 4. Remove storage pools/resource/volume group in the case of errors. 

1978 except Exception as e: 

1979 logger('Destroying resource group and storage pools after fail...') 

1980 try: 

1981 cls._destroy_resource_group(lin, group_name) 

1982 cls._destroy_resource_group(lin, ha_group_name) 

1983 except Exception as e2: 

1984 logger('Failed to destroy resource group: {}'.format(e2)) 

1985 pass 

1986 j = 0 

1987 i = min(i, len(node_names) - 1) 

1988 while j <= i: 

1989 try: 

1990 cls._destroy_storage_pool(lin, storage_pool_name, node_names[j]) 

1991 except Exception as e2: 

1992 logger('Failed to destroy resource group: {}'.format(e2)) 

1993 pass 

1994 j += 1 

1995 raise e 

1996 

1997 # 5. Return new instance. 

1998 instance = cls.__new__(cls) 

1999 instance._linstor = lin 

2000 instance._logger = logger 

2001 instance._redundancy = redundancy 

2002 instance._base_group_name = base_group_name 

2003 instance._group_name = group_name 

2004 instance._volumes = set() 

2005 instance._storage_pools_time = 0 

2006 instance._kv_cache = instance._create_kv_cache() 

2007 instance._resource_cache = None 

2008 instance._resource_cache_dirty = True 

2009 instance._volume_info_cache = None 

2010 instance._volume_info_cache_dirty = True 

2011 return instance 

2012 

2013 @classmethod 

2014 def build_device_path(cls, volume_name): 

2015 """ 

2016 Build a device path given a volume name. 

2017 :param str volume_name: The volume name to use. 

2018 :return: A valid or not device path. 

2019 :rtype: str 

2020 """ 

2021 

2022 return '{}{}/0'.format(cls.DEV_ROOT_PATH, volume_name) 

2023 

2024 @classmethod 

2025 def build_volume_name(cls, base_name): 

2026 """ 

2027 Build a volume name given a base name (i.e. a UUID). 

2028 :param str base_name: The volume name to use. 

2029 :return: A valid or not device path. 

2030 :rtype: str 

2031 """ 

2032 return '{}{}'.format(cls.PREFIX_VOLUME, base_name) 

2033 

2034 @classmethod 

2035 def round_up_volume_size(cls, volume_size): 

2036 """ 

2037 Align volume size on higher multiple of BLOCK_SIZE. 

2038 :param int volume_size: The volume size to align. 

2039 :return: An aligned volume size. 

2040 :rtype: int 

2041 """ 

2042 return round_up(volume_size, cls.BLOCK_SIZE) 

2043 

2044 @classmethod 

2045 def round_down_volume_size(cls, volume_size): 

2046 """ 

2047 Align volume size on lower multiple of BLOCK_SIZE. 

2048 :param int volume_size: The volume size to align. 

2049 :return: An aligned volume size. 

2050 :rtype: int 

2051 """ 

2052 return round_down(volume_size, cls.BLOCK_SIZE) 

2053 

2054 # -------------------------------------------------------------------------- 

2055 # Private helpers. 

2056 # -------------------------------------------------------------------------- 

2057 

2058 def _create_kv_cache(self): 

2059 self._kv_cache = self._create_linstor_kv('/') 

2060 self._kv_cache_dirty = False 

2061 return self._kv_cache 

2062 

2063 def _get_kv_cache(self): 

2064 if self._kv_cache_dirty: 

2065 self._kv_cache = self._create_kv_cache() 

2066 return self._kv_cache 

2067 

2068 def _create_resource_cache(self): 

2069 self._resource_cache = self._linstor.resource_list_raise() 

2070 self._resource_cache_dirty = False 

2071 return self._resource_cache 

2072 

2073 def _get_resource_cache(self): 

2074 if self._resource_cache_dirty: 

2075 self._resource_cache = self._create_resource_cache() 

2076 return self._resource_cache 

2077 

2078 def _mark_resource_cache_as_dirty(self): 

2079 self._resource_cache_dirty = True 

2080 self._volume_info_cache_dirty = True 

2081 

2082 # -------------------------------------------------------------------------- 

2083 

2084 def _ensure_volume_exists(self, volume_uuid): 

2085 if volume_uuid not in self._volumes: 

2086 raise LinstorVolumeManagerError( 

2087 'volume `{}` doesn\'t exist'.format(volume_uuid), 

2088 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS 

2089 ) 

2090 

2091 def _find_best_size_candidates(self): 

2092 result = self._linstor.resource_group_qmvs(self._group_name) 

2093 error_str = self._get_error_str(result) 

2094 if error_str: 

2095 raise LinstorVolumeManagerError( 

2096 'Failed to get max volume size allowed of SR `{}`: {}'.format( 

2097 self._group_name, 

2098 error_str 

2099 ) 

2100 ) 

2101 return result[0].candidates 

2102 

2103 def _fetch_resource_names(self, ignore_deleted=True): 

2104 resource_names = set() 

2105 dfns = self._linstor.resource_dfn_list_raise().resource_definitions 

2106 for dfn in dfns: 

2107 if dfn.resource_group_name in self.get_all_group_names(self._base_group_name) and ( 

2108 ignore_deleted or 

2109 linstor.consts.FLAG_DELETE not in dfn.flags 

2110 ): 

2111 resource_names.add(dfn.name) 

2112 return resource_names 

2113 

2114 def _get_volumes_info(self, volume_names=None): 

2115 all_volume_info = {} 

2116 

2117 if not self._volume_info_cache_dirty: 

2118 return self._volume_info_cache 

2119 

2120 # `volume_names` MUST contain all volumes registered in the KV store. 

2121 # It can be provided to the function to avoid double fetching. 

2122 if not volume_names: 

2123 volume_names = self.get_volumes_with_name() 

2124 volume_names = set(volume_names.values()) 

2125 

2126 def process_resource(resource): 

2127 if resource.name not in all_volume_info: 

2128 current = all_volume_info[resource.name] = self.VolumeInfo( 

2129 resource.name 

2130 ) 

2131 else: 

2132 current = all_volume_info[resource.name] 

2133 

2134 if linstor.consts.FLAG_DISKLESS not in resource.flags: 

2135 current.diskful.append(resource.node_name) 

2136 

2137 for volume in resource.volumes: 

2138 # We ignore diskless pools of the form "DfltDisklessStorPool". 

2139 if volume.storage_pool_name != self._group_name: 

2140 continue 

2141 # Only fetch first volume. 

2142 if volume.number != 0: 

2143 continue 

2144 

2145 allocated_size = volume.allocated_size 

2146 if allocated_size > current.allocated_size: 

2147 current.allocated_size = allocated_size 

2148 

2149 usable_size = volume.usable_size 

2150 if usable_size > 0 and ( 

2151 usable_size < current.virtual_size or 

2152 not current.virtual_size 

2153 ): 

2154 current.virtual_size = usable_size 

2155 

2156 try: 

2157 for resource in self._get_resource_cache().resources: 

2158 if resource.name in volume_names: 

2159 process_resource(resource) 

2160 for volume in all_volume_info.values(): 

2161 if volume.allocated_size <= 0: 

2162 raise LinstorVolumeManagerError('Failed to get allocated size of `{}`'.format(resource.name)) 

2163 

2164 if volume.virtual_size <= 0: 

2165 raise LinstorVolumeManagerError('Failed to get usable size of `{}`'.format(volume.name)) 

2166 

2167 volume.allocated_size *= 1024 

2168 volume.virtual_size *= 1024 

2169 except LinstorVolumeManagerError: 

2170 self._mark_resource_cache_as_dirty() 

2171 raise 

2172 

2173 self._volume_info_cache_dirty = False 

2174 self._volume_info_cache = all_volume_info 

2175 

2176 return all_volume_info 

2177 

2178 def _get_volume_node_names_and_size(self, volume_name): 

2179 node_names = set() 

2180 size = -1 

2181 for resource in self._linstor.resource_list_raise( 

2182 filter_by_resources=[volume_name] 

2183 ).resources: 

2184 for volume in resource.volumes: 

2185 # We ignore diskless pools of the form "DfltDisklessStorPool". 

2186 if volume.storage_pool_name != self._group_name: 

2187 continue 

2188 

2189 node_names.add(resource.node_name) 

2190 

2191 usable_size = volume.usable_size 

2192 if usable_size <= 0: 

2193 continue 

2194 

2195 if size < 0: 

2196 size = usable_size 

2197 else: 

2198 size = min(size, usable_size) 

2199 

2200 if size <= 0: 

2201 raise LinstorVolumeManagerError('Failed to get usable size of `{}`'.format(resource.name)) 

2202 

2203 return (node_names, size * 1024) 

2204 

2205 def _compute_size(self, attr): 

2206 capacity = 0 

2207 for pool in self._get_storage_pools(force=True): 

2208 space = pool.free_space 

2209 if space: 

2210 size = getattr(space, attr) 

2211 if size < 0: 

2212 raise LinstorVolumeManagerError( 

2213 'Failed to get pool {} attr of `{}`' 

2214 .format(attr, pool.node_name) 

2215 ) 

2216 capacity += size 

2217 return capacity * 1024 

2218 

2219 def _get_node_names(self): 

2220 node_names = set() 

2221 for pool in self._get_storage_pools(): 

2222 node_names.add(pool.node_name) 

2223 return node_names 

2224 

2225 def _get_storage_pools(self, force=False): 

2226 cur_time = time.time() 

2227 elsaped_time = cur_time - self._storage_pools_time 

2228 

2229 if force or elsaped_time >= self.STORAGE_POOLS_FETCH_INTERVAL: 

2230 self._storage_pools = self._linstor.storage_pool_list_raise( 

2231 filter_by_stor_pools=[self._group_name] 

2232 ).storage_pools 

2233 self._storage_pools_time = time.time() 

2234 

2235 return self._storage_pools 

2236 

2237 def _create_volume( 

2238 self, 

2239 volume_uuid, 

2240 volume_name, 

2241 size, 

2242 place_resources, 

2243 high_availability 

2244 ): 

2245 size = self.round_up_volume_size(size) 

2246 self._mark_resource_cache_as_dirty() 

2247 

2248 group_name = self._ha_group_name if high_availability else self._group_name 

2249 def create_definition(): 

2250 first_attempt = True 

2251 while True: 

2252 try: 

2253 self._check_volume_creation_errors( 

2254 self._linstor.resource_group_spawn( 

2255 rsc_grp_name=group_name, 

2256 rsc_dfn_name=volume_name, 

2257 vlm_sizes=['{}B'.format(size)], 

2258 definitions_only=True 

2259 ), 

2260 volume_uuid, 

2261 self._group_name 

2262 ) 

2263 break 

2264 except LinstorVolumeManagerError as e: 

2265 if ( 

2266 not first_attempt or 

2267 not high_availability or 

2268 e.code != LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS 

2269 ): 

2270 raise 

2271 

2272 first_attempt = False 

2273 self._create_resource_group( 

2274 self._linstor, 

2275 group_name, 

2276 self._group_name, 

2277 3, 

2278 True 

2279 ) 

2280 

2281 self._configure_volume_peer_slots(self._linstor, volume_name) 

2282 

2283 def clean(): 

2284 try: 

2285 self._destroy_volume(volume_uuid, force=True, preserve_properties=True) 

2286 except Exception as e: 

2287 self._logger( 

2288 'Unable to destroy volume {} after creation fail: {}' 

2289 .format(volume_uuid, e) 

2290 ) 

2291 

2292 def create(): 

2293 try: 

2294 create_definition() 

2295 if place_resources: 

2296 # Basic case when we use the default redundancy of the group. 

2297 self._check_volume_creation_errors( 

2298 self._linstor.resource_auto_place( 

2299 rsc_name=volume_name, 

2300 place_count=self._redundancy, 

2301 diskless_on_remaining=False 

2302 ), 

2303 volume_uuid, 

2304 self._group_name 

2305 ) 

2306 except LinstorVolumeManagerError as e: 

2307 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS: 

2308 clean() 

2309 raise 

2310 except Exception: 

2311 clean() 

2312 raise 

2313 

2314 util.retry(create, maxretry=5) 

2315 

2316 def _create_volume_with_properties( 

2317 self, 

2318 volume_uuid, 

2319 volume_name, 

2320 size, 

2321 place_resources, 

2322 high_availability 

2323 ): 

2324 if self.check_volume_exists(volume_uuid): 

2325 raise LinstorVolumeManagerError( 

2326 'Could not create volume `{}` from SR `{}`, it already exists' 

2327 .format(volume_uuid, self._group_name) + ' in properties', 

2328 LinstorVolumeManagerError.ERR_VOLUME_EXISTS 

2329 ) 

2330 

2331 if volume_name in self._fetch_resource_names(): 

2332 raise LinstorVolumeManagerError( 

2333 'Could not create volume `{}` from SR `{}`, '.format( 

2334 volume_uuid, self._group_name 

2335 ) + 'resource of the same name already exists in LINSTOR' 

2336 ) 

2337 

2338 # I am paranoid. 

2339 volume_properties = self._get_volume_properties(volume_uuid) 

2340 if (volume_properties.get(self.PROP_NOT_EXISTS) is not None): 

2341 raise LinstorVolumeManagerError( 

2342 'Could not create volume `{}`, '.format(volume_uuid) + 

2343 'properties already exist' 

2344 ) 

2345 

2346 try: 

2347 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_CREATING 

2348 volume_properties[self.PROP_VOLUME_NAME] = volume_name 

2349 

2350 self._create_volume( 

2351 volume_uuid, 

2352 volume_name, 

2353 size, 

2354 place_resources, 

2355 high_availability 

2356 ) 

2357 

2358 assert volume_properties.namespace == \ 

2359 self._build_volume_namespace(volume_uuid) 

2360 return volume_properties 

2361 except LinstorVolumeManagerError as e: 

2362 # Do not destroy existing resource! 

2363 # In theory we can't get this error because we check this event 

2364 # before the `self._create_volume` case. 

2365 # It can only happen if the same volume uuid is used in the same 

2366 # call in another host. 

2367 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS: 

2368 self._destroy_volume(volume_uuid, force=True) 

2369 raise 

2370 

2371 def _find_device_path(self, volume_uuid, volume_name): 

2372 current_device_path = self._request_device_path( 

2373 volume_uuid, volume_name, activate=True 

2374 ) 

2375 

2376 # We use realpath here to get the /dev/drbd<id> path instead of 

2377 # /dev/drbd/by-res/<resource_name>. 

2378 expected_device_path = self.build_device_path(volume_name) 

2379 util.wait_for_path(expected_device_path, 5) 

2380 

2381 device_realpath = os.path.realpath(expected_device_path) 

2382 if current_device_path != device_realpath: 

2383 raise LinstorVolumeManagerError( 

2384 'Invalid path, current={}, expected={} (realpath={})' 

2385 .format( 

2386 current_device_path, 

2387 expected_device_path, 

2388 device_realpath 

2389 ) 

2390 ) 

2391 return expected_device_path 

2392 

2393 def _request_device_path(self, volume_uuid, volume_name, activate=False): 

2394 node_name = socket.gethostname() 

2395 

2396 resource = next(filter( 

2397 lambda resource: resource.node_name == node_name and 

2398 resource.name == volume_name, 

2399 self._get_resource_cache().resources 

2400 ), None) 

2401 

2402 if not resource: 

2403 if activate: 

2404 self._mark_resource_cache_as_dirty() 

2405 self._activate_device_path( 

2406 self._linstor, node_name, volume_name 

2407 ) 

2408 return self._request_device_path(volume_uuid, volume_name) 

2409 raise LinstorVolumeManagerError( 

2410 'Unable to get dev path for `{}`, no resource found but definition "seems" to exist' 

2411 .format(volume_uuid) 

2412 ) 

2413 

2414 # Contains a path of the /dev/drbd<id> form. 

2415 device_path = resource.volumes[0].device_path 

2416 if not device_path: 

2417 raise LinstorVolumeManagerError('Empty dev path for `{}`!'.format(volume_uuid)) 

2418 return device_path 

2419 

2420 def _destroy_resource(self, resource_name, force=False): 

2421 result = self._linstor.resource_dfn_delete(resource_name) 

2422 error_str = self._get_error_str(result) 

2423 if not error_str: 

2424 self._mark_resource_cache_as_dirty() 

2425 return 

2426 

2427 if not force: 

2428 self._mark_resource_cache_as_dirty() 

2429 raise LinstorVolumeManagerError( 

2430 'Could not destroy resource `{}` from SR `{}`: {}' 

2431 .format(resource_name, self._group_name, error_str) 

2432 ) 

2433 

2434 # If force is used, ensure there is no opener. 

2435 all_openers = get_all_volume_openers(resource_name, '0') 

2436 for openers in all_openers.values(): 

2437 if openers: 

2438 self._mark_resource_cache_as_dirty() 

2439 raise LinstorVolumeManagerError( 

2440 'Could not force destroy resource `{}` from SR `{}`: {} (openers=`{}`)' 

2441 .format(resource_name, self._group_name, error_str, all_openers) 

2442 ) 

2443 

2444 # Maybe the resource is blocked in primary mode. DRBD/LINSTOR issue? 

2445 resource_states = filter( 

2446 lambda resource_state: resource_state.name == resource_name, 

2447 self._get_resource_cache().resource_states 

2448 ) 

2449 

2450 # Mark only after computation of states. 

2451 self._mark_resource_cache_as_dirty() 

2452 

2453 for resource_state in resource_states: 

2454 volume_state = resource_state.volume_states[0] 

2455 if resource_state.in_use: 

2456 demote_drbd_resource(resource_state.node_name, resource_name) 

2457 break 

2458 self._destroy_resource(resource_name) 

2459 

2460 def _destroy_volume(self, volume_uuid, force=False, preserve_properties=False): 

2461 volume_properties = self._get_volume_properties(volume_uuid) 

2462 try: 

2463 volume_name = volume_properties.get(self.PROP_VOLUME_NAME) 

2464 if volume_name in self._fetch_resource_names(): 

2465 self._destroy_resource(volume_name, force) 

2466 

2467 # Assume this call is atomic. 

2468 if not preserve_properties: 

2469 volume_properties.clear() 

2470 except Exception as e: 

2471 raise LinstorVolumeManagerError( 

2472 'Cannot destroy volume `{}`: {}'.format(volume_uuid, e) 

2473 ) 

2474 

2475 def _build_volumes(self, repair): 

2476 properties = self._kv_cache 

2477 resource_names = self._fetch_resource_names() 

2478 

2479 self._volumes = set() 

2480 

2481 updating_uuid_volumes = self._get_volumes_by_property( 

2482 self.REG_UPDATING_UUID_SRC, ignore_inexisting_volumes=False 

2483 ) 

2484 if updating_uuid_volumes and not repair: 

2485 raise LinstorVolumeManagerError( 

2486 'Cannot build LINSTOR volume list: ' 

2487 'It exists invalid "updating uuid volumes", repair is required' 

2488 ) 

2489 

2490 existing_volumes = self._get_volumes_by_property( 

2491 self.REG_NOT_EXISTS, ignore_inexisting_volumes=False 

2492 ) 

2493 for volume_uuid, not_exists in existing_volumes.items(): 

2494 properties.namespace = self._build_volume_namespace(volume_uuid) 

2495 

2496 src_uuid = properties.get(self.PROP_UPDATING_UUID_SRC) 

2497 if src_uuid: 

2498 self._logger( 

2499 'Ignoring volume during manager initialization with prop ' 

2500 ' PROP_UPDATING_UUID_SRC: {} (properties={})' 

2501 .format( 

2502 volume_uuid, 

2503 self._get_filtered_properties(properties) 

2504 ) 

2505 ) 

2506 continue 

2507 

2508 # Insert volume in list if the volume exists. Or if the volume 

2509 # is being created and a slave wants to use it (repair = False). 

2510 # 

2511 # If we are on the master and if repair is True and state is 

2512 # Creating, it's probably a bug or crash: the creation process has 

2513 # been stopped. 

2514 if not_exists == self.STATE_EXISTS or ( 

2515 not repair and not_exists == self.STATE_CREATING 

2516 ): 

2517 self._volumes.add(volume_uuid) 

2518 continue 

2519 

2520 if not repair: 

2521 self._logger( 

2522 'Ignoring bad volume during manager initialization: {} ' 

2523 '(properties={})'.format( 

2524 volume_uuid, 

2525 self._get_filtered_properties(properties) 

2526 ) 

2527 ) 

2528 continue 

2529 

2530 # Remove bad volume. 

2531 try: 

2532 self._logger( 

2533 'Removing bad volume during manager initialization: {} ' 

2534 '(properties={})'.format( 

2535 volume_uuid, 

2536 self._get_filtered_properties(properties) 

2537 ) 

2538 ) 

2539 volume_name = properties.get(self.PROP_VOLUME_NAME) 

2540 

2541 # Little optimization, don't call `self._destroy_volume`, 

2542 # we already have resource name list. 

2543 if volume_name in resource_names: 

2544 self._destroy_resource(volume_name, force=True) 

2545 

2546 # Assume this call is atomic. 

2547 properties.clear() 

2548 except Exception as e: 

2549 # Do not raise, we don't want to block user action. 

2550 self._logger( 

2551 'Cannot clean volume {}: {}'.format(volume_uuid, e) 

2552 ) 

2553 

2554 # The volume can't be removed, maybe it's still in use, 

2555 # in this case rename it with the "DELETED_" prefix. 

2556 # This prefix is mandatory if it exists a snap transaction to 

2557 # rollback because the original VDI UUID can try to be renamed 

2558 # with the UUID we are trying to delete... 

2559 if not volume_uuid.startswith('DELETED_'): 

2560 self.update_volume_uuid( 

2561 volume_uuid, 'DELETED_' + volume_uuid, force=True 

2562 ) 

2563 

2564 for dest_uuid, src_uuid in updating_uuid_volumes.items(): 

2565 dest_namespace = self._build_volume_namespace(dest_uuid) 

2566 

2567 properties.namespace = dest_namespace 

2568 if int(properties.get(self.PROP_NOT_EXISTS)): 

2569 properties.clear() 

2570 continue 

2571 

2572 properties.namespace = self._build_volume_namespace(src_uuid) 

2573 properties.clear() 

2574 

2575 properties.namespace = dest_namespace 

2576 properties.pop(self.PROP_UPDATING_UUID_SRC) 

2577 

2578 if src_uuid in self._volumes: 

2579 self._volumes.remove(src_uuid) 

2580 self._volumes.add(dest_uuid) 

2581 

2582 def _get_sr_properties(self): 

2583 return self._create_linstor_kv(self._build_sr_namespace()) 

2584 

2585 def _get_volumes_by_property( 

2586 self, reg_prop, ignore_inexisting_volumes=True 

2587 ): 

2588 base_properties = self._get_kv_cache() 

2589 base_properties.namespace = self._build_volume_namespace() 

2590 

2591 volume_properties = {} 

2592 for volume_uuid in self._volumes: 

2593 volume_properties[volume_uuid] = '' 

2594 

2595 for key, value in base_properties.items(): 

2596 res = reg_prop.match(key) 

2597 if res: 

2598 volume_uuid = res.groups()[0] 

2599 if not ignore_inexisting_volumes or \ 

2600 volume_uuid in self._volumes: 

2601 volume_properties[volume_uuid] = value 

2602 

2603 return volume_properties 

2604 

2605 def _create_linstor_kv(self, namespace): 

2606 return linstor.KV( 

2607 self._group_name, 

2608 uri=self._linstor.controller_host(), 

2609 namespace=namespace 

2610 ) 

2611 

2612 def _get_volume_properties(self, volume_uuid): 

2613 properties = self._get_kv_cache() 

2614 properties.namespace = self._build_volume_namespace(volume_uuid) 

2615 return properties 

2616 

2617 @classmethod 

2618 def _build_sr_namespace(cls): 

2619 return '/{}/'.format(cls.NAMESPACE_SR) 

2620 

2621 @classmethod 

2622 def _build_volume_namespace(cls, volume_uuid=None): 

2623 # Return a path to all volumes if `volume_uuid` is not given. 

2624 if volume_uuid is None: 

2625 return '/{}/'.format(cls.NAMESPACE_VOLUME) 

2626 return '/{}/{}/'.format(cls.NAMESPACE_VOLUME, volume_uuid) 

2627 

2628 @classmethod 

2629 def _get_error_str(cls, result): 

2630 return ', '.join([ 

2631 err.message for err in cls._filter_errors(result) 

2632 ]) 

2633 

2634 @classmethod 

2635 def _create_linstor_instance( 

2636 cls, uri, keep_uri_unmodified=False, attempt_count=30 

2637 ): 

2638 retry = False 

2639 

2640 def connect(uri): 

2641 if not uri: 

2642 uri = get_controller_uri() 

2643 if not uri: 

2644 raise LinstorVolumeManagerError( 

2645 'Unable to find controller uri...' 

2646 ) 

2647 instance = linstor.Linstor(uri, keep_alive=True) 

2648 instance.connect() 

2649 return instance 

2650 

2651 try: 

2652 return connect(uri) 

2653 except (linstor.errors.LinstorNetworkError, LinstorVolumeManagerError): 

2654 pass 

2655 

2656 if not keep_uri_unmodified: 

2657 uri = None 

2658 

2659 return util.retry( 

2660 lambda: connect(uri), 

2661 maxretry=attempt_count, 

2662 period=1, 

2663 exceptions=[ 

2664 linstor.errors.LinstorNetworkError, 

2665 LinstorVolumeManagerError 

2666 ] 

2667 ) 

2668 

2669 @classmethod 

2670 def _configure_volume_peer_slots(cls, lin, volume_name): 

2671 result = lin.resource_dfn_modify(volume_name, {}, peer_slots=3) 

2672 error_str = cls._get_error_str(result) 

2673 if error_str: 

2674 raise LinstorVolumeManagerError( 

2675 'Could not configure volume peer slots of {}: {}' 

2676 .format(volume_name, error_str) 

2677 ) 

2678 

2679 @classmethod 

2680 def _activate_device_path(cls, lin, node_name, volume_name): 

2681 result = lin.resource_make_available(node_name, volume_name, diskful=False) 

2682 if linstor.Linstor.all_api_responses_no_error(result): 

2683 return 

2684 errors = linstor.Linstor.filter_api_call_response_errors(result) 

2685 if len(errors) == 1 and errors[0].is_error( 

2686 linstor.consts.FAIL_EXISTS_RSC 

2687 ): 

2688 return 

2689 

2690 raise LinstorVolumeManagerError( 

2691 'Unable to activate device path of `{}` on node `{}`: {}' 

2692 .format(volume_name, node_name, ', '.join( 

2693 [str(x) for x in result])) 

2694 ) 

2695 

2696 @classmethod 

2697 def _request_database_path(cls, lin, activate=False): 

2698 node_name = socket.gethostname() 

2699 

2700 try: 

2701 resource = next(filter( 

2702 lambda resource: resource.node_name == node_name and 

2703 resource.name == DATABASE_VOLUME_NAME, 

2704 lin.resource_list_raise().resources 

2705 ), None) 

2706 except Exception as e: 

2707 raise LinstorVolumeManagerError( 

2708 'Unable to fetch database resource: {}' 

2709 .format(e) 

2710 ) 

2711 

2712 if not resource: 

2713 if activate: 

2714 cls._activate_device_path( 

2715 lin, node_name, DATABASE_VOLUME_NAME 

2716 ) 

2717 return cls._request_database_path( 

2718 DATABASE_VOLUME_NAME, DATABASE_VOLUME_NAME 

2719 ) 

2720 raise LinstorVolumeManagerError( 

2721 'Empty dev path for `{}`, but definition "seems" to exist' 

2722 .format(DATABASE_PATH) 

2723 ) 

2724 # Contains a path of the /dev/drbd<id> form. 

2725 return resource.volumes[0].device_path 

2726 

2727 @classmethod 

2728 def _create_database_volume( 

2729 cls, lin, group_name, storage_pool_name, node_names, redundancy 

2730 ): 

2731 try: 

2732 dfns = lin.resource_dfn_list_raise().resource_definitions 

2733 except Exception as e: 

2734 raise LinstorVolumeManagerError( 

2735 'Unable to get definitions during database creation: {}' 

2736 .format(e) 

2737 ) 

2738 

2739 if dfns: 

2740 raise LinstorVolumeManagerError( 

2741 'Could not create volume `{}` from SR `{}`, '.format( 

2742 DATABASE_VOLUME_NAME, group_name 

2743 ) + 'LINSTOR volume list must be empty.' 

2744 ) 

2745 

2746 # Workaround to use thin lvm. Without this line an error is returned: 

2747 # "Not enough available nodes" 

2748 # I don't understand why but this command protect against this bug. 

2749 try: 

2750 pools = lin.storage_pool_list_raise( 

2751 filter_by_stor_pools=[storage_pool_name] 

2752 ) 

2753 except Exception as e: 

2754 raise LinstorVolumeManagerError( 

2755 'Failed to get storage pool list before database creation: {}' 

2756 .format(e) 

2757 ) 

2758 

2759 # Ensure we have a correct list of storage pools. 

2760 assert pools.storage_pools # We must have at least one storage pool! 

2761 nodes_with_pool = list(map(lambda pool: pool.node_name, pools.storage_pools)) 

2762 for node_name in nodes_with_pool: 

2763 assert node_name in node_names 

2764 util.SMlog('Nodes with storage pool: {}'.format(nodes_with_pool)) 

2765 

2766 # Create the database definition. 

2767 size = cls.round_up_volume_size(DATABASE_SIZE) 

2768 cls._check_volume_creation_errors(lin.resource_group_spawn( 

2769 rsc_grp_name=group_name, 

2770 rsc_dfn_name=DATABASE_VOLUME_NAME, 

2771 vlm_sizes=['{}B'.format(size)], 

2772 definitions_only=True 

2773 ), DATABASE_VOLUME_NAME, group_name) 

2774 cls._configure_volume_peer_slots(lin, DATABASE_VOLUME_NAME) 

2775 

2776 # Create real resources on the first nodes. 

2777 resources = [] 

2778 

2779 diskful_nodes = [] 

2780 diskless_nodes = [] 

2781 for node_name in node_names: 

2782 if node_name in nodes_with_pool: 

2783 diskful_nodes.append(node_name) 

2784 else: 

2785 diskless_nodes.append(node_name) 

2786 

2787 assert diskful_nodes 

2788 for node_name in diskful_nodes[:redundancy]: 

2789 util.SMlog('Create database diskful on {}'.format(node_name)) 

2790 resources.append(linstor.ResourceData( 

2791 node_name=node_name, 

2792 rsc_name=DATABASE_VOLUME_NAME, 

2793 storage_pool=storage_pool_name 

2794 )) 

2795 # Create diskless resources on the remaining set. 

2796 for node_name in diskful_nodes[redundancy:] + diskless_nodes: 

2797 util.SMlog('Create database diskless on {}'.format(node_name)) 

2798 resources.append(linstor.ResourceData( 

2799 node_name=node_name, 

2800 rsc_name=DATABASE_VOLUME_NAME, 

2801 diskless=True 

2802 )) 

2803 

2804 result = lin.resource_create(resources) 

2805 error_str = cls._get_error_str(result) 

2806 if error_str: 

2807 raise LinstorVolumeManagerError( 

2808 'Could not create database volume from SR `{}`: {}'.format( 

2809 group_name, error_str 

2810 ) 

2811 ) 

2812 

2813 # Create database and ensure path exists locally and 

2814 # on replicated devices. 

2815 current_device_path = cls._request_database_path(lin, activate=True) 

2816 

2817 # Ensure diskless paths exist on other hosts. Otherwise PBDs can't be 

2818 # plugged. 

2819 for node_name in node_names: 

2820 cls._activate_device_path(lin, node_name, DATABASE_VOLUME_NAME) 

2821 

2822 # We use realpath here to get the /dev/drbd<id> path instead of 

2823 # /dev/drbd/by-res/<resource_name>. 

2824 expected_device_path = cls.build_device_path(DATABASE_VOLUME_NAME) 

2825 util.wait_for_path(expected_device_path, 5) 

2826 

2827 device_realpath = os.path.realpath(expected_device_path) 

2828 if current_device_path != device_realpath: 

2829 raise LinstorVolumeManagerError( 

2830 'Invalid path, current={}, expected={} (realpath={})' 

2831 .format( 

2832 current_device_path, 

2833 expected_device_path, 

2834 device_realpath 

2835 ) 

2836 ) 

2837 

2838 try: 

2839 util.retry( 

2840 lambda: util.pread2([DATABASE_MKFS, expected_device_path]), 

2841 maxretry=5 

2842 ) 

2843 except Exception as e: 

2844 raise LinstorVolumeManagerError( 

2845 'Failed to execute {} on database volume: {}' 

2846 .format(DATABASE_MKFS, e) 

2847 ) 

2848 

2849 return expected_device_path 

2850 

2851 @classmethod 

2852 def _destroy_database_volume(cls, lin, group_name): 

2853 error_str = cls._get_error_str( 

2854 lin.resource_dfn_delete(DATABASE_VOLUME_NAME) 

2855 ) 

2856 if error_str: 

2857 raise LinstorVolumeManagerError( 

2858 'Could not destroy resource `{}` from SR `{}`: {}' 

2859 .format(DATABASE_VOLUME_NAME, group_name, error_str) 

2860 ) 

2861 

2862 @classmethod 

2863 def _mount_database_volume(cls, volume_path, mount=True, force=False): 

2864 try: 

2865 # 1. Create a backup config folder. 

2866 database_not_empty = bool(os.listdir(DATABASE_PATH)) 

2867 backup_path = cls._create_database_backup_path() 

2868 

2869 # 2. Move the config in the mounted volume. 

2870 if database_not_empty: 

2871 cls._move_files(DATABASE_PATH, backup_path) 

2872 

2873 cls._mount_volume(volume_path, DATABASE_PATH, mount) 

2874 

2875 if database_not_empty: 

2876 cls._move_files(backup_path, DATABASE_PATH, force) 

2877 

2878 # 3. Remove useless backup directory. 

2879 try: 

2880 os.rmdir(backup_path) 

2881 except Exception as e: 

2882 raise LinstorVolumeManagerError( 

2883 'Failed to remove backup path {} of LINSTOR config: {}' 

2884 .format(backup_path, e) 

2885 ) 

2886 except Exception as e: 

2887 def force_exec(fn): 

2888 try: 

2889 fn() 

2890 except Exception: 

2891 pass 

2892 

2893 if mount == cls._is_mounted(DATABASE_PATH): 

2894 force_exec(lambda: cls._move_files( 

2895 DATABASE_PATH, backup_path 

2896 )) 

2897 force_exec(lambda: cls._mount_volume( 

2898 volume_path, DATABASE_PATH, not mount 

2899 )) 

2900 

2901 if mount != cls._is_mounted(DATABASE_PATH): 

2902 force_exec(lambda: cls._move_files( 

2903 backup_path, DATABASE_PATH 

2904 )) 

2905 

2906 force_exec(lambda: os.rmdir(backup_path)) 

2907 raise e 

2908 

2909 @classmethod 

2910 def _force_destroy_database_volume(cls, lin, group_name): 

2911 try: 

2912 cls._destroy_database_volume(lin, group_name) 

2913 except Exception: 

2914 pass 

2915 

2916 @classmethod 

2917 def _destroy_storage_pool(cls, lin, group_name, node_name): 

2918 def destroy(): 

2919 result = lin.storage_pool_delete(node_name, group_name) 

2920 errors = cls._filter_errors(result) 

2921 if cls._check_errors(errors, [ 

2922 linstor.consts.FAIL_NOT_FOUND_STOR_POOL, 

2923 linstor.consts.FAIL_NOT_FOUND_STOR_POOL_DFN 

2924 ]): 

2925 return 

2926 

2927 if errors: 

2928 raise LinstorVolumeManagerError( 

2929 'Failed to destroy SP `{}` on node `{}`: {}'.format( 

2930 group_name, 

2931 node_name, 

2932 cls._get_error_str(errors) 

2933 ) 

2934 ) 

2935 

2936 # We must retry to avoid errors like: 

2937 # "can not be deleted as volumes / snapshot-volumes are still using it" 

2938 # after LINSTOR database volume destruction. 

2939 return util.retry(destroy, maxretry=10) 

2940 

2941 @classmethod 

2942 def _create_resource_group( 

2943 cls, 

2944 lin, 

2945 group_name, 

2946 storage_pool_name, 

2947 redundancy, 

2948 destroy_old_group 

2949 ): 

2950 rg_creation_attempt = 0 

2951 while True: 

2952 result = lin.resource_group_create( 

2953 name=group_name, 

2954 place_count=redundancy, 

2955 storage_pool=storage_pool_name, 

2956 diskless_on_remaining=False 

2957 ) 

2958 error_str = cls._get_error_str(result) 

2959 if not error_str: 

2960 break 

2961 

2962 errors = cls._filter_errors(result) 

2963 if destroy_old_group and cls._check_errors(errors, [ 

2964 linstor.consts.FAIL_EXISTS_RSC_GRP 

2965 ]): 

2966 rg_creation_attempt += 1 

2967 if rg_creation_attempt < 2: 

2968 try: 

2969 cls._destroy_resource_group(lin, group_name) 

2970 except Exception as e: 

2971 error_str = 'Failed to destroy old and empty RG: {}'.format(e) 

2972 else: 

2973 continue 

2974 

2975 raise LinstorVolumeManagerError( 

2976 'Could not create RG `{}`: {}'.format( 

2977 group_name, error_str 

2978 ) 

2979 ) 

2980 

2981 result = lin.volume_group_create(group_name) 

2982 error_str = cls._get_error_str(result) 

2983 if error_str: 

2984 raise LinstorVolumeManagerError( 

2985 'Could not create VG `{}`: {}'.format( 

2986 group_name, error_str 

2987 ) 

2988 ) 

2989 

2990 @classmethod 

2991 def _destroy_resource_group(cls, lin, group_name): 

2992 def destroy(): 

2993 result = lin.resource_group_delete(group_name) 

2994 errors = cls._filter_errors(result) 

2995 if cls._check_errors(errors, [ 

2996 linstor.consts.FAIL_NOT_FOUND_RSC_GRP 

2997 ]): 

2998 return 

2999 

3000 if errors: 

3001 raise LinstorVolumeManagerError( 

3002 'Failed to destroy RG `{}`: {}' 

3003 .format(group_name, cls._get_error_str(errors)) 

3004 ) 

3005 

3006 return util.retry(destroy, maxretry=10) 

3007 

3008 @classmethod 

3009 def _build_group_name(cls, base_name): 

3010 # If thin provisioning is used we have a path like this: 

3011 # `VG/LV`. "/" is not accepted by LINSTOR. 

3012 return '{}{}'.format(cls.PREFIX_SR, base_name.replace('/', '_')) 

3013 

3014 # Used to store important data in a HA context, 

3015 # i.e. a replication count of 3. 

3016 @classmethod 

3017 def _build_ha_group_name(cls, base_name): 

3018 return '{}{}'.format(cls.PREFIX_HA, base_name.replace('/', '_')) 

3019 

3020 @classmethod 

3021 def _check_volume_creation_errors(cls, result, volume_uuid, group_name): 

3022 errors = cls._filter_errors(result) 

3023 if cls._check_errors(errors, [ 

3024 linstor.consts.FAIL_EXISTS_RSC, linstor.consts.FAIL_EXISTS_RSC_DFN 

3025 ]): 

3026 raise LinstorVolumeManagerError( 

3027 'Failed to create volume `{}` from SR `{}`, it already exists' 

3028 .format(volume_uuid, group_name), 

3029 LinstorVolumeManagerError.ERR_VOLUME_EXISTS 

3030 ) 

3031 

3032 if cls._check_errors(errors, [linstor.consts.FAIL_NOT_FOUND_RSC_GRP]): 

3033 raise LinstorVolumeManagerError( 

3034 'Failed to create volume `{}` from SR `{}`, resource group doesn\'t exist' 

3035 .format(volume_uuid, group_name), 

3036 LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS 

3037 ) 

3038 

3039 if errors: 

3040 raise LinstorVolumeManagerError( 

3041 'Failed to create volume `{}` from SR `{}`: {}'.format( 

3042 volume_uuid, 

3043 group_name, 

3044 cls._get_error_str(errors) 

3045 ) 

3046 ) 

3047 

3048 @classmethod 

3049 def _move_files(cls, src_dir, dest_dir, force=False): 

3050 def listdir(dir): 

3051 ignored = ['lost+found'] 

3052 return [file for file in os.listdir(dir) if file not in ignored] 

3053 

3054 try: 

3055 if not force: 

3056 files = listdir(dest_dir) 

3057 if files: 

3058 raise LinstorVolumeManagerError( 

3059 'Cannot move files from {} to {} because destination ' 

3060 'contains: {}'.format(src_dir, dest_dir, files) 

3061 ) 

3062 except LinstorVolumeManagerError: 

3063 raise 

3064 except Exception as e: 

3065 raise LinstorVolumeManagerError( 

3066 'Cannot list dir {}: {}'.format(dest_dir, e) 

3067 ) 

3068 

3069 try: 

3070 for file in listdir(src_dir): 

3071 try: 

3072 dest_file = os.path.join(dest_dir, file) 

3073 if not force and os.path.exists(dest_file): 

3074 raise LinstorVolumeManagerError( 

3075 'Cannot move {} because it already exists in the ' 

3076 'destination'.format(file) 

3077 ) 

3078 shutil.move(os.path.join(src_dir, file), dest_file) 

3079 except LinstorVolumeManagerError: 

3080 raise 

3081 except Exception as e: 

3082 raise LinstorVolumeManagerError( 

3083 'Cannot move {}: {}'.format(file, e) 

3084 ) 

3085 except Exception as e: 

3086 if not force: 

3087 try: 

3088 cls._move_files(dest_dir, src_dir, force=True) 

3089 except Exception: 

3090 pass 

3091 

3092 raise LinstorVolumeManagerError( 

3093 'Failed to move files from {} to {}: {}'.format( 

3094 src_dir, dest_dir, e 

3095 ) 

3096 ) 

3097 

3098 @staticmethod 

3099 def _create_database_backup_path(): 

3100 path = DATABASE_PATH + '-' + str(uuid.uuid4()) 

3101 try: 

3102 os.mkdir(path) 

3103 return path 

3104 except Exception as e: 

3105 raise LinstorVolumeManagerError( 

3106 'Failed to create backup path {} of LINSTOR config: {}' 

3107 .format(path, e) 

3108 ) 

3109 

3110 @staticmethod 

3111 def _get_filtered_properties(properties): 

3112 return dict(properties.items()) 

3113 

3114 @staticmethod 

3115 def _filter_errors(result): 

3116 return [ 

3117 err for err in result 

3118 if hasattr(err, 'is_error') and err.is_error() 

3119 ] 

3120 

3121 @staticmethod 

3122 def _check_errors(result, codes): 

3123 for err in result: 

3124 for code in codes: 

3125 if err.is_error(code): 

3126 return True 

3127 return False 

3128 

3129 @classmethod 

3130 def _controller_is_running(cls): 

3131 return cls._service_is_running('linstor-controller') 

3132 

3133 @classmethod 

3134 def _start_controller(cls, start=True): 

3135 return cls._start_service('linstor-controller', start) 

3136 

3137 @staticmethod 

3138 def _start_service(name, start=True): 

3139 action = 'start' if start else 'stop' 

3140 (ret, out, err) = util.doexec([ 

3141 'systemctl', action, name 

3142 ]) 

3143 if ret != 0: 

3144 raise LinstorVolumeManagerError( 

3145 'Failed to {} {}: {} {}' 

3146 .format(action, name, out, err) 

3147 ) 

3148 

3149 @staticmethod 

3150 def _service_is_running(name): 

3151 (ret, out, err) = util.doexec([ 

3152 'systemctl', 'is-active', '--quiet', name 

3153 ]) 

3154 return not ret 

3155 

3156 @staticmethod 

3157 def _is_mounted(mountpoint): 

3158 (ret, out, err) = util.doexec(['mountpoint', '-q', mountpoint]) 

3159 return ret == 0 

3160 

3161 @classmethod 

3162 def _mount_volume(cls, volume_path, mountpoint, mount=True): 

3163 if mount: 

3164 try: 

3165 util.pread(['mount', volume_path, mountpoint]) 

3166 except Exception as e: 

3167 raise LinstorVolumeManagerError( 

3168 'Failed to mount volume {} on {}: {}' 

3169 .format(volume_path, mountpoint, e) 

3170 ) 

3171 else: 

3172 try: 

3173 if cls._is_mounted(mountpoint): 

3174 util.pread(['umount', mountpoint]) 

3175 except Exception as e: 

3176 raise LinstorVolumeManagerError( 

3177 'Failed to umount volume {} on {}: {}' 

3178 .format(volume_path, mountpoint, e) 

3179 ) 

3180 

3181 

3182# ============================================================================== 

3183 

3184# Check if a path is a DRBD resource and log the process name/pid 

3185# that opened it. 

3186def log_drbd_openers(path): 

3187 # Ignore if it's not a symlink to DRBD resource. 

3188 if not path.startswith(DRBD_BY_RES_PATH): 

3189 return 

3190 

3191 # Compute resource name. 

3192 res_name_end = path.find('/', len(DRBD_BY_RES_PATH)) 

3193 if res_name_end == -1: 

3194 return 

3195 res_name = path[len(DRBD_BY_RES_PATH):res_name_end] 

3196 

3197 volume_end = path.rfind('/') 

3198 if volume_end == res_name_end: 

3199 return 

3200 volume = path[volume_end + 1:] 

3201 

3202 try: 

3203 # Ensure path is a DRBD. 

3204 drbd_path = os.path.realpath(path) 

3205 stats = os.stat(drbd_path) 

3206 if not stat.S_ISBLK(stats.st_mode) or os.major(stats.st_rdev) != 147: 

3207 return 

3208 

3209 # Find where the device is open. 

3210 (ret, stdout, stderr) = util.doexec(['drbdadm', 'status', res_name]) 

3211 if ret != 0: 

3212 util.SMlog('Failed to execute `drbdadm status` on `{}`: {}'.format( 

3213 res_name, stderr 

3214 )) 

3215 return 

3216 

3217 # Is it a local device? 

3218 if stdout.startswith('{} role:Primary'.format(res_name)): 

3219 util.SMlog( 

3220 'DRBD resource `{}` is open on local host: {}' 

3221 .format(path, get_local_volume_openers(res_name, volume)) 

3222 ) 

3223 return 

3224 

3225 # Is it a remote device? 

3226 util.SMlog( 

3227 'DRBD resource `{}` is open on hosts: {}' 

3228 .format(path, get_all_volume_openers(res_name, volume)) 

3229 ) 

3230 except Exception as e: 

3231 util.SMlog( 

3232 'Got exception while trying to determine where DRBD resource ' + 

3233 '`{}` is open: {}'.format(path, e) 

3234 )