Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/env python3 

2# 

3# Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr 

4# 

5# This program is free software: you can redistribute it and/or modify 

6# it under the terms of the GNU General Public License as published by 

7# the Free Software Foundation, either version 3 of the License, or 

8# (at your option) any later version. 

9# This program is distributed in the hope that it will be useful, 

10# but WITHOUT ANY WARRANTY; without even the implied warranty of 

11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

12# GNU General Public License for more details. 

13# 

14# You should have received a copy of the GNU General Public License 

15# along with this program. If not, see <https://www.gnu.org/licenses/>. 

16# 

17 

18from sm_typing import ( 

19 Any, 

20 Dict, 

21 List, 

22 override, 

23) 

24 

25import json 

26import linstor 

27import os.path 

28import re 

29import shutil 

30import socket 

31import stat 

32import time 

33import util 

34import uuid 

35 

36# Persistent prefix to add to RAW persistent volumes. 

37PERSISTENT_PREFIX = 'xcp-persistent-' 

38 

39# Contains the data of the "/var/lib/linstor" directory. 

40DATABASE_VOLUME_NAME = PERSISTENT_PREFIX + 'database' 

41DATABASE_SIZE = 1 << 30 # 1GB. 

42DATABASE_PATH = '/var/lib/linstor' 

43DATABASE_MKFS = 'mkfs.ext4' 

44 

45LINSTOR_SATELLITE_PORT = 3366 

46 

47REG_DRBDADM_PRIMARY = re.compile("([^\\s]+)\\s+role:Primary") 

48REG_DRBDSETUP_IP = re.compile('[^\\s]+\\s+(.*):.*$') 

49 

50DRBD_BY_RES_PATH = '/dev/drbd/by-res/' 

51 

52PLUGIN = 'linstor-manager' 

53 

54 

55# ============================================================================== 

56 

57def get_local_volume_openers(resource_name, volume): 

58 if not resource_name or volume is None: 

59 raise Exception('Cannot get DRBD openers without resource name and/or volume.') 

60 

61 path = '/sys/kernel/debug/drbd/resources/{}/volumes/{}/openers'.format( 

62 resource_name, volume 

63 ) 

64 

65 with open(path, 'r') as openers: 

66 # Not a big cost, so read all lines directly. 

67 lines = openers.readlines() 

68 

69 result = {} 

70 

71 opener_re = re.compile('(.*)\\s+([0-9]+)\\s+([0-9]+)') 

72 for line in lines: 

73 match = opener_re.match(line) 

74 assert match 

75 

76 groups = match.groups() 

77 process_name = groups[0] 

78 pid = groups[1] 

79 open_duration_ms = groups[2] 

80 result[pid] = { 

81 'process-name': process_name, 

82 'open-duration': open_duration_ms 

83 } 

84 

85 return json.dumps(result) 

86 

87def get_all_volume_openers(resource_name, volume): 

88 PLUGIN_CMD = 'getDrbdOpeners' 

89 

90 volume = str(volume) 

91 openers = {} 

92 

93 session = util.get_localAPI_session() 

94 

95 hosts = session.xenapi.host.get_all_records() 

96 for host_ref, host_record in hosts.items(): 

97 node_name = host_record['hostname'] 

98 try: 

99 if not session.xenapi.host_metrics.get_record( 

100 host_record['metrics'] 

101 )['live']: 

102 # Ensure we call plugin on online hosts only. 

103 continue 

104 

105 openers[node_name] = json.loads( 

106 session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, { 

107 'resourceName': resource_name, 

108 'volume': volume 

109 }) 

110 ) 

111 except Exception as e: 

112 util.SMlog('Failed to get openers of `{}` on `{}`: {}'.format( 

113 resource_name, node_name, e 

114 )) 

115 

116 return openers 

117 

118 

119# ============================================================================== 

120 

121def round_up(value, divisor): 

122 assert divisor 

123 divisor = int(divisor) 

124 return ((int(value) + divisor - 1) // divisor) * divisor 

125 

126 

127def round_down(value, divisor): 

128 assert divisor 

129 value = int(value) 

130 return value - (value % int(divisor)) 

131 

132 

133# ============================================================================== 

134 

135def _get_controller_addresses() -> List[str]: 

136 try: 

137 (ret, stdout, stderr) = util.doexec([ 

138 "/usr/sbin/ss", "-tnpH", "state", "established", f"( sport = :{LINSTOR_SATELLITE_PORT} )" 

139 ]) 

140 if ret == 0: 

141 return [ 

142 line.split()[3].rsplit(":", 1)[0] 

143 for line in stdout.splitlines() 

144 ] 

145 util.SMlog(f"Unexpected code {ret}: {stderr}") 

146 except Exception as e: 

147 util.SMlog(f"Unable to get controller addresses: {e}") 

148 return [] 

149 

150def _get_controller_uri() -> str: 

151 # TODO: Check that an IP address from the current pool is returned. 

152 addresses = _get_controller_addresses() 

153 return "linstor://" + addresses[0] if addresses else "" 

154 

155def get_controller_uri(): 

156 retries = 0 

157 while True: 

158 uri = _get_controller_uri() 

159 if uri: 

160 return uri 

161 

162 retries += 1 

163 if retries >= 30: 

164 break 

165 time.sleep(1) 

166 

167 

168def get_controller_node_name(): 

169 PLUGIN_CMD = 'hasControllerRunning' 

170 

171 (ret, stdout, stderr) = util.doexec([ 

172 'drbdadm', 'status', DATABASE_VOLUME_NAME 

173 ]) 

174 

175 if ret == 0: 

176 if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)): 

177 return 'localhost' 

178 

179 res = REG_DRBDADM_PRIMARY.search(stdout) 

180 if res: 

181 return res.groups()[0] 

182 

183 session = util.timeout_call(5, util.get_localAPI_session) 

184 

185 for host_ref, host_record in session.xenapi.host.get_all_records().items(): 

186 node_name = host_record['hostname'] 

187 try: 

188 if not session.xenapi.host_metrics.get_record( 

189 host_record['metrics'] 

190 )['live']: 

191 continue 

192 

193 if util.strtobool(session.xenapi.host.call_plugin( 

194 host_ref, PLUGIN, PLUGIN_CMD, {} 

195 )): 

196 return node_name 

197 except Exception as e: 

198 util.SMlog('Failed to call plugin to get controller on `{}`: {}'.format( 

199 node_name, e 

200 )) 

201 

202 

203def demote_drbd_resource(node_name, resource_name): 

204 PLUGIN_CMD = 'demoteDrbdResource' 

205 

206 session = util.timeout_call(5, util.get_localAPI_session) 

207 

208 for host_ref, host_record in session.xenapi.host.get_all_records().items(): 

209 if host_record['hostname'] != node_name: 

210 continue 

211 

212 try: 

213 session.xenapi.host.call_plugin( 

214 host_ref, PLUGIN, PLUGIN_CMD, {'resource_name': resource_name} 

215 ) 

216 except Exception as e: 

217 util.SMlog('Failed to demote resource `{}` on `{}`: {}'.format( 

218 resource_name, node_name, e 

219 )) 

220 raise Exception( 

221 'Can\'t demote resource `{}`, unable to find node `{}`' 

222 .format(resource_name, node_name) 

223 ) 

224 

225# ============================================================================== 

226 

227class LinstorVolumeManagerError(Exception): 

228 ERR_GENERIC = 0, 

229 ERR_VOLUME_EXISTS = 1, 

230 ERR_VOLUME_NOT_EXISTS = 2, 

231 ERR_VOLUME_DESTROY = 3, 

232 ERR_GROUP_NOT_EXISTS = 4, 

233 ERR_VOLUME_IN_USE = 5 

234 

235 def __init__(self, message, code=ERR_GENERIC): 

236 super(LinstorVolumeManagerError, self).__init__(message) 

237 self._code = code 

238 

239 @property 

240 def code(self): 

241 return self._code 

242 

243 

244# ============================================================================== 

245 

246# Note: 

247# If a storage pool is not accessible after a network change: 

248# linstor node interface modify <NODE> default --ip <IP> 

249 

250 

251class LinstorVolumeManager(object): 

252 """ 

253 API to manager LINSTOR volumes in XCP-ng. 

254 A volume in this context is a physical part of the storage layer. 

255 """ 

256 

257 __slots__ = ( 

258 '_linstor', '_uri', '_logger', '_redundancy', 

259 '_base_group_name', '_group_name', '_ha_group_name', 

260 '_volumes', '_storage_pools', '_storage_pools_time', 

261 '_kv_cache', '_resource_cache', '_volume_info_cache', 

262 '_kv_cache_dirty', '_resource_cache_dirty', '_volume_info_cache_dirty', 

263 '_resources_info_cache', 

264 ) 

265 

266 DEV_ROOT_PATH = DRBD_BY_RES_PATH 

267 

268 # Default sector size. 

269 BLOCK_SIZE = 512 

270 

271 # List of volume properties. 

272 PROP_METADATA = 'metadata' 

273 PROP_NOT_EXISTS = 'not-exists' 

274 PROP_VOLUME_NAME = 'volume-name' 

275 PROP_IS_READONLY_TIMESTAMP = 'readonly-timestamp' 

276 

277 # A volume can only be locked for a limited duration. 

278 # The goal is to give enough time to slaves to execute some actions on 

279 # a device before an UUID update or a coalesce for example. 

280 # Expiration is expressed in seconds. 

281 LOCKED_EXPIRATION_DELAY = 1 * 60 

282 

283 # Used when volume uuid is being updated. 

284 PROP_UPDATING_UUID_SRC = 'updating-uuid-src' 

285 

286 # States of property PROP_NOT_EXISTS. 

287 STATE_EXISTS = '0' 

288 STATE_NOT_EXISTS = '1' 

289 STATE_CREATING = '2' 

290 

291 # Property namespaces. 

292 NAMESPACE_SR = 'xcp/sr' 

293 NAMESPACE_VOLUME = 'xcp/volume' 

294 

295 # Regex to match properties. 

296 REG_PROP = '^([^/]+)/{}$' 

297 

298 REG_METADATA = re.compile(REG_PROP.format(PROP_METADATA)) 

299 REG_NOT_EXISTS = re.compile(REG_PROP.format(PROP_NOT_EXISTS)) 

300 REG_VOLUME_NAME = re.compile(REG_PROP.format(PROP_VOLUME_NAME)) 

301 REG_UPDATING_UUID_SRC = re.compile(REG_PROP.format(PROP_UPDATING_UUID_SRC)) 

302 

303 # Prefixes of SR/VOLUME in the LINSTOR DB. 

304 # A LINSTOR (resource, group, ...) name cannot start with a number. 

305 # So we add a prefix behind our SR/VOLUME uuids. 

306 PREFIX_SR = 'xcp-sr-' 

307 PREFIX_HA = 'xcp-ha-' 

308 PREFIX_VOLUME = 'xcp-volume-' 

309 

310 # Limit request number when storage pool info is asked, we fetch 

311 # the current pool status after N elapsed seconds. 

312 STORAGE_POOLS_FETCH_INTERVAL = 15 

313 

314 @staticmethod 

315 def default_logger(*args): 

316 print(args) 

317 

318 # -------------------------------------------------------------------------- 

319 # API. 

320 # -------------------------------------------------------------------------- 

321 

322 class VolumeInfo(object): 

323 __slots__ = ( 

324 'name', 

325 'allocated_size', # Allocated size, place count is not used. 

326 'virtual_size', # Total virtual available size of this volume 

327 # (i.e. the user size at creation). 

328 'diskful' # Array of nodes that have a diskful volume. 

329 ) 

330 

331 def __init__(self, name): 

332 self.name = name 

333 self.allocated_size = 0 

334 self.virtual_size = 0 

335 self.diskful = [] 

336 

337 @override 

338 def __repr__(self) -> str: 

339 return 'VolumeInfo("{}", {}, {}, {})'.format( 

340 self.name, self.allocated_size, self.virtual_size, 

341 self.diskful 

342 ) 

343 

344 # -------------------------------------------------------------------------- 

345 

346 def __init__( 

347 self, uri, group_name, repair=False, logger=default_logger.__func__, 

348 attempt_count=30 

349 ): 

350 """ 

351 Create a new LinstorVolumeManager object. 

352 :param str uri: URI to communicate with the LINSTOR controller. 

353 :param str group_name: The SR group name to use. 

354 :param bool repair: If true we try to remove bad volumes due to a crash 

355 or unexpected behavior. 

356 :param function logger: Function to log messages. 

357 :param int attempt_count: Number of attempts to join the controller. 

358 """ 

359 

360 self._uri = uri 

361 self._linstor = self._create_linstor_instance( 

362 uri, attempt_count=attempt_count 

363 ) 

364 

365 

366 mismatched_nodes = [ 

367 node for node in self._linstor.node_list().pop().nodes if node.connection_status == "VERSION_MISMATCH" 

368 ] 

369 

370 if mismatched_nodes: 

371 raise LinstorVolumeManagerError( 

372 "Some linstor nodes are not using the same version. " + 

373 f"Incriminated nodes are: {','.join([node.name for node in mismatched_nodes])}" 

374 ) 

375 

376 self._base_group_name = group_name 

377 

378 # Ensure group exists. 

379 group_name = self._build_group_name(group_name) 

380 groups = self._linstor.resource_group_list_raise([group_name]).resource_groups 

381 if not groups: 

382 raise LinstorVolumeManagerError( 

383 'Unable to find `{}` Linstor SR'.format(group_name) 

384 ) 

385 

386 # Ok. ;) 

387 self._logger = logger 

388 self._redundancy = groups[0].select_filter.place_count 

389 self._group_name = group_name 

390 self._ha_group_name = self._build_ha_group_name(self._base_group_name) 

391 self._volumes = set() 

392 self._storage_pools_time = 0 

393 

394 # To increase performance and limit request count to LINSTOR services, 

395 # we use caches. 

396 self._kv_cache = self._create_kv_cache() 

397 self._resource_cache = None 

398 self._resource_cache_dirty = True 

399 self._volume_info_cache = None 

400 self._volume_info_cache_dirty = True 

401 self._resources_info_cache = None 

402 self._build_volumes(repair=repair) 

403 

404 @property 

405 def uri(self) -> str: 

406 return self._uri 

407 

408 @property 

409 def group_name(self): 

410 """ 

411 Give the used group name. 

412 :return: The group name. 

413 :rtype: str 

414 """ 

415 return self._base_group_name 

416 

417 @property 

418 def redundancy(self): 

419 """ 

420 Give the used redundancy. 

421 :return: The redundancy. 

422 :rtype: int 

423 """ 

424 return self._redundancy 

425 

426 @property 

427 def volumes(self): 

428 """ 

429 Give the volumes uuid set. 

430 :return: The volumes uuid set. 

431 :rtype: set(str) 

432 """ 

433 return self._volumes 

434 

435 @property 

436 def max_volume_size_allowed(self): 

437 """ 

438 Give the max volume size currently available in B. 

439 :return: The current size. 

440 :rtype: int 

441 """ 

442 

443 candidates = self._find_best_size_candidates() 

444 if not candidates: 

445 raise LinstorVolumeManagerError( 

446 'Failed to get max volume size allowed' 

447 ) 

448 

449 size = candidates[0].max_volume_size 

450 if size < 0: 

451 raise LinstorVolumeManagerError( 

452 'Invalid max volume size allowed given: {}'.format(size) 

453 ) 

454 return self.round_down_volume_size(size * 1024) 

455 

456 @property 

457 def physical_size(self): 

458 """ 

459 Give the total physical size of the SR. 

460 :return: The physical size. 

461 :rtype: int 

462 """ 

463 return self._compute_size('total_capacity') 

464 

465 @property 

466 def physical_free_size(self): 

467 """ 

468 Give the total free physical size of the SR. 

469 :return: The physical free size. 

470 :rtype: int 

471 """ 

472 return self._compute_size('free_capacity') 

473 

474 @property 

475 def allocated_volume_size(self): 

476 """ 

477 Give the allocated size for all volumes. The place count is not 

478 used here. When thick lvm is used, the size for one volume should 

479 be equal to the virtual volume size. With thin lvm, the size is equal 

480 or lower to the volume size. 

481 :return: The allocated size of all volumes. 

482 :rtype: int 

483 """ 

484 

485 # Paths: /res_name/vol_number/size 

486 sizes = {} 

487 

488 for resource in self._get_resource_cache().resources: 

489 if resource.name not in sizes: 

490 current = sizes[resource.name] = {} 

491 else: 

492 current = sizes[resource.name] 

493 

494 for volume in resource.volumes: 

495 # We ignore diskless pools of the form "DfltDisklessStorPool". 

496 if volume.storage_pool_name != self._group_name: 

497 continue 

498 

499 allocated_size = max(volume.allocated_size, 0) 

500 current_allocated_size = current.get(volume.number) or -1 

501 if allocated_size > current_allocated_size: 

502 current[volume.number] = allocated_size 

503 

504 total_size = 0 

505 for volumes in sizes.values(): 

506 for size in volumes.values(): 

507 total_size += size 

508 

509 return total_size * 1024 

510 

511 def get_min_physical_size(self): 

512 """ 

513 Give the minimum physical size of the SR. 

514 I.e. the size of the smallest disk + the number of pools. 

515 :return: The physical min size. 

516 :rtype: tuple(int, int) 

517 """ 

518 size = None 

519 pool_count = 0 

520 for pool in self._get_storage_pools(force=True): 

521 space = pool.free_space 

522 if space: 

523 pool_count += 1 

524 current_size = space.total_capacity 

525 if current_size < 0: 

526 raise LinstorVolumeManagerError( 

527 'Failed to get pool total_capacity attr of `{}`' 

528 .format(pool.node_name) 

529 ) 

530 if size is None or current_size < size: 

531 size = current_size 

532 return (pool_count, (size or 0) * 1024) 

533 

534 @property 

535 def metadata(self): 

536 """ 

537 Get the metadata of the SR. 

538 :return: Dictionary that contains metadata. 

539 :rtype: dict(str, dict) 

540 """ 

541 

542 sr_properties = self._get_sr_properties() 

543 metadata = sr_properties.get(self.PROP_METADATA) 

544 if metadata is not None: 

545 metadata = json.loads(metadata) 

546 if isinstance(metadata, dict): 

547 return metadata 

548 raise LinstorVolumeManagerError( 

549 'Expected dictionary in SR metadata: {}'.format( 

550 self._group_name 

551 ) 

552 ) 

553 

554 return {} 

555 

556 @metadata.setter 

557 def metadata(self, metadata): 

558 """ 

559 Set the metadata of the SR. 

560 :param dict metadata: Dictionary that contains metadata. 

561 """ 

562 

563 assert isinstance(metadata, dict) 

564 sr_properties = self._get_sr_properties() 

565 sr_properties[self.PROP_METADATA] = json.dumps(metadata) 

566 

567 @property 

568 def disconnected_hosts(self): 

569 """ 

570 Get the list of disconnected hosts. 

571 :return: Set that contains disconnected hosts. 

572 :rtype: set(str) 

573 """ 

574 

575 disconnected_hosts = set() 

576 for pool in self._get_storage_pools(): 

577 for report in pool.reports: 

578 if report.ret_code & linstor.consts.WARN_NOT_CONNECTED == \ 

579 linstor.consts.WARN_NOT_CONNECTED: 

580 disconnected_hosts.add(pool.node_name) 

581 break 

582 return disconnected_hosts 

583 

584 def check_volume_exists(self, volume_uuid): 

585 """ 

586 Check if a volume exists in the SR. 

587 :return: True if volume exists. 

588 :rtype: bool 

589 """ 

590 return volume_uuid in self._volumes 

591 

592 def create_volume( 

593 self, 

594 volume_uuid, 

595 size, 

596 persistent=True, 

597 volume_name=None, 

598 high_availability=False 

599 ): 

600 """ 

601 Create a new volume on the SR. 

602 :param str volume_uuid: The volume uuid to use. 

603 :param int size: volume size in B. 

604 :param bool persistent: If false the volume will be unavailable 

605 on the next constructor call LinstorSR(...). 

606 :param str volume_name: If set, this name is used in the LINSTOR 

607 database instead of a generated name. 

608 :param bool high_availability: If set, the volume is created in 

609 the HA group. 

610 :return: The current device path of the volume. 

611 :rtype: str 

612 """ 

613 

614 self._logger('Creating LINSTOR volume {}...'.format(volume_uuid)) 

615 if not volume_name: 

616 volume_name = self.build_volume_name(util.gen_uuid()) 

617 volume_properties = self._create_volume_with_properties( 

618 volume_uuid, 

619 volume_name, 

620 size, 

621 True, # place_resources 

622 high_availability 

623 ) 

624 

625 # Volume created! Now try to find the device path. 

626 try: 

627 self._logger( 

628 'Find device path of LINSTOR volume {}...'.format(volume_uuid) 

629 ) 

630 device_path = self._find_device_path(volume_uuid, volume_name) 

631 if persistent: 

632 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS 

633 self._volumes.add(volume_uuid) 

634 self._logger( 

635 'LINSTOR volume {} created!'.format(volume_uuid) 

636 ) 

637 return device_path 

638 except Exception: 

639 # There is an issue to find the path. 

640 # At this point the volume has just been created, so force flag can be used. 

641 self._destroy_volume(volume_uuid, force=True) 

642 raise 

643 

644 def mark_volume_as_persistent(self, volume_uuid): 

645 """ 

646 Mark volume as persistent if created with persistent=False. 

647 :param str volume_uuid: The volume uuid to mark. 

648 """ 

649 

650 self._ensure_volume_exists(volume_uuid) 

651 

652 # Mark volume as persistent. 

653 volume_properties = self._get_volume_properties(volume_uuid) 

654 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS 

655 

656 def destroy_volume(self, volume_uuid): 

657 """ 

658 Destroy a volume. 

659 :param str volume_uuid: The volume uuid to destroy. 

660 """ 

661 

662 self._ensure_volume_exists(volume_uuid) 

663 self.ensure_volume_is_not_locked(volume_uuid) 

664 

665 is_volume_in_use = any(node["in-use"] for node in self.get_resource_info(volume_uuid)["nodes"].values()) 

666 if is_volume_in_use: 

667 raise LinstorVolumeManagerError( 

668 f"Could not destroy volume `{volume_uuid}` as it is currently in use", 

669 LinstorVolumeManagerError.ERR_VOLUME_IN_USE 

670 ) 

671 

672 # Mark volume as destroyed. 

673 volume_properties = self._get_volume_properties(volume_uuid) 

674 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS 

675 

676 try: 

677 self._volumes.remove(volume_uuid) 

678 self._destroy_volume(volume_uuid) 

679 except Exception as e: 

680 raise LinstorVolumeManagerError( 

681 str(e), 

682 LinstorVolumeManagerError.ERR_VOLUME_DESTROY 

683 ) 

684 

685 def lock_volume(self, volume_uuid, locked=True): 

686 """ 

687 Prevent modifications of the volume properties during 

688 "self.LOCKED_EXPIRATION_DELAY" seconds. The SR must be locked 

689 when used. This method is useful to attach/detach correctly a volume on 

690 a slave. Without it the GC can rename a volume, in this case the old 

691 volume path can be used by a slave... 

692 :param str volume_uuid: The volume uuid to protect/unprotect. 

693 :param bool locked: Lock/unlock the volume. 

694 """ 

695 

696 self._ensure_volume_exists(volume_uuid) 

697 

698 self._logger( 

699 '{} volume {} as locked'.format( 

700 'Mark' if locked else 'Unmark', 

701 volume_uuid 

702 ) 

703 ) 

704 

705 volume_properties = self._get_volume_properties(volume_uuid) 

706 if locked: 

707 volume_properties[ 

708 self.PROP_IS_READONLY_TIMESTAMP 

709 ] = str(time.time()) 

710 elif self.PROP_IS_READONLY_TIMESTAMP in volume_properties: 

711 volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP) 

712 

713 def ensure_volume_is_not_locked(self, volume_uuid, timeout=None): 

714 """ 

715 Ensure a volume is not locked. Wait if necessary. 

716 :param str volume_uuid: The volume uuid to check. 

717 :param int timeout: If the volume is always locked after the expiration 

718 of the timeout, an exception is thrown. 

719 """ 

720 return self.ensure_volume_list_is_not_locked([volume_uuid], timeout) 

721 

722 def ensure_volume_list_is_not_locked(self, volume_uuids, timeout=None): 

723 checked = set() 

724 for volume_uuid in volume_uuids: 

725 if volume_uuid in self._volumes: 

726 checked.add(volume_uuid) 

727 

728 if not checked: 

729 return 

730 

731 waiting = False 

732 

733 volume_properties = self._get_kv_cache() 

734 

735 start = time.time() 

736 while True: 

737 # Can't delete in for loop, use a copy of the list. 

738 remaining = checked.copy() 

739 for volume_uuid in checked: 

740 volume_properties.namespace = \ 

741 self._build_volume_namespace(volume_uuid) 

742 timestamp = volume_properties.get( 

743 self.PROP_IS_READONLY_TIMESTAMP 

744 ) 

745 if timestamp is None: 

746 remaining.remove(volume_uuid) 

747 continue 

748 

749 now = time.time() 

750 if now - float(timestamp) > self.LOCKED_EXPIRATION_DELAY: 

751 self._logger( 

752 'Remove readonly timestamp on {}'.format(volume_uuid) 

753 ) 

754 volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP) 

755 remaining.remove(volume_uuid) 

756 continue 

757 

758 if not waiting: 

759 self._logger( 

760 'Volume {} is locked, waiting...'.format(volume_uuid) 

761 ) 

762 waiting = True 

763 break 

764 

765 if not remaining: 

766 break 

767 checked = remaining 

768 

769 if timeout is not None and now - start > timeout: 

770 raise LinstorVolumeManagerError( 

771 'volume `{}` is locked and timeout has been reached' 

772 .format(volume_uuid), 

773 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS 

774 ) 

775 

776 # We must wait to use the volume. After that we can modify it 

777 # ONLY if the SR is locked to avoid bad reads on the slaves. 

778 time.sleep(1) 

779 volume_properties = self._create_kv_cache() 

780 

781 if waiting: 

782 self._logger('No volume locked now!') 

783 

784 def remove_volume_if_diskless(self, volume_uuid): 

785 """ 

786 Remove disless path from local node. 

787 :param str volume_uuid: The volume uuid to remove. 

788 """ 

789 

790 self._ensure_volume_exists(volume_uuid) 

791 

792 volume_properties = self._get_volume_properties(volume_uuid) 

793 volume_name = volume_properties.get(self.PROP_VOLUME_NAME) 

794 

795 node_name = socket.gethostname() 

796 

797 for resource in self._get_resource_cache().resources: 

798 if resource.name == volume_name and resource.node_name == node_name: 

799 if linstor.consts.FLAG_TIE_BREAKER in resource.flags: 

800 return 

801 break 

802 

803 result = self._linstor.resource_delete_if_diskless( 

804 node_name=node_name, rsc_name=volume_name 

805 ) 

806 if not linstor.Linstor.all_api_responses_no_error(result): 

807 raise LinstorVolumeManagerError( 

808 'Unable to delete diskless path of `{}` on node `{}`: {}' 

809 .format(volume_name, node_name, ', '.join( 

810 [str(x) for x in result])) 

811 ) 

812 

813 def introduce_volume(self, volume_uuid): 

814 pass # TODO: Implement me. 

815 

816 def resize_volume(self, volume_uuid, new_size): 

817 """ 

818 Resize a volume. 

819 :param str volume_uuid: The volume uuid to resize. 

820 :param int new_size: New size in B. 

821 """ 

822 

823 volume_name = self.get_volume_name(volume_uuid) 

824 self.ensure_volume_is_not_locked(volume_uuid) 

825 new_size = self.round_up_volume_size(new_size) // 1024 

826 

827 # We can't resize anything until DRBD is up to date. 

828 # We wait here for 5min max and raise an easy to understand error for the user. 

829 # 5min is an arbitrary time, it's impossible to get a fit all situation value 

830 # and it's currently impossible to know how much time we have to wait 

831 # This is mostly an issue for thick provisioning, thin isn't affected. 

832 start_time = time.monotonic() 

833 try: 

834 self._linstor.resource_dfn_wait_synced(volume_name, wait_interval=1.0, timeout=60*5) 

835 except linstor.LinstorTimeoutError: 

836 raise LinstorVolumeManagerError( 

837 f"Volume resizing of `{volume_uuid}` from SR `{self._group_name}` is incomplete: timeout reached but it continues in background." 

838 ) 

839 util.SMlog(f"DRBD is up to date, syncing took {time.monotonic() - start_time}s") 

840 

841 result = self._linstor.volume_dfn_modify( 

842 rsc_name=volume_name, 

843 volume_nr=0, 

844 size=new_size 

845 ) 

846 

847 self._mark_resource_cache_as_dirty() 

848 

849 error_str = self._get_error_str(result) 

850 if error_str: 

851 raise LinstorVolumeManagerError( 

852 f"Could not resize volume `{volume_uuid}` from SR `{self._group_name}`: {error_str}" 

853 ) 

854 

855 def get_volume_name(self, volume_uuid): 

856 """ 

857 Get the name of a particular volume. 

858 :param str volume_uuid: The volume uuid of the name to get. 

859 :return: The volume name. 

860 :rtype: str 

861 """ 

862 

863 self._ensure_volume_exists(volume_uuid) 

864 volume_properties = self._get_volume_properties(volume_uuid) 

865 volume_name = volume_properties.get(self.PROP_VOLUME_NAME) 

866 if volume_name: 

867 return volume_name 

868 raise LinstorVolumeManagerError( 

869 'Failed to get volume name of {}'.format(volume_uuid) 

870 ) 

871 

872 def get_volume_size(self, volume_uuid): 

873 """ 

874 Get the size of a particular volume. 

875 :param str volume_uuid: The volume uuid of the size to get. 

876 :return: The volume size. 

877 :rtype: int 

878 """ 

879 

880 volume_name = self.get_volume_name(volume_uuid) 

881 dfns = self._linstor.resource_dfn_list_raise( 

882 query_volume_definitions=True, 

883 filter_by_resource_definitions=[volume_name] 

884 ).resource_definitions 

885 

886 size = dfns[0].volume_definitions[0].size 

887 if size < 0: 

888 raise LinstorVolumeManagerError( 

889 'Failed to get volume size of: {}'.format(volume_uuid) 

890 ) 

891 return size * 1024 

892 

893 def set_auto_promote_timeout(self, volume_uuid, timeout): 

894 """ 

895 Define the blocking time of open calls when a DRBD 

896 is already open on another host. 

897 :param str volume_uuid: The volume uuid to modify. 

898 """ 

899 

900 volume_name = self.get_volume_name(volume_uuid) 

901 result = self._linstor.resource_dfn_modify(volume_name, { 

902 'DrbdOptions/Resource/auto-promote-timeout': timeout 

903 }) 

904 error_str = self._get_error_str(result) 

905 if error_str: 

906 raise LinstorVolumeManagerError( 

907 'Could not change the auto promote timeout of `{}`: {}' 

908 .format(volume_uuid, error_str) 

909 ) 

910 

911 def set_drbd_ha_properties(self, volume_name, enabled=True): 

912 """ 

913 Set or not HA DRBD properties required by drbd-reactor and 

914 by specific volumes. 

915 :param str volume_name: The volume to modify. 

916 :param bool enabled: Enable or disable HA properties. 

917 """ 

918 

919 properties = { 

920 'DrbdOptions/auto-quorum': 'disabled', 

921 'DrbdOptions/Resource/auto-promote': 'no', 

922 'DrbdOptions/Resource/on-no-data-accessible': 'io-error', 

923 'DrbdOptions/Resource/on-no-quorum': 'io-error', 

924 'DrbdOptions/Resource/on-suspended-primary-outdated': 'force-secondary', 

925 'DrbdOptions/Resource/quorum': 'majority' 

926 } 

927 if enabled: 

928 result = self._linstor.resource_dfn_modify(volume_name, properties) 

929 else: 

930 result = self._linstor.resource_dfn_modify(volume_name, {}, delete_props=list(properties.keys())) 

931 

932 error_str = self._get_error_str(result) 

933 if error_str: 

934 raise LinstorVolumeManagerError( 

935 'Could not modify HA DRBD properties on volume `{}`: {}' 

936 .format(volume_name, error_str) 

937 ) 

938 

939 def get_volume_info(self, volume_uuid): 

940 """ 

941 Get the volume info of a particular volume. 

942 :param str volume_uuid: The volume uuid of the volume info to get. 

943 :return: The volume info. 

944 :rtype: VolumeInfo 

945 """ 

946 

947 volume_name = self.get_volume_name(volume_uuid) 

948 return self._get_volumes_info()[volume_name] 

949 

950 def get_device_path(self, volume_uuid): 

951 """ 

952 Get the dev path of a volume, create a diskless if necessary. 

953 :param str volume_uuid: The volume uuid to get the dev path. 

954 :return: The current device path of the volume. 

955 :rtype: str 

956 """ 

957 

958 volume_name = self.get_volume_name(volume_uuid) 

959 return self._find_device_path(volume_uuid, volume_name) 

960 

961 def get_volume_uuid_from_device_path(self, device_path): 

962 """ 

963 Get the volume uuid of a device_path. 

964 :param str device_path: The dev path to find the volume uuid. 

965 :return: The volume uuid of the local device path. 

966 :rtype: str 

967 """ 

968 

969 expected_volume_name = \ 

970 self.get_volume_name_from_device_path(device_path) 

971 

972 volume_names = self.get_volumes_with_name() 

973 for volume_uuid, volume_name in volume_names.items(): 

974 if volume_name == expected_volume_name: 

975 return volume_uuid 

976 

977 raise LinstorVolumeManagerError( 

978 'Unable to find volume uuid from dev path `{}`'.format(device_path) 

979 ) 

980 

981 def get_volume_name_from_device_path(self, device_path): 

982 """ 

983 Get the volume name of a device_path. 

984 :param str device_path: The dev path to find the volume name. 

985 :return: The volume name of the device path. 

986 :rtype: str 

987 """ 

988 

989 # Assume that we have a path like this: 

990 # - "/dev/drbd/by-res/xcp-volume-<UUID>/0" 

991 # - "../xcp-volume-<UUID>/0" 

992 if device_path.startswith(DRBD_BY_RES_PATH): 

993 prefix_len = len(DRBD_BY_RES_PATH) 

994 elif device_path.startswith('../'): 

995 prefix_len = 3 

996 else: 

997 raise LinstorVolumeManagerError('Unexpected device path: `{}`'.format(device_path)) 

998 

999 res_name_end = device_path.find('/', prefix_len) 

1000 assert res_name_end != -1 

1001 return device_path[prefix_len:res_name_end] 

1002 

1003 def update_volume_uuid(self, volume_uuid, new_volume_uuid, force=False): 

1004 """ 

1005 Change the uuid of a volume. 

1006 :param str volume_uuid: The volume to modify. 

1007 :param str new_volume_uuid: The new volume uuid to use. 

1008 :param bool force: If true we doesn't check if volume_uuid is in the 

1009 volume list. I.e. the volume can be marked as deleted but the volume 

1010 can still be in the LINSTOR KV store if the deletion has failed. 

1011 In specific cases like "undo" after a failed clone we must rename a bad 

1012 deleted VDI. 

1013 """ 

1014 

1015 self._logger( 

1016 'Trying to update volume UUID {} to {}...' 

1017 .format(volume_uuid, new_volume_uuid) 

1018 ) 

1019 assert volume_uuid != new_volume_uuid, 'can\'t update volume UUID, same value' 

1020 

1021 if not force: 

1022 self._ensure_volume_exists(volume_uuid) 

1023 self.ensure_volume_is_not_locked(volume_uuid) 

1024 

1025 if new_volume_uuid in self._volumes: 

1026 raise LinstorVolumeManagerError( 

1027 'Volume `{}` already exists'.format(new_volume_uuid), 

1028 LinstorVolumeManagerError.ERR_VOLUME_EXISTS 

1029 ) 

1030 

1031 volume_properties = self._get_volume_properties(volume_uuid) 

1032 if volume_properties.get(self.PROP_UPDATING_UUID_SRC): 

1033 raise LinstorVolumeManagerError( 

1034 'Cannot update volume uuid {}: invalid state' 

1035 .format(volume_uuid) 

1036 ) 

1037 

1038 # 1. Copy in temp variables metadata and volume_name. 

1039 metadata = volume_properties.get(self.PROP_METADATA) 

1040 volume_name = volume_properties.get(self.PROP_VOLUME_NAME) 

1041 

1042 # 2. Switch to new volume namespace. 

1043 volume_properties.namespace = self._build_volume_namespace( 

1044 new_volume_uuid 

1045 ) 

1046 

1047 if list(volume_properties.items()): 

1048 raise LinstorVolumeManagerError( 

1049 'Cannot update volume uuid {} to {}: ' 

1050 .format(volume_uuid, new_volume_uuid) + 

1051 'this last one is not empty' 

1052 ) 

1053 

1054 try: 

1055 # 3. Mark new volume properties with PROP_UPDATING_UUID_SRC. 

1056 # If we crash after that, the new properties can be removed 

1057 # properly. 

1058 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS 

1059 volume_properties[self.PROP_UPDATING_UUID_SRC] = volume_uuid 

1060 

1061 # 4. Copy the properties. 

1062 # Note: On new volumes, during clone for example, the metadata 

1063 # may be missing. So we must test it to avoid this error: 

1064 # "None has to be a str/unicode, but is <type 'NoneType'>" 

1065 if metadata: 

1066 volume_properties[self.PROP_METADATA] = metadata 

1067 volume_properties[self.PROP_VOLUME_NAME] = volume_name 

1068 

1069 # 5. Ok! 

1070 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS 

1071 except Exception as err: 

1072 try: 

1073 # Clear the new volume properties in case of failure. 

1074 assert volume_properties.namespace == \ 

1075 self._build_volume_namespace(new_volume_uuid) 

1076 volume_properties.clear() 

1077 except Exception as e: 

1078 self._logger( 

1079 'Failed to clear new volume properties: {} (ignoring...)' 

1080 .format(e) 

1081 ) 

1082 raise LinstorVolumeManagerError( 

1083 'Failed to copy volume properties: {}'.format(err) 

1084 ) 

1085 

1086 try: 

1087 # 6. After this point, it's ok we can remove the 

1088 # PROP_UPDATING_UUID_SRC property and clear the src properties 

1089 # without problems. 

1090 

1091 # 7. Switch to old volume namespace. 

1092 volume_properties.namespace = self._build_volume_namespace( 

1093 volume_uuid 

1094 ) 

1095 volume_properties.clear() 

1096 

1097 # 8. Switch a last time to new volume namespace. 

1098 volume_properties.namespace = self._build_volume_namespace( 

1099 new_volume_uuid 

1100 ) 

1101 volume_properties.pop(self.PROP_UPDATING_UUID_SRC) 

1102 except Exception as e: 

1103 raise LinstorVolumeManagerError( 

1104 'Failed to clear volume properties ' 

1105 'after volume uuid update: {}'.format(e) 

1106 ) 

1107 

1108 try: 

1109 self._volumes.remove(volume_uuid) 

1110 except KeyError: 

1111 # Can be missing if we are building the volume set attr AND 

1112 # we are processing a deleted resource. 

1113 assert force 

1114 

1115 self._volumes.add(new_volume_uuid) 

1116 

1117 self._logger( 

1118 'UUID update succeeded of {} to {}! (properties={})' 

1119 .format( 

1120 volume_uuid, new_volume_uuid, 

1121 self._get_filtered_properties(volume_properties) 

1122 ) 

1123 ) 

1124 

1125 def update_volume_name(self, volume_uuid, volume_name): 

1126 """ 

1127 Change the volume name of a volume. 

1128 :param str volume_uuid: The volume to modify. 

1129 :param str volume_name: The volume_name to use. 

1130 """ 

1131 

1132 self._ensure_volume_exists(volume_uuid) 

1133 self.ensure_volume_is_not_locked(volume_uuid) 

1134 if not volume_name.startswith(self.PREFIX_VOLUME): 

1135 raise LinstorVolumeManagerError( 

1136 'Volume name `{}` must be start with `{}`' 

1137 .format(volume_name, self.PREFIX_VOLUME) 

1138 ) 

1139 

1140 if volume_name not in self._fetch_resource_names(): 

1141 raise LinstorVolumeManagerError( 

1142 'Volume `{}` doesn\'t exist'.format(volume_name) 

1143 ) 

1144 

1145 volume_properties = self._get_volume_properties(volume_uuid) 

1146 volume_properties[self.PROP_VOLUME_NAME] = volume_name 

1147 

1148 def get_usage_states(self, volume_uuid): 

1149 """ 

1150 Check if a volume is currently used. 

1151 :param str volume_uuid: The volume uuid to check. 

1152 :return: A dictionary that contains states. 

1153 :rtype: dict(str, bool or None) 

1154 """ 

1155 

1156 states = {} 

1157 

1158 volume_name = self.get_volume_name(volume_uuid) 

1159 for resource_state in self._linstor.resource_list_raise( 

1160 filter_by_resources=[volume_name] 

1161 ).resource_states: 

1162 states[resource_state.node_name] = resource_state.in_use 

1163 

1164 return states 

1165 

1166 def get_volume_openers(self, volume_uuid): 

1167 """ 

1168 Get openers of a volume. 

1169 :param str volume_uuid: The volume uuid to monitor. 

1170 :return: A dictionary that contains openers. 

1171 :rtype: dict(str, obj) 

1172 """ 

1173 return get_all_volume_openers(self.get_volume_name(volume_uuid), '0') 

1174 

1175 def get_volumes_with_name(self): 

1176 """ 

1177 Give a volume dictionary that contains names actually owned. 

1178 :return: A volume/name dict. 

1179 :rtype: dict(str, str) 

1180 """ 

1181 return self._get_volumes_by_property(self.REG_VOLUME_NAME) 

1182 

1183 def get_volumes_with_info(self): 

1184 """ 

1185 Give a volume dictionary that contains VolumeInfos. 

1186 :return: A volume/VolumeInfo dict. 

1187 :rtype: dict(str, VolumeInfo) 

1188 """ 

1189 

1190 volumes = {} 

1191 

1192 volume_names = self.get_volumes_with_name() 

1193 all_volume_info = self._get_volumes_info(volume_names) 

1194 for volume_uuid, volume_name in volume_names.items(): 

1195 if volume_name: 

1196 volume_info = all_volume_info.get(volume_name) 

1197 if volume_info: 

1198 volumes[volume_uuid] = volume_info 

1199 continue 

1200 

1201 # Well I suppose if this volume is not available, 

1202 # LINSTOR has been used directly without using this API. 

1203 volumes[volume_uuid] = self.VolumeInfo('') 

1204 

1205 return volumes 

1206 

1207 def get_volumes_with_metadata(self): 

1208 """ 

1209 Give a volume dictionary that contains metadata. 

1210 :return: A volume/metadata dict. 

1211 :rtype: dict(str, dict) 

1212 """ 

1213 

1214 volumes = {} 

1215 

1216 metadata = self._get_volumes_by_property(self.REG_METADATA) 

1217 for volume_uuid, volume_metadata in metadata.items(): 

1218 if volume_metadata: 

1219 volume_metadata = json.loads(volume_metadata) 

1220 if isinstance(volume_metadata, dict): 

1221 volumes[volume_uuid] = volume_metadata 

1222 continue 

1223 raise LinstorVolumeManagerError( 

1224 'Expected dictionary in volume metadata: {}' 

1225 .format(volume_uuid) 

1226 ) 

1227 

1228 volumes[volume_uuid] = {} 

1229 

1230 return volumes 

1231 

1232 def get_volume_metadata(self, volume_uuid): 

1233 """ 

1234 Get the metadata of a volume. 

1235 :return: Dictionary that contains metadata. 

1236 :rtype: dict 

1237 """ 

1238 

1239 self._ensure_volume_exists(volume_uuid) 

1240 volume_properties = self._get_volume_properties(volume_uuid) 

1241 metadata = volume_properties.get(self.PROP_METADATA) 

1242 if metadata: 

1243 metadata = json.loads(metadata) 

1244 if isinstance(metadata, dict): 

1245 return metadata 

1246 raise LinstorVolumeManagerError( 

1247 'Expected dictionary in volume metadata: {}' 

1248 .format(volume_uuid) 

1249 ) 

1250 return {} 

1251 

1252 def set_volume_metadata(self, volume_uuid, metadata): 

1253 """ 

1254 Set the metadata of a volume. 

1255 :param dict metadata: Dictionary that contains metadata. 

1256 """ 

1257 

1258 self._ensure_volume_exists(volume_uuid) 

1259 self.ensure_volume_is_not_locked(volume_uuid) 

1260 

1261 assert isinstance(metadata, dict) 

1262 volume_properties = self._get_volume_properties(volume_uuid) 

1263 volume_properties[self.PROP_METADATA] = json.dumps(metadata) 

1264 

1265 def update_volume_metadata(self, volume_uuid, metadata): 

1266 """ 

1267 Update the metadata of a volume. It modify only the given keys. 

1268 It doesn't remove unreferenced key instead of set_volume_metadata. 

1269 :param dict metadata: Dictionary that contains metadata. 

1270 """ 

1271 

1272 self._ensure_volume_exists(volume_uuid) 

1273 self.ensure_volume_is_not_locked(volume_uuid) 

1274 

1275 assert isinstance(metadata, dict) 

1276 volume_properties = self._get_volume_properties(volume_uuid) 

1277 

1278 current_metadata = json.loads( 

1279 volume_properties.get(self.PROP_METADATA, '{}') 

1280 ) 

1281 if not isinstance(metadata, dict): 

1282 raise LinstorVolumeManagerError( 

1283 'Expected dictionary in volume metadata: {}' 

1284 .format(volume_uuid) 

1285 ) 

1286 

1287 for key, value in metadata.items(): 

1288 current_metadata[key] = value 

1289 volume_properties[self.PROP_METADATA] = json.dumps(current_metadata) 

1290 

1291 def shallow_clone_volume(self, volume_uuid, clone_uuid, persistent=True): 

1292 """ 

1293 Clone a volume. Do not copy the data, this method creates a new volume 

1294 with the same size. 

1295 :param str volume_uuid: The volume to clone. 

1296 :param str clone_uuid: The cloned volume. 

1297 :param bool persistent: If false the volume will be unavailable 

1298 on the next constructor call LinstorSR(...). 

1299 :return: The current device path of the cloned volume. 

1300 :rtype: str 

1301 """ 

1302 

1303 volume_name = self.get_volume_name(volume_uuid) 

1304 self.ensure_volume_is_not_locked(volume_uuid) 

1305 

1306 # 1. Find ideal nodes + size to use. 

1307 ideal_node_names, size = self._get_volume_node_names_and_size( 

1308 volume_name 

1309 ) 

1310 if size <= 0: 

1311 raise LinstorVolumeManagerError( 

1312 'Invalid size of {} for volume `{}`'.format(size, volume_name) 

1313 ) 

1314 

1315 # 2. Create clone! 

1316 return self.create_volume(clone_uuid, size, persistent) 

1317 

1318 def remove_resourceless_volumes(self): 

1319 """ 

1320 Remove all volumes without valid or non-empty name 

1321 (i.e. without LINSTOR resource). It's different than 

1322 LinstorVolumeManager constructor that takes a `repair` param that 

1323 removes volumes with `PROP_NOT_EXISTS` to 1. 

1324 """ 

1325 

1326 resource_names = self._fetch_resource_names() 

1327 for volume_uuid, volume_name in self.get_volumes_with_name().items(): 

1328 if not volume_name or volume_name not in resource_names: 

1329 # Don't force, we can be sure of what's happening. 

1330 self.destroy_volume(volume_uuid) 

1331 

1332 def destroy(self): 

1333 """ 

1334 Destroy this SR. Object should not be used after that. 

1335 :param bool force: Try to destroy volumes before if true. 

1336 """ 

1337 

1338 # 1. Ensure volume list is empty. No cost. 

1339 if self._volumes: 

1340 raise LinstorVolumeManagerError( 

1341 'Cannot destroy LINSTOR volume manager: ' 

1342 'It exists remaining volumes' 

1343 ) 

1344 

1345 # 2. Fetch ALL resource names. 

1346 # This list may therefore contain volumes created outside 

1347 # the scope of the driver. 

1348 resource_names = self._fetch_resource_names(ignore_deleted=False) 

1349 try: 

1350 resource_names.remove(DATABASE_VOLUME_NAME) 

1351 except KeyError: 

1352 # Really strange to reach that point. 

1353 # Normally we always have the database volume in the list. 

1354 pass 

1355 

1356 # 3. Ensure the resource name list is entirely empty... 

1357 if resource_names: 

1358 raise LinstorVolumeManagerError( 

1359 'Cannot destroy LINSTOR volume manager: ' 

1360 'It exists remaining volumes (created externally or being deleted)' 

1361 ) 

1362 

1363 # 4. Destroying... 

1364 controller_is_running = self._controller_is_running() 

1365 uri = 'linstor://localhost' 

1366 try: 

1367 if controller_is_running: 

1368 self._start_controller(start=False) 

1369 

1370 # 4.1. Umount LINSTOR database. 

1371 self._mount_database_volume( 

1372 self.build_device_path(DATABASE_VOLUME_NAME), 

1373 mount=False, 

1374 force=True 

1375 ) 

1376 

1377 # 4.2. Refresh instance. 

1378 self._start_controller(start=True) 

1379 self._linstor = self._create_linstor_instance( 

1380 uri, keep_uri_unmodified=True 

1381 ) 

1382 

1383 # 4.3. Destroy database volume. 

1384 self._destroy_resource(DATABASE_VOLUME_NAME) 

1385 

1386 # 4.4. Refresh linstor connection. 

1387 # Without we get this error: 

1388 # "Cannot delete resource group 'xcp-sr-linstor_group_thin_device' because it has existing resource definitions.." 

1389 # Because the deletion of the databse was not seen by Linstor for some reason. 

1390 # It seems a simple refresh of the Linstor connection make it aware of the deletion. 

1391 self._linstor.disconnect() 

1392 self._linstor.connect() 

1393 

1394 # 4.5. Destroy remaining drbd nodes on hosts. 

1395 # We check if there is a DRBD node on hosts that could mean blocking when destroying resource groups. 

1396 # It needs to be done locally by each host so we go through the linstor-manager plugin. 

1397 # If we don't do this sometimes, the destroy will fail when trying to destroy the resource groups with: 

1398 # "linstor-manager:destroy error: Failed to destroy SP `xcp-sr-linstor_group_thin_device` on node `r620-s2`: The specified storage pool 'xcp-sr-linstor_group_thin_device' on node 'r620-s2' can not be deleted as volumes / snapshot-volumes are still using it." 

1399 session = util.timeout_call(5, util.get_localAPI_session) 

1400 for host_ref in session.xenapi.host.get_all(): 

1401 try: 

1402 response = session.xenapi.host.call_plugin( 

1403 host_ref, 'linstor-manager', 'destroyDrbdVolumes', {'volume_group': self._group_name} 

1404 ) 

1405 except Exception as e: 

1406 util.SMlog('Calling destroyDrbdVolumes on host {} failed with error {}'.format(host_ref, e)) 

1407 

1408 # 4.6. Destroy group and storage pools. 

1409 self._destroy_resource_group(self._linstor, self._group_name) 

1410 self._destroy_resource_group(self._linstor, self._ha_group_name) 

1411 for pool in self._get_storage_pools(force=True): 

1412 self._destroy_storage_pool( 

1413 self._linstor, pool.name, pool.node_name 

1414 ) 

1415 except Exception as e: 

1416 self._start_controller(start=controller_is_running) 

1417 raise e 

1418 

1419 try: 

1420 self._start_controller(start=False) 

1421 for file in os.listdir(DATABASE_PATH): 

1422 if file != 'lost+found': 

1423 os.remove(DATABASE_PATH + '/' + file) 

1424 except Exception as e: 

1425 util.SMlog( 

1426 'Ignoring failure after LINSTOR SR destruction: {}' 

1427 .format(e) 

1428 ) 

1429 

1430 def find_up_to_date_diskful_nodes(self, volume_uuid): 

1431 """ 

1432 Find all nodes that contain a specific volume using diskful disks. 

1433 The disk must be up to data to be used. 

1434 :param str volume_uuid: The volume to use. 

1435 :return: The available nodes. 

1436 :rtype: tuple(set(str), str) 

1437 """ 

1438 

1439 volume_name = self.get_volume_name(volume_uuid) 

1440 

1441 in_use_by = None 

1442 node_names = set() 

1443 

1444 resource_states = filter( 

1445 lambda resource_state: resource_state.name == volume_name, 

1446 self._get_resource_cache().resource_states 

1447 ) 

1448 

1449 for resource_state in resource_states: 

1450 volume_state = resource_state.volume_states[0] 

1451 if volume_state.disk_state == 'UpToDate': 

1452 node_names.add(resource_state.node_name) 

1453 if resource_state.in_use: 

1454 in_use_by = resource_state.node_name 

1455 

1456 return (node_names, in_use_by) 

1457 

1458 def invalidate_resource_cache(self): 

1459 """ 

1460 If resources are impacted by external commands like vhdutil, 

1461 it's necessary to call this function to invalidate current resource 

1462 cache. 

1463 """ 

1464 self._mark_resource_cache_as_dirty() 

1465 

1466 def has_node(self, node_name): 

1467 """ 

1468 Check if a node exists in the LINSTOR database. 

1469 :rtype: bool 

1470 """ 

1471 result = self._linstor.node_list() 

1472 error_str = self._get_error_str(result) 

1473 if error_str: 

1474 raise LinstorVolumeManagerError( 

1475 'Failed to list nodes using `{}`: {}' 

1476 .format(node_name, error_str) 

1477 ) 

1478 return bool(result[0].node(node_name)) 

1479 

1480 def create_node(self, node_name, ip): 

1481 """ 

1482 Create a new node in the LINSTOR database. 

1483 :param str node_name: Node name to use. 

1484 :param str ip: Host IP to communicate. 

1485 """ 

1486 result = self._linstor.node_create( 

1487 node_name, 

1488 linstor.consts.VAL_NODE_TYPE_CMBD, 

1489 ip 

1490 ) 

1491 errors = self._filter_errors(result) 

1492 if errors: 

1493 error_str = self._get_error_str(errors) 

1494 raise LinstorVolumeManagerError( 

1495 'Failed to create node `{}`: {}'.format(node_name, error_str) 

1496 ) 

1497 

1498 def destroy_node(self, node_name): 

1499 """ 

1500 Destroy a node in the LINSTOR database. 

1501 :param str node_name: Node name to remove. 

1502 """ 

1503 result = self._linstor.node_delete(node_name) 

1504 errors = self._filter_errors(result) 

1505 if errors: 

1506 error_str = self._get_error_str(errors) 

1507 raise LinstorVolumeManagerError( 

1508 'Failed to destroy node `{}`: {}'.format(node_name, error_str) 

1509 ) 

1510 

1511 def create_node_interface(self, node_name, name, ip): 

1512 """ 

1513 Create a new node interface in the LINSTOR database. 

1514 :param str node_name: Node name of the interface to use. 

1515 :param str name: Interface to create. 

1516 :param str ip: IP of the interface. 

1517 """ 

1518 result = self._linstor.netinterface_create(node_name, name, ip) 

1519 errors = self._filter_errors(result) 

1520 if errors: 

1521 error_str = self._get_error_str(errors) 

1522 raise LinstorVolumeManagerError( 

1523 'Failed to create node interface on `{}`: {}'.format(node_name, error_str) 

1524 ) 

1525 

1526 def destroy_node_interface(self, node_name, name): 

1527 """ 

1528 Destroy a node interface in the LINSTOR database. 

1529 :param str node_name: Node name of the interface to remove. 

1530 :param str name: Interface to remove. 

1531 """ 

1532 

1533 if name == 'default': 

1534 raise LinstorVolumeManagerError( 

1535 'Unable to delete the default interface of a node!' 

1536 ) 

1537 

1538 result = self._linstor.netinterface_delete(node_name, name) 

1539 errors = self._filter_errors(result) 

1540 if errors: 

1541 error_str = self._get_error_str(errors) 

1542 raise LinstorVolumeManagerError( 

1543 'Failed to destroy node interface on `{}`: {}'.format(node_name, error_str) 

1544 ) 

1545 

1546 def modify_node_interface(self, node_name, name, ip): 

1547 """ 

1548 Modify a node interface in the LINSTOR database. Create it if necessary. 

1549 :param str node_name: Node name of the interface to use. 

1550 :param str name: Interface to modify or create. 

1551 :param str ip: IP of the interface. 

1552 """ 

1553 result = self._linstor.netinterface_create(node_name, name, ip) 

1554 errors = self._filter_errors(result) 

1555 if not errors: 

1556 return 

1557 

1558 if self._check_errors(errors, [linstor.consts.FAIL_EXISTS_NET_IF]): 

1559 result = self._linstor.netinterface_modify(node_name, name, ip) 

1560 errors = self._filter_errors(result) 

1561 if not errors: 

1562 return 

1563 

1564 error_str = self._get_error_str(errors) 

1565 raise LinstorVolumeManagerError( 

1566 'Unable to modify interface on `{}`: {}'.format(node_name, error_str) 

1567 ) 

1568 

1569 def list_node_interfaces(self, node_name): 

1570 """ 

1571 List all node interfaces. 

1572 :param str node_name: Node name to use to list interfaces. 

1573 :rtype: list 

1574 : 

1575 """ 

1576 result = self._linstor.net_interface_list(node_name) 

1577 if not result: 

1578 raise LinstorVolumeManagerError( 

1579 'Unable to list interfaces on `{}`: no list received'.format(node_name) 

1580 ) 

1581 

1582 interfaces = {} 

1583 for interface in result: 

1584 interface = interface._rest_data 

1585 interfaces[interface['name']] = { 

1586 'address': interface['address'], 

1587 'active': interface['is_active'] 

1588 } 

1589 return interfaces 

1590 

1591 def get_node_preferred_interface(self, node_name): 

1592 """ 

1593 Get the preferred interface used by a node. 

1594 :param str node_name: Node name of the interface to get. 

1595 :rtype: str 

1596 """ 

1597 try: 

1598 nodes = self._linstor.node_list_raise([node_name]).nodes 

1599 if nodes: 

1600 properties = nodes[0].props 

1601 return properties.get('PrefNic', 'default') 

1602 return nodes 

1603 except Exception as e: 

1604 raise LinstorVolumeManagerError( 

1605 'Failed to get preferred interface: `{}`'.format(e) 

1606 ) 

1607 

1608 def set_node_preferred_interface(self, node_name, name): 

1609 """ 

1610 Set the preferred interface to use on a node. 

1611 :param str node_name: Node name of the interface. 

1612 :param str name: Preferred interface to use. 

1613 """ 

1614 result = self._linstor.node_modify(node_name, property_dict={'PrefNic': name}) 

1615 errors = self._filter_errors(result) 

1616 if errors: 

1617 error_str = self._get_error_str(errors) 

1618 raise LinstorVolumeManagerError( 

1619 'Failed to set preferred node interface on `{}`: {}'.format(node_name, error_str) 

1620 ) 

1621 

1622 def get_nodes_info(self): 

1623 """ 

1624 Get all nodes + statuses, used or not by the pool. 

1625 :rtype: dict(str, dict) 

1626 """ 

1627 try: 

1628 nodes = {} 

1629 for node in self._linstor.node_list_raise().nodes: 

1630 nodes[node.name] = node.connection_status 

1631 return nodes 

1632 except Exception as e: 

1633 raise LinstorVolumeManagerError( 

1634 'Failed to get all nodes: `{}`'.format(e) 

1635 ) 

1636 

1637 def get_storage_pools_info(self): 

1638 """ 

1639 Give all storage pools of current group name. 

1640 :rtype: dict(str, list) 

1641 """ 

1642 storage_pools = {} 

1643 for pool in self._get_storage_pools(force=True): 

1644 if pool.node_name not in storage_pools: 

1645 storage_pools[pool.node_name] = [] 

1646 

1647 size = -1 

1648 capacity = -1 

1649 

1650 space = pool.free_space 

1651 if space: 

1652 size = space.free_capacity 

1653 if size < 0: 

1654 size = -1 

1655 else: 

1656 size *= 1024 

1657 capacity = space.total_capacity 

1658 if capacity <= 0: 

1659 capacity = -1 

1660 else: 

1661 capacity *= 1024 

1662 

1663 storage_pools[pool.node_name].append({ 

1664 'name': pool.name, 

1665 'linstor-uuid': pool.uuid, 

1666 'free-size': size, 

1667 'capacity': capacity 

1668 }) 

1669 

1670 return storage_pools 

1671 

1672 def get_resources_info(self): 

1673 """ 

1674 Give all resources of current group name. 

1675 :rtype: dict(str, list) 

1676 """ 

1677 if self._resources_info_cache and not self._resource_cache_dirty: 

1678 return self._resources_info_cache 

1679 

1680 resources = {} 

1681 resource_list = self._get_resource_cache() 

1682 volume_names = self.get_volumes_with_name() 

1683 for resource in resource_list.resources: 

1684 if resource.name not in resources: 

1685 resources[resource.name] = { 'nodes': {}, 'uuid': '' } 

1686 resource_nodes = resources[resource.name]['nodes'] 

1687 

1688 resource_nodes[resource.node_name] = { 

1689 'volumes': [], 

1690 'diskful': linstor.consts.FLAG_DISKLESS not in resource.flags, 

1691 'tie-breaker': linstor.consts.FLAG_TIE_BREAKER in resource.flags 

1692 } 

1693 resource_volumes = resource_nodes[resource.node_name]['volumes'] 

1694 

1695 for volume in resource.volumes: 

1696 # We ignore diskless pools of the form "DfltDisklessStorPool". 

1697 if volume.storage_pool_name != self._group_name: 

1698 continue 

1699 

1700 usable_size = volume.usable_size 

1701 if usable_size < 0: 

1702 usable_size = -1 

1703 else: 

1704 usable_size *= 1024 

1705 

1706 allocated_size = volume.allocated_size 

1707 if allocated_size < 0: 

1708 allocated_size = -1 

1709 else: 

1710 allocated_size *= 1024 

1711 

1712 resource_volumes.append({ 

1713 'storage-pool-name': volume.storage_pool_name, 

1714 'linstor-uuid': volume.uuid, 

1715 'number': volume.number, 

1716 'device-path': volume.device_path, 

1717 'usable-size': usable_size, 

1718 'allocated-size': allocated_size 

1719 }) 

1720 

1721 for resource_state in resource_list.resource_states: 

1722 resource = resources[resource_state.rsc_name]['nodes'][resource_state.node_name] 

1723 resource['in-use'] = resource_state.in_use 

1724 

1725 volumes = resource['volumes'] 

1726 for volume_state in resource_state.volume_states: 

1727 volume = next((x for x in volumes if x['number'] == volume_state.number), None) 

1728 if volume: 

1729 volume['disk-state'] = volume_state.disk_state 

1730 

1731 for volume_uuid, volume_name in volume_names.items(): 

1732 resource = resources.get(volume_name) 

1733 if resource: 

1734 resource['uuid'] = volume_uuid 

1735 

1736 self._resources_info_cache = resources 

1737 return self._resources_info_cache 

1738 

1739 def get_resource_info(self, volume_uuid: str) -> Dict[str, Any]: 

1740 """ 

1741 Give a resource info based on its UUID. 

1742 :param volume_uuid str: volume uuid to search for 

1743 :rtype: dict(str, any) 

1744 """ 

1745 for volume in self.get_resources_info().values(): 

1746 if volume["uuid"] == volume_uuid: 

1747 return volume 

1748 

1749 raise LinstorVolumeManagerError( 

1750 f"Could not find info about volume `{volume_uuid}`", 

1751 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS 

1752 ) 

1753 

1754 def get_database_path(self): 

1755 """ 

1756 Get the database path. 

1757 :return: The current database path. 

1758 :rtype: str 

1759 """ 

1760 return self._request_database_path(self._linstor, activate=True) 

1761 

1762 @classmethod 

1763 def get_all_group_names(cls, base_name): 

1764 """ 

1765 Get all group names. I.e. list of current group + HA. 

1766 :param str base_name: The SR group_name to use. 

1767 :return: List of group names. 

1768 :rtype: list 

1769 """ 

1770 return [cls._build_group_name(base_name), cls._build_ha_group_name(base_name)] 

1771 

1772 @classmethod 

1773 def create_sr(cls, group_name, ips, redundancy, thin_provisioning, logger=default_logger.__func__): 

1774 """ 

1775 Create a new SR on the given nodes. 

1776 :param str group_name: The SR group_name to use. 

1777 :param set(str) ips: Node ips. 

1778 :param int redundancy: How many copy of volumes should we store? 

1779 :param bool thin_provisioning: Use thin or thick provisioning. 

1780 :param function logger: Function to log messages. 

1781 :return: A new LinstorSr instance. 

1782 :rtype: LinstorSr 

1783 """ 

1784 

1785 try: 

1786 cls._start_controller(start=True) 

1787 sr = cls._create_sr(group_name, ips, redundancy, thin_provisioning, logger) 

1788 finally: 

1789 # Controller must be stopped and volume unmounted because 

1790 # it is the role of the drbd-reactor daemon to do the right 

1791 # actions. 

1792 cls._start_controller(start=False) 

1793 cls._mount_volume( 

1794 cls.build_device_path(DATABASE_VOLUME_NAME), 

1795 DATABASE_PATH, 

1796 mount=False 

1797 ) 

1798 return sr 

1799 

1800 @classmethod 

1801 def _create_sr(cls, group_name, ips, redundancy, thin_provisioning, logger=default_logger.__func__): 

1802 # 1. Check if SR already exists. 

1803 uri = 'linstor://localhost' 

1804 

1805 lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True) 

1806 

1807 node_names = list(ips.keys()) 

1808 for node_name, ip in ips.items(): 

1809 while True: 

1810 # Try to create node. 

1811 result = lin.node_create( 

1812 node_name, 

1813 linstor.consts.VAL_NODE_TYPE_CMBD, 

1814 ip 

1815 ) 

1816 

1817 errors = cls._filter_errors(result) 

1818 if cls._check_errors( 

1819 errors, [linstor.consts.FAIL_EXISTS_NODE] 

1820 ): 

1821 # If it already exists, remove, then recreate. 

1822 result = lin.node_delete(node_name) 

1823 error_str = cls._get_error_str(result) 

1824 if error_str: 

1825 raise LinstorVolumeManagerError( 

1826 'Failed to remove old node `{}`: {}' 

1827 .format(node_name, error_str) 

1828 ) 

1829 elif not errors: 

1830 break # Created! 

1831 else: 

1832 raise LinstorVolumeManagerError( 

1833 'Failed to create node `{}` with ip `{}`: {}'.format( 

1834 node_name, ip, cls._get_error_str(errors) 

1835 ) 

1836 ) 

1837 

1838 driver_pool_name = group_name 

1839 base_group_name = group_name 

1840 group_name = cls._build_group_name(group_name) 

1841 storage_pool_name = group_name 

1842 pools = lin.storage_pool_list_raise(filter_by_stor_pools=[storage_pool_name]).storage_pools 

1843 if pools: 

1844 existing_node_names = [pool.node_name for pool in pools] 

1845 raise LinstorVolumeManagerError( 

1846 'Unable to create SR `{}`. It already exists on node(s): {}' 

1847 .format(group_name, existing_node_names) 

1848 ) 

1849 

1850 if lin.resource_group_list_raise( 

1851 cls.get_all_group_names(base_group_name) 

1852 ).resource_groups: 

1853 if not lin.resource_dfn_list_raise().resource_definitions: 

1854 backup_path = cls._create_database_backup_path() 

1855 logger( 

1856 'Group name already exists `{}` without LVs. ' 

1857 'Ignoring and moving the config files in {}'.format(group_name, backup_path) 

1858 ) 

1859 cls._move_files(DATABASE_PATH, backup_path) 

1860 else: 

1861 raise LinstorVolumeManagerError( 

1862 'Unable to create SR `{}`: The group name already exists' 

1863 .format(group_name) 

1864 ) 

1865 

1866 if thin_provisioning: 

1867 driver_pool_parts = driver_pool_name.split('/') 

1868 if not len(driver_pool_parts) == 2: 

1869 raise LinstorVolumeManagerError( 

1870 'Invalid group name using thin provisioning. ' 

1871 'Expected format: \'VG/LV`\'' 

1872 ) 

1873 

1874 # 2. Create storage pool on each node + resource group. 

1875 reg_volume_group_not_found = re.compile( 

1876 ".*Volume group '.*' not found$" 

1877 ) 

1878 

1879 i = 0 

1880 try: 

1881 # 2.a. Create storage pools. 

1882 storage_pool_count = 0 

1883 while i < len(node_names): 

1884 node_name = node_names[i] 

1885 

1886 result = lin.storage_pool_create( 

1887 node_name=node_name, 

1888 storage_pool_name=storage_pool_name, 

1889 storage_driver='LVM_THIN' if thin_provisioning else 'LVM', 

1890 driver_pool_name=driver_pool_name 

1891 ) 

1892 

1893 errors = linstor.Linstor.filter_api_call_response_errors( 

1894 result 

1895 ) 

1896 if errors: 

1897 if len(errors) == 1 and errors[0].is_error( 

1898 linstor.consts.FAIL_STOR_POOL_CONFIGURATION_ERROR 

1899 ) and reg_volume_group_not_found.match(errors[0].message): 

1900 logger( 

1901 'Volume group `{}` not found on `{}`. Ignoring...' 

1902 .format(group_name, node_name) 

1903 ) 

1904 cls._destroy_storage_pool(lin, storage_pool_name, node_name) 

1905 else: 

1906 error_str = cls._get_error_str(result) 

1907 raise LinstorVolumeManagerError( 

1908 'Could not create SP `{}` on node `{}`: {}' 

1909 .format(group_name, node_name, error_str) 

1910 ) 

1911 else: 

1912 storage_pool_count += 1 

1913 i += 1 

1914 

1915 if not storage_pool_count: 

1916 raise LinstorVolumeManagerError( 

1917 'Unable to create SR `{}`: No VG group found'.format( 

1918 group_name, 

1919 ) 

1920 ) 

1921 

1922 # 2.b. Create resource groups. 

1923 ha_group_name = cls._build_ha_group_name(base_group_name) 

1924 cls._create_resource_group( 

1925 lin, 

1926 group_name, 

1927 storage_pool_name, 

1928 redundancy, 

1929 True 

1930 ) 

1931 cls._create_resource_group( 

1932 lin, 

1933 ha_group_name, 

1934 storage_pool_name, 

1935 3, 

1936 True 

1937 ) 

1938 

1939 # 3. Create the LINSTOR database volume and mount it. 

1940 try: 

1941 logger('Creating database volume...') 

1942 volume_path = cls._create_database_volume( 

1943 lin, ha_group_name, storage_pool_name, node_names, redundancy 

1944 ) 

1945 except LinstorVolumeManagerError as e: 

1946 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS: 

1947 logger('Destroying database volume after creation fail...') 

1948 cls._force_destroy_database_volume(lin, group_name) 

1949 raise 

1950 

1951 try: 

1952 logger('Mounting database volume...') 

1953 

1954 # First we must disable the controller to move safely the 

1955 # LINSTOR config. 

1956 cls._start_controller(start=False) 

1957 

1958 cls._mount_database_volume(volume_path) 

1959 except Exception as e: 

1960 # Ensure we are connected because controller has been 

1961 # restarted during mount call. 

1962 logger('Destroying database volume after mount fail...') 

1963 

1964 try: 

1965 cls._start_controller(start=True) 

1966 except Exception: 

1967 pass 

1968 

1969 lin = cls._create_linstor_instance( 

1970 uri, keep_uri_unmodified=True 

1971 ) 

1972 cls._force_destroy_database_volume(lin, group_name) 

1973 raise e 

1974 

1975 cls._start_controller(start=True) 

1976 lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True) 

1977 

1978 # 4. Remove storage pools/resource/volume group in the case of errors. 

1979 except Exception as e: 

1980 logger('Destroying resource group and storage pools after fail...') 

1981 try: 

1982 cls._destroy_resource_group(lin, group_name) 

1983 cls._destroy_resource_group(lin, ha_group_name) 

1984 except Exception as e2: 

1985 logger('Failed to destroy resource group: {}'.format(e2)) 

1986 pass 

1987 j = 0 

1988 i = min(i, len(node_names) - 1) 

1989 while j <= i: 

1990 try: 

1991 cls._destroy_storage_pool(lin, storage_pool_name, node_names[j]) 

1992 except Exception as e2: 

1993 logger('Failed to destroy resource group: {}'.format(e2)) 

1994 pass 

1995 j += 1 

1996 raise e 

1997 

1998 # 5. Return new instance. 

1999 instance = cls.__new__(cls) 

2000 instance._linstor = lin 

2001 instance._logger = logger 

2002 instance._redundancy = redundancy 

2003 instance._base_group_name = base_group_name 

2004 instance._group_name = group_name 

2005 instance._volumes = set() 

2006 instance._storage_pools_time = 0 

2007 instance._kv_cache = instance._create_kv_cache() 

2008 instance._resource_cache = None 

2009 instance._resource_cache_dirty = True 

2010 instance._volume_info_cache = None 

2011 instance._volume_info_cache_dirty = True 

2012 return instance 

2013 

2014 @classmethod 

2015 def build_device_path(cls, volume_name): 

2016 """ 

2017 Build a device path given a volume name. 

2018 :param str volume_name: The volume name to use. 

2019 :return: A valid or not device path. 

2020 :rtype: str 

2021 """ 

2022 

2023 return '{}{}/0'.format(cls.DEV_ROOT_PATH, volume_name) 

2024 

2025 @classmethod 

2026 def build_volume_name(cls, base_name): 

2027 """ 

2028 Build a volume name given a base name (i.e. a UUID). 

2029 :param str base_name: The volume name to use. 

2030 :return: A valid or not device path. 

2031 :rtype: str 

2032 """ 

2033 return '{}{}'.format(cls.PREFIX_VOLUME, base_name) 

2034 

2035 @classmethod 

2036 def round_up_volume_size(cls, volume_size): 

2037 """ 

2038 Align volume size on higher multiple of BLOCK_SIZE. 

2039 :param int volume_size: The volume size to align. 

2040 :return: An aligned volume size. 

2041 :rtype: int 

2042 """ 

2043 return round_up(volume_size, cls.BLOCK_SIZE) 

2044 

2045 @classmethod 

2046 def round_down_volume_size(cls, volume_size): 

2047 """ 

2048 Align volume size on lower multiple of BLOCK_SIZE. 

2049 :param int volume_size: The volume size to align. 

2050 :return: An aligned volume size. 

2051 :rtype: int 

2052 """ 

2053 return round_down(volume_size, cls.BLOCK_SIZE) 

2054 

2055 # -------------------------------------------------------------------------- 

2056 # Private helpers. 

2057 # -------------------------------------------------------------------------- 

2058 

2059 def _create_kv_cache(self): 

2060 self._kv_cache = self._create_linstor_kv('/') 

2061 self._kv_cache_dirty = False 

2062 return self._kv_cache 

2063 

2064 def _get_kv_cache(self): 

2065 if self._kv_cache_dirty: 

2066 self._kv_cache = self._create_kv_cache() 

2067 return self._kv_cache 

2068 

2069 def _create_resource_cache(self): 

2070 self._resource_cache = self._linstor.resource_list_raise() 

2071 self._resource_cache_dirty = False 

2072 return self._resource_cache 

2073 

2074 def _get_resource_cache(self): 

2075 if self._resource_cache_dirty: 

2076 self._resource_cache = self._create_resource_cache() 

2077 return self._resource_cache 

2078 

2079 def _mark_resource_cache_as_dirty(self): 

2080 self._resource_cache_dirty = True 

2081 self._volume_info_cache_dirty = True 

2082 

2083 # -------------------------------------------------------------------------- 

2084 

2085 def _ensure_volume_exists(self, volume_uuid): 

2086 if volume_uuid not in self._volumes: 

2087 raise LinstorVolumeManagerError( 

2088 'volume `{}` doesn\'t exist'.format(volume_uuid), 

2089 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS 

2090 ) 

2091 

2092 def _find_best_size_candidates(self): 

2093 result = self._linstor.resource_group_qmvs(self._group_name) 

2094 error_str = self._get_error_str(result) 

2095 if error_str: 

2096 raise LinstorVolumeManagerError( 

2097 'Failed to get max volume size allowed of SR `{}`: {}'.format( 

2098 self._group_name, 

2099 error_str 

2100 ) 

2101 ) 

2102 return result[0].candidates 

2103 

2104 def _fetch_resource_names(self, ignore_deleted=True): 

2105 resource_names = set() 

2106 dfns = self._linstor.resource_dfn_list_raise().resource_definitions 

2107 for dfn in dfns: 

2108 if dfn.resource_group_name in self.get_all_group_names(self._base_group_name) and ( 

2109 ignore_deleted or 

2110 linstor.consts.FLAG_DELETE not in dfn.flags 

2111 ): 

2112 resource_names.add(dfn.name) 

2113 return resource_names 

2114 

2115 def _get_volumes_info(self, volume_names=None): 

2116 all_volume_info = {} 

2117 

2118 if not self._volume_info_cache_dirty: 

2119 return self._volume_info_cache 

2120 

2121 # `volume_names` MUST contain all volumes registered in the KV store. 

2122 # It can be provided to the function to avoid double fetching. 

2123 if not volume_names: 

2124 volume_names = self.get_volumes_with_name() 

2125 volume_names = set(volume_names.values()) 

2126 

2127 def process_resource(resource): 

2128 if resource.name not in all_volume_info: 

2129 current = all_volume_info[resource.name] = self.VolumeInfo( 

2130 resource.name 

2131 ) 

2132 else: 

2133 current = all_volume_info[resource.name] 

2134 

2135 if linstor.consts.FLAG_DISKLESS not in resource.flags: 

2136 current.diskful.append(resource.node_name) 

2137 

2138 for volume in resource.volumes: 

2139 # We ignore diskless pools of the form "DfltDisklessStorPool". 

2140 if volume.storage_pool_name != self._group_name: 

2141 continue 

2142 # Only fetch first volume. 

2143 if volume.number != 0: 

2144 continue 

2145 

2146 allocated_size = volume.allocated_size 

2147 if allocated_size > current.allocated_size: 

2148 current.allocated_size = allocated_size 

2149 

2150 usable_size = volume.usable_size 

2151 if usable_size > 0 and ( 

2152 usable_size < current.virtual_size or 

2153 not current.virtual_size 

2154 ): 

2155 current.virtual_size = usable_size 

2156 

2157 try: 

2158 for resource in self._get_resource_cache().resources: 

2159 if resource.name in volume_names: 

2160 process_resource(resource) 

2161 for volume in all_volume_info.values(): 

2162 if volume.allocated_size <= 0: 

2163 raise LinstorVolumeManagerError('Failed to get allocated size of `{}`'.format(resource.name)) 

2164 

2165 if volume.virtual_size <= 0: 

2166 raise LinstorVolumeManagerError('Failed to get usable size of `{}`'.format(volume.name)) 

2167 

2168 volume.allocated_size *= 1024 

2169 volume.virtual_size *= 1024 

2170 except LinstorVolumeManagerError: 

2171 self._mark_resource_cache_as_dirty() 

2172 raise 

2173 

2174 self._volume_info_cache_dirty = False 

2175 self._volume_info_cache = all_volume_info 

2176 

2177 return all_volume_info 

2178 

2179 def _get_volume_node_names_and_size(self, volume_name): 

2180 node_names = set() 

2181 size = -1 

2182 for resource in self._linstor.resource_list_raise( 

2183 filter_by_resources=[volume_name] 

2184 ).resources: 

2185 for volume in resource.volumes: 

2186 # We ignore diskless pools of the form "DfltDisklessStorPool". 

2187 if volume.storage_pool_name != self._group_name: 

2188 continue 

2189 

2190 node_names.add(resource.node_name) 

2191 

2192 usable_size = volume.usable_size 

2193 if usable_size <= 0: 

2194 continue 

2195 

2196 if size < 0: 

2197 size = usable_size 

2198 else: 

2199 size = min(size, usable_size) 

2200 

2201 if size <= 0: 

2202 raise LinstorVolumeManagerError('Failed to get usable size of `{}`'.format(resource.name)) 

2203 

2204 return (node_names, size * 1024) 

2205 

2206 def _compute_size(self, attr): 

2207 capacity = 0 

2208 for pool in self._get_storage_pools(force=True): 

2209 space = pool.free_space 

2210 if space: 

2211 size = getattr(space, attr) 

2212 if size < 0: 

2213 raise LinstorVolumeManagerError( 

2214 'Failed to get pool {} attr of `{}`' 

2215 .format(attr, pool.node_name) 

2216 ) 

2217 capacity += size 

2218 return capacity * 1024 

2219 

2220 def _get_node_names(self): 

2221 node_names = set() 

2222 for pool in self._get_storage_pools(): 

2223 node_names.add(pool.node_name) 

2224 return node_names 

2225 

2226 def _get_storage_pools(self, force=False): 

2227 cur_time = time.time() 

2228 elsaped_time = cur_time - self._storage_pools_time 

2229 

2230 if force or elsaped_time >= self.STORAGE_POOLS_FETCH_INTERVAL: 

2231 self._storage_pools = self._linstor.storage_pool_list_raise( 

2232 filter_by_stor_pools=[self._group_name] 

2233 ).storage_pools 

2234 self._storage_pools_time = time.time() 

2235 

2236 return self._storage_pools 

2237 

2238 def _create_volume( 

2239 self, 

2240 volume_uuid, 

2241 volume_name, 

2242 size, 

2243 place_resources, 

2244 high_availability 

2245 ): 

2246 size = self.round_up_volume_size(size) 

2247 self._mark_resource_cache_as_dirty() 

2248 

2249 group_name = self._ha_group_name if high_availability else self._group_name 

2250 def create_definition(): 

2251 first_attempt = True 

2252 while True: 

2253 try: 

2254 self._check_volume_creation_errors( 

2255 self._linstor.resource_group_spawn( 

2256 rsc_grp_name=group_name, 

2257 rsc_dfn_name=volume_name, 

2258 vlm_sizes=['{}B'.format(size)], 

2259 definitions_only=True 

2260 ), 

2261 volume_uuid, 

2262 self._group_name 

2263 ) 

2264 break 

2265 except LinstorVolumeManagerError as e: 

2266 if ( 

2267 not first_attempt or 

2268 not high_availability or 

2269 e.code != LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS 

2270 ): 

2271 raise 

2272 

2273 first_attempt = False 

2274 self._create_resource_group( 

2275 self._linstor, 

2276 group_name, 

2277 self._group_name, 

2278 3, 

2279 True 

2280 ) 

2281 

2282 self._configure_volume_peer_slots(self._linstor, volume_name) 

2283 

2284 def clean(): 

2285 try: 

2286 self._destroy_volume(volume_uuid, force=True, preserve_properties=True) 

2287 except Exception as e: 

2288 self._logger( 

2289 'Unable to destroy volume {} after creation fail: {}' 

2290 .format(volume_uuid, e) 

2291 ) 

2292 

2293 def create(): 

2294 try: 

2295 create_definition() 

2296 if place_resources: 

2297 # Basic case when we use the default redundancy of the group. 

2298 self._check_volume_creation_errors( 

2299 self._linstor.resource_auto_place( 

2300 rsc_name=volume_name, 

2301 place_count=self._redundancy, 

2302 diskless_on_remaining=False 

2303 ), 

2304 volume_uuid, 

2305 self._group_name 

2306 ) 

2307 except LinstorVolumeManagerError as e: 

2308 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS: 

2309 clean() 

2310 raise 

2311 except Exception: 

2312 clean() 

2313 raise 

2314 

2315 util.retry(create, maxretry=5) 

2316 

2317 def _create_volume_with_properties( 

2318 self, 

2319 volume_uuid, 

2320 volume_name, 

2321 size, 

2322 place_resources, 

2323 high_availability 

2324 ): 

2325 if self.check_volume_exists(volume_uuid): 

2326 raise LinstorVolumeManagerError( 

2327 'Could not create volume `{}` from SR `{}`, it already exists' 

2328 .format(volume_uuid, self._group_name) + ' in properties', 

2329 LinstorVolumeManagerError.ERR_VOLUME_EXISTS 

2330 ) 

2331 

2332 if volume_name in self._fetch_resource_names(): 

2333 raise LinstorVolumeManagerError( 

2334 'Could not create volume `{}` from SR `{}`, '.format( 

2335 volume_uuid, self._group_name 

2336 ) + 'resource of the same name already exists in LINSTOR' 

2337 ) 

2338 

2339 # I am paranoid. 

2340 volume_properties = self._get_volume_properties(volume_uuid) 

2341 if (volume_properties.get(self.PROP_NOT_EXISTS) is not None): 

2342 raise LinstorVolumeManagerError( 

2343 'Could not create volume `{}`, '.format(volume_uuid) + 

2344 'properties already exist' 

2345 ) 

2346 

2347 try: 

2348 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_CREATING 

2349 volume_properties[self.PROP_VOLUME_NAME] = volume_name 

2350 

2351 self._create_volume( 

2352 volume_uuid, 

2353 volume_name, 

2354 size, 

2355 place_resources, 

2356 high_availability 

2357 ) 

2358 

2359 assert volume_properties.namespace == \ 

2360 self._build_volume_namespace(volume_uuid) 

2361 return volume_properties 

2362 except LinstorVolumeManagerError as e: 

2363 # Do not destroy existing resource! 

2364 # In theory we can't get this error because we check this event 

2365 # before the `self._create_volume` case. 

2366 # It can only happen if the same volume uuid is used in the same 

2367 # call in another host. 

2368 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS: 

2369 self._destroy_volume(volume_uuid, force=True) 

2370 raise 

2371 

2372 def _find_device_path(self, volume_uuid, volume_name): 

2373 current_device_path = self._request_device_path( 

2374 volume_uuid, volume_name, activate=True 

2375 ) 

2376 

2377 # We use realpath here to get the /dev/drbd<id> path instead of 

2378 # /dev/drbd/by-res/<resource_name>. 

2379 expected_device_path = self.build_device_path(volume_name) 

2380 util.wait_for_path(expected_device_path, 5) 

2381 

2382 device_realpath = os.path.realpath(expected_device_path) 

2383 if current_device_path != device_realpath: 

2384 raise LinstorVolumeManagerError( 

2385 'Invalid path, current={}, expected={} (realpath={})' 

2386 .format( 

2387 current_device_path, 

2388 expected_device_path, 

2389 device_realpath 

2390 ) 

2391 ) 

2392 return expected_device_path 

2393 

2394 def _request_device_path(self, volume_uuid, volume_name, activate=False): 

2395 node_name = socket.gethostname() 

2396 

2397 resource = next(filter( 

2398 lambda resource: resource.node_name == node_name and 

2399 resource.name == volume_name, 

2400 self._get_resource_cache().resources 

2401 ), None) 

2402 

2403 if not resource: 

2404 if activate: 

2405 self._mark_resource_cache_as_dirty() 

2406 self._activate_device_path( 

2407 self._linstor, node_name, volume_name 

2408 ) 

2409 return self._request_device_path(volume_uuid, volume_name) 

2410 raise LinstorVolumeManagerError( 

2411 'Unable to get dev path for `{}`, no resource found but definition "seems" to exist' 

2412 .format(volume_uuid) 

2413 ) 

2414 

2415 # Contains a path of the /dev/drbd<id> form. 

2416 device_path = resource.volumes[0].device_path 

2417 if not device_path: 

2418 raise LinstorVolumeManagerError('Empty dev path for `{}`!'.format(volume_uuid)) 

2419 return device_path 

2420 

2421 def _destroy_resource(self, resource_name, force=False): 

2422 result = self._linstor.resource_dfn_delete(resource_name) 

2423 error_str = self._get_error_str(result) 

2424 if not error_str: 

2425 self._mark_resource_cache_as_dirty() 

2426 return 

2427 

2428 if not force: 

2429 self._mark_resource_cache_as_dirty() 

2430 raise LinstorVolumeManagerError( 

2431 'Could not destroy resource `{}` from SR `{}`: {}' 

2432 .format(resource_name, self._group_name, error_str) 

2433 ) 

2434 

2435 # If force is used, ensure there is no opener. 

2436 all_openers = get_all_volume_openers(resource_name, '0') 

2437 for openers in all_openers.values(): 

2438 if openers: 

2439 self._mark_resource_cache_as_dirty() 

2440 raise LinstorVolumeManagerError( 

2441 'Could not force destroy resource `{}` from SR `{}`: {} (openers=`{}`)' 

2442 .format(resource_name, self._group_name, error_str, all_openers) 

2443 ) 

2444 

2445 # Maybe the resource is blocked in primary mode. DRBD/LINSTOR issue? 

2446 resource_states = filter( 

2447 lambda resource_state: resource_state.name == resource_name, 

2448 self._get_resource_cache().resource_states 

2449 ) 

2450 

2451 # Mark only after computation of states. 

2452 self._mark_resource_cache_as_dirty() 

2453 

2454 for resource_state in resource_states: 

2455 volume_state = resource_state.volume_states[0] 

2456 if resource_state.in_use: 

2457 demote_drbd_resource(resource_state.node_name, resource_name) 

2458 break 

2459 self._destroy_resource(resource_name) 

2460 

2461 def _destroy_volume(self, volume_uuid, force=False, preserve_properties=False): 

2462 volume_properties = self._get_volume_properties(volume_uuid) 

2463 try: 

2464 volume_name = volume_properties.get(self.PROP_VOLUME_NAME) 

2465 if volume_name in self._fetch_resource_names(): 

2466 self._destroy_resource(volume_name, force) 

2467 

2468 # Assume this call is atomic. 

2469 if not preserve_properties: 

2470 volume_properties.clear() 

2471 except Exception as e: 

2472 raise LinstorVolumeManagerError( 

2473 'Cannot destroy volume `{}`: {}'.format(volume_uuid, e) 

2474 ) 

2475 

2476 def _build_volumes(self, repair): 

2477 properties = self._kv_cache 

2478 resource_names = self._fetch_resource_names() 

2479 

2480 self._volumes = set() 

2481 

2482 updating_uuid_volumes = self._get_volumes_by_property( 

2483 self.REG_UPDATING_UUID_SRC, ignore_inexisting_volumes=False 

2484 ) 

2485 if updating_uuid_volumes and not repair: 

2486 raise LinstorVolumeManagerError( 

2487 'Cannot build LINSTOR volume list: ' 

2488 'It exists invalid "updating uuid volumes", repair is required' 

2489 ) 

2490 

2491 existing_volumes = self._get_volumes_by_property( 

2492 self.REG_NOT_EXISTS, ignore_inexisting_volumes=False 

2493 ) 

2494 for volume_uuid, not_exists in existing_volumes.items(): 

2495 properties.namespace = self._build_volume_namespace(volume_uuid) 

2496 

2497 src_uuid = properties.get(self.PROP_UPDATING_UUID_SRC) 

2498 if src_uuid: 

2499 self._logger( 

2500 'Ignoring volume during manager initialization with prop ' 

2501 ' PROP_UPDATING_UUID_SRC: {} (properties={})' 

2502 .format( 

2503 volume_uuid, 

2504 self._get_filtered_properties(properties) 

2505 ) 

2506 ) 

2507 continue 

2508 

2509 # Insert volume in list if the volume exists. Or if the volume 

2510 # is being created and a slave wants to use it (repair = False). 

2511 # 

2512 # If we are on the master and if repair is True and state is 

2513 # Creating, it's probably a bug or crash: the creation process has 

2514 # been stopped. 

2515 if not_exists == self.STATE_EXISTS or ( 

2516 not repair and not_exists == self.STATE_CREATING 

2517 ): 

2518 self._volumes.add(volume_uuid) 

2519 continue 

2520 

2521 if not repair: 

2522 self._logger( 

2523 'Ignoring bad volume during manager initialization: {} ' 

2524 '(properties={})'.format( 

2525 volume_uuid, 

2526 self._get_filtered_properties(properties) 

2527 ) 

2528 ) 

2529 continue 

2530 

2531 # Remove bad volume. 

2532 try: 

2533 self._logger( 

2534 'Removing bad volume during manager initialization: {} ' 

2535 '(properties={})'.format( 

2536 volume_uuid, 

2537 self._get_filtered_properties(properties) 

2538 ) 

2539 ) 

2540 volume_name = properties.get(self.PROP_VOLUME_NAME) 

2541 

2542 # Little optimization, don't call `self._destroy_volume`, 

2543 # we already have resource name list. 

2544 if volume_name in resource_names: 

2545 self._destroy_resource(volume_name, force=True) 

2546 

2547 # Assume this call is atomic. 

2548 properties.clear() 

2549 except Exception as e: 

2550 # Do not raise, we don't want to block user action. 

2551 self._logger( 

2552 'Cannot clean volume {}: {}'.format(volume_uuid, e) 

2553 ) 

2554 

2555 # The volume can't be removed, maybe it's still in use, 

2556 # in this case rename it with the "DELETED_" prefix. 

2557 # This prefix is mandatory if it exists a snap transaction to 

2558 # rollback because the original VDI UUID can try to be renamed 

2559 # with the UUID we are trying to delete... 

2560 if not volume_uuid.startswith('DELETED_'): 

2561 self.update_volume_uuid( 

2562 volume_uuid, 'DELETED_' + volume_uuid, force=True 

2563 ) 

2564 

2565 for dest_uuid, src_uuid in updating_uuid_volumes.items(): 

2566 dest_namespace = self._build_volume_namespace(dest_uuid) 

2567 

2568 properties.namespace = dest_namespace 

2569 if int(properties.get(self.PROP_NOT_EXISTS)): 

2570 properties.clear() 

2571 continue 

2572 

2573 properties.namespace = self._build_volume_namespace(src_uuid) 

2574 properties.clear() 

2575 

2576 properties.namespace = dest_namespace 

2577 properties.pop(self.PROP_UPDATING_UUID_SRC) 

2578 

2579 if src_uuid in self._volumes: 

2580 self._volumes.remove(src_uuid) 

2581 self._volumes.add(dest_uuid) 

2582 

2583 def _get_sr_properties(self): 

2584 return self._create_linstor_kv(self._build_sr_namespace()) 

2585 

2586 def _get_volumes_by_property( 

2587 self, reg_prop, ignore_inexisting_volumes=True 

2588 ): 

2589 base_properties = self._get_kv_cache() 

2590 base_properties.namespace = self._build_volume_namespace() 

2591 

2592 volume_properties = {} 

2593 for volume_uuid in self._volumes: 

2594 volume_properties[volume_uuid] = '' 

2595 

2596 for key, value in base_properties.items(): 

2597 res = reg_prop.match(key) 

2598 if res: 

2599 volume_uuid = res.groups()[0] 

2600 if not ignore_inexisting_volumes or \ 

2601 volume_uuid in self._volumes: 

2602 volume_properties[volume_uuid] = value 

2603 

2604 return volume_properties 

2605 

2606 def _create_linstor_kv(self, namespace): 

2607 return linstor.KV( 

2608 self._group_name, 

2609 uri=self._linstor.controller_host(), 

2610 namespace=namespace 

2611 ) 

2612 

2613 def _get_volume_properties(self, volume_uuid): 

2614 properties = self._get_kv_cache() 

2615 properties.namespace = self._build_volume_namespace(volume_uuid) 

2616 return properties 

2617 

2618 @classmethod 

2619 def _build_sr_namespace(cls): 

2620 return '/{}/'.format(cls.NAMESPACE_SR) 

2621 

2622 @classmethod 

2623 def _build_volume_namespace(cls, volume_uuid=None): 

2624 # Return a path to all volumes if `volume_uuid` is not given. 

2625 if volume_uuid is None: 

2626 return '/{}/'.format(cls.NAMESPACE_VOLUME) 

2627 return '/{}/{}/'.format(cls.NAMESPACE_VOLUME, volume_uuid) 

2628 

2629 @classmethod 

2630 def _get_error_str(cls, result): 

2631 return ', '.join([ 

2632 err.message for err in cls._filter_errors(result) 

2633 ]) 

2634 

2635 @classmethod 

2636 def _create_linstor_instance( 

2637 cls, uri, keep_uri_unmodified=False, attempt_count=30 

2638 ): 

2639 retry = False 

2640 

2641 def connect(uri): 

2642 if not uri: 

2643 uri = get_controller_uri() 

2644 if not uri: 

2645 raise LinstorVolumeManagerError( 

2646 'Unable to find controller uri...' 

2647 ) 

2648 instance = linstor.Linstor(uri, keep_alive=True) 

2649 instance.connect() 

2650 return instance 

2651 

2652 try: 

2653 return connect(uri) 

2654 except (linstor.errors.LinstorNetworkError, LinstorVolumeManagerError): 

2655 pass 

2656 

2657 if not keep_uri_unmodified: 

2658 uri = None 

2659 

2660 return util.retry( 

2661 lambda: connect(uri), 

2662 maxretry=attempt_count, 

2663 period=1, 

2664 exceptions=[ 

2665 linstor.errors.LinstorNetworkError, 

2666 LinstorVolumeManagerError 

2667 ] 

2668 ) 

2669 

2670 @classmethod 

2671 def _configure_volume_peer_slots(cls, lin, volume_name): 

2672 result = lin.resource_dfn_modify(volume_name, {}, peer_slots=3) 

2673 error_str = cls._get_error_str(result) 

2674 if error_str: 

2675 raise LinstorVolumeManagerError( 

2676 'Could not configure volume peer slots of {}: {}' 

2677 .format(volume_name, error_str) 

2678 ) 

2679 

2680 @classmethod 

2681 def _activate_device_path(cls, lin, node_name, volume_name): 

2682 result = lin.resource_make_available(node_name, volume_name, diskful=False) 

2683 if linstor.Linstor.all_api_responses_no_error(result): 

2684 return 

2685 errors = linstor.Linstor.filter_api_call_response_errors(result) 

2686 if len(errors) == 1 and errors[0].is_error( 

2687 linstor.consts.FAIL_EXISTS_RSC 

2688 ): 

2689 return 

2690 

2691 raise LinstorVolumeManagerError( 

2692 'Unable to activate device path of `{}` on node `{}`: {}' 

2693 .format(volume_name, node_name, ', '.join( 

2694 [str(x) for x in result])) 

2695 ) 

2696 

2697 @classmethod 

2698 def _request_database_path(cls, lin, activate=False): 

2699 node_name = socket.gethostname() 

2700 

2701 try: 

2702 resource = next(filter( 

2703 lambda resource: resource.node_name == node_name and 

2704 resource.name == DATABASE_VOLUME_NAME, 

2705 lin.resource_list_raise().resources 

2706 ), None) 

2707 except Exception as e: 

2708 raise LinstorVolumeManagerError( 

2709 'Unable to fetch database resource: {}' 

2710 .format(e) 

2711 ) 

2712 

2713 if not resource: 

2714 if activate: 

2715 cls._activate_device_path( 

2716 lin, node_name, DATABASE_VOLUME_NAME 

2717 ) 

2718 return cls._request_database_path( 

2719 DATABASE_VOLUME_NAME, DATABASE_VOLUME_NAME 

2720 ) 

2721 raise LinstorVolumeManagerError( 

2722 'Empty dev path for `{}`, but definition "seems" to exist' 

2723 .format(DATABASE_PATH) 

2724 ) 

2725 # Contains a path of the /dev/drbd<id> form. 

2726 return resource.volumes[0].device_path 

2727 

2728 @classmethod 

2729 def _create_database_volume( 

2730 cls, lin, group_name, storage_pool_name, node_names, redundancy 

2731 ): 

2732 try: 

2733 dfns = lin.resource_dfn_list_raise().resource_definitions 

2734 except Exception as e: 

2735 raise LinstorVolumeManagerError( 

2736 'Unable to get definitions during database creation: {}' 

2737 .format(e) 

2738 ) 

2739 

2740 if dfns: 

2741 raise LinstorVolumeManagerError( 

2742 'Could not create volume `{}` from SR `{}`, '.format( 

2743 DATABASE_VOLUME_NAME, group_name 

2744 ) + 'LINSTOR volume list must be empty.' 

2745 ) 

2746 

2747 # Workaround to use thin lvm. Without this line an error is returned: 

2748 # "Not enough available nodes" 

2749 # I don't understand why but this command protect against this bug. 

2750 try: 

2751 pools = lin.storage_pool_list_raise( 

2752 filter_by_stor_pools=[storage_pool_name] 

2753 ) 

2754 except Exception as e: 

2755 raise LinstorVolumeManagerError( 

2756 'Failed to get storage pool list before database creation: {}' 

2757 .format(e) 

2758 ) 

2759 

2760 # Ensure we have a correct list of storage pools. 

2761 assert pools.storage_pools # We must have at least one storage pool! 

2762 nodes_with_pool = list(map(lambda pool: pool.node_name, pools.storage_pools)) 

2763 for node_name in nodes_with_pool: 

2764 assert node_name in node_names 

2765 util.SMlog('Nodes with storage pool: {}'.format(nodes_with_pool)) 

2766 

2767 # Create the database definition. 

2768 size = cls.round_up_volume_size(DATABASE_SIZE) 

2769 cls._check_volume_creation_errors(lin.resource_group_spawn( 

2770 rsc_grp_name=group_name, 

2771 rsc_dfn_name=DATABASE_VOLUME_NAME, 

2772 vlm_sizes=['{}B'.format(size)], 

2773 definitions_only=True 

2774 ), DATABASE_VOLUME_NAME, group_name) 

2775 cls._configure_volume_peer_slots(lin, DATABASE_VOLUME_NAME) 

2776 

2777 # Create real resources on the first nodes. 

2778 resources = [] 

2779 

2780 diskful_nodes = [] 

2781 diskless_nodes = [] 

2782 for node_name in node_names: 

2783 if node_name in nodes_with_pool: 

2784 diskful_nodes.append(node_name) 

2785 else: 

2786 diskless_nodes.append(node_name) 

2787 

2788 assert diskful_nodes 

2789 for node_name in diskful_nodes[:redundancy]: 

2790 util.SMlog('Create database diskful on {}'.format(node_name)) 

2791 resources.append(linstor.ResourceData( 

2792 node_name=node_name, 

2793 rsc_name=DATABASE_VOLUME_NAME, 

2794 storage_pool=storage_pool_name 

2795 )) 

2796 # Create diskless resources on the remaining set. 

2797 for node_name in diskful_nodes[redundancy:] + diskless_nodes: 

2798 util.SMlog('Create database diskless on {}'.format(node_name)) 

2799 resources.append(linstor.ResourceData( 

2800 node_name=node_name, 

2801 rsc_name=DATABASE_VOLUME_NAME, 

2802 diskless=True 

2803 )) 

2804 

2805 result = lin.resource_create(resources) 

2806 error_str = cls._get_error_str(result) 

2807 if error_str: 

2808 raise LinstorVolumeManagerError( 

2809 'Could not create database volume from SR `{}`: {}'.format( 

2810 group_name, error_str 

2811 ) 

2812 ) 

2813 

2814 # Create database and ensure path exists locally and 

2815 # on replicated devices. 

2816 current_device_path = cls._request_database_path(lin, activate=True) 

2817 

2818 # Ensure diskless paths exist on other hosts. Otherwise PBDs can't be 

2819 # plugged. 

2820 for node_name in node_names: 

2821 cls._activate_device_path(lin, node_name, DATABASE_VOLUME_NAME) 

2822 

2823 # We use realpath here to get the /dev/drbd<id> path instead of 

2824 # /dev/drbd/by-res/<resource_name>. 

2825 expected_device_path = cls.build_device_path(DATABASE_VOLUME_NAME) 

2826 util.wait_for_path(expected_device_path, 5) 

2827 

2828 device_realpath = os.path.realpath(expected_device_path) 

2829 if current_device_path != device_realpath: 

2830 raise LinstorVolumeManagerError( 

2831 'Invalid path, current={}, expected={} (realpath={})' 

2832 .format( 

2833 current_device_path, 

2834 expected_device_path, 

2835 device_realpath 

2836 ) 

2837 ) 

2838 

2839 try: 

2840 util.retry( 

2841 lambda: util.pread2([DATABASE_MKFS, expected_device_path]), 

2842 maxretry=5 

2843 ) 

2844 except Exception as e: 

2845 raise LinstorVolumeManagerError( 

2846 'Failed to execute {} on database volume: {}' 

2847 .format(DATABASE_MKFS, e) 

2848 ) 

2849 

2850 return expected_device_path 

2851 

2852 @classmethod 

2853 def _destroy_database_volume(cls, lin, group_name): 

2854 error_str = cls._get_error_str( 

2855 lin.resource_dfn_delete(DATABASE_VOLUME_NAME) 

2856 ) 

2857 if error_str: 

2858 raise LinstorVolumeManagerError( 

2859 'Could not destroy resource `{}` from SR `{}`: {}' 

2860 .format(DATABASE_VOLUME_NAME, group_name, error_str) 

2861 ) 

2862 

2863 @classmethod 

2864 def _mount_database_volume(cls, volume_path, mount=True, force=False): 

2865 try: 

2866 # 1. Create a backup config folder. 

2867 database_not_empty = bool(os.listdir(DATABASE_PATH)) 

2868 backup_path = cls._create_database_backup_path() 

2869 

2870 # 2. Move the config in the mounted volume. 

2871 if database_not_empty: 

2872 cls._move_files(DATABASE_PATH, backup_path) 

2873 

2874 cls._mount_volume(volume_path, DATABASE_PATH, mount) 

2875 

2876 if database_not_empty: 

2877 cls._move_files(backup_path, DATABASE_PATH, force) 

2878 

2879 # 3. Remove useless backup directory. 

2880 try: 

2881 os.rmdir(backup_path) 

2882 except Exception as e: 

2883 raise LinstorVolumeManagerError( 

2884 'Failed to remove backup path {} of LINSTOR config: {}' 

2885 .format(backup_path, e) 

2886 ) 

2887 except Exception as e: 

2888 def force_exec(fn): 

2889 try: 

2890 fn() 

2891 except Exception: 

2892 pass 

2893 

2894 if mount == cls._is_mounted(DATABASE_PATH): 

2895 force_exec(lambda: cls._move_files( 

2896 DATABASE_PATH, backup_path 

2897 )) 

2898 force_exec(lambda: cls._mount_volume( 

2899 volume_path, DATABASE_PATH, not mount 

2900 )) 

2901 

2902 if mount != cls._is_mounted(DATABASE_PATH): 

2903 force_exec(lambda: cls._move_files( 

2904 backup_path, DATABASE_PATH 

2905 )) 

2906 

2907 force_exec(lambda: os.rmdir(backup_path)) 

2908 raise e 

2909 

2910 @classmethod 

2911 def _force_destroy_database_volume(cls, lin, group_name): 

2912 try: 

2913 cls._destroy_database_volume(lin, group_name) 

2914 except Exception: 

2915 pass 

2916 

2917 @classmethod 

2918 def _destroy_storage_pool(cls, lin, group_name, node_name): 

2919 def destroy(): 

2920 result = lin.storage_pool_delete(node_name, group_name) 

2921 errors = cls._filter_errors(result) 

2922 if cls._check_errors(errors, [ 

2923 linstor.consts.FAIL_NOT_FOUND_STOR_POOL, 

2924 linstor.consts.FAIL_NOT_FOUND_STOR_POOL_DFN 

2925 ]): 

2926 return 

2927 

2928 if errors: 

2929 raise LinstorVolumeManagerError( 

2930 'Failed to destroy SP `{}` on node `{}`: {}'.format( 

2931 group_name, 

2932 node_name, 

2933 cls._get_error_str(errors) 

2934 ) 

2935 ) 

2936 

2937 # We must retry to avoid errors like: 

2938 # "can not be deleted as volumes / snapshot-volumes are still using it" 

2939 # after LINSTOR database volume destruction. 

2940 return util.retry(destroy, maxretry=10) 

2941 

2942 @classmethod 

2943 def _create_resource_group( 

2944 cls, 

2945 lin, 

2946 group_name, 

2947 storage_pool_name, 

2948 redundancy, 

2949 destroy_old_group 

2950 ): 

2951 rg_creation_attempt = 0 

2952 while True: 

2953 result = lin.resource_group_create( 

2954 name=group_name, 

2955 place_count=redundancy, 

2956 storage_pool=storage_pool_name, 

2957 diskless_on_remaining=False 

2958 ) 

2959 error_str = cls._get_error_str(result) 

2960 if not error_str: 

2961 break 

2962 

2963 errors = cls._filter_errors(result) 

2964 if destroy_old_group and cls._check_errors(errors, [ 

2965 linstor.consts.FAIL_EXISTS_RSC_GRP 

2966 ]): 

2967 rg_creation_attempt += 1 

2968 if rg_creation_attempt < 2: 

2969 try: 

2970 cls._destroy_resource_group(lin, group_name) 

2971 except Exception as e: 

2972 error_str = 'Failed to destroy old and empty RG: {}'.format(e) 

2973 else: 

2974 continue 

2975 

2976 raise LinstorVolumeManagerError( 

2977 'Could not create RG `{}`: {}'.format( 

2978 group_name, error_str 

2979 ) 

2980 ) 

2981 

2982 result = lin.volume_group_create(group_name) 

2983 error_str = cls._get_error_str(result) 

2984 if error_str: 

2985 raise LinstorVolumeManagerError( 

2986 'Could not create VG `{}`: {}'.format( 

2987 group_name, error_str 

2988 ) 

2989 ) 

2990 

2991 @classmethod 

2992 def _destroy_resource_group(cls, lin, group_name): 

2993 def destroy(): 

2994 result = lin.resource_group_delete(group_name) 

2995 errors = cls._filter_errors(result) 

2996 if cls._check_errors(errors, [ 

2997 linstor.consts.FAIL_NOT_FOUND_RSC_GRP 

2998 ]): 

2999 return 

3000 

3001 if errors: 

3002 raise LinstorVolumeManagerError( 

3003 'Failed to destroy RG `{}`: {}' 

3004 .format(group_name, cls._get_error_str(errors)) 

3005 ) 

3006 

3007 return util.retry(destroy, maxretry=10) 

3008 

3009 @classmethod 

3010 def _build_group_name(cls, base_name): 

3011 # If thin provisioning is used we have a path like this: 

3012 # `VG/LV`. "/" is not accepted by LINSTOR. 

3013 return '{}{}'.format(cls.PREFIX_SR, base_name.replace('/', '_')) 

3014 

3015 # Used to store important data in a HA context, 

3016 # i.e. a replication count of 3. 

3017 @classmethod 

3018 def _build_ha_group_name(cls, base_name): 

3019 return '{}{}'.format(cls.PREFIX_HA, base_name.replace('/', '_')) 

3020 

3021 @classmethod 

3022 def _check_volume_creation_errors(cls, result, volume_uuid, group_name): 

3023 errors = cls._filter_errors(result) 

3024 if cls._check_errors(errors, [ 

3025 linstor.consts.FAIL_EXISTS_RSC, linstor.consts.FAIL_EXISTS_RSC_DFN 

3026 ]): 

3027 raise LinstorVolumeManagerError( 

3028 'Failed to create volume `{}` from SR `{}`, it already exists' 

3029 .format(volume_uuid, group_name), 

3030 LinstorVolumeManagerError.ERR_VOLUME_EXISTS 

3031 ) 

3032 

3033 if cls._check_errors(errors, [linstor.consts.FAIL_NOT_FOUND_RSC_GRP]): 

3034 raise LinstorVolumeManagerError( 

3035 'Failed to create volume `{}` from SR `{}`, resource group doesn\'t exist' 

3036 .format(volume_uuid, group_name), 

3037 LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS 

3038 ) 

3039 

3040 if errors: 

3041 raise LinstorVolumeManagerError( 

3042 'Failed to create volume `{}` from SR `{}`: {}'.format( 

3043 volume_uuid, 

3044 group_name, 

3045 cls._get_error_str(errors) 

3046 ) 

3047 ) 

3048 

3049 @classmethod 

3050 def _move_files(cls, src_dir, dest_dir, force=False): 

3051 def listdir(dir): 

3052 ignored = ['lost+found'] 

3053 return [file for file in os.listdir(dir) if file not in ignored] 

3054 

3055 try: 

3056 if not force: 

3057 files = listdir(dest_dir) 

3058 if files: 

3059 raise LinstorVolumeManagerError( 

3060 'Cannot move files from {} to {} because destination ' 

3061 'contains: {}'.format(src_dir, dest_dir, files) 

3062 ) 

3063 except LinstorVolumeManagerError: 

3064 raise 

3065 except Exception as e: 

3066 raise LinstorVolumeManagerError( 

3067 'Cannot list dir {}: {}'.format(dest_dir, e) 

3068 ) 

3069 

3070 try: 

3071 for file in listdir(src_dir): 

3072 try: 

3073 dest_file = os.path.join(dest_dir, file) 

3074 if not force and os.path.exists(dest_file): 

3075 raise LinstorVolumeManagerError( 

3076 'Cannot move {} because it already exists in the ' 

3077 'destination'.format(file) 

3078 ) 

3079 shutil.move(os.path.join(src_dir, file), dest_file) 

3080 except LinstorVolumeManagerError: 

3081 raise 

3082 except Exception as e: 

3083 raise LinstorVolumeManagerError( 

3084 'Cannot move {}: {}'.format(file, e) 

3085 ) 

3086 except Exception as e: 

3087 if not force: 

3088 try: 

3089 cls._move_files(dest_dir, src_dir, force=True) 

3090 except Exception: 

3091 pass 

3092 

3093 raise LinstorVolumeManagerError( 

3094 'Failed to move files from {} to {}: {}'.format( 

3095 src_dir, dest_dir, e 

3096 ) 

3097 ) 

3098 

3099 @staticmethod 

3100 def _create_database_backup_path(): 

3101 path = DATABASE_PATH + '-' + str(uuid.uuid4()) 

3102 try: 

3103 os.mkdir(path) 

3104 return path 

3105 except Exception as e: 

3106 raise LinstorVolumeManagerError( 

3107 'Failed to create backup path {} of LINSTOR config: {}' 

3108 .format(path, e) 

3109 ) 

3110 

3111 @staticmethod 

3112 def _get_filtered_properties(properties): 

3113 return dict(properties.items()) 

3114 

3115 @staticmethod 

3116 def _filter_errors(result): 

3117 return [ 

3118 err for err in result 

3119 if hasattr(err, 'is_error') and err.is_error() 

3120 ] 

3121 

3122 @staticmethod 

3123 def _check_errors(result, codes): 

3124 for err in result: 

3125 for code in codes: 

3126 if err.is_error(code): 

3127 return True 

3128 return False 

3129 

3130 @classmethod 

3131 def _controller_is_running(cls): 

3132 return cls._service_is_running('linstor-controller') 

3133 

3134 @classmethod 

3135 def _start_controller(cls, start=True): 

3136 return cls._start_service('linstor-controller', start) 

3137 

3138 @staticmethod 

3139 def _start_service(name, start=True): 

3140 action = 'start' if start else 'stop' 

3141 (ret, out, err) = util.doexec([ 

3142 'systemctl', action, name 

3143 ]) 

3144 if ret != 0: 

3145 raise LinstorVolumeManagerError( 

3146 'Failed to {} {}: {} {}' 

3147 .format(action, name, out, err) 

3148 ) 

3149 

3150 @staticmethod 

3151 def _service_is_running(name): 

3152 (ret, out, err) = util.doexec([ 

3153 'systemctl', 'is-active', '--quiet', name 

3154 ]) 

3155 return not ret 

3156 

3157 @staticmethod 

3158 def _is_mounted(mountpoint): 

3159 (ret, out, err) = util.doexec(['mountpoint', '-q', mountpoint]) 

3160 return ret == 0 

3161 

3162 @classmethod 

3163 def _mount_volume(cls, volume_path, mountpoint, mount=True): 

3164 if mount: 

3165 try: 

3166 util.pread(['mount', volume_path, mountpoint]) 

3167 except Exception as e: 

3168 raise LinstorVolumeManagerError( 

3169 'Failed to mount volume {} on {}: {}' 

3170 .format(volume_path, mountpoint, e) 

3171 ) 

3172 else: 

3173 try: 

3174 if cls._is_mounted(mountpoint): 

3175 util.pread(['umount', mountpoint]) 

3176 except Exception as e: 

3177 raise LinstorVolumeManagerError( 

3178 'Failed to umount volume {} on {}: {}' 

3179 .format(volume_path, mountpoint, e) 

3180 ) 

3181 

3182 

3183# ============================================================================== 

3184 

3185# Check if a path is a DRBD resource and log the process name/pid 

3186# that opened it. 

3187def log_drbd_openers(path): 

3188 # Ignore if it's not a symlink to DRBD resource. 

3189 if not path.startswith(DRBD_BY_RES_PATH): 

3190 return 

3191 

3192 # Compute resource name. 

3193 res_name_end = path.find('/', len(DRBD_BY_RES_PATH)) 

3194 if res_name_end == -1: 

3195 return 

3196 res_name = path[len(DRBD_BY_RES_PATH):res_name_end] 

3197 

3198 volume_end = path.rfind('/') 

3199 if volume_end == res_name_end: 

3200 return 

3201 volume = path[volume_end + 1:] 

3202 

3203 try: 

3204 # Ensure path is a DRBD. 

3205 drbd_path = os.path.realpath(path) 

3206 stats = os.stat(drbd_path) 

3207 if not stat.S_ISBLK(stats.st_mode) or os.major(stats.st_rdev) != 147: 

3208 return 

3209 

3210 # Find where the device is open. 

3211 (ret, stdout, stderr) = util.doexec(['drbdadm', 'status', res_name]) 

3212 if ret != 0: 

3213 util.SMlog('Failed to execute `drbdadm status` on `{}`: {}'.format( 

3214 res_name, stderr 

3215 )) 

3216 return 

3217 

3218 # Is it a local device? 

3219 if stdout.startswith('{} role:Primary'.format(res_name)): 

3220 util.SMlog( 

3221 'DRBD resource `{}` is open on local host: {}' 

3222 .format(path, get_local_volume_openers(res_name, volume)) 

3223 ) 

3224 return 

3225 

3226 # Is it a remote device? 

3227 util.SMlog( 

3228 'DRBD resource `{}` is open on hosts: {}' 

3229 .format(path, get_all_volume_openers(res_name, volume)) 

3230 ) 

3231 except Exception as e: 

3232 util.SMlog( 

3233 'Got exception while trying to determine where DRBD resource ' + 

3234 '`{}` is open: {}'.format(path, e) 

3235 )