Coverage for drivers/linstorvolumemanager.py : 10%
Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python3
2#
3# Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr
4#
5# This program is free software: you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation, either version 3 of the License, or
8# (at your option) any later version.
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program. If not, see <https://www.gnu.org/licenses/>.
16#
18from sm_typing import (
19 Any,
20 Dict,
21 List,
22 override,
23)
25import json
26import linstor
27import os.path
28import re
29import shutil
30import socket
31import stat
32import time
33import util
34import uuid
36# Persistent prefix to add to RAW persistent volumes.
37PERSISTENT_PREFIX = 'xcp-persistent-'
39# Contains the data of the "/var/lib/linstor" directory.
40DATABASE_VOLUME_NAME = PERSISTENT_PREFIX + 'database'
41DATABASE_SIZE = 1 << 30 # 1GB.
42DATABASE_PATH = '/var/lib/linstor'
43DATABASE_MKFS = 'mkfs.ext4'
45LINSTOR_SATELLITE_PORT = 3366
47REG_DRBDADM_PRIMARY = re.compile("([^\\s]+)\\s+role:Primary")
48REG_DRBDSETUP_IP = re.compile('[^\\s]+\\s+(.*):.*$')
50DRBD_BY_RES_PATH = '/dev/drbd/by-res/'
52PLUGIN = 'linstor-manager'
55# ==============================================================================
57def get_local_volume_openers(resource_name, volume):
58 if not resource_name or volume is None:
59 raise Exception('Cannot get DRBD openers without resource name and/or volume.')
61 path = '/sys/kernel/debug/drbd/resources/{}/volumes/{}/openers'.format(
62 resource_name, volume
63 )
65 with open(path, 'r') as openers:
66 # Not a big cost, so read all lines directly.
67 lines = openers.readlines()
69 result = {}
71 opener_re = re.compile('(.*)\\s+([0-9]+)\\s+([0-9]+)')
72 for line in lines:
73 match = opener_re.match(line)
74 assert match
76 groups = match.groups()
77 process_name = groups[0]
78 pid = groups[1]
79 open_duration_ms = groups[2]
80 result[pid] = {
81 'process-name': process_name,
82 'open-duration': open_duration_ms
83 }
85 return json.dumps(result)
87def get_all_volume_openers(resource_name, volume):
88 PLUGIN_CMD = 'getDrbdOpeners'
90 volume = str(volume)
91 openers = {}
93 session = util.get_localAPI_session()
95 hosts = session.xenapi.host.get_all_records()
96 for host_ref, host_record in hosts.items():
97 node_name = host_record['hostname']
98 try:
99 if not session.xenapi.host_metrics.get_record(
100 host_record['metrics']
101 )['live']:
102 # Ensure we call plugin on online hosts only.
103 continue
105 openers[node_name] = json.loads(
106 session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, {
107 'resourceName': resource_name,
108 'volume': volume
109 })
110 )
111 except Exception as e:
112 util.SMlog('Failed to get openers of `{}` on `{}`: {}'.format(
113 resource_name, node_name, e
114 ))
116 return openers
119# ==============================================================================
121def round_up(value, divisor):
122 assert divisor
123 divisor = int(divisor)
124 return ((int(value) + divisor - 1) // divisor) * divisor
127def round_down(value, divisor):
128 assert divisor
129 value = int(value)
130 return value - (value % int(divisor))
133# ==============================================================================
135def _get_controller_addresses() -> List[str]:
136 try:
137 (ret, stdout, stderr) = util.doexec([
138 "/usr/sbin/ss", "-tnpH", "state", "established", f"( sport = :{LINSTOR_SATELLITE_PORT} )"
139 ])
140 if ret == 0:
141 return [
142 line.split()[3].rsplit(":", 1)[0]
143 for line in stdout.splitlines()
144 ]
145 util.SMlog(f"Unexpected code {ret}: {stderr}")
146 except Exception as e:
147 util.SMlog(f"Unable to get controller addresses: {e}")
148 return []
150def _get_controller_uri() -> str:
151 # TODO: Check that an IP address from the current pool is returned.
152 addresses = _get_controller_addresses()
153 return "linstor://" + addresses[0] if addresses else ""
155def get_controller_uri():
156 retries = 0
157 while True:
158 uri = _get_controller_uri()
159 if uri:
160 return uri
162 retries += 1
163 if retries >= 30:
164 break
165 time.sleep(1)
168def get_controller_node_name():
169 PLUGIN_CMD = 'hasControllerRunning'
171 (ret, stdout, stderr) = util.doexec([
172 'drbdadm', 'status', DATABASE_VOLUME_NAME
173 ])
175 if ret == 0:
176 if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)):
177 return 'localhost'
179 res = REG_DRBDADM_PRIMARY.search(stdout)
180 if res:
181 return res.groups()[0]
183 session = util.timeout_call(5, util.get_localAPI_session)
185 for host_ref, host_record in session.xenapi.host.get_all_records().items():
186 node_name = host_record['hostname']
187 try:
188 if not session.xenapi.host_metrics.get_record(
189 host_record['metrics']
190 )['live']:
191 continue
193 if util.strtobool(session.xenapi.host.call_plugin(
194 host_ref, PLUGIN, PLUGIN_CMD, {}
195 )):
196 return node_name
197 except Exception as e:
198 util.SMlog('Failed to call plugin to get controller on `{}`: {}'.format(
199 node_name, e
200 ))
203def demote_drbd_resource(node_name, resource_name):
204 PLUGIN_CMD = 'demoteDrbdResource'
206 session = util.timeout_call(5, util.get_localAPI_session)
208 for host_ref, host_record in session.xenapi.host.get_all_records().items():
209 if host_record['hostname'] != node_name:
210 continue
212 try:
213 session.xenapi.host.call_plugin(
214 host_ref, PLUGIN, PLUGIN_CMD, {'resource_name': resource_name}
215 )
216 except Exception as e:
217 util.SMlog('Failed to demote resource `{}` on `{}`: {}'.format(
218 resource_name, node_name, e
219 ))
220 raise Exception(
221 'Can\'t demote resource `{}`, unable to find node `{}`'
222 .format(resource_name, node_name)
223 )
225# ==============================================================================
227class LinstorVolumeManagerError(Exception):
228 ERR_GENERIC = 0,
229 ERR_VOLUME_EXISTS = 1,
230 ERR_VOLUME_NOT_EXISTS = 2,
231 ERR_VOLUME_DESTROY = 3,
232 ERR_GROUP_NOT_EXISTS = 4,
233 ERR_VOLUME_IN_USE = 5
235 def __init__(self, message, code=ERR_GENERIC):
236 super(LinstorVolumeManagerError, self).__init__(message)
237 self._code = code
239 @property
240 def code(self):
241 return self._code
244# ==============================================================================
246# Note:
247# If a storage pool is not accessible after a network change:
248# linstor node interface modify <NODE> default --ip <IP>
251class LinstorVolumeManager(object):
252 """
253 API to manager LINSTOR volumes in XCP-ng.
254 A volume in this context is a physical part of the storage layer.
255 """
257 __slots__ = (
258 '_linstor', '_uri', '_logger', '_redundancy',
259 '_base_group_name', '_group_name', '_ha_group_name',
260 '_volumes', '_storage_pools', '_storage_pools_time',
261 '_kv_cache', '_resource_cache', '_volume_info_cache',
262 '_kv_cache_dirty', '_resource_cache_dirty', '_volume_info_cache_dirty',
263 '_resources_info_cache',
264 )
266 DEV_ROOT_PATH = DRBD_BY_RES_PATH
268 # Default sector size.
269 BLOCK_SIZE = 512
271 # List of volume properties.
272 PROP_METADATA = 'metadata'
273 PROP_NOT_EXISTS = 'not-exists'
274 PROP_VOLUME_NAME = 'volume-name'
275 PROP_IS_READONLY_TIMESTAMP = 'readonly-timestamp'
277 # A volume can only be locked for a limited duration.
278 # The goal is to give enough time to slaves to execute some actions on
279 # a device before an UUID update or a coalesce for example.
280 # Expiration is expressed in seconds.
281 LOCKED_EXPIRATION_DELAY = 1 * 60
283 # Used when volume uuid is being updated.
284 PROP_UPDATING_UUID_SRC = 'updating-uuid-src'
286 # States of property PROP_NOT_EXISTS.
287 STATE_EXISTS = '0'
288 STATE_NOT_EXISTS = '1'
289 STATE_CREATING = '2'
291 # Property namespaces.
292 NAMESPACE_SR = 'xcp/sr'
293 NAMESPACE_VOLUME = 'xcp/volume'
295 # Regex to match properties.
296 REG_PROP = '^([^/]+)/{}$'
298 REG_METADATA = re.compile(REG_PROP.format(PROP_METADATA))
299 REG_NOT_EXISTS = re.compile(REG_PROP.format(PROP_NOT_EXISTS))
300 REG_VOLUME_NAME = re.compile(REG_PROP.format(PROP_VOLUME_NAME))
301 REG_UPDATING_UUID_SRC = re.compile(REG_PROP.format(PROP_UPDATING_UUID_SRC))
303 # Prefixes of SR/VOLUME in the LINSTOR DB.
304 # A LINSTOR (resource, group, ...) name cannot start with a number.
305 # So we add a prefix behind our SR/VOLUME uuids.
306 PREFIX_SR = 'xcp-sr-'
307 PREFIX_HA = 'xcp-ha-'
308 PREFIX_VOLUME = 'xcp-volume-'
310 # Limit request number when storage pool info is asked, we fetch
311 # the current pool status after N elapsed seconds.
312 STORAGE_POOLS_FETCH_INTERVAL = 15
314 @staticmethod
315 def default_logger(*args):
316 print(args)
318 # --------------------------------------------------------------------------
319 # API.
320 # --------------------------------------------------------------------------
322 class VolumeInfo(object):
323 __slots__ = (
324 'name',
325 'allocated_size', # Allocated size, place count is not used.
326 'virtual_size', # Total virtual available size of this volume
327 # (i.e. the user size at creation).
328 'diskful' # Array of nodes that have a diskful volume.
329 )
331 def __init__(self, name):
332 self.name = name
333 self.allocated_size = 0
334 self.virtual_size = 0
335 self.diskful = []
337 @override
338 def __repr__(self) -> str:
339 return 'VolumeInfo("{}", {}, {}, {})'.format(
340 self.name, self.allocated_size, self.virtual_size,
341 self.diskful
342 )
344 # --------------------------------------------------------------------------
346 def __init__(
347 self, uri, group_name, repair=False, logger=default_logger.__func__,
348 attempt_count=30
349 ):
350 """
351 Create a new LinstorVolumeManager object.
352 :param str uri: URI to communicate with the LINSTOR controller.
353 :param str group_name: The SR group name to use.
354 :param bool repair: If true we try to remove bad volumes due to a crash
355 or unexpected behavior.
356 :param function logger: Function to log messages.
357 :param int attempt_count: Number of attempts to join the controller.
358 """
360 self._uri = uri
361 self._linstor = self._create_linstor_instance(
362 uri, attempt_count=attempt_count
363 )
366 mismatched_nodes = [
367 node for node in self._linstor.node_list().pop().nodes if node.connection_status == "VERSION_MISMATCH"
368 ]
370 if mismatched_nodes:
371 raise LinstorVolumeManagerError(
372 "Some linstor nodes are not using the same version. " +
373 f"Incriminated nodes are: {','.join([node.name for node in mismatched_nodes])}"
374 )
376 self._base_group_name = group_name
378 # Ensure group exists.
379 group_name = self._build_group_name(group_name)
380 groups = self._linstor.resource_group_list_raise([group_name]).resource_groups
381 if not groups:
382 raise LinstorVolumeManagerError(
383 'Unable to find `{}` Linstor SR'.format(group_name)
384 )
386 # Ok. ;)
387 self._logger = logger
388 self._redundancy = groups[0].select_filter.place_count
389 self._group_name = group_name
390 self._ha_group_name = self._build_ha_group_name(self._base_group_name)
391 self._volumes = set()
392 self._storage_pools_time = 0
394 # To increase performance and limit request count to LINSTOR services,
395 # we use caches.
396 self._kv_cache = self._create_kv_cache()
397 self._resource_cache = None
398 self._resource_cache_dirty = True
399 self._volume_info_cache = None
400 self._volume_info_cache_dirty = True
401 self._resources_info_cache = None
402 self._build_volumes(repair=repair)
404 @property
405 def uri(self) -> str:
406 return self._uri
408 @property
409 def group_name(self):
410 """
411 Give the used group name.
412 :return: The group name.
413 :rtype: str
414 """
415 return self._base_group_name
417 @property
418 def redundancy(self):
419 """
420 Give the used redundancy.
421 :return: The redundancy.
422 :rtype: int
423 """
424 return self._redundancy
426 @property
427 def volumes(self):
428 """
429 Give the volumes uuid set.
430 :return: The volumes uuid set.
431 :rtype: set(str)
432 """
433 return self._volumes
435 @property
436 def max_volume_size_allowed(self):
437 """
438 Give the max volume size currently available in B.
439 :return: The current size.
440 :rtype: int
441 """
443 candidates = self._find_best_size_candidates()
444 if not candidates:
445 raise LinstorVolumeManagerError(
446 'Failed to get max volume size allowed'
447 )
449 size = candidates[0].max_volume_size
450 if size < 0:
451 raise LinstorVolumeManagerError(
452 'Invalid max volume size allowed given: {}'.format(size)
453 )
454 return self.round_down_volume_size(size * 1024)
456 @property
457 def physical_size(self):
458 """
459 Give the total physical size of the SR.
460 :return: The physical size.
461 :rtype: int
462 """
463 return self._compute_size('total_capacity')
465 @property
466 def physical_free_size(self):
467 """
468 Give the total free physical size of the SR.
469 :return: The physical free size.
470 :rtype: int
471 """
472 return self._compute_size('free_capacity')
474 @property
475 def allocated_volume_size(self):
476 """
477 Give the allocated size for all volumes. The place count is not
478 used here. When thick lvm is used, the size for one volume should
479 be equal to the virtual volume size. With thin lvm, the size is equal
480 or lower to the volume size.
481 :return: The allocated size of all volumes.
482 :rtype: int
483 """
485 # Paths: /res_name/vol_number/size
486 sizes = {}
488 for resource in self._get_resource_cache().resources:
489 if resource.name not in sizes:
490 current = sizes[resource.name] = {}
491 else:
492 current = sizes[resource.name]
494 for volume in resource.volumes:
495 # We ignore diskless pools of the form "DfltDisklessStorPool".
496 if volume.storage_pool_name != self._group_name:
497 continue
499 allocated_size = max(volume.allocated_size, 0)
500 current_allocated_size = current.get(volume.number) or -1
501 if allocated_size > current_allocated_size:
502 current[volume.number] = allocated_size
504 total_size = 0
505 for volumes in sizes.values():
506 for size in volumes.values():
507 total_size += size
509 return total_size * 1024
511 def get_min_physical_size(self):
512 """
513 Give the minimum physical size of the SR.
514 I.e. the size of the smallest disk + the number of pools.
515 :return: The physical min size.
516 :rtype: tuple(int, int)
517 """
518 size = None
519 pool_count = 0
520 for pool in self._get_storage_pools(force=True):
521 space = pool.free_space
522 if space:
523 pool_count += 1
524 current_size = space.total_capacity
525 if current_size < 0:
526 raise LinstorVolumeManagerError(
527 'Failed to get pool total_capacity attr of `{}`'
528 .format(pool.node_name)
529 )
530 if size is None or current_size < size:
531 size = current_size
532 return (pool_count, (size or 0) * 1024)
534 @property
535 def metadata(self):
536 """
537 Get the metadata of the SR.
538 :return: Dictionary that contains metadata.
539 :rtype: dict(str, dict)
540 """
542 sr_properties = self._get_sr_properties()
543 metadata = sr_properties.get(self.PROP_METADATA)
544 if metadata is not None:
545 metadata = json.loads(metadata)
546 if isinstance(metadata, dict):
547 return metadata
548 raise LinstorVolumeManagerError(
549 'Expected dictionary in SR metadata: {}'.format(
550 self._group_name
551 )
552 )
554 return {}
556 @metadata.setter
557 def metadata(self, metadata):
558 """
559 Set the metadata of the SR.
560 :param dict metadata: Dictionary that contains metadata.
561 """
563 assert isinstance(metadata, dict)
564 sr_properties = self._get_sr_properties()
565 sr_properties[self.PROP_METADATA] = json.dumps(metadata)
567 @property
568 def disconnected_hosts(self):
569 """
570 Get the list of disconnected hosts.
571 :return: Set that contains disconnected hosts.
572 :rtype: set(str)
573 """
575 disconnected_hosts = set()
576 for pool in self._get_storage_pools():
577 for report in pool.reports:
578 if report.ret_code & linstor.consts.WARN_NOT_CONNECTED == \
579 linstor.consts.WARN_NOT_CONNECTED:
580 disconnected_hosts.add(pool.node_name)
581 break
582 return disconnected_hosts
584 def check_volume_exists(self, volume_uuid):
585 """
586 Check if a volume exists in the SR.
587 :return: True if volume exists.
588 :rtype: bool
589 """
590 return volume_uuid in self._volumes
592 def create_volume(
593 self,
594 volume_uuid,
595 size,
596 persistent=True,
597 volume_name=None,
598 high_availability=False
599 ):
600 """
601 Create a new volume on the SR.
602 :param str volume_uuid: The volume uuid to use.
603 :param int size: volume size in B.
604 :param bool persistent: If false the volume will be unavailable
605 on the next constructor call LinstorSR(...).
606 :param str volume_name: If set, this name is used in the LINSTOR
607 database instead of a generated name.
608 :param bool high_availability: If set, the volume is created in
609 the HA group.
610 :return: The current device path of the volume.
611 :rtype: str
612 """
614 self._logger('Creating LINSTOR volume {}...'.format(volume_uuid))
615 if not volume_name:
616 volume_name = self.build_volume_name(util.gen_uuid())
617 volume_properties = self._create_volume_with_properties(
618 volume_uuid,
619 volume_name,
620 size,
621 True, # place_resources
622 high_availability
623 )
625 # Volume created! Now try to find the device path.
626 try:
627 self._logger(
628 'Find device path of LINSTOR volume {}...'.format(volume_uuid)
629 )
630 device_path = self._find_device_path(volume_uuid, volume_name)
631 if persistent:
632 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS
633 self._volumes.add(volume_uuid)
634 self._logger(
635 'LINSTOR volume {} created!'.format(volume_uuid)
636 )
637 return device_path
638 except Exception:
639 # There is an issue to find the path.
640 # At this point the volume has just been created, so force flag can be used.
641 self._destroy_volume(volume_uuid, force=True)
642 raise
644 def mark_volume_as_persistent(self, volume_uuid):
645 """
646 Mark volume as persistent if created with persistent=False.
647 :param str volume_uuid: The volume uuid to mark.
648 """
650 self._ensure_volume_exists(volume_uuid)
652 # Mark volume as persistent.
653 volume_properties = self._get_volume_properties(volume_uuid)
654 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS
656 def destroy_volume(self, volume_uuid):
657 """
658 Destroy a volume.
659 :param str volume_uuid: The volume uuid to destroy.
660 """
662 self._ensure_volume_exists(volume_uuid)
663 self.ensure_volume_is_not_locked(volume_uuid)
665 is_volume_in_use = any(node["in-use"] for node in self.get_resource_info(volume_uuid)["nodes"].values())
666 if is_volume_in_use:
667 raise LinstorVolumeManagerError(
668 f"Could not destroy volume `{volume_uuid}` as it is currently in use",
669 LinstorVolumeManagerError.ERR_VOLUME_IN_USE
670 )
672 # Mark volume as destroyed.
673 volume_properties = self._get_volume_properties(volume_uuid)
674 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS
676 try:
677 self._volumes.remove(volume_uuid)
678 self._destroy_volume(volume_uuid)
679 except Exception as e:
680 raise LinstorVolumeManagerError(
681 str(e),
682 LinstorVolumeManagerError.ERR_VOLUME_DESTROY
683 )
685 def lock_volume(self, volume_uuid, locked=True):
686 """
687 Prevent modifications of the volume properties during
688 "self.LOCKED_EXPIRATION_DELAY" seconds. The SR must be locked
689 when used. This method is useful to attach/detach correctly a volume on
690 a slave. Without it the GC can rename a volume, in this case the old
691 volume path can be used by a slave...
692 :param str volume_uuid: The volume uuid to protect/unprotect.
693 :param bool locked: Lock/unlock the volume.
694 """
696 self._ensure_volume_exists(volume_uuid)
698 self._logger(
699 '{} volume {} as locked'.format(
700 'Mark' if locked else 'Unmark',
701 volume_uuid
702 )
703 )
705 volume_properties = self._get_volume_properties(volume_uuid)
706 if locked:
707 volume_properties[
708 self.PROP_IS_READONLY_TIMESTAMP
709 ] = str(time.time())
710 elif self.PROP_IS_READONLY_TIMESTAMP in volume_properties:
711 volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP)
713 def ensure_volume_is_not_locked(self, volume_uuid, timeout=None):
714 """
715 Ensure a volume is not locked. Wait if necessary.
716 :param str volume_uuid: The volume uuid to check.
717 :param int timeout: If the volume is always locked after the expiration
718 of the timeout, an exception is thrown.
719 """
720 return self.ensure_volume_list_is_not_locked([volume_uuid], timeout)
722 def ensure_volume_list_is_not_locked(self, volume_uuids, timeout=None):
723 checked = set()
724 for volume_uuid in volume_uuids:
725 if volume_uuid in self._volumes:
726 checked.add(volume_uuid)
728 if not checked:
729 return
731 waiting = False
733 volume_properties = self._get_kv_cache()
735 start = time.time()
736 while True:
737 # Can't delete in for loop, use a copy of the list.
738 remaining = checked.copy()
739 for volume_uuid in checked:
740 volume_properties.namespace = \
741 self._build_volume_namespace(volume_uuid)
742 timestamp = volume_properties.get(
743 self.PROP_IS_READONLY_TIMESTAMP
744 )
745 if timestamp is None:
746 remaining.remove(volume_uuid)
747 continue
749 now = time.time()
750 if now - float(timestamp) > self.LOCKED_EXPIRATION_DELAY:
751 self._logger(
752 'Remove readonly timestamp on {}'.format(volume_uuid)
753 )
754 volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP)
755 remaining.remove(volume_uuid)
756 continue
758 if not waiting:
759 self._logger(
760 'Volume {} is locked, waiting...'.format(volume_uuid)
761 )
762 waiting = True
763 break
765 if not remaining:
766 break
767 checked = remaining
769 if timeout is not None and now - start > timeout:
770 raise LinstorVolumeManagerError(
771 'volume `{}` is locked and timeout has been reached'
772 .format(volume_uuid),
773 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS
774 )
776 # We must wait to use the volume. After that we can modify it
777 # ONLY if the SR is locked to avoid bad reads on the slaves.
778 time.sleep(1)
779 volume_properties = self._create_kv_cache()
781 if waiting:
782 self._logger('No volume locked now!')
784 def remove_volume_if_diskless(self, volume_uuid):
785 """
786 Remove disless path from local node.
787 :param str volume_uuid: The volume uuid to remove.
788 """
790 self._ensure_volume_exists(volume_uuid)
792 volume_properties = self._get_volume_properties(volume_uuid)
793 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
795 node_name = socket.gethostname()
797 for resource in self._get_resource_cache().resources:
798 if resource.name == volume_name and resource.node_name == node_name:
799 if linstor.consts.FLAG_TIE_BREAKER in resource.flags:
800 return
801 break
803 result = self._linstor.resource_delete_if_diskless(
804 node_name=node_name, rsc_name=volume_name
805 )
806 if not linstor.Linstor.all_api_responses_no_error(result):
807 raise LinstorVolumeManagerError(
808 'Unable to delete diskless path of `{}` on node `{}`: {}'
809 .format(volume_name, node_name, ', '.join(
810 [str(x) for x in result]))
811 )
813 def introduce_volume(self, volume_uuid):
814 pass # TODO: Implement me.
816 def resize_volume(self, volume_uuid, new_size):
817 """
818 Resize a volume.
819 :param str volume_uuid: The volume uuid to resize.
820 :param int new_size: New size in B.
821 """
823 volume_name = self.get_volume_name(volume_uuid)
824 self.ensure_volume_is_not_locked(volume_uuid)
825 new_size = self.round_up_volume_size(new_size) // 1024
827 # We can't resize anything until DRBD is up to date.
828 # We wait here for 5min max and raise an easy to understand error for the user.
829 # 5min is an arbitrary time, it's impossible to get a fit all situation value
830 # and it's currently impossible to know how much time we have to wait
831 # This is mostly an issue for thick provisioning, thin isn't affected.
832 start_time = time.monotonic()
833 try:
834 self._linstor.resource_dfn_wait_synced(volume_name, wait_interval=1.0, timeout=60*5)
835 except linstor.LinstorTimeoutError:
836 raise LinstorVolumeManagerError(
837 f"Volume resizing of `{volume_uuid}` from SR `{self._group_name}` is incomplete: timeout reached but it continues in background."
838 )
839 util.SMlog(f"DRBD is up to date, syncing took {time.monotonic() - start_time}s")
841 result = self._linstor.volume_dfn_modify(
842 rsc_name=volume_name,
843 volume_nr=0,
844 size=new_size
845 )
847 self._mark_resource_cache_as_dirty()
849 error_str = self._get_error_str(result)
850 if error_str:
851 raise LinstorVolumeManagerError(
852 f"Could not resize volume `{volume_uuid}` from SR `{self._group_name}`: {error_str}"
853 )
855 def get_volume_name(self, volume_uuid):
856 """
857 Get the name of a particular volume.
858 :param str volume_uuid: The volume uuid of the name to get.
859 :return: The volume name.
860 :rtype: str
861 """
863 self._ensure_volume_exists(volume_uuid)
864 volume_properties = self._get_volume_properties(volume_uuid)
865 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
866 if volume_name:
867 return volume_name
868 raise LinstorVolumeManagerError(
869 'Failed to get volume name of {}'.format(volume_uuid)
870 )
872 def get_volume_size(self, volume_uuid):
873 """
874 Get the size of a particular volume.
875 :param str volume_uuid: The volume uuid of the size to get.
876 :return: The volume size.
877 :rtype: int
878 """
880 volume_name = self.get_volume_name(volume_uuid)
881 dfns = self._linstor.resource_dfn_list_raise(
882 query_volume_definitions=True,
883 filter_by_resource_definitions=[volume_name]
884 ).resource_definitions
886 size = dfns[0].volume_definitions[0].size
887 if size < 0:
888 raise LinstorVolumeManagerError(
889 'Failed to get volume size of: {}'.format(volume_uuid)
890 )
891 return size * 1024
893 def set_auto_promote_timeout(self, volume_uuid, timeout):
894 """
895 Define the blocking time of open calls when a DRBD
896 is already open on another host.
897 :param str volume_uuid: The volume uuid to modify.
898 """
900 volume_name = self.get_volume_name(volume_uuid)
901 result = self._linstor.resource_dfn_modify(volume_name, {
902 'DrbdOptions/Resource/auto-promote-timeout': timeout
903 })
904 error_str = self._get_error_str(result)
905 if error_str:
906 raise LinstorVolumeManagerError(
907 'Could not change the auto promote timeout of `{}`: {}'
908 .format(volume_uuid, error_str)
909 )
911 def set_drbd_ha_properties(self, volume_name, enabled=True):
912 """
913 Set or not HA DRBD properties required by drbd-reactor and
914 by specific volumes.
915 :param str volume_name: The volume to modify.
916 :param bool enabled: Enable or disable HA properties.
917 """
919 properties = {
920 'DrbdOptions/auto-quorum': 'disabled',
921 'DrbdOptions/Resource/auto-promote': 'no',
922 'DrbdOptions/Resource/on-no-data-accessible': 'io-error',
923 'DrbdOptions/Resource/on-no-quorum': 'io-error',
924 'DrbdOptions/Resource/on-suspended-primary-outdated': 'force-secondary',
925 'DrbdOptions/Resource/quorum': 'majority'
926 }
927 if enabled:
928 result = self._linstor.resource_dfn_modify(volume_name, properties)
929 else:
930 result = self._linstor.resource_dfn_modify(volume_name, {}, delete_props=list(properties.keys()))
932 error_str = self._get_error_str(result)
933 if error_str:
934 raise LinstorVolumeManagerError(
935 'Could not modify HA DRBD properties on volume `{}`: {}'
936 .format(volume_name, error_str)
937 )
939 def get_volume_info(self, volume_uuid):
940 """
941 Get the volume info of a particular volume.
942 :param str volume_uuid: The volume uuid of the volume info to get.
943 :return: The volume info.
944 :rtype: VolumeInfo
945 """
947 volume_name = self.get_volume_name(volume_uuid)
948 return self._get_volumes_info()[volume_name]
950 def get_device_path(self, volume_uuid):
951 """
952 Get the dev path of a volume, create a diskless if necessary.
953 :param str volume_uuid: The volume uuid to get the dev path.
954 :return: The current device path of the volume.
955 :rtype: str
956 """
958 volume_name = self.get_volume_name(volume_uuid)
959 return self._find_device_path(volume_uuid, volume_name)
961 def get_volume_uuid_from_device_path(self, device_path):
962 """
963 Get the volume uuid of a device_path.
964 :param str device_path: The dev path to find the volume uuid.
965 :return: The volume uuid of the local device path.
966 :rtype: str
967 """
969 expected_volume_name = \
970 self.get_volume_name_from_device_path(device_path)
972 volume_names = self.get_volumes_with_name()
973 for volume_uuid, volume_name in volume_names.items():
974 if volume_name == expected_volume_name:
975 return volume_uuid
977 raise LinstorVolumeManagerError(
978 'Unable to find volume uuid from dev path `{}`'.format(device_path)
979 )
981 def get_volume_name_from_device_path(self, device_path):
982 """
983 Get the volume name of a device_path.
984 :param str device_path: The dev path to find the volume name.
985 :return: The volume name of the device path.
986 :rtype: str
987 """
989 # Assume that we have a path like this:
990 # - "/dev/drbd/by-res/xcp-volume-<UUID>/0"
991 # - "../xcp-volume-<UUID>/0"
992 if device_path.startswith(DRBD_BY_RES_PATH):
993 prefix_len = len(DRBD_BY_RES_PATH)
994 elif device_path.startswith('../'):
995 prefix_len = 3
996 else:
997 raise LinstorVolumeManagerError('Unexpected device path: `{}`'.format(device_path))
999 res_name_end = device_path.find('/', prefix_len)
1000 assert res_name_end != -1
1001 return device_path[prefix_len:res_name_end]
1003 def update_volume_uuid(self, volume_uuid, new_volume_uuid, force=False):
1004 """
1005 Change the uuid of a volume.
1006 :param str volume_uuid: The volume to modify.
1007 :param str new_volume_uuid: The new volume uuid to use.
1008 :param bool force: If true we doesn't check if volume_uuid is in the
1009 volume list. I.e. the volume can be marked as deleted but the volume
1010 can still be in the LINSTOR KV store if the deletion has failed.
1011 In specific cases like "undo" after a failed clone we must rename a bad
1012 deleted VDI.
1013 """
1015 self._logger(
1016 'Trying to update volume UUID {} to {}...'
1017 .format(volume_uuid, new_volume_uuid)
1018 )
1019 assert volume_uuid != new_volume_uuid, 'can\'t update volume UUID, same value'
1021 if not force:
1022 self._ensure_volume_exists(volume_uuid)
1023 self.ensure_volume_is_not_locked(volume_uuid)
1025 if new_volume_uuid in self._volumes:
1026 raise LinstorVolumeManagerError(
1027 'Volume `{}` already exists'.format(new_volume_uuid),
1028 LinstorVolumeManagerError.ERR_VOLUME_EXISTS
1029 )
1031 volume_properties = self._get_volume_properties(volume_uuid)
1032 if volume_properties.get(self.PROP_UPDATING_UUID_SRC):
1033 raise LinstorVolumeManagerError(
1034 'Cannot update volume uuid {}: invalid state'
1035 .format(volume_uuid)
1036 )
1038 # 1. Copy in temp variables metadata and volume_name.
1039 metadata = volume_properties.get(self.PROP_METADATA)
1040 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
1042 # 2. Switch to new volume namespace.
1043 volume_properties.namespace = self._build_volume_namespace(
1044 new_volume_uuid
1045 )
1047 if list(volume_properties.items()):
1048 raise LinstorVolumeManagerError(
1049 'Cannot update volume uuid {} to {}: '
1050 .format(volume_uuid, new_volume_uuid) +
1051 'this last one is not empty'
1052 )
1054 try:
1055 # 3. Mark new volume properties with PROP_UPDATING_UUID_SRC.
1056 # If we crash after that, the new properties can be removed
1057 # properly.
1058 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS
1059 volume_properties[self.PROP_UPDATING_UUID_SRC] = volume_uuid
1061 # 4. Copy the properties.
1062 # Note: On new volumes, during clone for example, the metadata
1063 # may be missing. So we must test it to avoid this error:
1064 # "None has to be a str/unicode, but is <type 'NoneType'>"
1065 if metadata:
1066 volume_properties[self.PROP_METADATA] = metadata
1067 volume_properties[self.PROP_VOLUME_NAME] = volume_name
1069 # 5. Ok!
1070 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS
1071 except Exception as err:
1072 try:
1073 # Clear the new volume properties in case of failure.
1074 assert volume_properties.namespace == \
1075 self._build_volume_namespace(new_volume_uuid)
1076 volume_properties.clear()
1077 except Exception as e:
1078 self._logger(
1079 'Failed to clear new volume properties: {} (ignoring...)'
1080 .format(e)
1081 )
1082 raise LinstorVolumeManagerError(
1083 'Failed to copy volume properties: {}'.format(err)
1084 )
1086 try:
1087 # 6. After this point, it's ok we can remove the
1088 # PROP_UPDATING_UUID_SRC property and clear the src properties
1089 # without problems.
1091 # 7. Switch to old volume namespace.
1092 volume_properties.namespace = self._build_volume_namespace(
1093 volume_uuid
1094 )
1095 volume_properties.clear()
1097 # 8. Switch a last time to new volume namespace.
1098 volume_properties.namespace = self._build_volume_namespace(
1099 new_volume_uuid
1100 )
1101 volume_properties.pop(self.PROP_UPDATING_UUID_SRC)
1102 except Exception as e:
1103 raise LinstorVolumeManagerError(
1104 'Failed to clear volume properties '
1105 'after volume uuid update: {}'.format(e)
1106 )
1108 try:
1109 self._volumes.remove(volume_uuid)
1110 except KeyError:
1111 # Can be missing if we are building the volume set attr AND
1112 # we are processing a deleted resource.
1113 assert force
1115 self._volumes.add(new_volume_uuid)
1117 self._logger(
1118 'UUID update succeeded of {} to {}! (properties={})'
1119 .format(
1120 volume_uuid, new_volume_uuid,
1121 self._get_filtered_properties(volume_properties)
1122 )
1123 )
1125 def update_volume_name(self, volume_uuid, volume_name):
1126 """
1127 Change the volume name of a volume.
1128 :param str volume_uuid: The volume to modify.
1129 :param str volume_name: The volume_name to use.
1130 """
1132 self._ensure_volume_exists(volume_uuid)
1133 self.ensure_volume_is_not_locked(volume_uuid)
1134 if not volume_name.startswith(self.PREFIX_VOLUME):
1135 raise LinstorVolumeManagerError(
1136 'Volume name `{}` must be start with `{}`'
1137 .format(volume_name, self.PREFIX_VOLUME)
1138 )
1140 if volume_name not in self._fetch_resource_names():
1141 raise LinstorVolumeManagerError(
1142 'Volume `{}` doesn\'t exist'.format(volume_name)
1143 )
1145 volume_properties = self._get_volume_properties(volume_uuid)
1146 volume_properties[self.PROP_VOLUME_NAME] = volume_name
1148 def get_usage_states(self, volume_uuid):
1149 """
1150 Check if a volume is currently used.
1151 :param str volume_uuid: The volume uuid to check.
1152 :return: A dictionary that contains states.
1153 :rtype: dict(str, bool or None)
1154 """
1156 states = {}
1158 volume_name = self.get_volume_name(volume_uuid)
1159 for resource_state in self._linstor.resource_list_raise(
1160 filter_by_resources=[volume_name]
1161 ).resource_states:
1162 states[resource_state.node_name] = resource_state.in_use
1164 return states
1166 def get_volume_openers(self, volume_uuid):
1167 """
1168 Get openers of a volume.
1169 :param str volume_uuid: The volume uuid to monitor.
1170 :return: A dictionary that contains openers.
1171 :rtype: dict(str, obj)
1172 """
1173 return get_all_volume_openers(self.get_volume_name(volume_uuid), '0')
1175 def get_volumes_with_name(self):
1176 """
1177 Give a volume dictionary that contains names actually owned.
1178 :return: A volume/name dict.
1179 :rtype: dict(str, str)
1180 """
1181 return self._get_volumes_by_property(self.REG_VOLUME_NAME)
1183 def get_volumes_with_info(self):
1184 """
1185 Give a volume dictionary that contains VolumeInfos.
1186 :return: A volume/VolumeInfo dict.
1187 :rtype: dict(str, VolumeInfo)
1188 """
1190 volumes = {}
1192 volume_names = self.get_volumes_with_name()
1193 all_volume_info = self._get_volumes_info(volume_names)
1194 for volume_uuid, volume_name in volume_names.items():
1195 if volume_name:
1196 volume_info = all_volume_info.get(volume_name)
1197 if volume_info:
1198 volumes[volume_uuid] = volume_info
1199 continue
1201 # Well I suppose if this volume is not available,
1202 # LINSTOR has been used directly without using this API.
1203 volumes[volume_uuid] = self.VolumeInfo('')
1205 return volumes
1207 def get_volumes_with_metadata(self):
1208 """
1209 Give a volume dictionary that contains metadata.
1210 :return: A volume/metadata dict.
1211 :rtype: dict(str, dict)
1212 """
1214 volumes = {}
1216 metadata = self._get_volumes_by_property(self.REG_METADATA)
1217 for volume_uuid, volume_metadata in metadata.items():
1218 if volume_metadata:
1219 volume_metadata = json.loads(volume_metadata)
1220 if isinstance(volume_metadata, dict):
1221 volumes[volume_uuid] = volume_metadata
1222 continue
1223 raise LinstorVolumeManagerError(
1224 'Expected dictionary in volume metadata: {}'
1225 .format(volume_uuid)
1226 )
1228 volumes[volume_uuid] = {}
1230 return volumes
1232 def get_volume_metadata(self, volume_uuid):
1233 """
1234 Get the metadata of a volume.
1235 :return: Dictionary that contains metadata.
1236 :rtype: dict
1237 """
1239 self._ensure_volume_exists(volume_uuid)
1240 volume_properties = self._get_volume_properties(volume_uuid)
1241 metadata = volume_properties.get(self.PROP_METADATA)
1242 if metadata:
1243 metadata = json.loads(metadata)
1244 if isinstance(metadata, dict):
1245 return metadata
1246 raise LinstorVolumeManagerError(
1247 'Expected dictionary in volume metadata: {}'
1248 .format(volume_uuid)
1249 )
1250 return {}
1252 def set_volume_metadata(self, volume_uuid, metadata):
1253 """
1254 Set the metadata of a volume.
1255 :param dict metadata: Dictionary that contains metadata.
1256 """
1258 self._ensure_volume_exists(volume_uuid)
1259 self.ensure_volume_is_not_locked(volume_uuid)
1261 assert isinstance(metadata, dict)
1262 volume_properties = self._get_volume_properties(volume_uuid)
1263 volume_properties[self.PROP_METADATA] = json.dumps(metadata)
1265 def update_volume_metadata(self, volume_uuid, metadata):
1266 """
1267 Update the metadata of a volume. It modify only the given keys.
1268 It doesn't remove unreferenced key instead of set_volume_metadata.
1269 :param dict metadata: Dictionary that contains metadata.
1270 """
1272 self._ensure_volume_exists(volume_uuid)
1273 self.ensure_volume_is_not_locked(volume_uuid)
1275 assert isinstance(metadata, dict)
1276 volume_properties = self._get_volume_properties(volume_uuid)
1278 current_metadata = json.loads(
1279 volume_properties.get(self.PROP_METADATA, '{}')
1280 )
1281 if not isinstance(metadata, dict):
1282 raise LinstorVolumeManagerError(
1283 'Expected dictionary in volume metadata: {}'
1284 .format(volume_uuid)
1285 )
1287 for key, value in metadata.items():
1288 current_metadata[key] = value
1289 volume_properties[self.PROP_METADATA] = json.dumps(current_metadata)
1291 def shallow_clone_volume(self, volume_uuid, clone_uuid, persistent=True):
1292 """
1293 Clone a volume. Do not copy the data, this method creates a new volume
1294 with the same size.
1295 :param str volume_uuid: The volume to clone.
1296 :param str clone_uuid: The cloned volume.
1297 :param bool persistent: If false the volume will be unavailable
1298 on the next constructor call LinstorSR(...).
1299 :return: The current device path of the cloned volume.
1300 :rtype: str
1301 """
1303 volume_name = self.get_volume_name(volume_uuid)
1304 self.ensure_volume_is_not_locked(volume_uuid)
1306 # 1. Find ideal nodes + size to use.
1307 ideal_node_names, size = self._get_volume_node_names_and_size(
1308 volume_name
1309 )
1310 if size <= 0:
1311 raise LinstorVolumeManagerError(
1312 'Invalid size of {} for volume `{}`'.format(size, volume_name)
1313 )
1315 # 2. Create clone!
1316 return self.create_volume(clone_uuid, size, persistent)
1318 def remove_resourceless_volumes(self):
1319 """
1320 Remove all volumes without valid or non-empty name
1321 (i.e. without LINSTOR resource). It's different than
1322 LinstorVolumeManager constructor that takes a `repair` param that
1323 removes volumes with `PROP_NOT_EXISTS` to 1.
1324 """
1326 resource_names = self._fetch_resource_names()
1327 for volume_uuid, volume_name in self.get_volumes_with_name().items():
1328 if not volume_name or volume_name not in resource_names:
1329 # Don't force, we can be sure of what's happening.
1330 self.destroy_volume(volume_uuid)
1332 def destroy(self):
1333 """
1334 Destroy this SR. Object should not be used after that.
1335 :param bool force: Try to destroy volumes before if true.
1336 """
1338 # 1. Ensure volume list is empty. No cost.
1339 if self._volumes:
1340 raise LinstorVolumeManagerError(
1341 'Cannot destroy LINSTOR volume manager: '
1342 'It exists remaining volumes'
1343 )
1345 # 2. Fetch ALL resource names.
1346 # This list may therefore contain volumes created outside
1347 # the scope of the driver.
1348 resource_names = self._fetch_resource_names(ignore_deleted=False)
1349 try:
1350 resource_names.remove(DATABASE_VOLUME_NAME)
1351 except KeyError:
1352 # Really strange to reach that point.
1353 # Normally we always have the database volume in the list.
1354 pass
1356 # 3. Ensure the resource name list is entirely empty...
1357 if resource_names:
1358 raise LinstorVolumeManagerError(
1359 'Cannot destroy LINSTOR volume manager: '
1360 'It exists remaining volumes (created externally or being deleted)'
1361 )
1363 # 4. Destroying...
1364 controller_is_running = self._controller_is_running()
1365 uri = 'linstor://localhost'
1366 try:
1367 if controller_is_running:
1368 self._start_controller(start=False)
1370 # 4.1. Umount LINSTOR database.
1371 self._mount_database_volume(
1372 self.build_device_path(DATABASE_VOLUME_NAME),
1373 mount=False,
1374 force=True
1375 )
1377 # 4.2. Refresh instance.
1378 self._start_controller(start=True)
1379 self._linstor = self._create_linstor_instance(
1380 uri, keep_uri_unmodified=True
1381 )
1383 # 4.3. Destroy database volume.
1384 self._destroy_resource(DATABASE_VOLUME_NAME)
1386 # 4.4. Refresh linstor connection.
1387 # Without we get this error:
1388 # "Cannot delete resource group 'xcp-sr-linstor_group_thin_device' because it has existing resource definitions.."
1389 # Because the deletion of the databse was not seen by Linstor for some reason.
1390 # It seems a simple refresh of the Linstor connection make it aware of the deletion.
1391 self._linstor.disconnect()
1392 self._linstor.connect()
1394 # 4.5. Destroy remaining drbd nodes on hosts.
1395 # We check if there is a DRBD node on hosts that could mean blocking when destroying resource groups.
1396 # It needs to be done locally by each host so we go through the linstor-manager plugin.
1397 # If we don't do this sometimes, the destroy will fail when trying to destroy the resource groups with:
1398 # "linstor-manager:destroy error: Failed to destroy SP `xcp-sr-linstor_group_thin_device` on node `r620-s2`: The specified storage pool 'xcp-sr-linstor_group_thin_device' on node 'r620-s2' can not be deleted as volumes / snapshot-volumes are still using it."
1399 session = util.timeout_call(5, util.get_localAPI_session)
1400 for host_ref in session.xenapi.host.get_all():
1401 try:
1402 response = session.xenapi.host.call_plugin(
1403 host_ref, 'linstor-manager', 'destroyDrbdVolumes', {'volume_group': self._group_name}
1404 )
1405 except Exception as e:
1406 util.SMlog('Calling destroyDrbdVolumes on host {} failed with error {}'.format(host_ref, e))
1408 # 4.6. Destroy group and storage pools.
1409 self._destroy_resource_group(self._linstor, self._group_name)
1410 self._destroy_resource_group(self._linstor, self._ha_group_name)
1411 for pool in self._get_storage_pools(force=True):
1412 self._destroy_storage_pool(
1413 self._linstor, pool.name, pool.node_name
1414 )
1415 except Exception as e:
1416 self._start_controller(start=controller_is_running)
1417 raise e
1419 try:
1420 self._start_controller(start=False)
1421 for file in os.listdir(DATABASE_PATH):
1422 if file != 'lost+found':
1423 os.remove(DATABASE_PATH + '/' + file)
1424 except Exception as e:
1425 util.SMlog(
1426 'Ignoring failure after LINSTOR SR destruction: {}'
1427 .format(e)
1428 )
1430 def find_up_to_date_diskful_nodes(self, volume_uuid):
1431 """
1432 Find all nodes that contain a specific volume using diskful disks.
1433 The disk must be up to data to be used.
1434 :param str volume_uuid: The volume to use.
1435 :return: The available nodes.
1436 :rtype: tuple(set(str), str)
1437 """
1439 volume_name = self.get_volume_name(volume_uuid)
1441 in_use_by = None
1442 node_names = set()
1444 resource_states = filter(
1445 lambda resource_state: resource_state.name == volume_name,
1446 self._get_resource_cache().resource_states
1447 )
1449 for resource_state in resource_states:
1450 volume_state = resource_state.volume_states[0]
1451 if volume_state.disk_state == 'UpToDate':
1452 node_names.add(resource_state.node_name)
1453 if resource_state.in_use:
1454 in_use_by = resource_state.node_name
1456 return (node_names, in_use_by)
1458 def invalidate_resource_cache(self):
1459 """
1460 If resources are impacted by external commands like vhdutil,
1461 it's necessary to call this function to invalidate current resource
1462 cache.
1463 """
1464 self._mark_resource_cache_as_dirty()
1466 def has_node(self, node_name):
1467 """
1468 Check if a node exists in the LINSTOR database.
1469 :rtype: bool
1470 """
1471 result = self._linstor.node_list()
1472 error_str = self._get_error_str(result)
1473 if error_str:
1474 raise LinstorVolumeManagerError(
1475 'Failed to list nodes using `{}`: {}'
1476 .format(node_name, error_str)
1477 )
1478 return bool(result[0].node(node_name))
1480 def create_node(self, node_name, ip):
1481 """
1482 Create a new node in the LINSTOR database.
1483 :param str node_name: Node name to use.
1484 :param str ip: Host IP to communicate.
1485 """
1486 result = self._linstor.node_create(
1487 node_name,
1488 linstor.consts.VAL_NODE_TYPE_CMBD,
1489 ip
1490 )
1491 errors = self._filter_errors(result)
1492 if errors:
1493 error_str = self._get_error_str(errors)
1494 raise LinstorVolumeManagerError(
1495 'Failed to create node `{}`: {}'.format(node_name, error_str)
1496 )
1498 def destroy_node(self, node_name):
1499 """
1500 Destroy a node in the LINSTOR database.
1501 :param str node_name: Node name to remove.
1502 """
1503 result = self._linstor.node_delete(node_name)
1504 errors = self._filter_errors(result)
1505 if errors:
1506 error_str = self._get_error_str(errors)
1507 raise LinstorVolumeManagerError(
1508 'Failed to destroy node `{}`: {}'.format(node_name, error_str)
1509 )
1511 def create_node_interface(self, node_name, name, ip):
1512 """
1513 Create a new node interface in the LINSTOR database.
1514 :param str node_name: Node name of the interface to use.
1515 :param str name: Interface to create.
1516 :param str ip: IP of the interface.
1517 """
1518 result = self._linstor.netinterface_create(node_name, name, ip)
1519 errors = self._filter_errors(result)
1520 if errors:
1521 error_str = self._get_error_str(errors)
1522 raise LinstorVolumeManagerError(
1523 'Failed to create node interface on `{}`: {}'.format(node_name, error_str)
1524 )
1526 def destroy_node_interface(self, node_name, name):
1527 """
1528 Destroy a node interface in the LINSTOR database.
1529 :param str node_name: Node name of the interface to remove.
1530 :param str name: Interface to remove.
1531 """
1533 if name == 'default':
1534 raise LinstorVolumeManagerError(
1535 'Unable to delete the default interface of a node!'
1536 )
1538 result = self._linstor.netinterface_delete(node_name, name)
1539 errors = self._filter_errors(result)
1540 if errors:
1541 error_str = self._get_error_str(errors)
1542 raise LinstorVolumeManagerError(
1543 'Failed to destroy node interface on `{}`: {}'.format(node_name, error_str)
1544 )
1546 def modify_node_interface(self, node_name, name, ip):
1547 """
1548 Modify a node interface in the LINSTOR database. Create it if necessary.
1549 :param str node_name: Node name of the interface to use.
1550 :param str name: Interface to modify or create.
1551 :param str ip: IP of the interface.
1552 """
1553 result = self._linstor.netinterface_create(node_name, name, ip)
1554 errors = self._filter_errors(result)
1555 if not errors:
1556 return
1558 if self._check_errors(errors, [linstor.consts.FAIL_EXISTS_NET_IF]):
1559 result = self._linstor.netinterface_modify(node_name, name, ip)
1560 errors = self._filter_errors(result)
1561 if not errors:
1562 return
1564 error_str = self._get_error_str(errors)
1565 raise LinstorVolumeManagerError(
1566 'Unable to modify interface on `{}`: {}'.format(node_name, error_str)
1567 )
1569 def list_node_interfaces(self, node_name):
1570 """
1571 List all node interfaces.
1572 :param str node_name: Node name to use to list interfaces.
1573 :rtype: list
1574 :
1575 """
1576 result = self._linstor.net_interface_list(node_name)
1577 if not result:
1578 raise LinstorVolumeManagerError(
1579 'Unable to list interfaces on `{}`: no list received'.format(node_name)
1580 )
1582 interfaces = {}
1583 for interface in result:
1584 interface = interface._rest_data
1585 interfaces[interface['name']] = {
1586 'address': interface['address'],
1587 'active': interface['is_active']
1588 }
1589 return interfaces
1591 def get_node_preferred_interface(self, node_name):
1592 """
1593 Get the preferred interface used by a node.
1594 :param str node_name: Node name of the interface to get.
1595 :rtype: str
1596 """
1597 try:
1598 nodes = self._linstor.node_list_raise([node_name]).nodes
1599 if nodes:
1600 properties = nodes[0].props
1601 return properties.get('PrefNic', 'default')
1602 return nodes
1603 except Exception as e:
1604 raise LinstorVolumeManagerError(
1605 'Failed to get preferred interface: `{}`'.format(e)
1606 )
1608 def set_node_preferred_interface(self, node_name, name):
1609 """
1610 Set the preferred interface to use on a node.
1611 :param str node_name: Node name of the interface.
1612 :param str name: Preferred interface to use.
1613 """
1614 result = self._linstor.node_modify(node_name, property_dict={'PrefNic': name})
1615 errors = self._filter_errors(result)
1616 if errors:
1617 error_str = self._get_error_str(errors)
1618 raise LinstorVolumeManagerError(
1619 'Failed to set preferred node interface on `{}`: {}'.format(node_name, error_str)
1620 )
1622 def get_nodes_info(self):
1623 """
1624 Get all nodes + statuses, used or not by the pool.
1625 :rtype: dict(str, dict)
1626 """
1627 try:
1628 nodes = {}
1629 for node in self._linstor.node_list_raise().nodes:
1630 nodes[node.name] = node.connection_status
1631 return nodes
1632 except Exception as e:
1633 raise LinstorVolumeManagerError(
1634 'Failed to get all nodes: `{}`'.format(e)
1635 )
1637 def get_storage_pools_info(self):
1638 """
1639 Give all storage pools of current group name.
1640 :rtype: dict(str, list)
1641 """
1642 storage_pools = {}
1643 for pool in self._get_storage_pools(force=True):
1644 if pool.node_name not in storage_pools:
1645 storage_pools[pool.node_name] = []
1647 size = -1
1648 capacity = -1
1650 space = pool.free_space
1651 if space:
1652 size = space.free_capacity
1653 if size < 0:
1654 size = -1
1655 else:
1656 size *= 1024
1657 capacity = space.total_capacity
1658 if capacity <= 0:
1659 capacity = -1
1660 else:
1661 capacity *= 1024
1663 storage_pools[pool.node_name].append({
1664 'name': pool.name,
1665 'linstor-uuid': pool.uuid,
1666 'free-size': size,
1667 'capacity': capacity
1668 })
1670 return storage_pools
1672 def get_resources_info(self):
1673 """
1674 Give all resources of current group name.
1675 :rtype: dict(str, list)
1676 """
1677 if self._resources_info_cache and not self._resource_cache_dirty:
1678 return self._resources_info_cache
1680 resources = {}
1681 resource_list = self._get_resource_cache()
1682 volume_names = self.get_volumes_with_name()
1683 for resource in resource_list.resources:
1684 if resource.name not in resources:
1685 resources[resource.name] = { 'nodes': {}, 'uuid': '' }
1686 resource_nodes = resources[resource.name]['nodes']
1688 resource_nodes[resource.node_name] = {
1689 'volumes': [],
1690 'diskful': linstor.consts.FLAG_DISKLESS not in resource.flags,
1691 'tie-breaker': linstor.consts.FLAG_TIE_BREAKER in resource.flags
1692 }
1693 resource_volumes = resource_nodes[resource.node_name]['volumes']
1695 for volume in resource.volumes:
1696 # We ignore diskless pools of the form "DfltDisklessStorPool".
1697 if volume.storage_pool_name != self._group_name:
1698 continue
1700 usable_size = volume.usable_size
1701 if usable_size < 0:
1702 usable_size = -1
1703 else:
1704 usable_size *= 1024
1706 allocated_size = volume.allocated_size
1707 if allocated_size < 0:
1708 allocated_size = -1
1709 else:
1710 allocated_size *= 1024
1712 resource_volumes.append({
1713 'storage-pool-name': volume.storage_pool_name,
1714 'linstor-uuid': volume.uuid,
1715 'number': volume.number,
1716 'device-path': volume.device_path,
1717 'usable-size': usable_size,
1718 'allocated-size': allocated_size
1719 })
1721 for resource_state in resource_list.resource_states:
1722 resource = resources[resource_state.rsc_name]['nodes'][resource_state.node_name]
1723 resource['in-use'] = resource_state.in_use
1725 volumes = resource['volumes']
1726 for volume_state in resource_state.volume_states:
1727 volume = next((x for x in volumes if x['number'] == volume_state.number), None)
1728 if volume:
1729 volume['disk-state'] = volume_state.disk_state
1731 for volume_uuid, volume_name in volume_names.items():
1732 resource = resources.get(volume_name)
1733 if resource:
1734 resource['uuid'] = volume_uuid
1736 self._resources_info_cache = resources
1737 return self._resources_info_cache
1739 def get_resource_info(self, volume_uuid: str) -> Dict[str, Any]:
1740 """
1741 Give a resource info based on its UUID.
1742 :param volume_uuid str: volume uuid to search for
1743 :rtype: dict(str, any)
1744 """
1745 for volume in self.get_resources_info().values():
1746 if volume["uuid"] == volume_uuid:
1747 return volume
1749 raise LinstorVolumeManagerError(
1750 f"Could not find info about volume `{volume_uuid}`",
1751 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS
1752 )
1754 def get_database_path(self):
1755 """
1756 Get the database path.
1757 :return: The current database path.
1758 :rtype: str
1759 """
1760 return self._request_database_path(self._linstor, activate=True)
1762 @classmethod
1763 def get_all_group_names(cls, base_name):
1764 """
1765 Get all group names. I.e. list of current group + HA.
1766 :param str base_name: The SR group_name to use.
1767 :return: List of group names.
1768 :rtype: list
1769 """
1770 return [cls._build_group_name(base_name), cls._build_ha_group_name(base_name)]
1772 @classmethod
1773 def create_sr(cls, group_name, ips, redundancy, thin_provisioning, logger=default_logger.__func__):
1774 """
1775 Create a new SR on the given nodes.
1776 :param str group_name: The SR group_name to use.
1777 :param set(str) ips: Node ips.
1778 :param int redundancy: How many copy of volumes should we store?
1779 :param bool thin_provisioning: Use thin or thick provisioning.
1780 :param function logger: Function to log messages.
1781 :return: A new LinstorSr instance.
1782 :rtype: LinstorSr
1783 """
1785 try:
1786 cls._start_controller(start=True)
1787 sr = cls._create_sr(group_name, ips, redundancy, thin_provisioning, logger)
1788 finally:
1789 # Controller must be stopped and volume unmounted because
1790 # it is the role of the drbd-reactor daemon to do the right
1791 # actions.
1792 cls._start_controller(start=False)
1793 cls._mount_volume(
1794 cls.build_device_path(DATABASE_VOLUME_NAME),
1795 DATABASE_PATH,
1796 mount=False
1797 )
1798 return sr
1800 @classmethod
1801 def _create_sr(cls, group_name, ips, redundancy, thin_provisioning, logger=default_logger.__func__):
1802 # 1. Check if SR already exists.
1803 uri = 'linstor://localhost'
1805 lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True)
1807 node_names = list(ips.keys())
1808 for node_name, ip in ips.items():
1809 while True:
1810 # Try to create node.
1811 result = lin.node_create(
1812 node_name,
1813 linstor.consts.VAL_NODE_TYPE_CMBD,
1814 ip
1815 )
1817 errors = cls._filter_errors(result)
1818 if cls._check_errors(
1819 errors, [linstor.consts.FAIL_EXISTS_NODE]
1820 ):
1821 # If it already exists, remove, then recreate.
1822 result = lin.node_delete(node_name)
1823 error_str = cls._get_error_str(result)
1824 if error_str:
1825 raise LinstorVolumeManagerError(
1826 'Failed to remove old node `{}`: {}'
1827 .format(node_name, error_str)
1828 )
1829 elif not errors:
1830 break # Created!
1831 else:
1832 raise LinstorVolumeManagerError(
1833 'Failed to create node `{}` with ip `{}`: {}'.format(
1834 node_name, ip, cls._get_error_str(errors)
1835 )
1836 )
1838 driver_pool_name = group_name
1839 base_group_name = group_name
1840 group_name = cls._build_group_name(group_name)
1841 storage_pool_name = group_name
1842 pools = lin.storage_pool_list_raise(filter_by_stor_pools=[storage_pool_name]).storage_pools
1843 if pools:
1844 existing_node_names = [pool.node_name for pool in pools]
1845 raise LinstorVolumeManagerError(
1846 'Unable to create SR `{}`. It already exists on node(s): {}'
1847 .format(group_name, existing_node_names)
1848 )
1850 if lin.resource_group_list_raise(
1851 cls.get_all_group_names(base_group_name)
1852 ).resource_groups:
1853 if not lin.resource_dfn_list_raise().resource_definitions:
1854 backup_path = cls._create_database_backup_path()
1855 logger(
1856 'Group name already exists `{}` without LVs. '
1857 'Ignoring and moving the config files in {}'.format(group_name, backup_path)
1858 )
1859 cls._move_files(DATABASE_PATH, backup_path)
1860 else:
1861 raise LinstorVolumeManagerError(
1862 'Unable to create SR `{}`: The group name already exists'
1863 .format(group_name)
1864 )
1866 if thin_provisioning:
1867 driver_pool_parts = driver_pool_name.split('/')
1868 if not len(driver_pool_parts) == 2:
1869 raise LinstorVolumeManagerError(
1870 'Invalid group name using thin provisioning. '
1871 'Expected format: \'VG/LV`\''
1872 )
1874 # 2. Create storage pool on each node + resource group.
1875 reg_volume_group_not_found = re.compile(
1876 ".*Volume group '.*' not found$"
1877 )
1879 i = 0
1880 try:
1881 # 2.a. Create storage pools.
1882 storage_pool_count = 0
1883 while i < len(node_names):
1884 node_name = node_names[i]
1886 result = lin.storage_pool_create(
1887 node_name=node_name,
1888 storage_pool_name=storage_pool_name,
1889 storage_driver='LVM_THIN' if thin_provisioning else 'LVM',
1890 driver_pool_name=driver_pool_name
1891 )
1893 errors = linstor.Linstor.filter_api_call_response_errors(
1894 result
1895 )
1896 if errors:
1897 if len(errors) == 1 and errors[0].is_error(
1898 linstor.consts.FAIL_STOR_POOL_CONFIGURATION_ERROR
1899 ) and reg_volume_group_not_found.match(errors[0].message):
1900 logger(
1901 'Volume group `{}` not found on `{}`. Ignoring...'
1902 .format(group_name, node_name)
1903 )
1904 cls._destroy_storage_pool(lin, storage_pool_name, node_name)
1905 else:
1906 error_str = cls._get_error_str(result)
1907 raise LinstorVolumeManagerError(
1908 'Could not create SP `{}` on node `{}`: {}'
1909 .format(group_name, node_name, error_str)
1910 )
1911 else:
1912 storage_pool_count += 1
1913 i += 1
1915 if not storage_pool_count:
1916 raise LinstorVolumeManagerError(
1917 'Unable to create SR `{}`: No VG group found'.format(
1918 group_name,
1919 )
1920 )
1922 # 2.b. Create resource groups.
1923 ha_group_name = cls._build_ha_group_name(base_group_name)
1924 cls._create_resource_group(
1925 lin,
1926 group_name,
1927 storage_pool_name,
1928 redundancy,
1929 True
1930 )
1931 cls._create_resource_group(
1932 lin,
1933 ha_group_name,
1934 storage_pool_name,
1935 3,
1936 True
1937 )
1939 # 3. Create the LINSTOR database volume and mount it.
1940 try:
1941 logger('Creating database volume...')
1942 volume_path = cls._create_database_volume(
1943 lin, ha_group_name, storage_pool_name, node_names, redundancy
1944 )
1945 except LinstorVolumeManagerError as e:
1946 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS:
1947 logger('Destroying database volume after creation fail...')
1948 cls._force_destroy_database_volume(lin, group_name)
1949 raise
1951 try:
1952 logger('Mounting database volume...')
1954 # First we must disable the controller to move safely the
1955 # LINSTOR config.
1956 cls._start_controller(start=False)
1958 cls._mount_database_volume(volume_path)
1959 except Exception as e:
1960 # Ensure we are connected because controller has been
1961 # restarted during mount call.
1962 logger('Destroying database volume after mount fail...')
1964 try:
1965 cls._start_controller(start=True)
1966 except Exception:
1967 pass
1969 lin = cls._create_linstor_instance(
1970 uri, keep_uri_unmodified=True
1971 )
1972 cls._force_destroy_database_volume(lin, group_name)
1973 raise e
1975 cls._start_controller(start=True)
1976 lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True)
1978 # 4. Remove storage pools/resource/volume group in the case of errors.
1979 except Exception as e:
1980 logger('Destroying resource group and storage pools after fail...')
1981 try:
1982 cls._destroy_resource_group(lin, group_name)
1983 cls._destroy_resource_group(lin, ha_group_name)
1984 except Exception as e2:
1985 logger('Failed to destroy resource group: {}'.format(e2))
1986 pass
1987 j = 0
1988 i = min(i, len(node_names) - 1)
1989 while j <= i:
1990 try:
1991 cls._destroy_storage_pool(lin, storage_pool_name, node_names[j])
1992 except Exception as e2:
1993 logger('Failed to destroy resource group: {}'.format(e2))
1994 pass
1995 j += 1
1996 raise e
1998 # 5. Return new instance.
1999 instance = cls.__new__(cls)
2000 instance._linstor = lin
2001 instance._logger = logger
2002 instance._redundancy = redundancy
2003 instance._base_group_name = base_group_name
2004 instance._group_name = group_name
2005 instance._volumes = set()
2006 instance._storage_pools_time = 0
2007 instance._kv_cache = instance._create_kv_cache()
2008 instance._resource_cache = None
2009 instance._resource_cache_dirty = True
2010 instance._volume_info_cache = None
2011 instance._volume_info_cache_dirty = True
2012 return instance
2014 @classmethod
2015 def build_device_path(cls, volume_name):
2016 """
2017 Build a device path given a volume name.
2018 :param str volume_name: The volume name to use.
2019 :return: A valid or not device path.
2020 :rtype: str
2021 """
2023 return '{}{}/0'.format(cls.DEV_ROOT_PATH, volume_name)
2025 @classmethod
2026 def build_volume_name(cls, base_name):
2027 """
2028 Build a volume name given a base name (i.e. a UUID).
2029 :param str base_name: The volume name to use.
2030 :return: A valid or not device path.
2031 :rtype: str
2032 """
2033 return '{}{}'.format(cls.PREFIX_VOLUME, base_name)
2035 @classmethod
2036 def round_up_volume_size(cls, volume_size):
2037 """
2038 Align volume size on higher multiple of BLOCK_SIZE.
2039 :param int volume_size: The volume size to align.
2040 :return: An aligned volume size.
2041 :rtype: int
2042 """
2043 return round_up(volume_size, cls.BLOCK_SIZE)
2045 @classmethod
2046 def round_down_volume_size(cls, volume_size):
2047 """
2048 Align volume size on lower multiple of BLOCK_SIZE.
2049 :param int volume_size: The volume size to align.
2050 :return: An aligned volume size.
2051 :rtype: int
2052 """
2053 return round_down(volume_size, cls.BLOCK_SIZE)
2055 # --------------------------------------------------------------------------
2056 # Private helpers.
2057 # --------------------------------------------------------------------------
2059 def _create_kv_cache(self):
2060 self._kv_cache = self._create_linstor_kv('/')
2061 self._kv_cache_dirty = False
2062 return self._kv_cache
2064 def _get_kv_cache(self):
2065 if self._kv_cache_dirty:
2066 self._kv_cache = self._create_kv_cache()
2067 return self._kv_cache
2069 def _create_resource_cache(self):
2070 self._resource_cache = self._linstor.resource_list_raise()
2071 self._resource_cache_dirty = False
2072 return self._resource_cache
2074 def _get_resource_cache(self):
2075 if self._resource_cache_dirty:
2076 self._resource_cache = self._create_resource_cache()
2077 return self._resource_cache
2079 def _mark_resource_cache_as_dirty(self):
2080 self._resource_cache_dirty = True
2081 self._volume_info_cache_dirty = True
2083 # --------------------------------------------------------------------------
2085 def _ensure_volume_exists(self, volume_uuid):
2086 if volume_uuid not in self._volumes:
2087 raise LinstorVolumeManagerError(
2088 'volume `{}` doesn\'t exist'.format(volume_uuid),
2089 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS
2090 )
2092 def _find_best_size_candidates(self):
2093 result = self._linstor.resource_group_qmvs(self._group_name)
2094 error_str = self._get_error_str(result)
2095 if error_str:
2096 raise LinstorVolumeManagerError(
2097 'Failed to get max volume size allowed of SR `{}`: {}'.format(
2098 self._group_name,
2099 error_str
2100 )
2101 )
2102 return result[0].candidates
2104 def _fetch_resource_names(self, ignore_deleted=True):
2105 resource_names = set()
2106 dfns = self._linstor.resource_dfn_list_raise().resource_definitions
2107 for dfn in dfns:
2108 if dfn.resource_group_name in self.get_all_group_names(self._base_group_name) and (
2109 ignore_deleted or
2110 linstor.consts.FLAG_DELETE not in dfn.flags
2111 ):
2112 resource_names.add(dfn.name)
2113 return resource_names
2115 def _get_volumes_info(self, volume_names=None):
2116 all_volume_info = {}
2118 if not self._volume_info_cache_dirty:
2119 return self._volume_info_cache
2121 # `volume_names` MUST contain all volumes registered in the KV store.
2122 # It can be provided to the function to avoid double fetching.
2123 if not volume_names:
2124 volume_names = self.get_volumes_with_name()
2125 volume_names = set(volume_names.values())
2127 def process_resource(resource):
2128 if resource.name not in all_volume_info:
2129 current = all_volume_info[resource.name] = self.VolumeInfo(
2130 resource.name
2131 )
2132 else:
2133 current = all_volume_info[resource.name]
2135 if linstor.consts.FLAG_DISKLESS not in resource.flags:
2136 current.diskful.append(resource.node_name)
2138 for volume in resource.volumes:
2139 # We ignore diskless pools of the form "DfltDisklessStorPool".
2140 if volume.storage_pool_name != self._group_name:
2141 continue
2142 # Only fetch first volume.
2143 if volume.number != 0:
2144 continue
2146 allocated_size = volume.allocated_size
2147 if allocated_size > current.allocated_size:
2148 current.allocated_size = allocated_size
2150 usable_size = volume.usable_size
2151 if usable_size > 0 and (
2152 usable_size < current.virtual_size or
2153 not current.virtual_size
2154 ):
2155 current.virtual_size = usable_size
2157 try:
2158 for resource in self._get_resource_cache().resources:
2159 if resource.name in volume_names:
2160 process_resource(resource)
2161 for volume in all_volume_info.values():
2162 if volume.allocated_size <= 0:
2163 raise LinstorVolumeManagerError('Failed to get allocated size of `{}`'.format(resource.name))
2165 if volume.virtual_size <= 0:
2166 raise LinstorVolumeManagerError('Failed to get usable size of `{}`'.format(volume.name))
2168 volume.allocated_size *= 1024
2169 volume.virtual_size *= 1024
2170 except LinstorVolumeManagerError:
2171 self._mark_resource_cache_as_dirty()
2172 raise
2174 self._volume_info_cache_dirty = False
2175 self._volume_info_cache = all_volume_info
2177 return all_volume_info
2179 def _get_volume_node_names_and_size(self, volume_name):
2180 node_names = set()
2181 size = -1
2182 for resource in self._linstor.resource_list_raise(
2183 filter_by_resources=[volume_name]
2184 ).resources:
2185 for volume in resource.volumes:
2186 # We ignore diskless pools of the form "DfltDisklessStorPool".
2187 if volume.storage_pool_name != self._group_name:
2188 continue
2190 node_names.add(resource.node_name)
2192 usable_size = volume.usable_size
2193 if usable_size <= 0:
2194 continue
2196 if size < 0:
2197 size = usable_size
2198 else:
2199 size = min(size, usable_size)
2201 if size <= 0:
2202 raise LinstorVolumeManagerError('Failed to get usable size of `{}`'.format(resource.name))
2204 return (node_names, size * 1024)
2206 def _compute_size(self, attr):
2207 capacity = 0
2208 for pool in self._get_storage_pools(force=True):
2209 space = pool.free_space
2210 if space:
2211 size = getattr(space, attr)
2212 if size < 0:
2213 raise LinstorVolumeManagerError(
2214 'Failed to get pool {} attr of `{}`'
2215 .format(attr, pool.node_name)
2216 )
2217 capacity += size
2218 return capacity * 1024
2220 def _get_node_names(self):
2221 node_names = set()
2222 for pool in self._get_storage_pools():
2223 node_names.add(pool.node_name)
2224 return node_names
2226 def _get_storage_pools(self, force=False):
2227 cur_time = time.time()
2228 elsaped_time = cur_time - self._storage_pools_time
2230 if force or elsaped_time >= self.STORAGE_POOLS_FETCH_INTERVAL:
2231 self._storage_pools = self._linstor.storage_pool_list_raise(
2232 filter_by_stor_pools=[self._group_name]
2233 ).storage_pools
2234 self._storage_pools_time = time.time()
2236 return self._storage_pools
2238 def _create_volume(
2239 self,
2240 volume_uuid,
2241 volume_name,
2242 size,
2243 place_resources,
2244 high_availability
2245 ):
2246 size = self.round_up_volume_size(size)
2247 self._mark_resource_cache_as_dirty()
2249 group_name = self._ha_group_name if high_availability else self._group_name
2250 def create_definition():
2251 first_attempt = True
2252 while True:
2253 try:
2254 self._check_volume_creation_errors(
2255 self._linstor.resource_group_spawn(
2256 rsc_grp_name=group_name,
2257 rsc_dfn_name=volume_name,
2258 vlm_sizes=['{}B'.format(size)],
2259 definitions_only=True
2260 ),
2261 volume_uuid,
2262 self._group_name
2263 )
2264 break
2265 except LinstorVolumeManagerError as e:
2266 if (
2267 not first_attempt or
2268 not high_availability or
2269 e.code != LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS
2270 ):
2271 raise
2273 first_attempt = False
2274 self._create_resource_group(
2275 self._linstor,
2276 group_name,
2277 self._group_name,
2278 3,
2279 True
2280 )
2282 self._configure_volume_peer_slots(self._linstor, volume_name)
2284 def clean():
2285 try:
2286 self._destroy_volume(volume_uuid, force=True, preserve_properties=True)
2287 except Exception as e:
2288 self._logger(
2289 'Unable to destroy volume {} after creation fail: {}'
2290 .format(volume_uuid, e)
2291 )
2293 def create():
2294 try:
2295 create_definition()
2296 if place_resources:
2297 # Basic case when we use the default redundancy of the group.
2298 self._check_volume_creation_errors(
2299 self._linstor.resource_auto_place(
2300 rsc_name=volume_name,
2301 place_count=self._redundancy,
2302 diskless_on_remaining=False
2303 ),
2304 volume_uuid,
2305 self._group_name
2306 )
2307 except LinstorVolumeManagerError as e:
2308 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS:
2309 clean()
2310 raise
2311 except Exception:
2312 clean()
2313 raise
2315 util.retry(create, maxretry=5)
2317 def _create_volume_with_properties(
2318 self,
2319 volume_uuid,
2320 volume_name,
2321 size,
2322 place_resources,
2323 high_availability
2324 ):
2325 if self.check_volume_exists(volume_uuid):
2326 raise LinstorVolumeManagerError(
2327 'Could not create volume `{}` from SR `{}`, it already exists'
2328 .format(volume_uuid, self._group_name) + ' in properties',
2329 LinstorVolumeManagerError.ERR_VOLUME_EXISTS
2330 )
2332 if volume_name in self._fetch_resource_names():
2333 raise LinstorVolumeManagerError(
2334 'Could not create volume `{}` from SR `{}`, '.format(
2335 volume_uuid, self._group_name
2336 ) + 'resource of the same name already exists in LINSTOR'
2337 )
2339 # I am paranoid.
2340 volume_properties = self._get_volume_properties(volume_uuid)
2341 if (volume_properties.get(self.PROP_NOT_EXISTS) is not None):
2342 raise LinstorVolumeManagerError(
2343 'Could not create volume `{}`, '.format(volume_uuid) +
2344 'properties already exist'
2345 )
2347 try:
2348 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_CREATING
2349 volume_properties[self.PROP_VOLUME_NAME] = volume_name
2351 self._create_volume(
2352 volume_uuid,
2353 volume_name,
2354 size,
2355 place_resources,
2356 high_availability
2357 )
2359 assert volume_properties.namespace == \
2360 self._build_volume_namespace(volume_uuid)
2361 return volume_properties
2362 except LinstorVolumeManagerError as e:
2363 # Do not destroy existing resource!
2364 # In theory we can't get this error because we check this event
2365 # before the `self._create_volume` case.
2366 # It can only happen if the same volume uuid is used in the same
2367 # call in another host.
2368 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS:
2369 self._destroy_volume(volume_uuid, force=True)
2370 raise
2372 def _find_device_path(self, volume_uuid, volume_name):
2373 current_device_path = self._request_device_path(
2374 volume_uuid, volume_name, activate=True
2375 )
2377 # We use realpath here to get the /dev/drbd<id> path instead of
2378 # /dev/drbd/by-res/<resource_name>.
2379 expected_device_path = self.build_device_path(volume_name)
2380 util.wait_for_path(expected_device_path, 5)
2382 device_realpath = os.path.realpath(expected_device_path)
2383 if current_device_path != device_realpath:
2384 raise LinstorVolumeManagerError(
2385 'Invalid path, current={}, expected={} (realpath={})'
2386 .format(
2387 current_device_path,
2388 expected_device_path,
2389 device_realpath
2390 )
2391 )
2392 return expected_device_path
2394 def _request_device_path(self, volume_uuid, volume_name, activate=False):
2395 node_name = socket.gethostname()
2397 resource = next(filter(
2398 lambda resource: resource.node_name == node_name and
2399 resource.name == volume_name,
2400 self._get_resource_cache().resources
2401 ), None)
2403 if not resource:
2404 if activate:
2405 self._mark_resource_cache_as_dirty()
2406 self._activate_device_path(
2407 self._linstor, node_name, volume_name
2408 )
2409 return self._request_device_path(volume_uuid, volume_name)
2410 raise LinstorVolumeManagerError(
2411 'Unable to get dev path for `{}`, no resource found but definition "seems" to exist'
2412 .format(volume_uuid)
2413 )
2415 # Contains a path of the /dev/drbd<id> form.
2416 device_path = resource.volumes[0].device_path
2417 if not device_path:
2418 raise LinstorVolumeManagerError('Empty dev path for `{}`!'.format(volume_uuid))
2419 return device_path
2421 def _destroy_resource(self, resource_name, force=False):
2422 result = self._linstor.resource_dfn_delete(resource_name)
2423 error_str = self._get_error_str(result)
2424 if not error_str:
2425 self._mark_resource_cache_as_dirty()
2426 return
2428 if not force:
2429 self._mark_resource_cache_as_dirty()
2430 raise LinstorVolumeManagerError(
2431 'Could not destroy resource `{}` from SR `{}`: {}'
2432 .format(resource_name, self._group_name, error_str)
2433 )
2435 # If force is used, ensure there is no opener.
2436 all_openers = get_all_volume_openers(resource_name, '0')
2437 for openers in all_openers.values():
2438 if openers:
2439 self._mark_resource_cache_as_dirty()
2440 raise LinstorVolumeManagerError(
2441 'Could not force destroy resource `{}` from SR `{}`: {} (openers=`{}`)'
2442 .format(resource_name, self._group_name, error_str, all_openers)
2443 )
2445 # Maybe the resource is blocked in primary mode. DRBD/LINSTOR issue?
2446 resource_states = filter(
2447 lambda resource_state: resource_state.name == resource_name,
2448 self._get_resource_cache().resource_states
2449 )
2451 # Mark only after computation of states.
2452 self._mark_resource_cache_as_dirty()
2454 for resource_state in resource_states:
2455 volume_state = resource_state.volume_states[0]
2456 if resource_state.in_use:
2457 demote_drbd_resource(resource_state.node_name, resource_name)
2458 break
2459 self._destroy_resource(resource_name)
2461 def _destroy_volume(self, volume_uuid, force=False, preserve_properties=False):
2462 volume_properties = self._get_volume_properties(volume_uuid)
2463 try:
2464 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
2465 if volume_name in self._fetch_resource_names():
2466 self._destroy_resource(volume_name, force)
2468 # Assume this call is atomic.
2469 if not preserve_properties:
2470 volume_properties.clear()
2471 except Exception as e:
2472 raise LinstorVolumeManagerError(
2473 'Cannot destroy volume `{}`: {}'.format(volume_uuid, e)
2474 )
2476 def _build_volumes(self, repair):
2477 properties = self._kv_cache
2478 resource_names = self._fetch_resource_names()
2480 self._volumes = set()
2482 updating_uuid_volumes = self._get_volumes_by_property(
2483 self.REG_UPDATING_UUID_SRC, ignore_inexisting_volumes=False
2484 )
2485 if updating_uuid_volumes and not repair:
2486 raise LinstorVolumeManagerError(
2487 'Cannot build LINSTOR volume list: '
2488 'It exists invalid "updating uuid volumes", repair is required'
2489 )
2491 existing_volumes = self._get_volumes_by_property(
2492 self.REG_NOT_EXISTS, ignore_inexisting_volumes=False
2493 )
2494 for volume_uuid, not_exists in existing_volumes.items():
2495 properties.namespace = self._build_volume_namespace(volume_uuid)
2497 src_uuid = properties.get(self.PROP_UPDATING_UUID_SRC)
2498 if src_uuid:
2499 self._logger(
2500 'Ignoring volume during manager initialization with prop '
2501 ' PROP_UPDATING_UUID_SRC: {} (properties={})'
2502 .format(
2503 volume_uuid,
2504 self._get_filtered_properties(properties)
2505 )
2506 )
2507 continue
2509 # Insert volume in list if the volume exists. Or if the volume
2510 # is being created and a slave wants to use it (repair = False).
2511 #
2512 # If we are on the master and if repair is True and state is
2513 # Creating, it's probably a bug or crash: the creation process has
2514 # been stopped.
2515 if not_exists == self.STATE_EXISTS or (
2516 not repair and not_exists == self.STATE_CREATING
2517 ):
2518 self._volumes.add(volume_uuid)
2519 continue
2521 if not repair:
2522 self._logger(
2523 'Ignoring bad volume during manager initialization: {} '
2524 '(properties={})'.format(
2525 volume_uuid,
2526 self._get_filtered_properties(properties)
2527 )
2528 )
2529 continue
2531 # Remove bad volume.
2532 try:
2533 self._logger(
2534 'Removing bad volume during manager initialization: {} '
2535 '(properties={})'.format(
2536 volume_uuid,
2537 self._get_filtered_properties(properties)
2538 )
2539 )
2540 volume_name = properties.get(self.PROP_VOLUME_NAME)
2542 # Little optimization, don't call `self._destroy_volume`,
2543 # we already have resource name list.
2544 if volume_name in resource_names:
2545 self._destroy_resource(volume_name, force=True)
2547 # Assume this call is atomic.
2548 properties.clear()
2549 except Exception as e:
2550 # Do not raise, we don't want to block user action.
2551 self._logger(
2552 'Cannot clean volume {}: {}'.format(volume_uuid, e)
2553 )
2555 # The volume can't be removed, maybe it's still in use,
2556 # in this case rename it with the "DELETED_" prefix.
2557 # This prefix is mandatory if it exists a snap transaction to
2558 # rollback because the original VDI UUID can try to be renamed
2559 # with the UUID we are trying to delete...
2560 if not volume_uuid.startswith('DELETED_'):
2561 self.update_volume_uuid(
2562 volume_uuid, 'DELETED_' + volume_uuid, force=True
2563 )
2565 for dest_uuid, src_uuid in updating_uuid_volumes.items():
2566 dest_namespace = self._build_volume_namespace(dest_uuid)
2568 properties.namespace = dest_namespace
2569 if int(properties.get(self.PROP_NOT_EXISTS)):
2570 properties.clear()
2571 continue
2573 properties.namespace = self._build_volume_namespace(src_uuid)
2574 properties.clear()
2576 properties.namespace = dest_namespace
2577 properties.pop(self.PROP_UPDATING_UUID_SRC)
2579 if src_uuid in self._volumes:
2580 self._volumes.remove(src_uuid)
2581 self._volumes.add(dest_uuid)
2583 def _get_sr_properties(self):
2584 return self._create_linstor_kv(self._build_sr_namespace())
2586 def _get_volumes_by_property(
2587 self, reg_prop, ignore_inexisting_volumes=True
2588 ):
2589 base_properties = self._get_kv_cache()
2590 base_properties.namespace = self._build_volume_namespace()
2592 volume_properties = {}
2593 for volume_uuid in self._volumes:
2594 volume_properties[volume_uuid] = ''
2596 for key, value in base_properties.items():
2597 res = reg_prop.match(key)
2598 if res:
2599 volume_uuid = res.groups()[0]
2600 if not ignore_inexisting_volumes or \
2601 volume_uuid in self._volumes:
2602 volume_properties[volume_uuid] = value
2604 return volume_properties
2606 def _create_linstor_kv(self, namespace):
2607 return linstor.KV(
2608 self._group_name,
2609 uri=self._linstor.controller_host(),
2610 namespace=namespace
2611 )
2613 def _get_volume_properties(self, volume_uuid):
2614 properties = self._get_kv_cache()
2615 properties.namespace = self._build_volume_namespace(volume_uuid)
2616 return properties
2618 @classmethod
2619 def _build_sr_namespace(cls):
2620 return '/{}/'.format(cls.NAMESPACE_SR)
2622 @classmethod
2623 def _build_volume_namespace(cls, volume_uuid=None):
2624 # Return a path to all volumes if `volume_uuid` is not given.
2625 if volume_uuid is None:
2626 return '/{}/'.format(cls.NAMESPACE_VOLUME)
2627 return '/{}/{}/'.format(cls.NAMESPACE_VOLUME, volume_uuid)
2629 @classmethod
2630 def _get_error_str(cls, result):
2631 return ', '.join([
2632 err.message for err in cls._filter_errors(result)
2633 ])
2635 @classmethod
2636 def _create_linstor_instance(
2637 cls, uri, keep_uri_unmodified=False, attempt_count=30
2638 ):
2639 retry = False
2641 def connect(uri):
2642 if not uri:
2643 uri = get_controller_uri()
2644 if not uri:
2645 raise LinstorVolumeManagerError(
2646 'Unable to find controller uri...'
2647 )
2648 instance = linstor.Linstor(uri, keep_alive=True)
2649 instance.connect()
2650 return instance
2652 try:
2653 return connect(uri)
2654 except (linstor.errors.LinstorNetworkError, LinstorVolumeManagerError):
2655 pass
2657 if not keep_uri_unmodified:
2658 uri = None
2660 return util.retry(
2661 lambda: connect(uri),
2662 maxretry=attempt_count,
2663 period=1,
2664 exceptions=[
2665 linstor.errors.LinstorNetworkError,
2666 LinstorVolumeManagerError
2667 ]
2668 )
2670 @classmethod
2671 def _configure_volume_peer_slots(cls, lin, volume_name):
2672 result = lin.resource_dfn_modify(volume_name, {}, peer_slots=3)
2673 error_str = cls._get_error_str(result)
2674 if error_str:
2675 raise LinstorVolumeManagerError(
2676 'Could not configure volume peer slots of {}: {}'
2677 .format(volume_name, error_str)
2678 )
2680 @classmethod
2681 def _activate_device_path(cls, lin, node_name, volume_name):
2682 result = lin.resource_make_available(node_name, volume_name, diskful=False)
2683 if linstor.Linstor.all_api_responses_no_error(result):
2684 return
2685 errors = linstor.Linstor.filter_api_call_response_errors(result)
2686 if len(errors) == 1 and errors[0].is_error(
2687 linstor.consts.FAIL_EXISTS_RSC
2688 ):
2689 return
2691 raise LinstorVolumeManagerError(
2692 'Unable to activate device path of `{}` on node `{}`: {}'
2693 .format(volume_name, node_name, ', '.join(
2694 [str(x) for x in result]))
2695 )
2697 @classmethod
2698 def _request_database_path(cls, lin, activate=False):
2699 node_name = socket.gethostname()
2701 try:
2702 resource = next(filter(
2703 lambda resource: resource.node_name == node_name and
2704 resource.name == DATABASE_VOLUME_NAME,
2705 lin.resource_list_raise().resources
2706 ), None)
2707 except Exception as e:
2708 raise LinstorVolumeManagerError(
2709 'Unable to fetch database resource: {}'
2710 .format(e)
2711 )
2713 if not resource:
2714 if activate:
2715 cls._activate_device_path(
2716 lin, node_name, DATABASE_VOLUME_NAME
2717 )
2718 return cls._request_database_path(
2719 DATABASE_VOLUME_NAME, DATABASE_VOLUME_NAME
2720 )
2721 raise LinstorVolumeManagerError(
2722 'Empty dev path for `{}`, but definition "seems" to exist'
2723 .format(DATABASE_PATH)
2724 )
2725 # Contains a path of the /dev/drbd<id> form.
2726 return resource.volumes[0].device_path
2728 @classmethod
2729 def _create_database_volume(
2730 cls, lin, group_name, storage_pool_name, node_names, redundancy
2731 ):
2732 try:
2733 dfns = lin.resource_dfn_list_raise().resource_definitions
2734 except Exception as e:
2735 raise LinstorVolumeManagerError(
2736 'Unable to get definitions during database creation: {}'
2737 .format(e)
2738 )
2740 if dfns:
2741 raise LinstorVolumeManagerError(
2742 'Could not create volume `{}` from SR `{}`, '.format(
2743 DATABASE_VOLUME_NAME, group_name
2744 ) + 'LINSTOR volume list must be empty.'
2745 )
2747 # Workaround to use thin lvm. Without this line an error is returned:
2748 # "Not enough available nodes"
2749 # I don't understand why but this command protect against this bug.
2750 try:
2751 pools = lin.storage_pool_list_raise(
2752 filter_by_stor_pools=[storage_pool_name]
2753 )
2754 except Exception as e:
2755 raise LinstorVolumeManagerError(
2756 'Failed to get storage pool list before database creation: {}'
2757 .format(e)
2758 )
2760 # Ensure we have a correct list of storage pools.
2761 assert pools.storage_pools # We must have at least one storage pool!
2762 nodes_with_pool = list(map(lambda pool: pool.node_name, pools.storage_pools))
2763 for node_name in nodes_with_pool:
2764 assert node_name in node_names
2765 util.SMlog('Nodes with storage pool: {}'.format(nodes_with_pool))
2767 # Create the database definition.
2768 size = cls.round_up_volume_size(DATABASE_SIZE)
2769 cls._check_volume_creation_errors(lin.resource_group_spawn(
2770 rsc_grp_name=group_name,
2771 rsc_dfn_name=DATABASE_VOLUME_NAME,
2772 vlm_sizes=['{}B'.format(size)],
2773 definitions_only=True
2774 ), DATABASE_VOLUME_NAME, group_name)
2775 cls._configure_volume_peer_slots(lin, DATABASE_VOLUME_NAME)
2777 # Create real resources on the first nodes.
2778 resources = []
2780 diskful_nodes = []
2781 diskless_nodes = []
2782 for node_name in node_names:
2783 if node_name in nodes_with_pool:
2784 diskful_nodes.append(node_name)
2785 else:
2786 diskless_nodes.append(node_name)
2788 assert diskful_nodes
2789 for node_name in diskful_nodes[:redundancy]:
2790 util.SMlog('Create database diskful on {}'.format(node_name))
2791 resources.append(linstor.ResourceData(
2792 node_name=node_name,
2793 rsc_name=DATABASE_VOLUME_NAME,
2794 storage_pool=storage_pool_name
2795 ))
2796 # Create diskless resources on the remaining set.
2797 for node_name in diskful_nodes[redundancy:] + diskless_nodes:
2798 util.SMlog('Create database diskless on {}'.format(node_name))
2799 resources.append(linstor.ResourceData(
2800 node_name=node_name,
2801 rsc_name=DATABASE_VOLUME_NAME,
2802 diskless=True
2803 ))
2805 result = lin.resource_create(resources)
2806 error_str = cls._get_error_str(result)
2807 if error_str:
2808 raise LinstorVolumeManagerError(
2809 'Could not create database volume from SR `{}`: {}'.format(
2810 group_name, error_str
2811 )
2812 )
2814 # Create database and ensure path exists locally and
2815 # on replicated devices.
2816 current_device_path = cls._request_database_path(lin, activate=True)
2818 # Ensure diskless paths exist on other hosts. Otherwise PBDs can't be
2819 # plugged.
2820 for node_name in node_names:
2821 cls._activate_device_path(lin, node_name, DATABASE_VOLUME_NAME)
2823 # We use realpath here to get the /dev/drbd<id> path instead of
2824 # /dev/drbd/by-res/<resource_name>.
2825 expected_device_path = cls.build_device_path(DATABASE_VOLUME_NAME)
2826 util.wait_for_path(expected_device_path, 5)
2828 device_realpath = os.path.realpath(expected_device_path)
2829 if current_device_path != device_realpath:
2830 raise LinstorVolumeManagerError(
2831 'Invalid path, current={}, expected={} (realpath={})'
2832 .format(
2833 current_device_path,
2834 expected_device_path,
2835 device_realpath
2836 )
2837 )
2839 try:
2840 util.retry(
2841 lambda: util.pread2([DATABASE_MKFS, expected_device_path]),
2842 maxretry=5
2843 )
2844 except Exception as e:
2845 raise LinstorVolumeManagerError(
2846 'Failed to execute {} on database volume: {}'
2847 .format(DATABASE_MKFS, e)
2848 )
2850 return expected_device_path
2852 @classmethod
2853 def _destroy_database_volume(cls, lin, group_name):
2854 error_str = cls._get_error_str(
2855 lin.resource_dfn_delete(DATABASE_VOLUME_NAME)
2856 )
2857 if error_str:
2858 raise LinstorVolumeManagerError(
2859 'Could not destroy resource `{}` from SR `{}`: {}'
2860 .format(DATABASE_VOLUME_NAME, group_name, error_str)
2861 )
2863 @classmethod
2864 def _mount_database_volume(cls, volume_path, mount=True, force=False):
2865 try:
2866 # 1. Create a backup config folder.
2867 database_not_empty = bool(os.listdir(DATABASE_PATH))
2868 backup_path = cls._create_database_backup_path()
2870 # 2. Move the config in the mounted volume.
2871 if database_not_empty:
2872 cls._move_files(DATABASE_PATH, backup_path)
2874 cls._mount_volume(volume_path, DATABASE_PATH, mount)
2876 if database_not_empty:
2877 cls._move_files(backup_path, DATABASE_PATH, force)
2879 # 3. Remove useless backup directory.
2880 try:
2881 os.rmdir(backup_path)
2882 except Exception as e:
2883 raise LinstorVolumeManagerError(
2884 'Failed to remove backup path {} of LINSTOR config: {}'
2885 .format(backup_path, e)
2886 )
2887 except Exception as e:
2888 def force_exec(fn):
2889 try:
2890 fn()
2891 except Exception:
2892 pass
2894 if mount == cls._is_mounted(DATABASE_PATH):
2895 force_exec(lambda: cls._move_files(
2896 DATABASE_PATH, backup_path
2897 ))
2898 force_exec(lambda: cls._mount_volume(
2899 volume_path, DATABASE_PATH, not mount
2900 ))
2902 if mount != cls._is_mounted(DATABASE_PATH):
2903 force_exec(lambda: cls._move_files(
2904 backup_path, DATABASE_PATH
2905 ))
2907 force_exec(lambda: os.rmdir(backup_path))
2908 raise e
2910 @classmethod
2911 def _force_destroy_database_volume(cls, lin, group_name):
2912 try:
2913 cls._destroy_database_volume(lin, group_name)
2914 except Exception:
2915 pass
2917 @classmethod
2918 def _destroy_storage_pool(cls, lin, group_name, node_name):
2919 def destroy():
2920 result = lin.storage_pool_delete(node_name, group_name)
2921 errors = cls._filter_errors(result)
2922 if cls._check_errors(errors, [
2923 linstor.consts.FAIL_NOT_FOUND_STOR_POOL,
2924 linstor.consts.FAIL_NOT_FOUND_STOR_POOL_DFN
2925 ]):
2926 return
2928 if errors:
2929 raise LinstorVolumeManagerError(
2930 'Failed to destroy SP `{}` on node `{}`: {}'.format(
2931 group_name,
2932 node_name,
2933 cls._get_error_str(errors)
2934 )
2935 )
2937 # We must retry to avoid errors like:
2938 # "can not be deleted as volumes / snapshot-volumes are still using it"
2939 # after LINSTOR database volume destruction.
2940 return util.retry(destroy, maxretry=10)
2942 @classmethod
2943 def _create_resource_group(
2944 cls,
2945 lin,
2946 group_name,
2947 storage_pool_name,
2948 redundancy,
2949 destroy_old_group
2950 ):
2951 rg_creation_attempt = 0
2952 while True:
2953 result = lin.resource_group_create(
2954 name=group_name,
2955 place_count=redundancy,
2956 storage_pool=storage_pool_name,
2957 diskless_on_remaining=False
2958 )
2959 error_str = cls._get_error_str(result)
2960 if not error_str:
2961 break
2963 errors = cls._filter_errors(result)
2964 if destroy_old_group and cls._check_errors(errors, [
2965 linstor.consts.FAIL_EXISTS_RSC_GRP
2966 ]):
2967 rg_creation_attempt += 1
2968 if rg_creation_attempt < 2:
2969 try:
2970 cls._destroy_resource_group(lin, group_name)
2971 except Exception as e:
2972 error_str = 'Failed to destroy old and empty RG: {}'.format(e)
2973 else:
2974 continue
2976 raise LinstorVolumeManagerError(
2977 'Could not create RG `{}`: {}'.format(
2978 group_name, error_str
2979 )
2980 )
2982 result = lin.volume_group_create(group_name)
2983 error_str = cls._get_error_str(result)
2984 if error_str:
2985 raise LinstorVolumeManagerError(
2986 'Could not create VG `{}`: {}'.format(
2987 group_name, error_str
2988 )
2989 )
2991 @classmethod
2992 def _destroy_resource_group(cls, lin, group_name):
2993 def destroy():
2994 result = lin.resource_group_delete(group_name)
2995 errors = cls._filter_errors(result)
2996 if cls._check_errors(errors, [
2997 linstor.consts.FAIL_NOT_FOUND_RSC_GRP
2998 ]):
2999 return
3001 if errors:
3002 raise LinstorVolumeManagerError(
3003 'Failed to destroy RG `{}`: {}'
3004 .format(group_name, cls._get_error_str(errors))
3005 )
3007 return util.retry(destroy, maxretry=10)
3009 @classmethod
3010 def _build_group_name(cls, base_name):
3011 # If thin provisioning is used we have a path like this:
3012 # `VG/LV`. "/" is not accepted by LINSTOR.
3013 return '{}{}'.format(cls.PREFIX_SR, base_name.replace('/', '_'))
3015 # Used to store important data in a HA context,
3016 # i.e. a replication count of 3.
3017 @classmethod
3018 def _build_ha_group_name(cls, base_name):
3019 return '{}{}'.format(cls.PREFIX_HA, base_name.replace('/', '_'))
3021 @classmethod
3022 def _check_volume_creation_errors(cls, result, volume_uuid, group_name):
3023 errors = cls._filter_errors(result)
3024 if cls._check_errors(errors, [
3025 linstor.consts.FAIL_EXISTS_RSC, linstor.consts.FAIL_EXISTS_RSC_DFN
3026 ]):
3027 raise LinstorVolumeManagerError(
3028 'Failed to create volume `{}` from SR `{}`, it already exists'
3029 .format(volume_uuid, group_name),
3030 LinstorVolumeManagerError.ERR_VOLUME_EXISTS
3031 )
3033 if cls._check_errors(errors, [linstor.consts.FAIL_NOT_FOUND_RSC_GRP]):
3034 raise LinstorVolumeManagerError(
3035 'Failed to create volume `{}` from SR `{}`, resource group doesn\'t exist'
3036 .format(volume_uuid, group_name),
3037 LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS
3038 )
3040 if errors:
3041 raise LinstorVolumeManagerError(
3042 'Failed to create volume `{}` from SR `{}`: {}'.format(
3043 volume_uuid,
3044 group_name,
3045 cls._get_error_str(errors)
3046 )
3047 )
3049 @classmethod
3050 def _move_files(cls, src_dir, dest_dir, force=False):
3051 def listdir(dir):
3052 ignored = ['lost+found']
3053 return [file for file in os.listdir(dir) if file not in ignored]
3055 try:
3056 if not force:
3057 files = listdir(dest_dir)
3058 if files:
3059 raise LinstorVolumeManagerError(
3060 'Cannot move files from {} to {} because destination '
3061 'contains: {}'.format(src_dir, dest_dir, files)
3062 )
3063 except LinstorVolumeManagerError:
3064 raise
3065 except Exception as e:
3066 raise LinstorVolumeManagerError(
3067 'Cannot list dir {}: {}'.format(dest_dir, e)
3068 )
3070 try:
3071 for file in listdir(src_dir):
3072 try:
3073 dest_file = os.path.join(dest_dir, file)
3074 if not force and os.path.exists(dest_file):
3075 raise LinstorVolumeManagerError(
3076 'Cannot move {} because it already exists in the '
3077 'destination'.format(file)
3078 )
3079 shutil.move(os.path.join(src_dir, file), dest_file)
3080 except LinstorVolumeManagerError:
3081 raise
3082 except Exception as e:
3083 raise LinstorVolumeManagerError(
3084 'Cannot move {}: {}'.format(file, e)
3085 )
3086 except Exception as e:
3087 if not force:
3088 try:
3089 cls._move_files(dest_dir, src_dir, force=True)
3090 except Exception:
3091 pass
3093 raise LinstorVolumeManagerError(
3094 'Failed to move files from {} to {}: {}'.format(
3095 src_dir, dest_dir, e
3096 )
3097 )
3099 @staticmethod
3100 def _create_database_backup_path():
3101 path = DATABASE_PATH + '-' + str(uuid.uuid4())
3102 try:
3103 os.mkdir(path)
3104 return path
3105 except Exception as e:
3106 raise LinstorVolumeManagerError(
3107 'Failed to create backup path {} of LINSTOR config: {}'
3108 .format(path, e)
3109 )
3111 @staticmethod
3112 def _get_filtered_properties(properties):
3113 return dict(properties.items())
3115 @staticmethod
3116 def _filter_errors(result):
3117 return [
3118 err for err in result
3119 if hasattr(err, 'is_error') and err.is_error()
3120 ]
3122 @staticmethod
3123 def _check_errors(result, codes):
3124 for err in result:
3125 for code in codes:
3126 if err.is_error(code):
3127 return True
3128 return False
3130 @classmethod
3131 def _controller_is_running(cls):
3132 return cls._service_is_running('linstor-controller')
3134 @classmethod
3135 def _start_controller(cls, start=True):
3136 return cls._start_service('linstor-controller', start)
3138 @staticmethod
3139 def _start_service(name, start=True):
3140 action = 'start' if start else 'stop'
3141 (ret, out, err) = util.doexec([
3142 'systemctl', action, name
3143 ])
3144 if ret != 0:
3145 raise LinstorVolumeManagerError(
3146 'Failed to {} {}: {} {}'
3147 .format(action, name, out, err)
3148 )
3150 @staticmethod
3151 def _service_is_running(name):
3152 (ret, out, err) = util.doexec([
3153 'systemctl', 'is-active', '--quiet', name
3154 ])
3155 return not ret
3157 @staticmethod
3158 def _is_mounted(mountpoint):
3159 (ret, out, err) = util.doexec(['mountpoint', '-q', mountpoint])
3160 return ret == 0
3162 @classmethod
3163 def _mount_volume(cls, volume_path, mountpoint, mount=True):
3164 if mount:
3165 try:
3166 util.pread(['mount', volume_path, mountpoint])
3167 except Exception as e:
3168 raise LinstorVolumeManagerError(
3169 'Failed to mount volume {} on {}: {}'
3170 .format(volume_path, mountpoint, e)
3171 )
3172 else:
3173 try:
3174 if cls._is_mounted(mountpoint):
3175 util.pread(['umount', mountpoint])
3176 except Exception as e:
3177 raise LinstorVolumeManagerError(
3178 'Failed to umount volume {} on {}: {}'
3179 .format(volume_path, mountpoint, e)
3180 )
3183# ==============================================================================
3185# Check if a path is a DRBD resource and log the process name/pid
3186# that opened it.
3187def log_drbd_openers(path):
3188 # Ignore if it's not a symlink to DRBD resource.
3189 if not path.startswith(DRBD_BY_RES_PATH):
3190 return
3192 # Compute resource name.
3193 res_name_end = path.find('/', len(DRBD_BY_RES_PATH))
3194 if res_name_end == -1:
3195 return
3196 res_name = path[len(DRBD_BY_RES_PATH):res_name_end]
3198 volume_end = path.rfind('/')
3199 if volume_end == res_name_end:
3200 return
3201 volume = path[volume_end + 1:]
3203 try:
3204 # Ensure path is a DRBD.
3205 drbd_path = os.path.realpath(path)
3206 stats = os.stat(drbd_path)
3207 if not stat.S_ISBLK(stats.st_mode) or os.major(stats.st_rdev) != 147:
3208 return
3210 # Find where the device is open.
3211 (ret, stdout, stderr) = util.doexec(['drbdadm', 'status', res_name])
3212 if ret != 0:
3213 util.SMlog('Failed to execute `drbdadm status` on `{}`: {}'.format(
3214 res_name, stderr
3215 ))
3216 return
3218 # Is it a local device?
3219 if stdout.startswith('{} role:Primary'.format(res_name)):
3220 util.SMlog(
3221 'DRBD resource `{}` is open on local host: {}'
3222 .format(path, get_local_volume_openers(res_name, volume))
3223 )
3224 return
3226 # Is it a remote device?
3227 util.SMlog(
3228 'DRBD resource `{}` is open on hosts: {}'
3229 .format(path, get_all_volume_openers(res_name, volume))
3230 )
3231 except Exception as e:
3232 util.SMlog(
3233 'Got exception while trying to determine where DRBD resource ' +
3234 '`{}` is open: {}'.format(path, e)
3235 )