Coverage for drivers/linstorvolumemanager.py : 10%
Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python3
2#
3# Copyright (C) 2020 Vates SAS - ronan.abhamon@vates.fr
4#
5# This program is free software: you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation, either version 3 of the License, or
8# (at your option) any later version.
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program. If not, see <https://www.gnu.org/licenses/>.
16#
18from sm_typing import (
19 Any,
20 Dict,
21 List,
22 override,
23)
25import json
26import linstor
27import os.path
28import re
29import shutil
30import socket
31import stat
32import time
33import util
34import uuid
36# Persistent prefix to add to RAW persistent volumes.
37PERSISTENT_PREFIX = 'xcp-persistent-'
39# Contains the data of the "/var/lib/linstor" directory.
40DATABASE_VOLUME_NAME = PERSISTENT_PREFIX + 'database'
41DATABASE_SIZE = 1 << 30 # 1GB.
42DATABASE_PATH = '/var/lib/linstor'
43DATABASE_MKFS = 'mkfs.ext4'
45LINSTOR_SATELLITE_PORT = 3366
47REG_DRBDADM_PRIMARY = re.compile("([^\\s]+)\\s+role:Primary")
48REG_DRBDSETUP_IP = re.compile('[^\\s]+\\s+(.*):.*$')
50DRBD_BY_RES_PATH = '/dev/drbd/by-res/'
52PLUGIN = 'linstor-manager'
55# ==============================================================================
57def get_local_volume_openers(resource_name, volume):
58 if not resource_name or volume is None:
59 raise Exception('Cannot get DRBD openers without resource name and/or volume.')
61 path = '/sys/kernel/debug/drbd/resources/{}/volumes/{}/openers'.format(
62 resource_name, volume
63 )
65 with open(path, 'r') as openers:
66 # Not a big cost, so read all lines directly.
67 lines = openers.readlines()
69 result = {}
71 opener_re = re.compile('(.*)\\s+([0-9]+)\\s+([0-9]+)')
72 for line in lines:
73 match = opener_re.match(line)
74 assert match
76 groups = match.groups()
77 process_name = groups[0]
78 pid = groups[1]
79 open_duration_ms = groups[2]
80 result[pid] = {
81 'process-name': process_name,
82 'open-duration': open_duration_ms
83 }
85 return json.dumps(result)
87def get_all_volume_openers(resource_name, volume):
88 PLUGIN_CMD = 'getDrbdOpeners'
90 volume = str(volume)
91 openers = {}
93 session = util.get_localAPI_session()
95 hosts = session.xenapi.host.get_all_records()
96 for host_ref, host_record in hosts.items():
97 node_name = host_record['hostname']
98 try:
99 if not session.xenapi.host_metrics.get_record(
100 host_record['metrics']
101 )['live']:
102 # Ensure we call plugin on online hosts only.
103 continue
105 openers[node_name] = json.loads(
106 session.xenapi.host.call_plugin(host_ref, PLUGIN, PLUGIN_CMD, {
107 'resourceName': resource_name,
108 'volume': volume
109 })
110 )
111 except Exception as e:
112 util.SMlog('Failed to get openers of `{}` on `{}`: {}'.format(
113 resource_name, node_name, e
114 ))
116 return openers
119# ==============================================================================
121def round_up(value, divisor):
122 assert divisor
123 divisor = int(divisor)
124 return ((int(value) + divisor - 1) // divisor) * divisor
127def round_down(value, divisor):
128 assert divisor
129 value = int(value)
130 return value - (value % int(divisor))
133# ==============================================================================
135def _get_controller_addresses() -> List[str]:
136 try:
137 (ret, stdout, stderr) = util.doexec([
138 "/usr/sbin/ss", "-tnpH", "state", "established", f"( sport = :{LINSTOR_SATELLITE_PORT} )"
139 ])
140 if ret == 0:
141 return [
142 line.split()[3].rsplit(":", 1)[0]
143 for line in stdout.splitlines()
144 ]
145 util.SMlog(f"Unexpected code {ret}: {stderr}")
146 except Exception as e:
147 util.SMlog(f"Unable to get controller addresses: {e}")
148 return []
150def _get_controller_uri() -> str:
151 # TODO: Check that an IP address from the current pool is returned.
152 addresses = _get_controller_addresses()
153 return "linstor://" + addresses[0] if addresses else ""
155def get_controller_uri():
156 retries = 0
157 while True:
158 uri = _get_controller_uri()
159 if uri:
160 return uri
162 retries += 1
163 if retries >= 30:
164 break
165 time.sleep(1)
168def get_controller_node_name():
169 PLUGIN_CMD = 'hasControllerRunning'
171 (ret, stdout, stderr) = util.doexec([
172 'drbdadm', 'status', DATABASE_VOLUME_NAME
173 ])
175 if ret == 0:
176 if stdout.startswith('{} role:Primary'.format(DATABASE_VOLUME_NAME)):
177 return 'localhost'
179 res = REG_DRBDADM_PRIMARY.search(stdout)
180 if res:
181 return res.groups()[0]
183 session = util.timeout_call(5, util.get_localAPI_session)
185 for host_ref, host_record in session.xenapi.host.get_all_records().items():
186 node_name = host_record['hostname']
187 try:
188 if not session.xenapi.host_metrics.get_record(
189 host_record['metrics']
190 )['live']:
191 continue
193 if util.strtobool(session.xenapi.host.call_plugin(
194 host_ref, PLUGIN, PLUGIN_CMD, {}
195 )):
196 return node_name
197 except Exception as e:
198 util.SMlog('Failed to call plugin to get controller on `{}`: {}'.format(
199 node_name, e
200 ))
203def demote_drbd_resource(node_name, resource_name):
204 PLUGIN_CMD = 'demoteDrbdResource'
206 session = util.timeout_call(5, util.get_localAPI_session)
208 for host_ref, host_record in session.xenapi.host.get_all_records().items():
209 if host_record['hostname'] != node_name:
210 continue
212 try:
213 session.xenapi.host.call_plugin(
214 host_ref, PLUGIN, PLUGIN_CMD, {'resource_name': resource_name}
215 )
216 except Exception as e:
217 util.SMlog('Failed to demote resource `{}` on `{}`: {}'.format(
218 resource_name, node_name, e
219 ))
220 raise Exception(
221 'Can\'t demote resource `{}`, unable to find node `{}`'
222 .format(resource_name, node_name)
223 )
225# ==============================================================================
227class LinstorVolumeManagerError(Exception):
228 ERR_GENERIC = 0,
229 ERR_VOLUME_EXISTS = 1,
230 ERR_VOLUME_NOT_EXISTS = 2,
231 ERR_VOLUME_DESTROY = 3,
232 ERR_GROUP_NOT_EXISTS = 4,
233 ERR_VOLUME_IN_USE = 5
235 def __init__(self, message, code=ERR_GENERIC):
236 super(LinstorVolumeManagerError, self).__init__(message)
237 self._code = code
239 @property
240 def code(self):
241 return self._code
244# ==============================================================================
246# Note:
247# If a storage pool is not accessible after a network change:
248# linstor node interface modify <NODE> default --ip <IP>
251class LinstorVolumeManager(object):
252 """
253 API to manager LINSTOR volumes in XCP-ng.
254 A volume in this context is a physical part of the storage layer.
255 """
257 __slots__ = (
258 '_linstor', '_uri', '_logger', '_redundancy',
259 '_base_group_name', '_group_name', '_ha_group_name',
260 '_volumes', '_storage_pools', '_storage_pools_time',
261 '_kv_cache', '_resource_cache', '_volume_info_cache',
262 '_kv_cache_dirty', '_resource_cache_dirty', '_volume_info_cache_dirty',
263 '_resources_info_cache',
264 )
266 DEV_ROOT_PATH = DRBD_BY_RES_PATH
268 # Default sector size.
269 BLOCK_SIZE = 512
271 # List of volume properties.
272 PROP_METADATA = 'metadata'
273 PROP_NOT_EXISTS = 'not-exists'
274 PROP_VOLUME_NAME = 'volume-name'
275 PROP_IS_READONLY_TIMESTAMP = 'readonly-timestamp'
277 # A volume can only be locked for a limited duration.
278 # The goal is to give enough time to slaves to execute some actions on
279 # a device before an UUID update or a coalesce for example.
280 # Expiration is expressed in seconds.
281 LOCKED_EXPIRATION_DELAY = 1 * 60
283 # Used when volume uuid is being updated.
284 PROP_UPDATING_UUID_SRC = 'updating-uuid-src'
286 # States of property PROP_NOT_EXISTS.
287 STATE_EXISTS = '0'
288 STATE_NOT_EXISTS = '1'
289 STATE_CREATING = '2'
291 # Property namespaces.
292 NAMESPACE_SR = 'xcp/sr'
293 NAMESPACE_VOLUME = 'xcp/volume'
295 # Regex to match properties.
296 REG_PROP = '^([^/]+)/{}$'
298 REG_METADATA = re.compile(REG_PROP.format(PROP_METADATA))
299 REG_NOT_EXISTS = re.compile(REG_PROP.format(PROP_NOT_EXISTS))
300 REG_VOLUME_NAME = re.compile(REG_PROP.format(PROP_VOLUME_NAME))
301 REG_UPDATING_UUID_SRC = re.compile(REG_PROP.format(PROP_UPDATING_UUID_SRC))
303 # Prefixes of SR/VOLUME in the LINSTOR DB.
304 # A LINSTOR (resource, group, ...) name cannot start with a number.
305 # So we add a prefix behind our SR/VOLUME uuids.
306 PREFIX_SR = 'xcp-sr-'
307 PREFIX_HA = 'xcp-ha-'
308 PREFIX_VOLUME = 'xcp-volume-'
310 # Limit request number when storage pool info is asked, we fetch
311 # the current pool status after N elapsed seconds.
312 STORAGE_POOLS_FETCH_INTERVAL = 15
314 @staticmethod
315 def default_logger(*args):
316 print(args)
318 # --------------------------------------------------------------------------
319 # API.
320 # --------------------------------------------------------------------------
322 class VolumeInfo(object):
323 __slots__ = (
324 'name',
325 'allocated_size', # Allocated size, place count is not used.
326 'virtual_size', # Total virtual available size of this volume
327 # (i.e. the user size at creation).
328 'diskful' # Array of nodes that have a diskful volume.
329 )
331 def __init__(self, name):
332 self.name = name
333 self.allocated_size = 0
334 self.virtual_size = 0
335 self.diskful = []
337 @override
338 def __repr__(self) -> str:
339 return 'VolumeInfo("{}", {}, {}, {})'.format(
340 self.name, self.allocated_size, self.virtual_size,
341 self.diskful
342 )
344 # --------------------------------------------------------------------------
346 def __init__(
347 self, uri, group_name, repair=False, logger=default_logger.__func__,
348 attempt_count=30
349 ):
350 """
351 Create a new LinstorVolumeManager object.
352 :param str uri: URI to communicate with the LINSTOR controller.
353 :param str group_name: The SR group name to use.
354 :param bool repair: If true we try to remove bad volumes due to a crash
355 or unexpected behavior.
356 :param function logger: Function to log messages.
357 :param int attempt_count: Number of attempts to join the controller.
358 """
360 self._uri = uri
361 self._linstor = self._create_linstor_instance(
362 uri, attempt_count=attempt_count
363 )
366 mismatched_nodes = [
367 node for node in self._linstor.node_list().pop().nodes if node.connection_status == "VERSION_MISMATCH"
368 ]
370 if mismatched_nodes:
371 raise LinstorVolumeManagerError(
372 "Some linstor nodes are not using the same version. " +
373 f"Incriminated nodes are: {','.join([node.name for node in mismatched_nodes])}"
374 )
376 self._base_group_name = group_name
378 # Ensure group exists.
379 group_name = self._build_group_name(group_name)
380 groups = self._linstor.resource_group_list_raise([group_name]).resource_groups
381 if not groups:
382 raise LinstorVolumeManagerError(
383 'Unable to find `{}` Linstor SR'.format(group_name)
384 )
386 # Ok. ;)
387 self._logger = logger
388 self._redundancy = groups[0].select_filter.place_count
389 self._group_name = group_name
390 self._ha_group_name = self._build_ha_group_name(self._base_group_name)
391 self._volumes = set()
392 self._storage_pools_time = 0
394 # To increase performance and limit request count to LINSTOR services,
395 # we use caches.
396 self._kv_cache = self._create_kv_cache()
397 self._resource_cache = None
398 self._resource_cache_dirty = True
399 self._volume_info_cache = None
400 self._volume_info_cache_dirty = True
401 self._resources_info_cache = None
402 self._build_volumes(repair=repair)
404 @property
405 def uri(self) -> str:
406 return self._uri
408 @property
409 def group_name(self):
410 """
411 Give the used group name.
412 :return: The group name.
413 :rtype: str
414 """
415 return self._base_group_name
417 @property
418 def redundancy(self):
419 """
420 Give the used redundancy.
421 :return: The redundancy.
422 :rtype: int
423 """
424 return self._redundancy
426 @property
427 def volumes(self):
428 """
429 Give the volumes uuid set.
430 :return: The volumes uuid set.
431 :rtype: set(str)
432 """
433 return self._volumes
435 @property
436 def max_volume_size_allowed(self):
437 """
438 Give the max volume size currently available in B.
439 :return: The current size.
440 :rtype: int
441 """
443 candidates = self._find_best_size_candidates()
444 if not candidates:
445 raise LinstorVolumeManagerError(
446 'Failed to get max volume size allowed'
447 )
449 size = candidates[0].max_volume_size
450 if size < 0:
451 raise LinstorVolumeManagerError(
452 'Invalid max volume size allowed given: {}'.format(size)
453 )
454 return self.round_down_volume_size(size * 1024)
456 @property
457 def physical_size(self):
458 """
459 Give the total physical size of the SR.
460 :return: The physical size.
461 :rtype: int
462 """
463 return self._compute_size('total_capacity')
465 @property
466 def physical_free_size(self):
467 """
468 Give the total free physical size of the SR.
469 :return: The physical free size.
470 :rtype: int
471 """
472 return self._compute_size('free_capacity')
474 @property
475 def allocated_volume_size(self):
476 """
477 Give the allocated size for all volumes. The place count is not
478 used here. When thick lvm is used, the size for one volume should
479 be equal to the virtual volume size. With thin lvm, the size is equal
480 or lower to the volume size.
481 :return: The allocated size of all volumes.
482 :rtype: int
483 """
485 # Paths: /res_name/vol_number/size
486 sizes = {}
488 for resource in self._get_resource_cache().resources:
489 if resource.name not in sizes:
490 current = sizes[resource.name] = {}
491 else:
492 current = sizes[resource.name]
494 for volume in resource.volumes:
495 # We ignore diskless pools of the form "DfltDisklessStorPool".
496 if volume.storage_pool_name != self._group_name:
497 continue
499 allocated_size = max(volume.allocated_size, 0)
500 current_allocated_size = current.get(volume.number) or -1
501 if allocated_size > current_allocated_size:
502 current[volume.number] = allocated_size
504 total_size = 0
505 for volumes in sizes.values():
506 for size in volumes.values():
507 total_size += size
509 return total_size * 1024
511 def get_min_physical_size(self):
512 """
513 Give the minimum physical size of the SR.
514 I.e. the size of the smallest disk + the number of pools.
515 :return: The physical min size.
516 :rtype: tuple(int, int)
517 """
518 size = None
519 pool_count = 0
520 for pool in self._get_storage_pools(force=True):
521 space = pool.free_space
522 if space:
523 pool_count += 1
524 current_size = space.total_capacity
525 if current_size < 0:
526 raise LinstorVolumeManagerError(
527 'Failed to get pool total_capacity attr of `{}`'
528 .format(pool.node_name)
529 )
530 if size is None or current_size < size:
531 size = current_size
532 return (pool_count, (size or 0) * 1024)
534 @property
535 def metadata(self):
536 """
537 Get the metadata of the SR.
538 :return: Dictionary that contains metadata.
539 :rtype: dict(str, dict)
540 """
542 sr_properties = self._get_sr_properties()
543 metadata = sr_properties.get(self.PROP_METADATA)
544 if metadata is not None:
545 metadata = json.loads(metadata)
546 if isinstance(metadata, dict):
547 return metadata
548 raise LinstorVolumeManagerError(
549 'Expected dictionary in SR metadata: {}'.format(
550 self._group_name
551 )
552 )
554 return {}
556 @metadata.setter
557 def metadata(self, metadata):
558 """
559 Set the metadata of the SR.
560 :param dict metadata: Dictionary that contains metadata.
561 """
563 assert isinstance(metadata, dict)
564 sr_properties = self._get_sr_properties()
565 sr_properties[self.PROP_METADATA] = json.dumps(metadata)
567 @property
568 def disconnected_hosts(self):
569 """
570 Get the list of disconnected hosts.
571 :return: Set that contains disconnected hosts.
572 :rtype: set(str)
573 """
575 disconnected_hosts = set()
576 for pool in self._get_storage_pools():
577 for report in pool.reports:
578 if report.ret_code & linstor.consts.WARN_NOT_CONNECTED == \
579 linstor.consts.WARN_NOT_CONNECTED:
580 disconnected_hosts.add(pool.node_name)
581 break
582 return disconnected_hosts
584 def check_volume_exists(self, volume_uuid):
585 """
586 Check if a volume exists in the SR.
587 :return: True if volume exists.
588 :rtype: bool
589 """
590 return volume_uuid in self._volumes
592 def create_volume(
593 self,
594 volume_uuid,
595 size,
596 persistent=True,
597 volume_name=None,
598 high_availability=False
599 ):
600 """
601 Create a new volume on the SR.
602 :param str volume_uuid: The volume uuid to use.
603 :param int size: volume size in B.
604 :param bool persistent: If false the volume will be unavailable
605 on the next constructor call LinstorSR(...).
606 :param str volume_name: If set, this name is used in the LINSTOR
607 database instead of a generated name.
608 :param bool high_availability: If set, the volume is created in
609 the HA group.
610 :return: The current device path of the volume.
611 :rtype: str
612 """
614 self._logger('Creating LINSTOR volume {}...'.format(volume_uuid))
615 if not volume_name:
616 volume_name = self.build_volume_name(util.gen_uuid())
617 volume_properties = self._create_volume_with_properties(
618 volume_uuid,
619 volume_name,
620 size,
621 True, # place_resources
622 high_availability
623 )
625 # Volume created! Now try to find the device path.
626 try:
627 self._logger(
628 'Find device path of LINSTOR volume {}...'.format(volume_uuid)
629 )
630 device_path = self._find_device_path(volume_uuid, volume_name)
631 if persistent:
632 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS
633 self._volumes.add(volume_uuid)
634 self._logger(
635 'LINSTOR volume {} created!'.format(volume_uuid)
636 )
637 return device_path
638 except Exception:
639 # There is an issue to find the path.
640 # At this point the volume has just been created, so force flag can be used.
641 self._destroy_volume(volume_uuid, force=True)
642 raise
644 def mark_volume_as_persistent(self, volume_uuid):
645 """
646 Mark volume as persistent if created with persistent=False.
647 :param str volume_uuid: The volume uuid to mark.
648 """
650 self._ensure_volume_exists(volume_uuid)
652 # Mark volume as persistent.
653 volume_properties = self._get_volume_properties(volume_uuid)
654 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS
656 def destroy_volume(self, volume_uuid):
657 """
658 Destroy a volume.
659 :param str volume_uuid: The volume uuid to destroy.
660 """
662 self._ensure_volume_exists(volume_uuid)
663 self.ensure_volume_is_not_locked(volume_uuid)
665 is_volume_in_use = any(node["in-use"] for node in self.get_resource_info(volume_uuid)["nodes"].values())
666 if is_volume_in_use:
667 raise LinstorVolumeManagerError(
668 f"Could not destroy volume `{volume_uuid}` as it is currently in use",
669 LinstorVolumeManagerError.ERR_VOLUME_IN_USE
670 )
672 # Mark volume as destroyed.
673 volume_properties = self._get_volume_properties(volume_uuid)
674 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS
676 try:
677 self._volumes.remove(volume_uuid)
678 self._destroy_volume(volume_uuid)
679 except Exception as e:
680 raise LinstorVolumeManagerError(
681 str(e),
682 LinstorVolumeManagerError.ERR_VOLUME_DESTROY
683 )
685 def lock_volume(self, volume_uuid, locked=True):
686 """
687 Prevent modifications of the volume properties during
688 "self.LOCKED_EXPIRATION_DELAY" seconds. The SR must be locked
689 when used. This method is useful to attach/detach correctly a volume on
690 a slave. Without it the GC can rename a volume, in this case the old
691 volume path can be used by a slave...
692 :param str volume_uuid: The volume uuid to protect/unprotect.
693 :param bool locked: Lock/unlock the volume.
694 """
696 self._ensure_volume_exists(volume_uuid)
698 self._logger(
699 '{} volume {} as locked'.format(
700 'Mark' if locked else 'Unmark',
701 volume_uuid
702 )
703 )
705 volume_properties = self._get_volume_properties(volume_uuid)
706 if locked:
707 volume_properties[
708 self.PROP_IS_READONLY_TIMESTAMP
709 ] = str(time.time())
710 elif self.PROP_IS_READONLY_TIMESTAMP in volume_properties:
711 volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP)
713 def ensure_volume_is_not_locked(self, volume_uuid, timeout=None):
714 """
715 Ensure a volume is not locked. Wait if necessary.
716 :param str volume_uuid: The volume uuid to check.
717 :param int timeout: If the volume is always locked after the expiration
718 of the timeout, an exception is thrown.
719 """
720 return self.ensure_volume_list_is_not_locked([volume_uuid], timeout)
722 def ensure_volume_list_is_not_locked(self, volume_uuids, timeout=None):
723 checked = set()
724 for volume_uuid in volume_uuids:
725 if volume_uuid in self._volumes:
726 checked.add(volume_uuid)
728 if not checked:
729 return
731 waiting = False
733 volume_properties = self._get_kv_cache()
735 start = time.time()
736 while True:
737 # Can't delete in for loop, use a copy of the list.
738 remaining = checked.copy()
739 for volume_uuid in checked:
740 volume_properties.namespace = \
741 self._build_volume_namespace(volume_uuid)
742 timestamp = volume_properties.get(
743 self.PROP_IS_READONLY_TIMESTAMP
744 )
745 if timestamp is None:
746 remaining.remove(volume_uuid)
747 continue
749 now = time.time()
750 if now - float(timestamp) > self.LOCKED_EXPIRATION_DELAY:
751 self._logger(
752 'Remove readonly timestamp on {}'.format(volume_uuid)
753 )
754 volume_properties.pop(self.PROP_IS_READONLY_TIMESTAMP)
755 remaining.remove(volume_uuid)
756 continue
758 if not waiting:
759 self._logger(
760 'Volume {} is locked, waiting...'.format(volume_uuid)
761 )
762 waiting = True
763 break
765 if not remaining:
766 break
767 checked = remaining
769 if timeout is not None and now - start > timeout:
770 raise LinstorVolumeManagerError(
771 'volume `{}` is locked and timeout has been reached'
772 .format(volume_uuid),
773 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS
774 )
776 # We must wait to use the volume. After that we can modify it
777 # ONLY if the SR is locked to avoid bad reads on the slaves.
778 time.sleep(1)
779 volume_properties = self._create_kv_cache()
781 if waiting:
782 self._logger('No volume locked now!')
784 def remove_volume_if_diskless(self, volume_uuid):
785 """
786 Remove disless path from local node.
787 :param str volume_uuid: The volume uuid to remove.
788 """
790 self._ensure_volume_exists(volume_uuid)
792 volume_properties = self._get_volume_properties(volume_uuid)
793 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
795 node_name = socket.gethostname()
797 for resource in self._get_resource_cache().resources:
798 if resource.name == volume_name and resource.node_name == node_name:
799 if linstor.consts.FLAG_TIE_BREAKER in resource.flags:
800 return
801 break
803 result = self._linstor.resource_delete_if_diskless(
804 node_name=node_name, rsc_name=volume_name
805 )
806 if not linstor.Linstor.all_api_responses_no_error(result):
807 raise LinstorVolumeManagerError(
808 'Unable to delete diskless path of `{}` on node `{}`: {}'
809 .format(volume_name, node_name, ', '.join(
810 [str(x) for x in result]))
811 )
813 def introduce_volume(self, volume_uuid):
814 pass # TODO: Implement me.
816 def resize_volume(self, volume_uuid, new_size):
817 """
818 Resize a volume.
819 :param str volume_uuid: The volume uuid to resize.
820 :param int new_size: New size in B.
821 """
823 volume_name = self.get_volume_name(volume_uuid)
824 self.ensure_volume_is_not_locked(volume_uuid)
825 new_size = self.round_up_volume_size(new_size) // 1024
827 # We can't resize anything until DRBD is up to date.
828 # We wait here for 5min max and raise an easy to understand error for the user.
829 # 5min is an arbitrary time, it's impossible to get a fit all situation value
830 # and it's currently impossible to know how much time we have to wait
831 # This is mostly an issue for thick provisioning, thin isn't affected.
832 start_time = time.monotonic()
833 try:
834 self._linstor.resource_dfn_wait_synced(volume_name, wait_interval=1.0, timeout=60*5)
835 except linstor.LinstorTimeoutError:
836 raise LinstorVolumeManagerError(
837 f"Volume resizing of `{volume_uuid}` from SR `{self._group_name}` is incomplete: timeout reached but it continues in background."
838 )
839 util.SMlog(f"DRBD is up to date, syncing took {time.monotonic() - start_time}s")
841 result = self._linstor.volume_dfn_modify(
842 rsc_name=volume_name,
843 volume_nr=0,
844 size=new_size
845 )
847 self._mark_resource_cache_as_dirty()
849 error_str = self._get_error_str(result)
850 if error_str:
851 raise LinstorVolumeManagerError(
852 f"Could not resize volume `{volume_uuid}` from SR `{self._group_name}`: {error_str}"
853 )
855 def get_volume_name(self, volume_uuid):
856 """
857 Get the name of a particular volume.
858 :param str volume_uuid: The volume uuid of the name to get.
859 :return: The volume name.
860 :rtype: str
861 """
863 self._ensure_volume_exists(volume_uuid)
864 volume_properties = self._get_volume_properties(volume_uuid)
865 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
866 if volume_name:
867 return volume_name
868 raise LinstorVolumeManagerError(
869 'Failed to get volume name of {}'.format(volume_uuid)
870 )
872 def get_volume_size(self, volume_uuid):
873 """
874 Get the size of a particular volume.
875 :param str volume_uuid: The volume uuid of the size to get.
876 :return: The volume size.
877 :rtype: int
878 """
880 volume_name = self.get_volume_name(volume_uuid)
881 dfns = self._linstor.resource_dfn_list_raise(
882 query_volume_definitions=True,
883 filter_by_resource_definitions=[volume_name]
884 ).resource_definitions
886 size = dfns[0].volume_definitions[0].size
887 if size < 0:
888 raise LinstorVolumeManagerError(
889 'Failed to get volume size of: {}'.format(volume_uuid)
890 )
891 return size * 1024
893 def set_auto_promote_timeout(self, volume_uuid, timeout):
894 """
895 Define the blocking time of open calls when a DRBD
896 is already open on another host.
897 :param str volume_uuid: The volume uuid to modify.
898 """
900 volume_name = self.get_volume_name(volume_uuid)
901 result = self._linstor.resource_dfn_modify(volume_name, {
902 'DrbdOptions/Resource/auto-promote-timeout': timeout
903 })
904 error_str = self._get_error_str(result)
905 if error_str:
906 raise LinstorVolumeManagerError(
907 'Could not change the auto promote timeout of `{}`: {}'
908 .format(volume_uuid, error_str)
909 )
911 def set_drbd_ha_properties(self, volume_name, enabled=True):
912 """
913 Set or not HA DRBD properties required by drbd-reactor and
914 by specific volumes.
915 :param str volume_name: The volume to modify.
916 :param bool enabled: Enable or disable HA properties.
917 """
919 properties = {
920 'DrbdOptions/auto-quorum': 'disabled',
921 'DrbdOptions/Resource/auto-promote': 'no',
922 'DrbdOptions/Resource/on-no-data-accessible': 'io-error',
923 'DrbdOptions/Resource/on-no-quorum': 'io-error',
924 'DrbdOptions/Resource/on-suspended-primary-outdated': 'force-secondary',
925 'DrbdOptions/Resource/quorum': 'majority'
926 }
927 if enabled:
928 result = self._linstor.resource_dfn_modify(volume_name, properties)
929 else:
930 result = self._linstor.resource_dfn_modify(volume_name, {}, delete_props=list(properties.keys()))
932 error_str = self._get_error_str(result)
933 if error_str:
934 raise LinstorVolumeManagerError(
935 'Could not modify HA DRBD properties on volume `{}`: {}'
936 .format(volume_name, error_str)
937 )
939 def get_volume_info(self, volume_uuid):
940 """
941 Get the volume info of a particular volume.
942 :param str volume_uuid: The volume uuid of the volume info to get.
943 :return: The volume info.
944 :rtype: VolumeInfo
945 """
947 volume_name = self.get_volume_name(volume_uuid)
948 return self._get_volumes_info()[volume_name]
950 def get_device_path(self, volume_uuid):
951 """
952 Get the dev path of a volume, create a diskless if necessary.
953 :param str volume_uuid: The volume uuid to get the dev path.
954 :return: The current device path of the volume.
955 :rtype: str
956 """
958 volume_name = self.get_volume_name(volume_uuid)
959 return self._find_device_path(volume_uuid, volume_name)
961 def get_volume_uuid_from_device_path(self, device_path):
962 """
963 Get the volume uuid of a device_path.
964 :param str device_path: The dev path to find the volume uuid.
965 :return: The volume uuid of the local device path.
966 :rtype: str
967 """
969 expected_volume_name = \
970 self.get_volume_name_from_device_path(device_path)
972 volume_names = self.get_volumes_with_name()
973 for volume_uuid, volume_name in volume_names.items():
974 if volume_name == expected_volume_name:
975 return volume_uuid
977 raise LinstorVolumeManagerError(
978 'Unable to find volume uuid from dev path `{}`'.format(device_path)
979 )
981 def get_volume_name_from_device_path(self, device_path):
982 """
983 Get the volume name of a device_path.
984 :param str device_path: The dev path to find the volume name.
985 :return: The volume name of the device path.
986 :rtype: str
987 """
989 # Assume that we have a path like this:
990 # - "/dev/drbd/by-res/xcp-volume-<UUID>/0"
991 # - "../xcp-volume-<UUID>/0"
992 if device_path.startswith(DRBD_BY_RES_PATH):
993 prefix_len = len(DRBD_BY_RES_PATH)
994 else:
995 assert device_path.startswith('../')
996 prefix_len = 3
998 res_name_end = device_path.find('/', prefix_len)
999 assert res_name_end != -1
1000 return device_path[prefix_len:res_name_end]
1002 def update_volume_uuid(self, volume_uuid, new_volume_uuid, force=False):
1003 """
1004 Change the uuid of a volume.
1005 :param str volume_uuid: The volume to modify.
1006 :param str new_volume_uuid: The new volume uuid to use.
1007 :param bool force: If true we doesn't check if volume_uuid is in the
1008 volume list. I.e. the volume can be marked as deleted but the volume
1009 can still be in the LINSTOR KV store if the deletion has failed.
1010 In specific cases like "undo" after a failed clone we must rename a bad
1011 deleted VDI.
1012 """
1014 self._logger(
1015 'Trying to update volume UUID {} to {}...'
1016 .format(volume_uuid, new_volume_uuid)
1017 )
1018 assert volume_uuid != new_volume_uuid, 'can\'t update volume UUID, same value'
1020 if not force:
1021 self._ensure_volume_exists(volume_uuid)
1022 self.ensure_volume_is_not_locked(volume_uuid)
1024 if new_volume_uuid in self._volumes:
1025 raise LinstorVolumeManagerError(
1026 'Volume `{}` already exists'.format(new_volume_uuid),
1027 LinstorVolumeManagerError.ERR_VOLUME_EXISTS
1028 )
1030 volume_properties = self._get_volume_properties(volume_uuid)
1031 if volume_properties.get(self.PROP_UPDATING_UUID_SRC):
1032 raise LinstorVolumeManagerError(
1033 'Cannot update volume uuid {}: invalid state'
1034 .format(volume_uuid)
1035 )
1037 # 1. Copy in temp variables metadata and volume_name.
1038 metadata = volume_properties.get(self.PROP_METADATA)
1039 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
1041 # 2. Switch to new volume namespace.
1042 volume_properties.namespace = self._build_volume_namespace(
1043 new_volume_uuid
1044 )
1046 if list(volume_properties.items()):
1047 raise LinstorVolumeManagerError(
1048 'Cannot update volume uuid {} to {}: '
1049 .format(volume_uuid, new_volume_uuid) +
1050 'this last one is not empty'
1051 )
1053 try:
1054 # 3. Mark new volume properties with PROP_UPDATING_UUID_SRC.
1055 # If we crash after that, the new properties can be removed
1056 # properly.
1057 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_NOT_EXISTS
1058 volume_properties[self.PROP_UPDATING_UUID_SRC] = volume_uuid
1060 # 4. Copy the properties.
1061 # Note: On new volumes, during clone for example, the metadata
1062 # may be missing. So we must test it to avoid this error:
1063 # "None has to be a str/unicode, but is <type 'NoneType'>"
1064 if metadata:
1065 volume_properties[self.PROP_METADATA] = metadata
1066 volume_properties[self.PROP_VOLUME_NAME] = volume_name
1068 # 5. Ok!
1069 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_EXISTS
1070 except Exception as err:
1071 try:
1072 # Clear the new volume properties in case of failure.
1073 assert volume_properties.namespace == \
1074 self._build_volume_namespace(new_volume_uuid)
1075 volume_properties.clear()
1076 except Exception as e:
1077 self._logger(
1078 'Failed to clear new volume properties: {} (ignoring...)'
1079 .format(e)
1080 )
1081 raise LinstorVolumeManagerError(
1082 'Failed to copy volume properties: {}'.format(err)
1083 )
1085 try:
1086 # 6. After this point, it's ok we can remove the
1087 # PROP_UPDATING_UUID_SRC property and clear the src properties
1088 # without problems.
1090 # 7. Switch to old volume namespace.
1091 volume_properties.namespace = self._build_volume_namespace(
1092 volume_uuid
1093 )
1094 volume_properties.clear()
1096 # 8. Switch a last time to new volume namespace.
1097 volume_properties.namespace = self._build_volume_namespace(
1098 new_volume_uuid
1099 )
1100 volume_properties.pop(self.PROP_UPDATING_UUID_SRC)
1101 except Exception as e:
1102 raise LinstorVolumeManagerError(
1103 'Failed to clear volume properties '
1104 'after volume uuid update: {}'.format(e)
1105 )
1107 try:
1108 self._volumes.remove(volume_uuid)
1109 except KeyError:
1110 # Can be missing if we are building the volume set attr AND
1111 # we are processing a deleted resource.
1112 assert force
1114 self._volumes.add(new_volume_uuid)
1116 self._logger(
1117 'UUID update succeeded of {} to {}! (properties={})'
1118 .format(
1119 volume_uuid, new_volume_uuid,
1120 self._get_filtered_properties(volume_properties)
1121 )
1122 )
1124 def update_volume_name(self, volume_uuid, volume_name):
1125 """
1126 Change the volume name of a volume.
1127 :param str volume_uuid: The volume to modify.
1128 :param str volume_name: The volume_name to use.
1129 """
1131 self._ensure_volume_exists(volume_uuid)
1132 self.ensure_volume_is_not_locked(volume_uuid)
1133 if not volume_name.startswith(self.PREFIX_VOLUME):
1134 raise LinstorVolumeManagerError(
1135 'Volume name `{}` must be start with `{}`'
1136 .format(volume_name, self.PREFIX_VOLUME)
1137 )
1139 if volume_name not in self._fetch_resource_names():
1140 raise LinstorVolumeManagerError(
1141 'Volume `{}` doesn\'t exist'.format(volume_name)
1142 )
1144 volume_properties = self._get_volume_properties(volume_uuid)
1145 volume_properties[self.PROP_VOLUME_NAME] = volume_name
1147 def get_usage_states(self, volume_uuid):
1148 """
1149 Check if a volume is currently used.
1150 :param str volume_uuid: The volume uuid to check.
1151 :return: A dictionary that contains states.
1152 :rtype: dict(str, bool or None)
1153 """
1155 states = {}
1157 volume_name = self.get_volume_name(volume_uuid)
1158 for resource_state in self._linstor.resource_list_raise(
1159 filter_by_resources=[volume_name]
1160 ).resource_states:
1161 states[resource_state.node_name] = resource_state.in_use
1163 return states
1165 def get_volume_openers(self, volume_uuid):
1166 """
1167 Get openers of a volume.
1168 :param str volume_uuid: The volume uuid to monitor.
1169 :return: A dictionary that contains openers.
1170 :rtype: dict(str, obj)
1171 """
1172 return get_all_volume_openers(self.get_volume_name(volume_uuid), '0')
1174 def get_volumes_with_name(self):
1175 """
1176 Give a volume dictionary that contains names actually owned.
1177 :return: A volume/name dict.
1178 :rtype: dict(str, str)
1179 """
1180 return self._get_volumes_by_property(self.REG_VOLUME_NAME)
1182 def get_volumes_with_info(self):
1183 """
1184 Give a volume dictionary that contains VolumeInfos.
1185 :return: A volume/VolumeInfo dict.
1186 :rtype: dict(str, VolumeInfo)
1187 """
1189 volumes = {}
1191 volume_names = self.get_volumes_with_name()
1192 all_volume_info = self._get_volumes_info(volume_names)
1193 for volume_uuid, volume_name in volume_names.items():
1194 if volume_name:
1195 volume_info = all_volume_info.get(volume_name)
1196 if volume_info:
1197 volumes[volume_uuid] = volume_info
1198 continue
1200 # Well I suppose if this volume is not available,
1201 # LINSTOR has been used directly without using this API.
1202 volumes[volume_uuid] = self.VolumeInfo('')
1204 return volumes
1206 def get_volumes_with_metadata(self):
1207 """
1208 Give a volume dictionary that contains metadata.
1209 :return: A volume/metadata dict.
1210 :rtype: dict(str, dict)
1211 """
1213 volumes = {}
1215 metadata = self._get_volumes_by_property(self.REG_METADATA)
1216 for volume_uuid, volume_metadata in metadata.items():
1217 if volume_metadata:
1218 volume_metadata = json.loads(volume_metadata)
1219 if isinstance(volume_metadata, dict):
1220 volumes[volume_uuid] = volume_metadata
1221 continue
1222 raise LinstorVolumeManagerError(
1223 'Expected dictionary in volume metadata: {}'
1224 .format(volume_uuid)
1225 )
1227 volumes[volume_uuid] = {}
1229 return volumes
1231 def get_volume_metadata(self, volume_uuid):
1232 """
1233 Get the metadata of a volume.
1234 :return: Dictionary that contains metadata.
1235 :rtype: dict
1236 """
1238 self._ensure_volume_exists(volume_uuid)
1239 volume_properties = self._get_volume_properties(volume_uuid)
1240 metadata = volume_properties.get(self.PROP_METADATA)
1241 if metadata:
1242 metadata = json.loads(metadata)
1243 if isinstance(metadata, dict):
1244 return metadata
1245 raise LinstorVolumeManagerError(
1246 'Expected dictionary in volume metadata: {}'
1247 .format(volume_uuid)
1248 )
1249 return {}
1251 def set_volume_metadata(self, volume_uuid, metadata):
1252 """
1253 Set the metadata of a volume.
1254 :param dict metadata: Dictionary that contains metadata.
1255 """
1257 self._ensure_volume_exists(volume_uuid)
1258 self.ensure_volume_is_not_locked(volume_uuid)
1260 assert isinstance(metadata, dict)
1261 volume_properties = self._get_volume_properties(volume_uuid)
1262 volume_properties[self.PROP_METADATA] = json.dumps(metadata)
1264 def update_volume_metadata(self, volume_uuid, metadata):
1265 """
1266 Update the metadata of a volume. It modify only the given keys.
1267 It doesn't remove unreferenced key instead of set_volume_metadata.
1268 :param dict metadata: Dictionary that contains metadata.
1269 """
1271 self._ensure_volume_exists(volume_uuid)
1272 self.ensure_volume_is_not_locked(volume_uuid)
1274 assert isinstance(metadata, dict)
1275 volume_properties = self._get_volume_properties(volume_uuid)
1277 current_metadata = json.loads(
1278 volume_properties.get(self.PROP_METADATA, '{}')
1279 )
1280 if not isinstance(metadata, dict):
1281 raise LinstorVolumeManagerError(
1282 'Expected dictionary in volume metadata: {}'
1283 .format(volume_uuid)
1284 )
1286 for key, value in metadata.items():
1287 current_metadata[key] = value
1288 volume_properties[self.PROP_METADATA] = json.dumps(current_metadata)
1290 def shallow_clone_volume(self, volume_uuid, clone_uuid, persistent=True):
1291 """
1292 Clone a volume. Do not copy the data, this method creates a new volume
1293 with the same size.
1294 :param str volume_uuid: The volume to clone.
1295 :param str clone_uuid: The cloned volume.
1296 :param bool persistent: If false the volume will be unavailable
1297 on the next constructor call LinstorSR(...).
1298 :return: The current device path of the cloned volume.
1299 :rtype: str
1300 """
1302 volume_name = self.get_volume_name(volume_uuid)
1303 self.ensure_volume_is_not_locked(volume_uuid)
1305 # 1. Find ideal nodes + size to use.
1306 ideal_node_names, size = self._get_volume_node_names_and_size(
1307 volume_name
1308 )
1309 if size <= 0:
1310 raise LinstorVolumeManagerError(
1311 'Invalid size of {} for volume `{}`'.format(size, volume_name)
1312 )
1314 # 2. Create clone!
1315 return self.create_volume(clone_uuid, size, persistent)
1317 def remove_resourceless_volumes(self):
1318 """
1319 Remove all volumes without valid or non-empty name
1320 (i.e. without LINSTOR resource). It's different than
1321 LinstorVolumeManager constructor that takes a `repair` param that
1322 removes volumes with `PROP_NOT_EXISTS` to 1.
1323 """
1325 resource_names = self._fetch_resource_names()
1326 for volume_uuid, volume_name in self.get_volumes_with_name().items():
1327 if not volume_name or volume_name not in resource_names:
1328 # Don't force, we can be sure of what's happening.
1329 self.destroy_volume(volume_uuid)
1331 def destroy(self):
1332 """
1333 Destroy this SR. Object should not be used after that.
1334 :param bool force: Try to destroy volumes before if true.
1335 """
1337 # 1. Ensure volume list is empty. No cost.
1338 if self._volumes:
1339 raise LinstorVolumeManagerError(
1340 'Cannot destroy LINSTOR volume manager: '
1341 'It exists remaining volumes'
1342 )
1344 # 2. Fetch ALL resource names.
1345 # This list may therefore contain volumes created outside
1346 # the scope of the driver.
1347 resource_names = self._fetch_resource_names(ignore_deleted=False)
1348 try:
1349 resource_names.remove(DATABASE_VOLUME_NAME)
1350 except KeyError:
1351 # Really strange to reach that point.
1352 # Normally we always have the database volume in the list.
1353 pass
1355 # 3. Ensure the resource name list is entirely empty...
1356 if resource_names:
1357 raise LinstorVolumeManagerError(
1358 'Cannot destroy LINSTOR volume manager: '
1359 'It exists remaining volumes (created externally or being deleted)'
1360 )
1362 # 4. Destroying...
1363 controller_is_running = self._controller_is_running()
1364 uri = 'linstor://localhost'
1365 try:
1366 if controller_is_running:
1367 self._start_controller(start=False)
1369 # 4.1. Umount LINSTOR database.
1370 self._mount_database_volume(
1371 self.build_device_path(DATABASE_VOLUME_NAME),
1372 mount=False,
1373 force=True
1374 )
1376 # 4.2. Refresh instance.
1377 self._start_controller(start=True)
1378 self._linstor = self._create_linstor_instance(
1379 uri, keep_uri_unmodified=True
1380 )
1382 # 4.3. Destroy database volume.
1383 self._destroy_resource(DATABASE_VOLUME_NAME)
1385 # 4.4. Refresh linstor connection.
1386 # Without we get this error:
1387 # "Cannot delete resource group 'xcp-sr-linstor_group_thin_device' because it has existing resource definitions.."
1388 # Because the deletion of the databse was not seen by Linstor for some reason.
1389 # It seems a simple refresh of the Linstor connection make it aware of the deletion.
1390 self._linstor.disconnect()
1391 self._linstor.connect()
1393 # 4.5. Destroy remaining drbd nodes on hosts.
1394 # We check if there is a DRBD node on hosts that could mean blocking when destroying resource groups.
1395 # It needs to be done locally by each host so we go through the linstor-manager plugin.
1396 # If we don't do this sometimes, the destroy will fail when trying to destroy the resource groups with:
1397 # "linstor-manager:destroy error: Failed to destroy SP `xcp-sr-linstor_group_thin_device` on node `r620-s2`: The specified storage pool 'xcp-sr-linstor_group_thin_device' on node 'r620-s2' can not be deleted as volumes / snapshot-volumes are still using it."
1398 session = util.timeout_call(5, util.get_localAPI_session)
1399 for host_ref in session.xenapi.host.get_all():
1400 try:
1401 response = session.xenapi.host.call_plugin(
1402 host_ref, 'linstor-manager', 'destroyDrbdVolumes', {'volume_group': self._group_name}
1403 )
1404 except Exception as e:
1405 util.SMlog('Calling destroyDrbdVolumes on host {} failed with error {}'.format(host_ref, e))
1407 # 4.6. Destroy group and storage pools.
1408 self._destroy_resource_group(self._linstor, self._group_name)
1409 self._destroy_resource_group(self._linstor, self._ha_group_name)
1410 for pool in self._get_storage_pools(force=True):
1411 self._destroy_storage_pool(
1412 self._linstor, pool.name, pool.node_name
1413 )
1414 except Exception as e:
1415 self._start_controller(start=controller_is_running)
1416 raise e
1418 try:
1419 self._start_controller(start=False)
1420 for file in os.listdir(DATABASE_PATH):
1421 if file != 'lost+found':
1422 os.remove(DATABASE_PATH + '/' + file)
1423 except Exception as e:
1424 util.SMlog(
1425 'Ignoring failure after LINSTOR SR destruction: {}'
1426 .format(e)
1427 )
1429 def find_up_to_date_diskful_nodes(self, volume_uuid):
1430 """
1431 Find all nodes that contain a specific volume using diskful disks.
1432 The disk must be up to data to be used.
1433 :param str volume_uuid: The volume to use.
1434 :return: The available nodes.
1435 :rtype: tuple(set(str), str)
1436 """
1438 volume_name = self.get_volume_name(volume_uuid)
1440 in_use_by = None
1441 node_names = set()
1443 resource_states = filter(
1444 lambda resource_state: resource_state.name == volume_name,
1445 self._get_resource_cache().resource_states
1446 )
1448 for resource_state in resource_states:
1449 volume_state = resource_state.volume_states[0]
1450 if volume_state.disk_state == 'UpToDate':
1451 node_names.add(resource_state.node_name)
1452 if resource_state.in_use:
1453 in_use_by = resource_state.node_name
1455 return (node_names, in_use_by)
1457 def invalidate_resource_cache(self):
1458 """
1459 If resources are impacted by external commands like vhdutil,
1460 it's necessary to call this function to invalidate current resource
1461 cache.
1462 """
1463 self._mark_resource_cache_as_dirty()
1465 def has_node(self, node_name):
1466 """
1467 Check if a node exists in the LINSTOR database.
1468 :rtype: bool
1469 """
1470 result = self._linstor.node_list()
1471 error_str = self._get_error_str(result)
1472 if error_str:
1473 raise LinstorVolumeManagerError(
1474 'Failed to list nodes using `{}`: {}'
1475 .format(node_name, error_str)
1476 )
1477 return bool(result[0].node(node_name))
1479 def create_node(self, node_name, ip):
1480 """
1481 Create a new node in the LINSTOR database.
1482 :param str node_name: Node name to use.
1483 :param str ip: Host IP to communicate.
1484 """
1485 result = self._linstor.node_create(
1486 node_name,
1487 linstor.consts.VAL_NODE_TYPE_CMBD,
1488 ip
1489 )
1490 errors = self._filter_errors(result)
1491 if errors:
1492 error_str = self._get_error_str(errors)
1493 raise LinstorVolumeManagerError(
1494 'Failed to create node `{}`: {}'.format(node_name, error_str)
1495 )
1497 def destroy_node(self, node_name):
1498 """
1499 Destroy a node in the LINSTOR database.
1500 :param str node_name: Node name to remove.
1501 """
1502 result = self._linstor.node_delete(node_name)
1503 errors = self._filter_errors(result)
1504 if errors:
1505 error_str = self._get_error_str(errors)
1506 raise LinstorVolumeManagerError(
1507 'Failed to destroy node `{}`: {}'.format(node_name, error_str)
1508 )
1510 def create_node_interface(self, node_name, name, ip):
1511 """
1512 Create a new node interface in the LINSTOR database.
1513 :param str node_name: Node name of the interface to use.
1514 :param str name: Interface to create.
1515 :param str ip: IP of the interface.
1516 """
1517 result = self._linstor.netinterface_create(node_name, name, ip)
1518 errors = self._filter_errors(result)
1519 if errors:
1520 error_str = self._get_error_str(errors)
1521 raise LinstorVolumeManagerError(
1522 'Failed to create node interface on `{}`: {}'.format(node_name, error_str)
1523 )
1525 def destroy_node_interface(self, node_name, name):
1526 """
1527 Destroy a node interface in the LINSTOR database.
1528 :param str node_name: Node name of the interface to remove.
1529 :param str name: Interface to remove.
1530 """
1532 if name == 'default':
1533 raise LinstorVolumeManagerError(
1534 'Unable to delete the default interface of a node!'
1535 )
1537 result = self._linstor.netinterface_delete(node_name, name)
1538 errors = self._filter_errors(result)
1539 if errors:
1540 error_str = self._get_error_str(errors)
1541 raise LinstorVolumeManagerError(
1542 'Failed to destroy node interface on `{}`: {}'.format(node_name, error_str)
1543 )
1545 def modify_node_interface(self, node_name, name, ip):
1546 """
1547 Modify a node interface in the LINSTOR database. Create it if necessary.
1548 :param str node_name: Node name of the interface to use.
1549 :param str name: Interface to modify or create.
1550 :param str ip: IP of the interface.
1551 """
1552 result = self._linstor.netinterface_create(node_name, name, ip)
1553 errors = self._filter_errors(result)
1554 if not errors:
1555 return
1557 if self._check_errors(errors, [linstor.consts.FAIL_EXISTS_NET_IF]):
1558 result = self._linstor.netinterface_modify(node_name, name, ip)
1559 errors = self._filter_errors(result)
1560 if not errors:
1561 return
1563 error_str = self._get_error_str(errors)
1564 raise LinstorVolumeManagerError(
1565 'Unable to modify interface on `{}`: {}'.format(node_name, error_str)
1566 )
1568 def list_node_interfaces(self, node_name):
1569 """
1570 List all node interfaces.
1571 :param str node_name: Node name to use to list interfaces.
1572 :rtype: list
1573 :
1574 """
1575 result = self._linstor.net_interface_list(node_name)
1576 if not result:
1577 raise LinstorVolumeManagerError(
1578 'Unable to list interfaces on `{}`: no list received'.format(node_name)
1579 )
1581 interfaces = {}
1582 for interface in result:
1583 interface = interface._rest_data
1584 interfaces[interface['name']] = {
1585 'address': interface['address'],
1586 'active': interface['is_active']
1587 }
1588 return interfaces
1590 def get_node_preferred_interface(self, node_name):
1591 """
1592 Get the preferred interface used by a node.
1593 :param str node_name: Node name of the interface to get.
1594 :rtype: str
1595 """
1596 try:
1597 nodes = self._linstor.node_list_raise([node_name]).nodes
1598 if nodes:
1599 properties = nodes[0].props
1600 return properties.get('PrefNic', 'default')
1601 return nodes
1602 except Exception as e:
1603 raise LinstorVolumeManagerError(
1604 'Failed to get preferred interface: `{}`'.format(e)
1605 )
1607 def set_node_preferred_interface(self, node_name, name):
1608 """
1609 Set the preferred interface to use on a node.
1610 :param str node_name: Node name of the interface.
1611 :param str name: Preferred interface to use.
1612 """
1613 result = self._linstor.node_modify(node_name, property_dict={'PrefNic': name})
1614 errors = self._filter_errors(result)
1615 if errors:
1616 error_str = self._get_error_str(errors)
1617 raise LinstorVolumeManagerError(
1618 'Failed to set preferred node interface on `{}`: {}'.format(node_name, error_str)
1619 )
1621 def get_nodes_info(self):
1622 """
1623 Get all nodes + statuses, used or not by the pool.
1624 :rtype: dict(str, dict)
1625 """
1626 try:
1627 nodes = {}
1628 for node in self._linstor.node_list_raise().nodes:
1629 nodes[node.name] = node.connection_status
1630 return nodes
1631 except Exception as e:
1632 raise LinstorVolumeManagerError(
1633 'Failed to get all nodes: `{}`'.format(e)
1634 )
1636 def get_storage_pools_info(self):
1637 """
1638 Give all storage pools of current group name.
1639 :rtype: dict(str, list)
1640 """
1641 storage_pools = {}
1642 for pool in self._get_storage_pools(force=True):
1643 if pool.node_name not in storage_pools:
1644 storage_pools[pool.node_name] = []
1646 size = -1
1647 capacity = -1
1649 space = pool.free_space
1650 if space:
1651 size = space.free_capacity
1652 if size < 0:
1653 size = -1
1654 else:
1655 size *= 1024
1656 capacity = space.total_capacity
1657 if capacity <= 0:
1658 capacity = -1
1659 else:
1660 capacity *= 1024
1662 storage_pools[pool.node_name].append({
1663 'name': pool.name,
1664 'linstor-uuid': pool.uuid,
1665 'free-size': size,
1666 'capacity': capacity
1667 })
1669 return storage_pools
1671 def get_resources_info(self):
1672 """
1673 Give all resources of current group name.
1674 :rtype: dict(str, list)
1675 """
1676 if self._resources_info_cache and not self._resource_cache_dirty:
1677 return self._resources_info_cache
1679 resources = {}
1680 resource_list = self._get_resource_cache()
1681 volume_names = self.get_volumes_with_name()
1682 for resource in resource_list.resources:
1683 if resource.name not in resources:
1684 resources[resource.name] = { 'nodes': {}, 'uuid': '' }
1685 resource_nodes = resources[resource.name]['nodes']
1687 resource_nodes[resource.node_name] = {
1688 'volumes': [],
1689 'diskful': linstor.consts.FLAG_DISKLESS not in resource.flags,
1690 'tie-breaker': linstor.consts.FLAG_TIE_BREAKER in resource.flags
1691 }
1692 resource_volumes = resource_nodes[resource.node_name]['volumes']
1694 for volume in resource.volumes:
1695 # We ignore diskless pools of the form "DfltDisklessStorPool".
1696 if volume.storage_pool_name != self._group_name:
1697 continue
1699 usable_size = volume.usable_size
1700 if usable_size < 0:
1701 usable_size = -1
1702 else:
1703 usable_size *= 1024
1705 allocated_size = volume.allocated_size
1706 if allocated_size < 0:
1707 allocated_size = -1
1708 else:
1709 allocated_size *= 1024
1711 resource_volumes.append({
1712 'storage-pool-name': volume.storage_pool_name,
1713 'linstor-uuid': volume.uuid,
1714 'number': volume.number,
1715 'device-path': volume.device_path,
1716 'usable-size': usable_size,
1717 'allocated-size': allocated_size
1718 })
1720 for resource_state in resource_list.resource_states:
1721 resource = resources[resource_state.rsc_name]['nodes'][resource_state.node_name]
1722 resource['in-use'] = resource_state.in_use
1724 volumes = resource['volumes']
1725 for volume_state in resource_state.volume_states:
1726 volume = next((x for x in volumes if x['number'] == volume_state.number), None)
1727 if volume:
1728 volume['disk-state'] = volume_state.disk_state
1730 for volume_uuid, volume_name in volume_names.items():
1731 resource = resources.get(volume_name)
1732 if resource:
1733 resource['uuid'] = volume_uuid
1735 self._resources_info_cache = resources
1736 return self._resources_info_cache
1738 def get_resource_info(self, volume_uuid: str) -> Dict[str, Any]:
1739 """
1740 Give a resource info based on its UUID.
1741 :param volume_uuid str: volume uuid to search for
1742 :rtype: dict(str, any)
1743 """
1744 for volume in self.get_resources_info().values():
1745 if volume["uuid"] == volume_uuid:
1746 return volume
1748 raise LinstorVolumeManagerError(
1749 f"Could not find info about volume `{volume_uuid}`",
1750 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS
1751 )
1753 def get_database_path(self):
1754 """
1755 Get the database path.
1756 :return: The current database path.
1757 :rtype: str
1758 """
1759 return self._request_database_path(self._linstor, activate=True)
1761 @classmethod
1762 def get_all_group_names(cls, base_name):
1763 """
1764 Get all group names. I.e. list of current group + HA.
1765 :param str base_name: The SR group_name to use.
1766 :return: List of group names.
1767 :rtype: list
1768 """
1769 return [cls._build_group_name(base_name), cls._build_ha_group_name(base_name)]
1771 @classmethod
1772 def create_sr(cls, group_name, ips, redundancy, thin_provisioning, logger=default_logger.__func__):
1773 """
1774 Create a new SR on the given nodes.
1775 :param str group_name: The SR group_name to use.
1776 :param set(str) ips: Node ips.
1777 :param int redundancy: How many copy of volumes should we store?
1778 :param bool thin_provisioning: Use thin or thick provisioning.
1779 :param function logger: Function to log messages.
1780 :return: A new LinstorSr instance.
1781 :rtype: LinstorSr
1782 """
1784 try:
1785 cls._start_controller(start=True)
1786 sr = cls._create_sr(group_name, ips, redundancy, thin_provisioning, logger)
1787 finally:
1788 # Controller must be stopped and volume unmounted because
1789 # it is the role of the drbd-reactor daemon to do the right
1790 # actions.
1791 cls._start_controller(start=False)
1792 cls._mount_volume(
1793 cls.build_device_path(DATABASE_VOLUME_NAME),
1794 DATABASE_PATH,
1795 mount=False
1796 )
1797 return sr
1799 @classmethod
1800 def _create_sr(cls, group_name, ips, redundancy, thin_provisioning, logger=default_logger.__func__):
1801 # 1. Check if SR already exists.
1802 uri = 'linstor://localhost'
1804 lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True)
1806 node_names = list(ips.keys())
1807 for node_name, ip in ips.items():
1808 while True:
1809 # Try to create node.
1810 result = lin.node_create(
1811 node_name,
1812 linstor.consts.VAL_NODE_TYPE_CMBD,
1813 ip
1814 )
1816 errors = cls._filter_errors(result)
1817 if cls._check_errors(
1818 errors, [linstor.consts.FAIL_EXISTS_NODE]
1819 ):
1820 # If it already exists, remove, then recreate.
1821 result = lin.node_delete(node_name)
1822 error_str = cls._get_error_str(result)
1823 if error_str:
1824 raise LinstorVolumeManagerError(
1825 'Failed to remove old node `{}`: {}'
1826 .format(node_name, error_str)
1827 )
1828 elif not errors:
1829 break # Created!
1830 else:
1831 raise LinstorVolumeManagerError(
1832 'Failed to create node `{}` with ip `{}`: {}'.format(
1833 node_name, ip, cls._get_error_str(errors)
1834 )
1835 )
1837 driver_pool_name = group_name
1838 base_group_name = group_name
1839 group_name = cls._build_group_name(group_name)
1840 storage_pool_name = group_name
1841 pools = lin.storage_pool_list_raise(filter_by_stor_pools=[storage_pool_name]).storage_pools
1842 if pools:
1843 existing_node_names = [pool.node_name for pool in pools]
1844 raise LinstorVolumeManagerError(
1845 'Unable to create SR `{}`. It already exists on node(s): {}'
1846 .format(group_name, existing_node_names)
1847 )
1849 if lin.resource_group_list_raise(
1850 cls.get_all_group_names(base_group_name)
1851 ).resource_groups:
1852 if not lin.resource_dfn_list_raise().resource_definitions:
1853 backup_path = cls._create_database_backup_path()
1854 logger(
1855 'Group name already exists `{}` without LVs. '
1856 'Ignoring and moving the config files in {}'.format(group_name, backup_path)
1857 )
1858 cls._move_files(DATABASE_PATH, backup_path)
1859 else:
1860 raise LinstorVolumeManagerError(
1861 'Unable to create SR `{}`: The group name already exists'
1862 .format(group_name)
1863 )
1865 if thin_provisioning:
1866 driver_pool_parts = driver_pool_name.split('/')
1867 if not len(driver_pool_parts) == 2:
1868 raise LinstorVolumeManagerError(
1869 'Invalid group name using thin provisioning. '
1870 'Expected format: \'VG/LV`\''
1871 )
1873 # 2. Create storage pool on each node + resource group.
1874 reg_volume_group_not_found = re.compile(
1875 ".*Volume group '.*' not found$"
1876 )
1878 i = 0
1879 try:
1880 # 2.a. Create storage pools.
1881 storage_pool_count = 0
1882 while i < len(node_names):
1883 node_name = node_names[i]
1885 result = lin.storage_pool_create(
1886 node_name=node_name,
1887 storage_pool_name=storage_pool_name,
1888 storage_driver='LVM_THIN' if thin_provisioning else 'LVM',
1889 driver_pool_name=driver_pool_name
1890 )
1892 errors = linstor.Linstor.filter_api_call_response_errors(
1893 result
1894 )
1895 if errors:
1896 if len(errors) == 1 and errors[0].is_error(
1897 linstor.consts.FAIL_STOR_POOL_CONFIGURATION_ERROR
1898 ) and reg_volume_group_not_found.match(errors[0].message):
1899 logger(
1900 'Volume group `{}` not found on `{}`. Ignoring...'
1901 .format(group_name, node_name)
1902 )
1903 cls._destroy_storage_pool(lin, storage_pool_name, node_name)
1904 else:
1905 error_str = cls._get_error_str(result)
1906 raise LinstorVolumeManagerError(
1907 'Could not create SP `{}` on node `{}`: {}'
1908 .format(group_name, node_name, error_str)
1909 )
1910 else:
1911 storage_pool_count += 1
1912 i += 1
1914 if not storage_pool_count:
1915 raise LinstorVolumeManagerError(
1916 'Unable to create SR `{}`: No VG group found'.format(
1917 group_name,
1918 )
1919 )
1921 # 2.b. Create resource groups.
1922 ha_group_name = cls._build_ha_group_name(base_group_name)
1923 cls._create_resource_group(
1924 lin,
1925 group_name,
1926 storage_pool_name,
1927 redundancy,
1928 True
1929 )
1930 cls._create_resource_group(
1931 lin,
1932 ha_group_name,
1933 storage_pool_name,
1934 3,
1935 True
1936 )
1938 # 3. Create the LINSTOR database volume and mount it.
1939 try:
1940 logger('Creating database volume...')
1941 volume_path = cls._create_database_volume(
1942 lin, ha_group_name, storage_pool_name, node_names, redundancy
1943 )
1944 except LinstorVolumeManagerError as e:
1945 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS:
1946 logger('Destroying database volume after creation fail...')
1947 cls._force_destroy_database_volume(lin, group_name)
1948 raise
1950 try:
1951 logger('Mounting database volume...')
1953 # First we must disable the controller to move safely the
1954 # LINSTOR config.
1955 cls._start_controller(start=False)
1957 cls._mount_database_volume(volume_path)
1958 except Exception as e:
1959 # Ensure we are connected because controller has been
1960 # restarted during mount call.
1961 logger('Destroying database volume after mount fail...')
1963 try:
1964 cls._start_controller(start=True)
1965 except Exception:
1966 pass
1968 lin = cls._create_linstor_instance(
1969 uri, keep_uri_unmodified=True
1970 )
1971 cls._force_destroy_database_volume(lin, group_name)
1972 raise e
1974 cls._start_controller(start=True)
1975 lin = cls._create_linstor_instance(uri, keep_uri_unmodified=True)
1977 # 4. Remove storage pools/resource/volume group in the case of errors.
1978 except Exception as e:
1979 logger('Destroying resource group and storage pools after fail...')
1980 try:
1981 cls._destroy_resource_group(lin, group_name)
1982 cls._destroy_resource_group(lin, ha_group_name)
1983 except Exception as e2:
1984 logger('Failed to destroy resource group: {}'.format(e2))
1985 pass
1986 j = 0
1987 i = min(i, len(node_names) - 1)
1988 while j <= i:
1989 try:
1990 cls._destroy_storage_pool(lin, storage_pool_name, node_names[j])
1991 except Exception as e2:
1992 logger('Failed to destroy resource group: {}'.format(e2))
1993 pass
1994 j += 1
1995 raise e
1997 # 5. Return new instance.
1998 instance = cls.__new__(cls)
1999 instance._linstor = lin
2000 instance._logger = logger
2001 instance._redundancy = redundancy
2002 instance._base_group_name = base_group_name
2003 instance._group_name = group_name
2004 instance._volumes = set()
2005 instance._storage_pools_time = 0
2006 instance._kv_cache = instance._create_kv_cache()
2007 instance._resource_cache = None
2008 instance._resource_cache_dirty = True
2009 instance._volume_info_cache = None
2010 instance._volume_info_cache_dirty = True
2011 return instance
2013 @classmethod
2014 def build_device_path(cls, volume_name):
2015 """
2016 Build a device path given a volume name.
2017 :param str volume_name: The volume name to use.
2018 :return: A valid or not device path.
2019 :rtype: str
2020 """
2022 return '{}{}/0'.format(cls.DEV_ROOT_PATH, volume_name)
2024 @classmethod
2025 def build_volume_name(cls, base_name):
2026 """
2027 Build a volume name given a base name (i.e. a UUID).
2028 :param str base_name: The volume name to use.
2029 :return: A valid or not device path.
2030 :rtype: str
2031 """
2032 return '{}{}'.format(cls.PREFIX_VOLUME, base_name)
2034 @classmethod
2035 def round_up_volume_size(cls, volume_size):
2036 """
2037 Align volume size on higher multiple of BLOCK_SIZE.
2038 :param int volume_size: The volume size to align.
2039 :return: An aligned volume size.
2040 :rtype: int
2041 """
2042 return round_up(volume_size, cls.BLOCK_SIZE)
2044 @classmethod
2045 def round_down_volume_size(cls, volume_size):
2046 """
2047 Align volume size on lower multiple of BLOCK_SIZE.
2048 :param int volume_size: The volume size to align.
2049 :return: An aligned volume size.
2050 :rtype: int
2051 """
2052 return round_down(volume_size, cls.BLOCK_SIZE)
2054 # --------------------------------------------------------------------------
2055 # Private helpers.
2056 # --------------------------------------------------------------------------
2058 def _create_kv_cache(self):
2059 self._kv_cache = self._create_linstor_kv('/')
2060 self._kv_cache_dirty = False
2061 return self._kv_cache
2063 def _get_kv_cache(self):
2064 if self._kv_cache_dirty:
2065 self._kv_cache = self._create_kv_cache()
2066 return self._kv_cache
2068 def _create_resource_cache(self):
2069 self._resource_cache = self._linstor.resource_list_raise()
2070 self._resource_cache_dirty = False
2071 return self._resource_cache
2073 def _get_resource_cache(self):
2074 if self._resource_cache_dirty:
2075 self._resource_cache = self._create_resource_cache()
2076 return self._resource_cache
2078 def _mark_resource_cache_as_dirty(self):
2079 self._resource_cache_dirty = True
2080 self._volume_info_cache_dirty = True
2082 # --------------------------------------------------------------------------
2084 def _ensure_volume_exists(self, volume_uuid):
2085 if volume_uuid not in self._volumes:
2086 raise LinstorVolumeManagerError(
2087 'volume `{}` doesn\'t exist'.format(volume_uuid),
2088 LinstorVolumeManagerError.ERR_VOLUME_NOT_EXISTS
2089 )
2091 def _find_best_size_candidates(self):
2092 result = self._linstor.resource_group_qmvs(self._group_name)
2093 error_str = self._get_error_str(result)
2094 if error_str:
2095 raise LinstorVolumeManagerError(
2096 'Failed to get max volume size allowed of SR `{}`: {}'.format(
2097 self._group_name,
2098 error_str
2099 )
2100 )
2101 return result[0].candidates
2103 def _fetch_resource_names(self, ignore_deleted=True):
2104 resource_names = set()
2105 dfns = self._linstor.resource_dfn_list_raise().resource_definitions
2106 for dfn in dfns:
2107 if dfn.resource_group_name in self.get_all_group_names(self._base_group_name) and (
2108 ignore_deleted or
2109 linstor.consts.FLAG_DELETE not in dfn.flags
2110 ):
2111 resource_names.add(dfn.name)
2112 return resource_names
2114 def _get_volumes_info(self, volume_names=None):
2115 all_volume_info = {}
2117 if not self._volume_info_cache_dirty:
2118 return self._volume_info_cache
2120 # `volume_names` MUST contain all volumes registered in the KV store.
2121 # It can be provided to the function to avoid double fetching.
2122 if not volume_names:
2123 volume_names = self.get_volumes_with_name()
2124 volume_names = set(volume_names.values())
2126 def process_resource(resource):
2127 if resource.name not in all_volume_info:
2128 current = all_volume_info[resource.name] = self.VolumeInfo(
2129 resource.name
2130 )
2131 else:
2132 current = all_volume_info[resource.name]
2134 if linstor.consts.FLAG_DISKLESS not in resource.flags:
2135 current.diskful.append(resource.node_name)
2137 for volume in resource.volumes:
2138 # We ignore diskless pools of the form "DfltDisklessStorPool".
2139 if volume.storage_pool_name != self._group_name:
2140 continue
2141 # Only fetch first volume.
2142 if volume.number != 0:
2143 continue
2145 allocated_size = volume.allocated_size
2146 if allocated_size > current.allocated_size:
2147 current.allocated_size = allocated_size
2149 usable_size = volume.usable_size
2150 if usable_size > 0 and (
2151 usable_size < current.virtual_size or
2152 not current.virtual_size
2153 ):
2154 current.virtual_size = usable_size
2156 try:
2157 for resource in self._get_resource_cache().resources:
2158 if resource.name in volume_names:
2159 process_resource(resource)
2160 for volume in all_volume_info.values():
2161 if volume.allocated_size <= 0:
2162 raise LinstorVolumeManagerError('Failed to get allocated size of `{}`'.format(resource.name))
2164 if volume.virtual_size <= 0:
2165 raise LinstorVolumeManagerError('Failed to get usable size of `{}`'.format(volume.name))
2167 volume.allocated_size *= 1024
2168 volume.virtual_size *= 1024
2169 except LinstorVolumeManagerError:
2170 self._mark_resource_cache_as_dirty()
2171 raise
2173 self._volume_info_cache_dirty = False
2174 self._volume_info_cache = all_volume_info
2176 return all_volume_info
2178 def _get_volume_node_names_and_size(self, volume_name):
2179 node_names = set()
2180 size = -1
2181 for resource in self._linstor.resource_list_raise(
2182 filter_by_resources=[volume_name]
2183 ).resources:
2184 for volume in resource.volumes:
2185 # We ignore diskless pools of the form "DfltDisklessStorPool".
2186 if volume.storage_pool_name != self._group_name:
2187 continue
2189 node_names.add(resource.node_name)
2191 usable_size = volume.usable_size
2192 if usable_size <= 0:
2193 continue
2195 if size < 0:
2196 size = usable_size
2197 else:
2198 size = min(size, usable_size)
2200 if size <= 0:
2201 raise LinstorVolumeManagerError('Failed to get usable size of `{}`'.format(resource.name))
2203 return (node_names, size * 1024)
2205 def _compute_size(self, attr):
2206 capacity = 0
2207 for pool in self._get_storage_pools(force=True):
2208 space = pool.free_space
2209 if space:
2210 size = getattr(space, attr)
2211 if size < 0:
2212 raise LinstorVolumeManagerError(
2213 'Failed to get pool {} attr of `{}`'
2214 .format(attr, pool.node_name)
2215 )
2216 capacity += size
2217 return capacity * 1024
2219 def _get_node_names(self):
2220 node_names = set()
2221 for pool in self._get_storage_pools():
2222 node_names.add(pool.node_name)
2223 return node_names
2225 def _get_storage_pools(self, force=False):
2226 cur_time = time.time()
2227 elsaped_time = cur_time - self._storage_pools_time
2229 if force or elsaped_time >= self.STORAGE_POOLS_FETCH_INTERVAL:
2230 self._storage_pools = self._linstor.storage_pool_list_raise(
2231 filter_by_stor_pools=[self._group_name]
2232 ).storage_pools
2233 self._storage_pools_time = time.time()
2235 return self._storage_pools
2237 def _create_volume(
2238 self,
2239 volume_uuid,
2240 volume_name,
2241 size,
2242 place_resources,
2243 high_availability
2244 ):
2245 size = self.round_up_volume_size(size)
2246 self._mark_resource_cache_as_dirty()
2248 group_name = self._ha_group_name if high_availability else self._group_name
2249 def create_definition():
2250 first_attempt = True
2251 while True:
2252 try:
2253 self._check_volume_creation_errors(
2254 self._linstor.resource_group_spawn(
2255 rsc_grp_name=group_name,
2256 rsc_dfn_name=volume_name,
2257 vlm_sizes=['{}B'.format(size)],
2258 definitions_only=True
2259 ),
2260 volume_uuid,
2261 self._group_name
2262 )
2263 break
2264 except LinstorVolumeManagerError as e:
2265 if (
2266 not first_attempt or
2267 not high_availability or
2268 e.code != LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS
2269 ):
2270 raise
2272 first_attempt = False
2273 self._create_resource_group(
2274 self._linstor,
2275 group_name,
2276 self._group_name,
2277 3,
2278 True
2279 )
2281 self._configure_volume_peer_slots(self._linstor, volume_name)
2283 def clean():
2284 try:
2285 self._destroy_volume(volume_uuid, force=True, preserve_properties=True)
2286 except Exception as e:
2287 self._logger(
2288 'Unable to destroy volume {} after creation fail: {}'
2289 .format(volume_uuid, e)
2290 )
2292 def create():
2293 try:
2294 create_definition()
2295 if place_resources:
2296 # Basic case when we use the default redundancy of the group.
2297 self._check_volume_creation_errors(
2298 self._linstor.resource_auto_place(
2299 rsc_name=volume_name,
2300 place_count=self._redundancy,
2301 diskless_on_remaining=False
2302 ),
2303 volume_uuid,
2304 self._group_name
2305 )
2306 except LinstorVolumeManagerError as e:
2307 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS:
2308 clean()
2309 raise
2310 except Exception:
2311 clean()
2312 raise
2314 util.retry(create, maxretry=5)
2316 def _create_volume_with_properties(
2317 self,
2318 volume_uuid,
2319 volume_name,
2320 size,
2321 place_resources,
2322 high_availability
2323 ):
2324 if self.check_volume_exists(volume_uuid):
2325 raise LinstorVolumeManagerError(
2326 'Could not create volume `{}` from SR `{}`, it already exists'
2327 .format(volume_uuid, self._group_name) + ' in properties',
2328 LinstorVolumeManagerError.ERR_VOLUME_EXISTS
2329 )
2331 if volume_name in self._fetch_resource_names():
2332 raise LinstorVolumeManagerError(
2333 'Could not create volume `{}` from SR `{}`, '.format(
2334 volume_uuid, self._group_name
2335 ) + 'resource of the same name already exists in LINSTOR'
2336 )
2338 # I am paranoid.
2339 volume_properties = self._get_volume_properties(volume_uuid)
2340 if (volume_properties.get(self.PROP_NOT_EXISTS) is not None):
2341 raise LinstorVolumeManagerError(
2342 'Could not create volume `{}`, '.format(volume_uuid) +
2343 'properties already exist'
2344 )
2346 try:
2347 volume_properties[self.PROP_NOT_EXISTS] = self.STATE_CREATING
2348 volume_properties[self.PROP_VOLUME_NAME] = volume_name
2350 self._create_volume(
2351 volume_uuid,
2352 volume_name,
2353 size,
2354 place_resources,
2355 high_availability
2356 )
2358 assert volume_properties.namespace == \
2359 self._build_volume_namespace(volume_uuid)
2360 return volume_properties
2361 except LinstorVolumeManagerError as e:
2362 # Do not destroy existing resource!
2363 # In theory we can't get this error because we check this event
2364 # before the `self._create_volume` case.
2365 # It can only happen if the same volume uuid is used in the same
2366 # call in another host.
2367 if e.code != LinstorVolumeManagerError.ERR_VOLUME_EXISTS:
2368 self._destroy_volume(volume_uuid, force=True)
2369 raise
2371 def _find_device_path(self, volume_uuid, volume_name):
2372 current_device_path = self._request_device_path(
2373 volume_uuid, volume_name, activate=True
2374 )
2376 # We use realpath here to get the /dev/drbd<id> path instead of
2377 # /dev/drbd/by-res/<resource_name>.
2378 expected_device_path = self.build_device_path(volume_name)
2379 util.wait_for_path(expected_device_path, 5)
2381 device_realpath = os.path.realpath(expected_device_path)
2382 if current_device_path != device_realpath:
2383 raise LinstorVolumeManagerError(
2384 'Invalid path, current={}, expected={} (realpath={})'
2385 .format(
2386 current_device_path,
2387 expected_device_path,
2388 device_realpath
2389 )
2390 )
2391 return expected_device_path
2393 def _request_device_path(self, volume_uuid, volume_name, activate=False):
2394 node_name = socket.gethostname()
2396 resource = next(filter(
2397 lambda resource: resource.node_name == node_name and
2398 resource.name == volume_name,
2399 self._get_resource_cache().resources
2400 ), None)
2402 if not resource:
2403 if activate:
2404 self._mark_resource_cache_as_dirty()
2405 self._activate_device_path(
2406 self._linstor, node_name, volume_name
2407 )
2408 return self._request_device_path(volume_uuid, volume_name)
2409 raise LinstorVolumeManagerError(
2410 'Unable to get dev path for `{}`, no resource found but definition "seems" to exist'
2411 .format(volume_uuid)
2412 )
2414 # Contains a path of the /dev/drbd<id> form.
2415 device_path = resource.volumes[0].device_path
2416 if not device_path:
2417 raise LinstorVolumeManagerError('Empty dev path for `{}`!'.format(volume_uuid))
2418 return device_path
2420 def _destroy_resource(self, resource_name, force=False):
2421 result = self._linstor.resource_dfn_delete(resource_name)
2422 error_str = self._get_error_str(result)
2423 if not error_str:
2424 self._mark_resource_cache_as_dirty()
2425 return
2427 if not force:
2428 self._mark_resource_cache_as_dirty()
2429 raise LinstorVolumeManagerError(
2430 'Could not destroy resource `{}` from SR `{}`: {}'
2431 .format(resource_name, self._group_name, error_str)
2432 )
2434 # If force is used, ensure there is no opener.
2435 all_openers = get_all_volume_openers(resource_name, '0')
2436 for openers in all_openers.values():
2437 if openers:
2438 self._mark_resource_cache_as_dirty()
2439 raise LinstorVolumeManagerError(
2440 'Could not force destroy resource `{}` from SR `{}`: {} (openers=`{}`)'
2441 .format(resource_name, self._group_name, error_str, all_openers)
2442 )
2444 # Maybe the resource is blocked in primary mode. DRBD/LINSTOR issue?
2445 resource_states = filter(
2446 lambda resource_state: resource_state.name == resource_name,
2447 self._get_resource_cache().resource_states
2448 )
2450 # Mark only after computation of states.
2451 self._mark_resource_cache_as_dirty()
2453 for resource_state in resource_states:
2454 volume_state = resource_state.volume_states[0]
2455 if resource_state.in_use:
2456 demote_drbd_resource(resource_state.node_name, resource_name)
2457 break
2458 self._destroy_resource(resource_name)
2460 def _destroy_volume(self, volume_uuid, force=False, preserve_properties=False):
2461 volume_properties = self._get_volume_properties(volume_uuid)
2462 try:
2463 volume_name = volume_properties.get(self.PROP_VOLUME_NAME)
2464 if volume_name in self._fetch_resource_names():
2465 self._destroy_resource(volume_name, force)
2467 # Assume this call is atomic.
2468 if not preserve_properties:
2469 volume_properties.clear()
2470 except Exception as e:
2471 raise LinstorVolumeManagerError(
2472 'Cannot destroy volume `{}`: {}'.format(volume_uuid, e)
2473 )
2475 def _build_volumes(self, repair):
2476 properties = self._kv_cache
2477 resource_names = self._fetch_resource_names()
2479 self._volumes = set()
2481 updating_uuid_volumes = self._get_volumes_by_property(
2482 self.REG_UPDATING_UUID_SRC, ignore_inexisting_volumes=False
2483 )
2484 if updating_uuid_volumes and not repair:
2485 raise LinstorVolumeManagerError(
2486 'Cannot build LINSTOR volume list: '
2487 'It exists invalid "updating uuid volumes", repair is required'
2488 )
2490 existing_volumes = self._get_volumes_by_property(
2491 self.REG_NOT_EXISTS, ignore_inexisting_volumes=False
2492 )
2493 for volume_uuid, not_exists in existing_volumes.items():
2494 properties.namespace = self._build_volume_namespace(volume_uuid)
2496 src_uuid = properties.get(self.PROP_UPDATING_UUID_SRC)
2497 if src_uuid:
2498 self._logger(
2499 'Ignoring volume during manager initialization with prop '
2500 ' PROP_UPDATING_UUID_SRC: {} (properties={})'
2501 .format(
2502 volume_uuid,
2503 self._get_filtered_properties(properties)
2504 )
2505 )
2506 continue
2508 # Insert volume in list if the volume exists. Or if the volume
2509 # is being created and a slave wants to use it (repair = False).
2510 #
2511 # If we are on the master and if repair is True and state is
2512 # Creating, it's probably a bug or crash: the creation process has
2513 # been stopped.
2514 if not_exists == self.STATE_EXISTS or (
2515 not repair and not_exists == self.STATE_CREATING
2516 ):
2517 self._volumes.add(volume_uuid)
2518 continue
2520 if not repair:
2521 self._logger(
2522 'Ignoring bad volume during manager initialization: {} '
2523 '(properties={})'.format(
2524 volume_uuid,
2525 self._get_filtered_properties(properties)
2526 )
2527 )
2528 continue
2530 # Remove bad volume.
2531 try:
2532 self._logger(
2533 'Removing bad volume during manager initialization: {} '
2534 '(properties={})'.format(
2535 volume_uuid,
2536 self._get_filtered_properties(properties)
2537 )
2538 )
2539 volume_name = properties.get(self.PROP_VOLUME_NAME)
2541 # Little optimization, don't call `self._destroy_volume`,
2542 # we already have resource name list.
2543 if volume_name in resource_names:
2544 self._destroy_resource(volume_name, force=True)
2546 # Assume this call is atomic.
2547 properties.clear()
2548 except Exception as e:
2549 # Do not raise, we don't want to block user action.
2550 self._logger(
2551 'Cannot clean volume {}: {}'.format(volume_uuid, e)
2552 )
2554 # The volume can't be removed, maybe it's still in use,
2555 # in this case rename it with the "DELETED_" prefix.
2556 # This prefix is mandatory if it exists a snap transaction to
2557 # rollback because the original VDI UUID can try to be renamed
2558 # with the UUID we are trying to delete...
2559 if not volume_uuid.startswith('DELETED_'):
2560 self.update_volume_uuid(
2561 volume_uuid, 'DELETED_' + volume_uuid, force=True
2562 )
2564 for dest_uuid, src_uuid in updating_uuid_volumes.items():
2565 dest_namespace = self._build_volume_namespace(dest_uuid)
2567 properties.namespace = dest_namespace
2568 if int(properties.get(self.PROP_NOT_EXISTS)):
2569 properties.clear()
2570 continue
2572 properties.namespace = self._build_volume_namespace(src_uuid)
2573 properties.clear()
2575 properties.namespace = dest_namespace
2576 properties.pop(self.PROP_UPDATING_UUID_SRC)
2578 if src_uuid in self._volumes:
2579 self._volumes.remove(src_uuid)
2580 self._volumes.add(dest_uuid)
2582 def _get_sr_properties(self):
2583 return self._create_linstor_kv(self._build_sr_namespace())
2585 def _get_volumes_by_property(
2586 self, reg_prop, ignore_inexisting_volumes=True
2587 ):
2588 base_properties = self._get_kv_cache()
2589 base_properties.namespace = self._build_volume_namespace()
2591 volume_properties = {}
2592 for volume_uuid in self._volumes:
2593 volume_properties[volume_uuid] = ''
2595 for key, value in base_properties.items():
2596 res = reg_prop.match(key)
2597 if res:
2598 volume_uuid = res.groups()[0]
2599 if not ignore_inexisting_volumes or \
2600 volume_uuid in self._volumes:
2601 volume_properties[volume_uuid] = value
2603 return volume_properties
2605 def _create_linstor_kv(self, namespace):
2606 return linstor.KV(
2607 self._group_name,
2608 uri=self._linstor.controller_host(),
2609 namespace=namespace
2610 )
2612 def _get_volume_properties(self, volume_uuid):
2613 properties = self._get_kv_cache()
2614 properties.namespace = self._build_volume_namespace(volume_uuid)
2615 return properties
2617 @classmethod
2618 def _build_sr_namespace(cls):
2619 return '/{}/'.format(cls.NAMESPACE_SR)
2621 @classmethod
2622 def _build_volume_namespace(cls, volume_uuid=None):
2623 # Return a path to all volumes if `volume_uuid` is not given.
2624 if volume_uuid is None:
2625 return '/{}/'.format(cls.NAMESPACE_VOLUME)
2626 return '/{}/{}/'.format(cls.NAMESPACE_VOLUME, volume_uuid)
2628 @classmethod
2629 def _get_error_str(cls, result):
2630 return ', '.join([
2631 err.message for err in cls._filter_errors(result)
2632 ])
2634 @classmethod
2635 def _create_linstor_instance(
2636 cls, uri, keep_uri_unmodified=False, attempt_count=30
2637 ):
2638 retry = False
2640 def connect(uri):
2641 if not uri:
2642 uri = get_controller_uri()
2643 if not uri:
2644 raise LinstorVolumeManagerError(
2645 'Unable to find controller uri...'
2646 )
2647 instance = linstor.Linstor(uri, keep_alive=True)
2648 instance.connect()
2649 return instance
2651 try:
2652 return connect(uri)
2653 except (linstor.errors.LinstorNetworkError, LinstorVolumeManagerError):
2654 pass
2656 if not keep_uri_unmodified:
2657 uri = None
2659 return util.retry(
2660 lambda: connect(uri),
2661 maxretry=attempt_count,
2662 period=1,
2663 exceptions=[
2664 linstor.errors.LinstorNetworkError,
2665 LinstorVolumeManagerError
2666 ]
2667 )
2669 @classmethod
2670 def _configure_volume_peer_slots(cls, lin, volume_name):
2671 result = lin.resource_dfn_modify(volume_name, {}, peer_slots=3)
2672 error_str = cls._get_error_str(result)
2673 if error_str:
2674 raise LinstorVolumeManagerError(
2675 'Could not configure volume peer slots of {}: {}'
2676 .format(volume_name, error_str)
2677 )
2679 @classmethod
2680 def _activate_device_path(cls, lin, node_name, volume_name):
2681 result = lin.resource_make_available(node_name, volume_name, diskful=False)
2682 if linstor.Linstor.all_api_responses_no_error(result):
2683 return
2684 errors = linstor.Linstor.filter_api_call_response_errors(result)
2685 if len(errors) == 1 and errors[0].is_error(
2686 linstor.consts.FAIL_EXISTS_RSC
2687 ):
2688 return
2690 raise LinstorVolumeManagerError(
2691 'Unable to activate device path of `{}` on node `{}`: {}'
2692 .format(volume_name, node_name, ', '.join(
2693 [str(x) for x in result]))
2694 )
2696 @classmethod
2697 def _request_database_path(cls, lin, activate=False):
2698 node_name = socket.gethostname()
2700 try:
2701 resource = next(filter(
2702 lambda resource: resource.node_name == node_name and
2703 resource.name == DATABASE_VOLUME_NAME,
2704 lin.resource_list_raise().resources
2705 ), None)
2706 except Exception as e:
2707 raise LinstorVolumeManagerError(
2708 'Unable to fetch database resource: {}'
2709 .format(e)
2710 )
2712 if not resource:
2713 if activate:
2714 cls._activate_device_path(
2715 lin, node_name, DATABASE_VOLUME_NAME
2716 )
2717 return cls._request_database_path(
2718 DATABASE_VOLUME_NAME, DATABASE_VOLUME_NAME
2719 )
2720 raise LinstorVolumeManagerError(
2721 'Empty dev path for `{}`, but definition "seems" to exist'
2722 .format(DATABASE_PATH)
2723 )
2724 # Contains a path of the /dev/drbd<id> form.
2725 return resource.volumes[0].device_path
2727 @classmethod
2728 def _create_database_volume(
2729 cls, lin, group_name, storage_pool_name, node_names, redundancy
2730 ):
2731 try:
2732 dfns = lin.resource_dfn_list_raise().resource_definitions
2733 except Exception as e:
2734 raise LinstorVolumeManagerError(
2735 'Unable to get definitions during database creation: {}'
2736 .format(e)
2737 )
2739 if dfns:
2740 raise LinstorVolumeManagerError(
2741 'Could not create volume `{}` from SR `{}`, '.format(
2742 DATABASE_VOLUME_NAME, group_name
2743 ) + 'LINSTOR volume list must be empty.'
2744 )
2746 # Workaround to use thin lvm. Without this line an error is returned:
2747 # "Not enough available nodes"
2748 # I don't understand why but this command protect against this bug.
2749 try:
2750 pools = lin.storage_pool_list_raise(
2751 filter_by_stor_pools=[storage_pool_name]
2752 )
2753 except Exception as e:
2754 raise LinstorVolumeManagerError(
2755 'Failed to get storage pool list before database creation: {}'
2756 .format(e)
2757 )
2759 # Ensure we have a correct list of storage pools.
2760 assert pools.storage_pools # We must have at least one storage pool!
2761 nodes_with_pool = list(map(lambda pool: pool.node_name, pools.storage_pools))
2762 for node_name in nodes_with_pool:
2763 assert node_name in node_names
2764 util.SMlog('Nodes with storage pool: {}'.format(nodes_with_pool))
2766 # Create the database definition.
2767 size = cls.round_up_volume_size(DATABASE_SIZE)
2768 cls._check_volume_creation_errors(lin.resource_group_spawn(
2769 rsc_grp_name=group_name,
2770 rsc_dfn_name=DATABASE_VOLUME_NAME,
2771 vlm_sizes=['{}B'.format(size)],
2772 definitions_only=True
2773 ), DATABASE_VOLUME_NAME, group_name)
2774 cls._configure_volume_peer_slots(lin, DATABASE_VOLUME_NAME)
2776 # Create real resources on the first nodes.
2777 resources = []
2779 diskful_nodes = []
2780 diskless_nodes = []
2781 for node_name in node_names:
2782 if node_name in nodes_with_pool:
2783 diskful_nodes.append(node_name)
2784 else:
2785 diskless_nodes.append(node_name)
2787 assert diskful_nodes
2788 for node_name in diskful_nodes[:redundancy]:
2789 util.SMlog('Create database diskful on {}'.format(node_name))
2790 resources.append(linstor.ResourceData(
2791 node_name=node_name,
2792 rsc_name=DATABASE_VOLUME_NAME,
2793 storage_pool=storage_pool_name
2794 ))
2795 # Create diskless resources on the remaining set.
2796 for node_name in diskful_nodes[redundancy:] + diskless_nodes:
2797 util.SMlog('Create database diskless on {}'.format(node_name))
2798 resources.append(linstor.ResourceData(
2799 node_name=node_name,
2800 rsc_name=DATABASE_VOLUME_NAME,
2801 diskless=True
2802 ))
2804 result = lin.resource_create(resources)
2805 error_str = cls._get_error_str(result)
2806 if error_str:
2807 raise LinstorVolumeManagerError(
2808 'Could not create database volume from SR `{}`: {}'.format(
2809 group_name, error_str
2810 )
2811 )
2813 # Create database and ensure path exists locally and
2814 # on replicated devices.
2815 current_device_path = cls._request_database_path(lin, activate=True)
2817 # Ensure diskless paths exist on other hosts. Otherwise PBDs can't be
2818 # plugged.
2819 for node_name in node_names:
2820 cls._activate_device_path(lin, node_name, DATABASE_VOLUME_NAME)
2822 # We use realpath here to get the /dev/drbd<id> path instead of
2823 # /dev/drbd/by-res/<resource_name>.
2824 expected_device_path = cls.build_device_path(DATABASE_VOLUME_NAME)
2825 util.wait_for_path(expected_device_path, 5)
2827 device_realpath = os.path.realpath(expected_device_path)
2828 if current_device_path != device_realpath:
2829 raise LinstorVolumeManagerError(
2830 'Invalid path, current={}, expected={} (realpath={})'
2831 .format(
2832 current_device_path,
2833 expected_device_path,
2834 device_realpath
2835 )
2836 )
2838 try:
2839 util.retry(
2840 lambda: util.pread2([DATABASE_MKFS, expected_device_path]),
2841 maxretry=5
2842 )
2843 except Exception as e:
2844 raise LinstorVolumeManagerError(
2845 'Failed to execute {} on database volume: {}'
2846 .format(DATABASE_MKFS, e)
2847 )
2849 return expected_device_path
2851 @classmethod
2852 def _destroy_database_volume(cls, lin, group_name):
2853 error_str = cls._get_error_str(
2854 lin.resource_dfn_delete(DATABASE_VOLUME_NAME)
2855 )
2856 if error_str:
2857 raise LinstorVolumeManagerError(
2858 'Could not destroy resource `{}` from SR `{}`: {}'
2859 .format(DATABASE_VOLUME_NAME, group_name, error_str)
2860 )
2862 @classmethod
2863 def _mount_database_volume(cls, volume_path, mount=True, force=False):
2864 try:
2865 # 1. Create a backup config folder.
2866 database_not_empty = bool(os.listdir(DATABASE_PATH))
2867 backup_path = cls._create_database_backup_path()
2869 # 2. Move the config in the mounted volume.
2870 if database_not_empty:
2871 cls._move_files(DATABASE_PATH, backup_path)
2873 cls._mount_volume(volume_path, DATABASE_PATH, mount)
2875 if database_not_empty:
2876 cls._move_files(backup_path, DATABASE_PATH, force)
2878 # 3. Remove useless backup directory.
2879 try:
2880 os.rmdir(backup_path)
2881 except Exception as e:
2882 raise LinstorVolumeManagerError(
2883 'Failed to remove backup path {} of LINSTOR config: {}'
2884 .format(backup_path, e)
2885 )
2886 except Exception as e:
2887 def force_exec(fn):
2888 try:
2889 fn()
2890 except Exception:
2891 pass
2893 if mount == cls._is_mounted(DATABASE_PATH):
2894 force_exec(lambda: cls._move_files(
2895 DATABASE_PATH, backup_path
2896 ))
2897 force_exec(lambda: cls._mount_volume(
2898 volume_path, DATABASE_PATH, not mount
2899 ))
2901 if mount != cls._is_mounted(DATABASE_PATH):
2902 force_exec(lambda: cls._move_files(
2903 backup_path, DATABASE_PATH
2904 ))
2906 force_exec(lambda: os.rmdir(backup_path))
2907 raise e
2909 @classmethod
2910 def _force_destroy_database_volume(cls, lin, group_name):
2911 try:
2912 cls._destroy_database_volume(lin, group_name)
2913 except Exception:
2914 pass
2916 @classmethod
2917 def _destroy_storage_pool(cls, lin, group_name, node_name):
2918 def destroy():
2919 result = lin.storage_pool_delete(node_name, group_name)
2920 errors = cls._filter_errors(result)
2921 if cls._check_errors(errors, [
2922 linstor.consts.FAIL_NOT_FOUND_STOR_POOL,
2923 linstor.consts.FAIL_NOT_FOUND_STOR_POOL_DFN
2924 ]):
2925 return
2927 if errors:
2928 raise LinstorVolumeManagerError(
2929 'Failed to destroy SP `{}` on node `{}`: {}'.format(
2930 group_name,
2931 node_name,
2932 cls._get_error_str(errors)
2933 )
2934 )
2936 # We must retry to avoid errors like:
2937 # "can not be deleted as volumes / snapshot-volumes are still using it"
2938 # after LINSTOR database volume destruction.
2939 return util.retry(destroy, maxretry=10)
2941 @classmethod
2942 def _create_resource_group(
2943 cls,
2944 lin,
2945 group_name,
2946 storage_pool_name,
2947 redundancy,
2948 destroy_old_group
2949 ):
2950 rg_creation_attempt = 0
2951 while True:
2952 result = lin.resource_group_create(
2953 name=group_name,
2954 place_count=redundancy,
2955 storage_pool=storage_pool_name,
2956 diskless_on_remaining=False
2957 )
2958 error_str = cls._get_error_str(result)
2959 if not error_str:
2960 break
2962 errors = cls._filter_errors(result)
2963 if destroy_old_group and cls._check_errors(errors, [
2964 linstor.consts.FAIL_EXISTS_RSC_GRP
2965 ]):
2966 rg_creation_attempt += 1
2967 if rg_creation_attempt < 2:
2968 try:
2969 cls._destroy_resource_group(lin, group_name)
2970 except Exception as e:
2971 error_str = 'Failed to destroy old and empty RG: {}'.format(e)
2972 else:
2973 continue
2975 raise LinstorVolumeManagerError(
2976 'Could not create RG `{}`: {}'.format(
2977 group_name, error_str
2978 )
2979 )
2981 result = lin.volume_group_create(group_name)
2982 error_str = cls._get_error_str(result)
2983 if error_str:
2984 raise LinstorVolumeManagerError(
2985 'Could not create VG `{}`: {}'.format(
2986 group_name, error_str
2987 )
2988 )
2990 @classmethod
2991 def _destroy_resource_group(cls, lin, group_name):
2992 def destroy():
2993 result = lin.resource_group_delete(group_name)
2994 errors = cls._filter_errors(result)
2995 if cls._check_errors(errors, [
2996 linstor.consts.FAIL_NOT_FOUND_RSC_GRP
2997 ]):
2998 return
3000 if errors:
3001 raise LinstorVolumeManagerError(
3002 'Failed to destroy RG `{}`: {}'
3003 .format(group_name, cls._get_error_str(errors))
3004 )
3006 return util.retry(destroy, maxretry=10)
3008 @classmethod
3009 def _build_group_name(cls, base_name):
3010 # If thin provisioning is used we have a path like this:
3011 # `VG/LV`. "/" is not accepted by LINSTOR.
3012 return '{}{}'.format(cls.PREFIX_SR, base_name.replace('/', '_'))
3014 # Used to store important data in a HA context,
3015 # i.e. a replication count of 3.
3016 @classmethod
3017 def _build_ha_group_name(cls, base_name):
3018 return '{}{}'.format(cls.PREFIX_HA, base_name.replace('/', '_'))
3020 @classmethod
3021 def _check_volume_creation_errors(cls, result, volume_uuid, group_name):
3022 errors = cls._filter_errors(result)
3023 if cls._check_errors(errors, [
3024 linstor.consts.FAIL_EXISTS_RSC, linstor.consts.FAIL_EXISTS_RSC_DFN
3025 ]):
3026 raise LinstorVolumeManagerError(
3027 'Failed to create volume `{}` from SR `{}`, it already exists'
3028 .format(volume_uuid, group_name),
3029 LinstorVolumeManagerError.ERR_VOLUME_EXISTS
3030 )
3032 if cls._check_errors(errors, [linstor.consts.FAIL_NOT_FOUND_RSC_GRP]):
3033 raise LinstorVolumeManagerError(
3034 'Failed to create volume `{}` from SR `{}`, resource group doesn\'t exist'
3035 .format(volume_uuid, group_name),
3036 LinstorVolumeManagerError.ERR_GROUP_NOT_EXISTS
3037 )
3039 if errors:
3040 raise LinstorVolumeManagerError(
3041 'Failed to create volume `{}` from SR `{}`: {}'.format(
3042 volume_uuid,
3043 group_name,
3044 cls._get_error_str(errors)
3045 )
3046 )
3048 @classmethod
3049 def _move_files(cls, src_dir, dest_dir, force=False):
3050 def listdir(dir):
3051 ignored = ['lost+found']
3052 return [file for file in os.listdir(dir) if file not in ignored]
3054 try:
3055 if not force:
3056 files = listdir(dest_dir)
3057 if files:
3058 raise LinstorVolumeManagerError(
3059 'Cannot move files from {} to {} because destination '
3060 'contains: {}'.format(src_dir, dest_dir, files)
3061 )
3062 except LinstorVolumeManagerError:
3063 raise
3064 except Exception as e:
3065 raise LinstorVolumeManagerError(
3066 'Cannot list dir {}: {}'.format(dest_dir, e)
3067 )
3069 try:
3070 for file in listdir(src_dir):
3071 try:
3072 dest_file = os.path.join(dest_dir, file)
3073 if not force and os.path.exists(dest_file):
3074 raise LinstorVolumeManagerError(
3075 'Cannot move {} because it already exists in the '
3076 'destination'.format(file)
3077 )
3078 shutil.move(os.path.join(src_dir, file), dest_file)
3079 except LinstorVolumeManagerError:
3080 raise
3081 except Exception as e:
3082 raise LinstorVolumeManagerError(
3083 'Cannot move {}: {}'.format(file, e)
3084 )
3085 except Exception as e:
3086 if not force:
3087 try:
3088 cls._move_files(dest_dir, src_dir, force=True)
3089 except Exception:
3090 pass
3092 raise LinstorVolumeManagerError(
3093 'Failed to move files from {} to {}: {}'.format(
3094 src_dir, dest_dir, e
3095 )
3096 )
3098 @staticmethod
3099 def _create_database_backup_path():
3100 path = DATABASE_PATH + '-' + str(uuid.uuid4())
3101 try:
3102 os.mkdir(path)
3103 return path
3104 except Exception as e:
3105 raise LinstorVolumeManagerError(
3106 'Failed to create backup path {} of LINSTOR config: {}'
3107 .format(path, e)
3108 )
3110 @staticmethod
3111 def _get_filtered_properties(properties):
3112 return dict(properties.items())
3114 @staticmethod
3115 def _filter_errors(result):
3116 return [
3117 err for err in result
3118 if hasattr(err, 'is_error') and err.is_error()
3119 ]
3121 @staticmethod
3122 def _check_errors(result, codes):
3123 for err in result:
3124 for code in codes:
3125 if err.is_error(code):
3126 return True
3127 return False
3129 @classmethod
3130 def _controller_is_running(cls):
3131 return cls._service_is_running('linstor-controller')
3133 @classmethod
3134 def _start_controller(cls, start=True):
3135 return cls._start_service('linstor-controller', start)
3137 @staticmethod
3138 def _start_service(name, start=True):
3139 action = 'start' if start else 'stop'
3140 (ret, out, err) = util.doexec([
3141 'systemctl', action, name
3142 ])
3143 if ret != 0:
3144 raise LinstorVolumeManagerError(
3145 'Failed to {} {}: {} {}'
3146 .format(action, name, out, err)
3147 )
3149 @staticmethod
3150 def _service_is_running(name):
3151 (ret, out, err) = util.doexec([
3152 'systemctl', 'is-active', '--quiet', name
3153 ])
3154 return not ret
3156 @staticmethod
3157 def _is_mounted(mountpoint):
3158 (ret, out, err) = util.doexec(['mountpoint', '-q', mountpoint])
3159 return ret == 0
3161 @classmethod
3162 def _mount_volume(cls, volume_path, mountpoint, mount=True):
3163 if mount:
3164 try:
3165 util.pread(['mount', volume_path, mountpoint])
3166 except Exception as e:
3167 raise LinstorVolumeManagerError(
3168 'Failed to mount volume {} on {}: {}'
3169 .format(volume_path, mountpoint, e)
3170 )
3171 else:
3172 try:
3173 if cls._is_mounted(mountpoint):
3174 util.pread(['umount', mountpoint])
3175 except Exception as e:
3176 raise LinstorVolumeManagerError(
3177 'Failed to umount volume {} on {}: {}'
3178 .format(volume_path, mountpoint, e)
3179 )
3182# ==============================================================================
3184# Check if a path is a DRBD resource and log the process name/pid
3185# that opened it.
3186def log_drbd_openers(path):
3187 # Ignore if it's not a symlink to DRBD resource.
3188 if not path.startswith(DRBD_BY_RES_PATH):
3189 return
3191 # Compute resource name.
3192 res_name_end = path.find('/', len(DRBD_BY_RES_PATH))
3193 if res_name_end == -1:
3194 return
3195 res_name = path[len(DRBD_BY_RES_PATH):res_name_end]
3197 volume_end = path.rfind('/')
3198 if volume_end == res_name_end:
3199 return
3200 volume = path[volume_end + 1:]
3202 try:
3203 # Ensure path is a DRBD.
3204 drbd_path = os.path.realpath(path)
3205 stats = os.stat(drbd_path)
3206 if not stat.S_ISBLK(stats.st_mode) or os.major(stats.st_rdev) != 147:
3207 return
3209 # Find where the device is open.
3210 (ret, stdout, stderr) = util.doexec(['drbdadm', 'status', res_name])
3211 if ret != 0:
3212 util.SMlog('Failed to execute `drbdadm status` on `{}`: {}'.format(
3213 res_name, stderr
3214 ))
3215 return
3217 # Is it a local device?
3218 if stdout.startswith('{} role:Primary'.format(res_name)):
3219 util.SMlog(
3220 'DRBD resource `{}` is open on local host: {}'
3221 .format(path, get_local_volume_openers(res_name, volume))
3222 )
3223 return
3225 # Is it a remote device?
3226 util.SMlog(
3227 'DRBD resource `{}` is open on hosts: {}'
3228 .format(path, get_all_volume_openers(res_name, volume))
3229 )
3230 except Exception as e:
3231 util.SMlog(
3232 'Got exception while trying to determine where DRBD resource ' +
3233 '`{}` is open: {}'.format(path, e)
3234 )