summaryrefslogtreecommitdiff
path: root/lib/metadata/lv_manip.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/metadata/lv_manip.c')
-rw-r--r--lib/metadata/lv_manip.c8176
1 files changed, 6691 insertions, 1485 deletions
diff --git a/lib/metadata/lv_manip.c b/lib/metadata/lv_manip.c
index d469fe8..9bec8b5 100644
--- a/lib/metadata/lv_manip.c
+++ b/lib/metadata/lv_manip.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
- * Copyright (C) 2004-2012 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004-2018 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
@@ -10,23 +10,33 @@
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
- * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#include "lib.h"
-#include "metadata.h"
-#include "locking.h"
+#include "lib/misc/lib.h"
+#include "lib/metadata/metadata.h"
+#include "lib/locking/locking.h"
#include "pv_map.h"
-#include "lvm-string.h"
-#include "toolcontext.h"
-#include "lv_alloc.h"
-#include "pv_alloc.h"
-#include "display.h"
-#include "segtype.h"
-#include "archiver.h"
-#include "activate.h"
-#include "str_list.h"
-#include "defaults.h"
+#include "lib/misc/lvm-string.h"
+#include "lib/commands/toolcontext.h"
+#include "lib/metadata/lv_alloc.h"
+#include "lib/metadata/pv_alloc.h"
+#include "lib/display/display.h"
+#include "lib/metadata/segtype.h"
+#include "lib/activate/activate.h"
+#include "lib/datastruct/str_list.h"
+#include "lib/config/defaults.h"
+#include "lib/misc/lvm-exec.h"
+#include "lib/mm/memlock.h"
+#include "lib/locking/lvmlockd.h"
+#include "lib/label/label.h"
+#include "lib/misc/lvm-signal.h"
+#include "lib/device/filesystem.h"
+
+#ifdef HAVE_BLKZEROOUT
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#endif
typedef enum {
PREFERRED,
@@ -46,6 +56,10 @@ typedef enum {
#define A_CLING_BY_TAGS 0x08 /* Must match tags against existing segment */
#define A_CAN_SPLIT 0x10
+#define A_AREA_COUNT_MATCHES 0x20 /* Existing lvseg has same number of areas as new segment */
+
+#define A_POSITIONAL_FILL 0x40 /* Slots are positional and filled using PREFERRED */
+#define A_PARTITION_BY_TAGS 0x80 /* No allocated area may share any tag with any other */
/*
* Constant parameters during a single allocation attempt.
@@ -61,10 +75,12 @@ struct alloc_parms {
* Holds varying state of each allocation attempt.
*/
struct alloc_state {
+ const struct alloc_parms *alloc_parms;
struct pv_area_used *areas;
uint32_t areas_size;
uint32_t log_area_count_still_needed; /* Number of areas still needing to be allocated for the log */
uint32_t allocated; /* Total number of extents allocated so far */
+ uint32_t num_positional_areas; /* Number of parallel allocations that must be contiguous/cling */
};
struct lv_names {
@@ -72,6 +88,773 @@ struct lv_names {
const char *new;
};
+enum {
+ LV_TYPE_UNKNOWN,
+ LV_TYPE_NONE,
+ LV_TYPE_PUBLIC,
+ LV_TYPE_PRIVATE,
+ LV_TYPE_HISTORY,
+ LV_TYPE_LINEAR,
+ LV_TYPE_STRIPED,
+ LV_TYPE_MIRROR,
+ LV_TYPE_RAID,
+ LV_TYPE_THIN,
+ LV_TYPE_CACHE,
+ LV_TYPE_SPARSE,
+ LV_TYPE_ORIGIN,
+ LV_TYPE_THINORIGIN,
+ LV_TYPE_MULTITHINORIGIN,
+ LV_TYPE_THICKORIGIN,
+ LV_TYPE_MULTITHICKORIGIN,
+ LV_TYPE_CACHEORIGIN,
+ LV_TYPE_EXTTHINORIGIN,
+ LV_TYPE_MULTIEXTTHINORIGIN,
+ LV_TYPE_SNAPSHOT,
+ LV_TYPE_THINSNAPSHOT,
+ LV_TYPE_THICKSNAPSHOT,
+ LV_TYPE_PVMOVE,
+ LV_TYPE_IMAGE,
+ LV_TYPE_LOG,
+ LV_TYPE_METADATA,
+ LV_TYPE_POOL,
+ LV_TYPE_DATA,
+ LV_TYPE_SPARE,
+ LV_TYPE_VDO,
+ LV_TYPE_VIRTUAL,
+ LV_TYPE_RAID0,
+ LV_TYPE_RAID0_META,
+ LV_TYPE_RAID1,
+ LV_TYPE_RAID10,
+ LV_TYPE_RAID4,
+ LV_TYPE_RAID5,
+ LV_TYPE_RAID5_N,
+ LV_TYPE_RAID5_LA,
+ LV_TYPE_RAID5_RA,
+ LV_TYPE_RAID5_LS,
+ LV_TYPE_RAID5_RS,
+ LV_TYPE_RAID6,
+ LV_TYPE_RAID6_ZR,
+ LV_TYPE_RAID6_NR,
+ LV_TYPE_RAID6_NC,
+ LV_TYPE_LOCKD,
+ LV_TYPE_SANLOCK,
+ LV_TYPE_CACHEVOL,
+ LV_TYPE_WRITECACHE,
+ LV_TYPE_WRITECACHEORIGIN,
+ LV_TYPE_INTEGRITY,
+ LV_TYPE_INTEGRITYORIGIN
+};
+
+static const char *_lv_type_names[] = {
+ [LV_TYPE_UNKNOWN] = "unknown",
+ [LV_TYPE_NONE] = "none",
+ [LV_TYPE_PUBLIC] = "public",
+ [LV_TYPE_PRIVATE] = "private",
+ [LV_TYPE_HISTORY] = "history",
+ [LV_TYPE_LINEAR] = "linear",
+ [LV_TYPE_STRIPED] = "striped",
+ [LV_TYPE_MIRROR] = "mirror",
+ [LV_TYPE_RAID] = "raid",
+ [LV_TYPE_THIN] = "thin",
+ [LV_TYPE_CACHE] = "cache",
+ [LV_TYPE_SPARSE] = "sparse",
+ [LV_TYPE_ORIGIN] = "origin",
+ [LV_TYPE_THINORIGIN] = "thinorigin",
+ [LV_TYPE_MULTITHINORIGIN] = "multithinorigin",
+ [LV_TYPE_THICKORIGIN] = "thickorigin",
+ [LV_TYPE_MULTITHICKORIGIN] = "multithickorigin",
+ [LV_TYPE_CACHEORIGIN] = "cacheorigin",
+ [LV_TYPE_EXTTHINORIGIN] = "extthinorigin",
+ [LV_TYPE_MULTIEXTTHINORIGIN] = "multiextthinorigin",
+ [LV_TYPE_SNAPSHOT] = "snapshot",
+ [LV_TYPE_THINSNAPSHOT] = "thinsnapshot",
+ [LV_TYPE_THICKSNAPSHOT] = "thicksnapshot",
+ [LV_TYPE_PVMOVE] = "pvmove",
+ [LV_TYPE_IMAGE] = "image",
+ [LV_TYPE_LOG] = "log",
+ [LV_TYPE_METADATA] = "metadata",
+ [LV_TYPE_POOL] = "pool",
+ [LV_TYPE_DATA] = "data",
+ [LV_TYPE_SPARE] = "spare",
+ [LV_TYPE_VDO] = "vdo",
+ [LV_TYPE_VIRTUAL] = "virtual",
+ [LV_TYPE_RAID0] = SEG_TYPE_NAME_RAID0,
+ [LV_TYPE_RAID0_META] = SEG_TYPE_NAME_RAID0_META,
+ [LV_TYPE_RAID1] = SEG_TYPE_NAME_RAID1,
+ [LV_TYPE_RAID10] = SEG_TYPE_NAME_RAID10,
+ [LV_TYPE_RAID4] = SEG_TYPE_NAME_RAID4,
+ [LV_TYPE_RAID5] = SEG_TYPE_NAME_RAID5,
+ [LV_TYPE_RAID5_N] = SEG_TYPE_NAME_RAID5_N,
+ [LV_TYPE_RAID5_LA] = SEG_TYPE_NAME_RAID5_LA,
+ [LV_TYPE_RAID5_RA] = SEG_TYPE_NAME_RAID5_RA,
+ [LV_TYPE_RAID5_LS] = SEG_TYPE_NAME_RAID5_LS,
+ [LV_TYPE_RAID5_RS] = SEG_TYPE_NAME_RAID5_RS,
+ [LV_TYPE_RAID6] = SEG_TYPE_NAME_RAID6,
+ [LV_TYPE_RAID6_ZR] = SEG_TYPE_NAME_RAID6_ZR,
+ [LV_TYPE_RAID6_NR] = SEG_TYPE_NAME_RAID6_NR,
+ [LV_TYPE_RAID6_NC] = SEG_TYPE_NAME_RAID6_NC,
+ [LV_TYPE_LOCKD] = "lockd",
+ [LV_TYPE_SANLOCK] = "sanlock",
+ [LV_TYPE_CACHEVOL] = "cachevol",
+ [LV_TYPE_WRITECACHE] = "writecache",
+ [LV_TYPE_WRITECACHEORIGIN] = "writecacheorigin",
+ [LV_TYPE_INTEGRITY] = "integrity",
+ [LV_TYPE_INTEGRITYORIGIN] = "integrityorigin",
+};
+
+static int _lv_layout_and_role_mirror(struct dm_pool *mem,
+ const struct logical_volume *lv,
+ struct dm_list *layout,
+ struct dm_list *role,
+ int *public_lv)
+{
+ int top_level = 0;
+
+ /* non-top-level LVs */
+ if (lv_is_mirror_image(lv)) {
+ if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_MIRROR]) ||
+ !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_IMAGE]))
+ goto_bad;
+ } else if (lv_is_mirror_log(lv)) {
+ if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_MIRROR]) ||
+ !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_LOG]))
+ goto_bad;
+ if (lv_is_mirrored(lv) &&
+ !str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_MIRROR]))
+ goto_bad;
+ } else if (lv_is_pvmove(lv)) {
+ if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_PVMOVE]) ||
+ !str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_MIRROR]))
+ goto_bad;
+ } else
+ top_level = 1;
+
+
+ if (!top_level) {
+ *public_lv = 0;
+ return 1;
+ }
+
+ /* top-level LVs */
+ if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_MIRROR]))
+ goto_bad;
+
+ return 1;
+bad:
+ return 0;
+}
+
+static int _lv_layout_and_role_raid(struct dm_pool *mem,
+ const struct logical_volume *lv,
+ struct dm_list *layout,
+ struct dm_list *role,
+ int *public_lv)
+{
+ int top_level = 0;
+ const struct segment_type *segtype;
+
+ /* non-top-level LVs */
+ if (lv_is_raid_image(lv)) {
+ if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_RAID]) ||
+ !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_IMAGE]))
+ goto_bad;
+ } else if (lv_is_raid_metadata(lv)) {
+ if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_RAID]) ||
+ !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_METADATA]))
+ goto_bad;
+ } else if (lv_is_pvmove(lv)) {
+ if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_PVMOVE]) ||
+ !str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID]))
+ goto_bad;
+ } else
+ top_level = 1;
+
+ if (!top_level) {
+ *public_lv = 0;
+ return 1;
+ }
+
+ /* top-level LVs */
+ if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID]))
+ goto_bad;
+
+ segtype = first_seg(lv)->segtype;
+
+ if (segtype_is_raid0(segtype)) {
+ if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID0]))
+ goto_bad;
+ } else if (segtype_is_raid1(segtype)) {
+ if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID1]))
+ goto_bad;
+ } else if (segtype_is_raid10(segtype)) {
+ if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID10]))
+ goto_bad;
+ } else if (segtype_is_raid4(segtype)) {
+ if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID4]))
+ goto_bad;
+ } else if (segtype_is_any_raid5(segtype)) {
+ if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID5]))
+ goto_bad;
+
+ if (segtype_is_raid5_la(segtype)) {
+ if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID5_LA]))
+ goto_bad;
+ } else if (segtype_is_raid5_ra(segtype)) {
+ if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID5_RA]))
+ goto_bad;
+ } else if (segtype_is_raid5_ls(segtype)) {
+ if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID5_LS]))
+ goto_bad;
+ } else if (segtype_is_raid5_rs(segtype)) {
+ if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID5_RS]))
+ goto_bad;
+ }
+ } else if (segtype_is_any_raid6(segtype)) {
+ if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID6]))
+ goto_bad;
+
+ if (segtype_is_raid6_zr(segtype)) {
+ if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID6_ZR]))
+ goto_bad;
+ } else if (segtype_is_raid6_nr(segtype)) {
+ if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID6_NR]))
+ goto_bad;
+ } else if (segtype_is_raid6_nc(segtype)) {
+ if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_RAID6_NC]))
+ goto_bad;
+ }
+ }
+
+ return 1;
+bad:
+ return 0;
+}
+
+static int _lv_layout_and_role_thin(struct dm_pool *mem,
+ const struct logical_volume *lv,
+ struct dm_list *layout,
+ struct dm_list *role,
+ int *public_lv)
+{
+ int top_level = 0;
+ unsigned snap_count;
+
+ /* non-top-level LVs */
+ if (lv_is_thin_pool_metadata(lv)) {
+ if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THIN]) ||
+ !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_POOL]) ||
+ !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_METADATA]))
+ goto_bad;
+ } else if (lv_is_thin_pool_data(lv)) {
+ if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THIN]) ||
+ !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_POOL]) ||
+ !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_DATA]))
+ goto_bad;
+ } else
+ top_level = 1;
+
+ if (!top_level) {
+ *public_lv = 0;
+ return 1;
+ }
+
+ /* top-level LVs */
+ if (lv_is_thin_volume(lv)) {
+ if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_THIN]) ||
+ !str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_SPARSE]))
+ goto_bad;
+ if (lv_is_thin_origin(lv, &snap_count)) {
+ if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_ORIGIN]) ||
+ !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THINORIGIN]))
+ goto_bad;
+ if (snap_count > 1 &&
+ !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_MULTITHINORIGIN]))
+ goto_bad;
+ }
+ if (lv_is_thin_snapshot(lv))
+ if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_SNAPSHOT]) ||
+ !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THINSNAPSHOT]))
+ goto_bad;
+ } else if (lv_is_thin_pool(lv)) {
+ if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_THIN]) ||
+ !str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_POOL]))
+ goto_bad;
+ *public_lv = 0;
+ }
+
+ if (lv_is_external_origin(lv)) {
+ if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_ORIGIN]) ||
+ !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_EXTTHINORIGIN]))
+ goto_bad;
+ if (lv->external_count > 1 &&
+ !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_MULTIEXTTHINORIGIN]))
+ goto_bad;
+ }
+
+ return 1;
+bad:
+ return 0;
+}
+
+static int _lv_layout_and_role_cache(struct dm_pool *mem,
+ const struct logical_volume *lv,
+ struct dm_list *layout,
+ struct dm_list *role,
+ int *public_lv)
+{
+ int top_level = 0;
+
+ /* non-top-level LVs */
+ if (lv_is_cache_pool_metadata(lv)) {
+ if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_CACHE]) ||
+ !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_POOL]) ||
+ !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_METADATA]))
+ goto_bad;
+ } else if (lv_is_cache_pool_data(lv)) {
+ if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_CACHE]) ||
+ !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_POOL]) ||
+ !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_DATA]))
+ goto_bad;
+ if (lv_is_cache(lv) &&
+ !str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_CACHE]))
+ goto_bad;
+ } else if (lv_is_cache_origin(lv)) {
+ if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_CACHE]) ||
+ !str_list_add(mem, role, _lv_type_names[LV_TYPE_ORIGIN]) ||
+ !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_CACHEORIGIN]))
+ goto_bad;
+ if (lv_is_cache(lv) &&
+ !str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_CACHE]))
+ goto_bad;
+ } else if (lv_is_writecache_origin(lv)) {
+ if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_WRITECACHE]) ||
+ !str_list_add(mem, role, _lv_type_names[LV_TYPE_ORIGIN]) ||
+ !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_WRITECACHEORIGIN]))
+ goto_bad;
+ if (lv_is_writecache(lv) &&
+ !str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_WRITECACHE]))
+ goto_bad;
+ } else
+ top_level = 1;
+
+ if (!top_level) {
+ *public_lv = 0;
+ return 1;
+ }
+
+ /* top-level LVs */
+ if (lv_is_cache(lv) &&
+ !str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_CACHE]))
+ goto_bad;
+ else if (lv_is_writecache(lv) &&
+ !str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_WRITECACHE]))
+ goto_bad;
+ else if (lv_is_writecache_cachevol(lv)) {
+ if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_WRITECACHE]) ||
+ !str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_CACHEVOL]))
+ goto_bad;
+ *public_lv = 0;
+ } else if (lv_is_cache_vol(lv)) {
+ if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_CACHE]) ||
+ !str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_CACHEVOL]))
+ goto_bad;
+ *public_lv = 0;
+ } else if (lv_is_cache_pool(lv)) {
+ if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_CACHE]) ||
+ !str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_POOL]))
+ goto_bad;
+ *public_lv = 0;
+ }
+
+ return 1;
+bad:
+ return 0;
+}
+
+static int _lv_layout_and_role_integrity(struct dm_pool *mem,
+ const struct logical_volume *lv,
+ struct dm_list *layout,
+ struct dm_list *role,
+ int *public_lv)
+{
+ int top_level = 0;
+
+ /* non-top-level LVs */
+ if (lv_is_integrity_metadata(lv)) {
+ if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_INTEGRITY]) ||
+ !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_METADATA]))
+ goto_bad;
+ } else if (lv_is_integrity_origin(lv)) {
+ if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_INTEGRITY]) ||
+ !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_ORIGIN]) ||
+ !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_INTEGRITYORIGIN]))
+ goto_bad;
+ } else
+ top_level = 1;
+
+ if (!top_level) {
+ *public_lv = 0;
+ return 1;
+ }
+
+ /* top-level LVs */
+ if (lv_is_integrity(lv)) {
+ if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_INTEGRITY]))
+ goto_bad;
+ }
+
+ return 1;
+bad:
+ return 0;
+}
+
+static int _lv_layout_and_role_thick_origin_snapshot(struct dm_pool *mem,
+ const struct logical_volume *lv,
+ struct dm_list *layout,
+ struct dm_list *role,
+ int *public_lv)
+{
+ if (lv_is_origin(lv)) {
+ if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_ORIGIN]) ||
+ !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THICKORIGIN]))
+ goto_bad;
+ /*
+ * Thin volumes are also marked with virtual flag, but we don't show "virtual"
+ * layout for thin LVs as they have their own keyword for layout - "thin"!
+ * So rule thin LVs out here!
+ */
+ if (lv_is_virtual(lv) && !lv_is_thin_volume(lv)) {
+ if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_VIRTUAL]))
+ goto_bad;
+ *public_lv = 0;
+ }
+ if (lv->origin_count > 1 &&
+ !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_MULTITHICKORIGIN]))
+ goto_bad;
+ } else if (lv_is_cow(lv)) {
+ if (!str_list_add(mem, role, _lv_type_names[LV_TYPE_SNAPSHOT]) ||
+ !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_THICKSNAPSHOT]))
+ goto_bad;
+ }
+
+ return 1;
+bad:
+ return 0;
+}
+
+static int _lv_layout_and_role_vdo(struct dm_pool *mem,
+ const struct logical_volume *lv,
+ struct dm_list *layout,
+ struct dm_list *role,
+ int *public_lv)
+{
+ int top_level = 0;
+
+ /* non-top-level LVs */
+ if (lv_is_vdo_pool(lv)) {
+ if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_VDO]) ||
+ !str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_POOL]))
+ goto_bad;
+ } else if (lv_is_vdo_pool_data(lv)) {
+ if (!str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_VDO]) ||
+ !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_POOL]) ||
+ !str_list_add_no_dup_check(mem, role, _lv_type_names[LV_TYPE_DATA]))
+ goto_bad;
+ } else
+ top_level = 1;
+
+ if (!top_level) {
+ *public_lv = 0;
+ return 1;
+ }
+
+ /* top-level LVs */
+ if (lv_is_vdo(lv)) {
+ if (!str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_VDO]) ||
+ !str_list_add_no_dup_check(mem, layout, _lv_type_names[LV_TYPE_SPARSE]))
+ goto_bad;
+ }
+
+ return 1;
+bad:
+ return 0;
+}
+
+int lv_layout_and_role(struct dm_pool *mem, const struct logical_volume *lv,
+ struct dm_list **layout, struct dm_list **role) {
+ int linear, striped;
+ struct lv_segment *seg;
+ int public_lv = 1;
+
+ *layout = *role = NULL;
+
+ if (!(*layout = str_list_create(mem))) {
+ log_error("LV layout list allocation failed");
+ return 0;
+ }
+
+ if (!(*role = str_list_create(mem))) {
+ log_error("LV role list allocation failed");
+ goto bad;
+ }
+
+ if (lv_is_historical(lv)) {
+ if (!str_list_add_no_dup_check(mem, *layout, _lv_type_names[LV_TYPE_NONE]) ||
+ !str_list_add_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_HISTORY]))
+ goto_bad;
+ }
+
+ /* Mirrors and related */
+ if ((lv_is_mirror_type(lv) || lv_is_pvmove(lv)) &&
+ !_lv_layout_and_role_mirror(mem, lv, *layout, *role, &public_lv))
+ goto_bad;
+
+ /* RAIDs and related */
+ if (lv_is_raid_type(lv) &&
+ !_lv_layout_and_role_raid(mem, lv, *layout, *role, &public_lv))
+ goto_bad;
+
+ /* Thins and related */
+ if ((lv_is_thin_type(lv) || lv_is_external_origin(lv)) &&
+ !_lv_layout_and_role_thin(mem, lv, *layout, *role, &public_lv))
+ goto_bad;
+
+ /* Caches and related */
+ if ((lv_is_cache_type(lv) || lv_is_cache_origin(lv) || lv_is_writecache(lv) || lv_is_writecache_origin(lv)) &&
+ !_lv_layout_and_role_cache(mem, lv, *layout, *role, &public_lv))
+ goto_bad;
+
+ /* Integrity related */
+ if ((lv_is_integrity(lv) || lv_is_integrity_origin(lv) || lv_is_integrity_metadata(lv)) &&
+ !_lv_layout_and_role_integrity(mem, lv, *layout, *role, &public_lv))
+ goto_bad;
+
+ /* VDO and related */
+ if (lv_is_vdo_type(lv) &&
+ !_lv_layout_and_role_vdo(mem, lv, *layout, *role, &public_lv))
+ goto_bad;
+
+ /* Pool-specific */
+ if (lv_is_pool_metadata_spare(lv)) {
+ if (!str_list_add_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_POOL]) ||
+ !str_list_add_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_SPARE]))
+ goto_bad;
+ public_lv = 0;
+ }
+
+ /* Old-style origins/snapshots, virtual origins */
+ if (!_lv_layout_and_role_thick_origin_snapshot(mem, lv, *layout, *role, &public_lv))
+ goto_bad;
+
+ if (lv_is_lockd_sanlock_lv(lv)) {
+ if (!str_list_add_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_LOCKD]) ||
+ !str_list_add_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_SANLOCK]))
+ goto_bad;
+ public_lv = 0;
+ }
+
+ /*
+ * If layout not yet determined, it must be either
+ * linear or striped or mixture of these two.
+ */
+ if (dm_list_empty(*layout)) {
+ linear = striped = 0;
+ dm_list_iterate_items(seg, &lv->segments) {
+ if (seg_is_linear(seg))
+ linear = 1;
+ else if (seg_is_striped(seg))
+ striped = 1;
+ else {
+ /*
+ * This should not happen but if it does
+ * we'll see that there's "unknown" layout
+ * present. This means we forgot to detect
+ * the role above and we need add proper
+ * detection for such role!
+ */
+ log_warn(INTERNAL_ERROR "WARNING: Failed to properly detect "
+ "layout and role for LV %s/%s.",
+ lv->vg->name, lv->name);
+ }
+ }
+
+ if (linear &&
+ !str_list_add_no_dup_check(mem, *layout, _lv_type_names[LV_TYPE_LINEAR]))
+ goto_bad;
+
+ if (striped &&
+ !str_list_add_no_dup_check(mem, *layout, _lv_type_names[LV_TYPE_STRIPED]))
+ goto_bad;
+
+ if (!linear && !striped &&
+ !str_list_add_no_dup_check(mem, *layout, _lv_type_names[LV_TYPE_UNKNOWN]))
+ goto_bad;
+ }
+
+ /* finally, add either 'public' or 'private' role to the LV */
+ if (public_lv) {
+ if (!str_list_add_h_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_PUBLIC]))
+ goto_bad;
+ } else {
+ if (!str_list_add_h_no_dup_check(mem, *role, _lv_type_names[LV_TYPE_PRIVATE]))
+ goto_bad;
+ }
+
+ return 1;
+bad:
+ dm_pool_free(mem, *layout);
+
+ return 0;
+}
+struct dm_list_and_mempool {
+ struct dm_list *list;
+ struct dm_pool *mem;
+};
+static int _get_pv_list_for_lv(struct logical_volume *lv, void *data)
+{
+ int dup_found;
+ uint32_t s;
+ struct pv_list *pvl;
+ struct lv_segment *seg;
+ struct dm_list *pvs = ((struct dm_list_and_mempool *)data)->list;
+ struct dm_pool *mem = ((struct dm_list_and_mempool *)data)->mem;
+
+ dm_list_iterate_items(seg, &lv->segments) {
+ for (s = 0; s < seg->area_count; s++) {
+ dup_found = 0;
+
+ if (seg_type(seg, s) != AREA_PV)
+ continue;
+
+ /* do not add duplicates */
+ dm_list_iterate_items(pvl, pvs)
+ if (pvl->pv == seg_pv(seg, s))
+ dup_found = 1;
+
+ if (dup_found)
+ continue;
+
+ if (!(pvl = dm_pool_zalloc(mem, sizeof(*pvl)))) {
+ log_error("Failed to allocate memory");
+ return 0;
+ }
+
+ pvl->pv = seg_pv(seg, s);
+ log_debug_metadata(" %s/%s uses %s", lv->vg->name,
+ lv->name, pv_dev_name(pvl->pv));
+
+ dm_list_add(pvs, &pvl->list);
+ }
+ }
+
+ return 1;
+}
+
+/*
+ * get_pv_list_for_lv
+ * @mem - mempool to allocate the list from.
+ * @lv
+ * @pvs - The list to add pv_list items to.
+ *
+ * 'pvs' is filled with 'pv_list' items for PVs that compose the LV.
+ * If the 'pvs' list already has items in it, duplicates will not be
+ * added. So, it is safe to repeatedly call this function for different
+ * LVs and build up a list of PVs for them all.
+ *
+ * Memory to create the list is obtained from the mempool provided.
+ *
+ * Returns: 1 on success, 0 on error
+ */
+int get_pv_list_for_lv(struct dm_pool *mem,
+ struct logical_volume *lv, struct dm_list *pvs)
+{
+ struct dm_list_and_mempool context = { pvs, mem };
+
+ log_debug_metadata("Generating list of PVs that %s/%s uses:",
+ lv->vg->name, lv->name);
+
+ if (!_get_pv_list_for_lv(lv, &context))
+ return_0;
+
+ return for_each_sub_lv(lv, &_get_pv_list_for_lv, &context);
+}
+
+/*
+ * get_default_region_size
+ * @cmd
+ *
+ * 'mirror_region_size' and 'raid_region_size' are effectively the same thing.
+ * However, "raid" is more inclusive than "mirror", so the name has been
+ * changed. This function checks for the old setting and warns the user if
+ * it is being overridden by the new setting (i.e. warn if both settings are
+ * present).
+ *
+ * Note that the config files give defaults in kiB terms, but we
+ * return the value in terms of sectors.
+ *
+ * Returns: default region_size in sectors
+ */
+static int _get_default_region_size(struct cmd_context *cmd)
+{
+ int mrs, rrs;
+
+ /*
+ * 'mirror_region_size' is the old setting. It is overridden
+ * by the new setting, 'raid_region_size'.
+ */
+ mrs = 2 * find_config_tree_int(cmd, activation_mirror_region_size_CFG, NULL);
+ rrs = 2 * find_config_tree_int(cmd, activation_raid_region_size_CFG, NULL);
+
+ if (!mrs && !rrs)
+ return DEFAULT_RAID_REGION_SIZE * 2;
+
+ if (!mrs)
+ return rrs;
+
+ if (!rrs)
+ return mrs;
+
+ if (mrs != rrs)
+ log_verbose("Overriding default 'mirror_region_size' setting"
+ " with 'raid_region_size' setting of %u kiB",
+ rrs / 2);
+
+ return rrs;
+}
+
+static int _round_down_pow2(int r)
+{
+ /* Set all bits to the right of the leftmost set bit */
+ r |= (r >> 1);
+ r |= (r >> 2);
+ r |= (r >> 4);
+ r |= (r >> 8);
+ r |= (r >> 16);
+
+ /* Pull out the leftmost set bit */
+ return r & ~(r >> 1);
+}
+
+uint32_t get_default_region_size(struct cmd_context *cmd)
+{
+ int pagesize = lvm_getpagesize();
+ int region_size = _get_default_region_size(cmd);
+
+ if (!is_power_of_2(region_size)) {
+ region_size = _round_down_pow2(region_size);
+ log_verbose("Reducing region size to %u kiB (power of 2).",
+ region_size / 2);
+ }
+
+ if (region_size % (pagesize >> SECTOR_SHIFT)) {
+ region_size = DEFAULT_RAID_REGION_SIZE * 2;
+ log_verbose("Using default region size %u kiB (multiple of page size).",
+ region_size / 2);
+ }
+
+ return (uint32_t) region_size;
+}
+
int add_seg_to_segs_using_this_lv(struct logical_volume *lv,
struct lv_segment *seg)
{
@@ -84,11 +867,11 @@ int add_seg_to_segs_using_this_lv(struct logical_volume *lv,
}
}
- log_very_verbose("Adding %s:%" PRIu32 " as an user of %s",
- seg->lv->name, seg->le, lv->name);
+ log_very_verbose("Adding %s:" FMTu32 " as an user of %s.",
+ display_lvname(seg->lv), seg->le, display_lvname(lv));
if (!(sl = dm_pool_zalloc(lv->vg->vgmem, sizeof(*sl)))) {
- log_error("Failed to allocate segment list");
+ log_error("Failed to allocate segment list.");
return 0;
}
@@ -110,14 +893,16 @@ int remove_seg_from_segs_using_this_lv(struct logical_volume *lv,
if (sl->count > 1)
sl->count--;
else {
- log_very_verbose("%s:%" PRIu32 " is no longer a user "
- "of %s", seg->lv->name, seg->le,
- lv->name);
+ log_very_verbose("%s:" FMTu32 " is no longer a user of %s.",
+ display_lvname(seg->lv), seg->le,
+ display_lvname(lv));
dm_list_del(&sl->list);
}
return 1;
}
+ log_error(INTERNAL_ERROR "Segment %s:" FMTu32 " is not a user of %s.",
+ display_lvname(seg->lv), seg->le, display_lvname(lv));
return 0;
}
@@ -128,28 +913,35 @@ int remove_seg_from_segs_using_this_lv(struct logical_volume *lv,
*
* In general, walk through lv->segs_using_this_lv.
*/
-struct lv_segment *get_only_segment_using_this_lv(struct logical_volume *lv)
+struct lv_segment *get_only_segment_using_this_lv(const struct logical_volume *lv)
{
struct seg_list *sl;
- if (dm_list_size(&lv->segs_using_this_lv) != 1) {
- log_error("%s is expected to have only one segment using it, "
- "while it has %d", lv->name,
- dm_list_size(&lv->segs_using_this_lv));
+ if (!lv) {
+ log_error(INTERNAL_ERROR "get_only_segment_using_this_lv() called with NULL LV.");
return NULL;
}
- dm_list_iterate_items(sl, &lv->segs_using_this_lv)
- break; /* first item */
+ dm_list_iterate_items(sl, &lv->segs_using_this_lv) {
+ /* Needs to be he only item in list */
+ if (!dm_list_end(&lv->segs_using_this_lv, &sl->list))
+ break;
- if (sl->count != 1) {
- log_error("%s is expected to have only one segment using it, "
- "while %s:%" PRIu32 " uses it %d times",
- lv->name, sl->seg->lv->name, sl->seg->le, sl->count);
- return NULL;
+ if (sl->count != 1) {
+ log_error("%s is expected to have only one segment using it, "
+ "while %s:" FMTu32 " uses it %d times.",
+ display_lvname(lv), display_lvname(sl->seg->lv),
+ sl->seg->le, sl->count);
+ return NULL;
+ }
+
+ return sl->seg;
}
- return sl->seg;
+ log_error("%s is expected to have only one segment using it, while it has %d.",
+ display_lvname(lv), dm_list_size(&lv->segs_using_this_lv));
+
+ return NULL;
}
/*
@@ -199,18 +991,59 @@ uint32_t find_free_lvnum(struct logical_volume *lv)
return i;
}
+dm_percent_t copy_percent(const struct logical_volume *lv)
+{
+ uint32_t numerator = 0u, denominator = 0u;
+ struct lv_segment *seg;
+
+ dm_list_iterate_items(seg, &lv->segments) {
+ denominator += seg->area_len;
+
+ /* FIXME Generalise name of 'extents_copied' field */
+ if (((seg_is_raid(seg) && !seg_is_any_raid0(seg)) || seg_is_mirrored(seg)) &&
+ (seg->area_count > 1))
+ numerator += seg->extents_copied;
+ else
+ numerator += seg->area_len;
+ }
+
+ return denominator ? dm_make_percent(numerator, denominator) : DM_PERCENT_100;
+}
+
+/* Round up extents to next stripe boundary for number of stripes */
+static uint32_t _round_to_stripe_boundary(struct volume_group *vg, uint32_t extents,
+ uint32_t stripes, int extend)
+{
+ uint32_t size_rest, new_extents = extents;
+
+ if (!stripes)
+ return extents;
+
+ /* Round up extents to stripe divisible amount */
+ if ((size_rest = extents % stripes)) {
+ new_extents += extend ? stripes - size_rest : -size_rest;
+ log_print_unless_silent("Rounding size %s (%u extents) %s to stripe boundary size %s (%u extents).",
+ display_size(vg->cmd, (uint64_t) extents * vg->extent_size), extents,
+ new_extents < extents ? "down" : "up",
+ display_size(vg->cmd, (uint64_t) new_extents * vg->extent_size), new_extents);
+ }
+
+ return new_extents;
+}
+
/*
* All lv_segments get created here.
*/
struct lv_segment *alloc_lv_segment(const struct segment_type *segtype,
struct logical_volume *lv,
uint32_t le, uint32_t len,
+ uint32_t reshape_len,
uint64_t status,
uint32_t stripe_size,
struct logical_volume *log_lv,
- struct logical_volume *thin_pool_lv,
uint32_t area_count,
uint32_t area_len,
+ uint32_t data_copies,
uint32_t chunk_size,
uint32_t region_size,
uint32_t extents_copied,
@@ -233,7 +1066,7 @@ struct lv_segment *alloc_lv_segment(const struct segment_type *segtype,
return_NULL;
}
- if (segtype_is_raid(segtype) &&
+ if (segtype_is_raid_with_meta(segtype) &&
!(seg->meta_areas = dm_pool_zalloc(mem, areas_sz))) {
dm_pool_free(mem, seg); /* frees everything alloced since seg */
return_NULL;
@@ -243,65 +1076,70 @@ struct lv_segment *alloc_lv_segment(const struct segment_type *segtype,
seg->lv = lv;
seg->le = le;
seg->len = len;
+ seg->reshape_len = reshape_len;
seg->status = status;
seg->stripe_size = stripe_size;
seg->area_count = area_count;
seg->area_len = area_len;
+ seg->data_copies = data_copies ? : lv_raid_data_copies(segtype, area_count);
seg->chunk_size = chunk_size;
seg->region_size = region_size;
seg->extents_copied = extents_copied;
seg->pvmove_source_seg = pvmove_source_seg;
dm_list_init(&seg->tags);
+ dm_list_init(&seg->origin_list);
dm_list_init(&seg->thin_messages);
- if (thin_pool_lv) {
- /* If this thin volume, thin snapshot is being created */
- if (lv_is_thin_volume(thin_pool_lv)) {
- seg->transaction_id = first_seg(first_seg(thin_pool_lv)->pool_lv)->transaction_id;
- if (!attach_pool_lv(seg, first_seg(thin_pool_lv)->pool_lv, thin_pool_lv))
- return_NULL;
- } else {
- seg->transaction_id = first_seg(thin_pool_lv)->transaction_id;
- if (!attach_pool_lv(seg, thin_pool_lv, NULL))
- return_NULL;
- }
- }
-
if (log_lv && !attach_mirror_log(seg, log_lv))
return_NULL;
+ if (segtype_is_mirror(segtype))
+ lv->status |= MIRROR;
+
+ if (segtype_is_mirrored(segtype))
+ lv->status |= MIRRORED;
+
return seg;
}
-struct lv_segment *alloc_snapshot_seg(struct logical_volume *lv,
- uint64_t status, uint32_t old_le_count)
+/*
+ * Temporary helper to return number of data copies for
+ * RAID segment @seg until seg->data_copies got added
+ */
+static uint32_t _raid_data_copies(struct lv_segment *seg)
{
- struct lv_segment *seg;
- const struct segment_type *segtype;
+ /*
+ * FIXME: needs to change once more than 2 are supported.
+ * I.e. use seg->data_copies then
+ */
+ if (seg_is_raid10(seg))
+ return 2;
- segtype = get_segtype_from_string(lv->vg->cmd, "snapshot");
- if (!segtype) {
- log_error("Failed to find snapshot segtype");
- return NULL;
- }
+ if (seg_is_raid1(seg))
+ return seg->area_count;
- if (!(seg = alloc_lv_segment(segtype, lv, old_le_count,
- lv->le_count - old_le_count, status, 0,
- NULL, NULL, 0, lv->le_count - old_le_count,
- 0, 0, 0, NULL))) {
- log_error("Couldn't allocate new snapshot segment.");
- return NULL;
- }
+ return seg->segtype->parity_devs + 1;
+}
- dm_list_add(&lv->segments, &seg->list);
- lv->status |= VIRTUAL;
+/* Data image count for RAID segment @seg */
+static uint32_t _raid_stripes_count(struct lv_segment *seg)
+{
+ /*
+ * FIXME: raid10 needs to change once more than
+ * 2 data_copies and odd # of legs supported.
+ */
+ if (seg_is_raid10(seg))
+ return seg->area_count / _raid_data_copies(seg);
- return seg;
+ return seg->area_count - seg->segtype->parity_devs;
}
static int _release_and_discard_lv_segment_area(struct lv_segment *seg, uint32_t s,
uint32_t area_reduction, int with_discard)
{
+ struct lv_segment *cache_seg;
+ struct logical_volume *lv = seg_lv(seg, s);
+
if (seg_type(seg, s) == AREA_UNASSIGNED)
return 1;
@@ -318,55 +1156,92 @@ static int _release_and_discard_lv_segment_area(struct lv_segment *seg, uint32_t
return 1;
}
- if ((seg_lv(seg, s)->status & MIRROR_IMAGE) ||
- (seg_lv(seg, s)->status & THIN_POOL_DATA)) {
- if (!lv_reduce(seg_lv(seg, s), area_reduction))
+ if (lv_is_mirror_image(lv) ||
+ lv_is_thin_pool_data(lv) ||
+ lv_is_vdo_pool_data(lv) ||
+ lv_is_cache_pool_data(lv)) {
+ if (!lv_reduce(lv, area_reduction))
return_0; /* FIXME: any upper level reporting */
return 1;
}
- if (seg_lv(seg, s)->status & RAID_IMAGE) {
- /*
- * FIXME: Use lv_reduce not lv_remove
- * We use lv_remove for now, because I haven't figured out
- * why lv_reduce won't remove the LV.
- lv_reduce(seg_lv(seg, s), area_reduction);
- */
- if (area_reduction != seg->area_len) {
- log_error("Unable to reduce RAID LV - operation not implemented.");
+ if (seg_is_cache_pool(seg) &&
+ !dm_list_empty(&seg->lv->segs_using_this_lv)) {
+ if (!(cache_seg = get_only_segment_using_this_lv(seg->lv)))
return_0;
- } else {
- if (!lv_remove(seg_lv(seg, s))) {
- log_error("Failed to remove RAID image %s",
- seg_lv(seg, s)->name);
- return 0;
- }
- }
- /* Remove metadata area if image has been removed */
- if (area_reduction == seg->area_len) {
- if (!lv_reduce(seg_metalv(seg, s),
- seg_metalv(seg, s)->le_count)) {
- log_error("Failed to remove RAID meta-device %s",
- seg_metalv(seg, s)->name);
+ if (!lv_cache_remove(cache_seg->lv))
+ return_0;
+ }
+
+ if (lv_is_raid_image(lv)) {
+ /* Calculate the amount of extents to reduce per rmeta/rimage LV */
+ uint32_t rimage_extents;
+ struct lv_segment *seg1 = first_seg(lv);
+
+ /* FIXME: avoid extra seg_is_*() conditionals here */
+ rimage_extents = raid_rimage_extents(seg1->segtype, area_reduction,
+ seg_is_any_raid0(seg) ? 0 : _raid_stripes_count(seg),
+ seg_is_raid10(seg) ? 1 :_raid_data_copies(seg));
+ if (!rimage_extents)
+ return 0;
+
+ if (seg->meta_areas) {
+ uint32_t meta_area_reduction;
+ struct logical_volume *mlv;
+ struct volume_group *vg = lv->vg;
+
+ if (seg_metatype(seg, s) != AREA_LV ||
+ !(mlv = seg_metalv(seg, s)))
return 0;
- }
+
+ meta_area_reduction = raid_rmeta_extents_delta(vg->cmd, lv->le_count, lv->le_count - rimage_extents,
+ seg->region_size, vg->extent_size);
+ /* Limit for raid0_meta not having region size set */
+ if (meta_area_reduction > mlv->le_count ||
+ !(lv->le_count - rimage_extents))
+ meta_area_reduction = mlv->le_count;
+
+ if (meta_area_reduction &&
+ !lv_reduce(mlv, meta_area_reduction))
+ return_0; /* FIXME: any upper level reporting */
}
+
+ if (!lv_reduce(lv, rimage_extents))
+ return_0; /* FIXME: any upper level reporting */
+
return 1;
}
if (area_reduction == seg->area_len) {
- log_very_verbose("Remove %s:%" PRIu32 "[%" PRIu32 "] from "
- "the top of LV %s:%" PRIu32,
- seg->lv->name, seg->le, s,
- seg_lv(seg, s)->name, seg_le(seg, s));
+ log_very_verbose("Remove %s:" FMTu32 "[" FMTu32 "] from "
+ "the top of LV %s:" FMTu32 ".",
+ display_lvname(seg->lv), seg->le, s,
+ display_lvname(lv), seg_le(seg, s));
+
+ if (!remove_seg_from_segs_using_this_lv(lv, seg))
+ return_0;
- remove_seg_from_segs_using_this_lv(seg_lv(seg, s), seg);
seg_lv(seg, s) = NULL;
seg_le(seg, s) = 0;
seg_type(seg, s) = AREA_UNASSIGNED;
}
+ /* When removed last VDO user automatically removes VDO pool */
+ if (lv_is_vdo_pool(lv) && dm_list_empty(&(lv->segs_using_this_lv))) {
+ struct volume_group *vg = lv->vg;
+
+ if (!lv_remove(lv)) /* FIXME: any upper level reporting */
+ return_0;
+
+ if (vg_is_shared(vg)) {
+ if (!lockd_lv_name(vg->cmd, vg, lv->name, &lv->lvid.id[1], lv->lock_args, "un", LDLV_PERSISTENT))
+ log_error("Failed to unlock vdo pool in lvmlockd.");
+ lockd_free_lv(vg->cmd, vg, lv->name, &lv->lvid.id[1], lv->lock_args);
+ }
+ return 1;
+ }
+
return 1;
}
@@ -451,14 +1326,21 @@ int set_lv_segment_area_lv(struct lv_segment *seg, uint32_t area_num,
struct logical_volume *lv, uint32_t le,
uint64_t status)
{
- log_very_verbose("Stack %s:%" PRIu32 "[%" PRIu32 "] on LV %s:%" PRIu32,
- seg->lv->name, seg->le, area_num, lv->name, le);
+ log_very_verbose("Stack %s:" FMTu32 "[" FMTu32 "] on LV %s:" FMTu32 ".",
+ display_lvname(seg->lv), seg->le, area_num,
+ display_lvname(lv), le);
- if (status & RAID_META) {
+ if (area_num >= seg->area_count) {
+ log_error(INTERNAL_ERROR "Try to set to high area number (%u >= %u) for LV %s.",
+ area_num, seg->area_count, display_lvname(seg->lv));
+ return 0;
+ }
+ lv->status |= status;
+ if (lv_is_raid_metadata(lv)) {
seg->meta_areas[area_num].type = AREA_LV;
seg_metalv(seg, area_num) = lv;
if (le) {
- log_error(INTERNAL_ERROR "Meta le != 0");
+ log_error(INTERNAL_ERROR "Meta le != 0.");
return 0;
}
seg_metale(seg, area_num) = 0;
@@ -467,7 +1349,6 @@ int set_lv_segment_area_lv(struct lv_segment *seg, uint32_t area_num,
seg_lv(seg, area_num) = lv;
seg_le(seg, area_num) = le;
}
- lv->status |= status;
if (!add_seg_to_segs_using_this_lv(lv, seg))
return_0;
@@ -478,17 +1359,19 @@ int set_lv_segment_area_lv(struct lv_segment *seg, uint32_t area_num,
/*
* Prepare for adding parallel areas to an existing segment.
*/
-static int _lv_segment_add_areas(struct logical_volume *lv,
- struct lv_segment *seg,
- uint32_t new_area_count)
+int add_lv_segment_areas(struct lv_segment *seg, uint32_t new_area_count)
{
struct lv_segment_area *newareas;
uint32_t areas_sz = new_area_count * sizeof(*newareas);
- if (!(newareas = dm_pool_zalloc(lv->vg->cmd->mem, areas_sz)))
- return_0;
+ if (!(newareas = dm_pool_zalloc(seg->lv->vg->vgmem, areas_sz))) {
+ log_error("Failed to allocate widened LV segment for %s.",
+ display_lvname(seg->lv));
+ return 0;
+ }
- memcpy(newareas, seg->areas, seg->area_count * sizeof(*seg->areas));
+ if (seg->area_count)
+ memcpy(newareas, seg->areas, seg->area_count * sizeof(*seg->areas));
seg->areas = newareas;
seg->area_count = new_area_count;
@@ -496,22 +1379,68 @@ static int _lv_segment_add_areas(struct logical_volume *lv,
return 1;
}
+static uint32_t _calc_area_multiple(const struct segment_type *segtype,
+ const uint32_t area_count,
+ const uint32_t stripes)
+{
+ if (!area_count)
+ return 1;
+
+ /* Striped */
+ if (segtype_is_striped(segtype))
+ return area_count;
+
+ /* Parity RAID (e.g. RAID 4/5/6) */
+ if (segtype_is_raid(segtype) && segtype->parity_devs) {
+ /*
+ * As articulated in _alloc_init, we can tell by
+ * the area_count whether a replacement drive is
+ * being allocated; and if this is the case, then
+ * there is no area_multiple that should be used.
+ */
+ if (area_count <= segtype->parity_devs)
+ return 1;
+
+ return area_count - segtype->parity_devs;
+ }
+
+ /*
+ * RAID10 - only has 2-way mirror right now.
+ * If we are to move beyond 2-way RAID10, then
+ * the 'stripes' argument will always need to
+ * be given.
+ */
+ if (segtype_is_raid10(segtype)) {
+ if (!stripes)
+ return area_count / 2;
+ return stripes;
+ }
+
+ /* Mirrored stripes */
+ if (stripes)
+ return stripes;
+
+ /* Mirrored */
+ return 1;
+}
+
/*
* Reduce the size of an lv_segment. New size can be zero.
*/
static int _lv_segment_reduce(struct lv_segment *seg, uint32_t reduction)
{
uint32_t area_reduction, s;
+ uint32_t areas = (seg->area_count / (seg_is_raid10(seg) ? seg->data_copies : 1)) - seg->segtype->parity_devs;
/* Caller must ensure exact divisibility */
- if (seg_is_striped(seg)) {
- if (reduction % seg->area_count) {
+ if (seg_is_striped(seg) || seg_is_striped_raid(seg)) {
+ if (reduction % areas) {
log_error("Segment extent reduction %" PRIu32
" not divisible by #stripes %" PRIu32,
reduction, seg->area_count);
return 0;
}
- area_reduction = (reduction / seg->area_count);
+ area_reduction = reduction / areas;
} else
area_reduction = reduction;
@@ -520,7 +1449,77 @@ static int _lv_segment_reduce(struct lv_segment *seg, uint32_t reduction)
return_0;
seg->len -= reduction;
- seg->area_len -= area_reduction;
+
+ if (seg_is_raid(seg))
+ seg->area_len = seg->len;
+ else
+ seg->area_len -= area_reduction;
+
+ return 1;
+}
+
+/* Find the bottommost resizable LV in the stack.
+ * It does not matter which LV is used in this stack for cmdline tool. */
+static struct logical_volume *_get_resizable_layer_lv(struct logical_volume *lv)
+{
+ while (lv_is_cache(lv) || /* _corig */
+ lv_is_integrity(lv) ||
+ lv_is_thin_pool(lv) || /* _tdata */
+ lv_is_vdo_pool(lv) || /* _vdata */
+ lv_is_writecache(lv)) /* _worigin */
+ lv = seg_lv(first_seg(lv), 0); /* component-level down */
+
+ return lv;
+}
+
+/* Check if LV is component of resizable LV.
+ * When resize changes size of LV this also changes the size whole stack upward.
+ * Support syntax suggar - so user can pick any LV in stack for resize. */
+static int _is_layered_lv(struct logical_volume *lv)
+{
+ return (lv_is_cache_origin(lv) ||
+ lv_is_integrity_origin(lv) ||
+ lv_is_thin_pool_data(lv) ||
+ lv_is_vdo_pool_data(lv) ||
+ lv_is_writecache_origin(lv));
+}
+
+/* Find the topmost LV in the stack - usually such LV is visible. */
+static struct logical_volume *_get_top_layer_lv(struct logical_volume *lv)
+{
+ struct lv_segment *seg;
+
+ while (_is_layered_lv(lv)) {
+ if (!(seg = get_only_segment_using_this_lv(lv))) {
+ log_error(INTERNAL_ERROR "No single component user of logical volume %s.",
+ display_lvname(lv));
+ return NULL;
+ }
+ lv = seg->lv; /* component-level up */
+ }
+
+ return lv;
+}
+
+
+/* Handles also stacking */
+static int _setup_lv_size(struct logical_volume *lv, uint32_t extents)
+{
+ struct lv_segment *seg;
+
+ lv->le_count = extents;
+ lv->size = (uint64_t) extents * lv->vg->extent_size;
+
+ while (lv->size && _is_layered_lv(lv)) {
+ if (!(seg = get_only_segment_using_this_lv(lv)))
+ return_0;
+
+ seg->lv->le_count =
+ seg->len =
+ seg->area_len = lv->le_count;
+ seg->lv->size = lv->size;
+ lv = seg->lv;
+ }
return 1;
}
@@ -530,15 +1529,42 @@ static int _lv_segment_reduce(struct lv_segment *seg, uint32_t reduction)
*/
static int _lv_reduce(struct logical_volume *lv, uint32_t extents, int delete)
{
- struct lv_segment *seg;
+ struct lv_segment *seg = NULL;
uint32_t count = extents;
uint32_t reduction;
+ struct logical_volume *pool_lv;
+ struct logical_volume *external_lv = NULL;
+ int is_raid10 = 0;
+ uint32_t data_copies = 0;
+ struct lv_list *lvl;
+ int is_last_pool = lv_is_pool(lv);
+
+ if (!dm_list_empty(&lv->segments)) {
+ seg = first_seg(lv);
+ is_raid10 = seg_is_any_raid10(seg) && seg->reshape_len;
+ data_copies = seg->data_copies;
+ }
+
+ if (lv_is_merging_origin(lv)) {
+ log_debug_metadata("Dropping snapshot merge of %s to removed origin %s.",
+ find_snapshot(lv)->lv->name, lv->name);
+ clear_snapshot_merge(lv);
+ }
dm_list_iterate_back_items(seg, &lv->segments) {
if (!count)
break;
+ if (seg->external_lv)
+ external_lv = seg->external_lv;
+
if (seg->len <= count) {
+ if (seg->merge_lv) {
+ log_debug_metadata("Dropping snapshot merge of removed %s to origin %s.",
+ seg->lv->name, seg->merge_lv->name);
+ clear_snapshot_merge(seg->merge_lv);
+ }
+
/* remove this segment completely */
/* FIXME Check this is safe */
if (seg->log_lv && !lv_remove(seg->log_lv))
@@ -547,9 +1573,51 @@ static int _lv_reduce(struct logical_volume *lv, uint32_t extents, int delete)
if (seg->metadata_lv && !lv_remove(seg->metadata_lv))
return_0;
- if (seg->pool_lv) {
+ /* Remove cache origin only when removing (not on lv_empty()) */
+ if (delete && seg_is_cache(seg)) {
+ if (lv_is_pending_delete(seg->lv)) {
+ /* Just dropping reference on origin when pending delete */
+ if (!remove_seg_from_segs_using_this_lv(seg_lv(seg, 0), seg))
+ return_0;
+ seg_lv(seg, 0) = NULL;
+ seg_le(seg, 0) = 0;
+ seg_type(seg, 0) = AREA_UNASSIGNED;
+ if (seg->pool_lv && !detach_pool_lv(seg))
+ return_0;
+ } else if (!lv_remove(seg_lv(seg, 0)))
+ return_0;
+ }
+
+ if (delete && seg_is_integrity(seg)) {
+ /* Remove integrity origin in addition to integrity layer. */
+ if (!lv_remove(seg_lv(seg, 0)))
+ return_0;
+ /* Remove integrity metadata. */
+ if (seg->integrity_meta_dev && !lv_remove(seg->integrity_meta_dev))
+ return_0;
+ }
+
+ if ((pool_lv = seg->pool_lv)) {
if (!detach_pool_lv(seg))
return_0;
+ /* When removing cached LV, remove pool as well */
+ if (seg_is_cache(seg) && !lv_remove(pool_lv))
+ return_0;
+ }
+
+ if (seg_is_thin_pool(seg)) {
+ /* For some segtypes the size may differ between the segment size and its layered LV
+ * i.e. thin-pool and tdata.
+ *
+ * This can get useful, when we will support multiple commits
+ * while resizing a stacked LV.
+ */
+ if (seg->len != seg_lv(seg, 0)->le_count) {
+ seg->len = seg_lv(seg, 0)->le_count;
+ /* FIXME: ATM capture as error as it should not happen. */
+ log_debug(INTERNAL_ERROR "Pool size mismatched data size for %s",
+ display_lvname(seg->lv));
+ }
}
dm_list_del(&seg->list);
@@ -562,12 +1630,24 @@ static int _lv_reduce(struct logical_volume *lv, uint32_t extents, int delete)
count -= reduction;
}
- lv->le_count -= extents;
- lv->size = (uint64_t) lv->le_count * lv->vg->extent_size;
+ if (!_setup_lv_size(lv, lv->le_count - extents * (is_raid10 ? data_copies : 1)))
+ return_0;
+
+ if ((seg = first_seg(lv))) {
+ if (is_raid10)
+ seg->len = seg->area_len = lv->le_count;
+
+ seg->extents_copied = seg->len;
+ }
if (!delete)
return 1;
+ if (lv == lv->vg->pool_metadata_spare_lv) {
+ lv->status &= ~POOL_METADATA_SPARE;
+ lv->vg->pool_metadata_spare_lv = NULL;
+ }
+
/* Remove the LV if it is now empty */
if (!lv->le_count && !unlink_lv_from_vg(lv))
return_0;
@@ -575,6 +1655,34 @@ static int _lv_reduce(struct logical_volume *lv, uint32_t extents, int delete)
!lv->vg->fid->fmt->ops->lv_setup(lv->vg->fid, lv))
return_0;
+ /* Removal of last user enforces refresh */
+ if (external_lv && !lv_is_external_origin(external_lv) &&
+ lv_is_active(external_lv) &&
+ !lv_update_and_reload(external_lv))
+ return_0;
+
+ /* When removing last pool, automatically drop the spare volume */
+ if (is_last_pool && lv->vg->pool_metadata_spare_lv) {
+ /* TODO: maybe use a list of pools or a counter to avoid linear search through VG */
+ dm_list_iterate_items(lvl, &lv->vg->lvs)
+ if (lv_is_thin_type(lvl->lv) ||
+ lv_is_cache_type(lvl->lv)) {
+ is_last_pool = 0;
+ break;
+ }
+
+ if (is_last_pool) {
+ /* This is purely internal LV volume, no question */
+ if (!deactivate_lv(lv->vg->cmd, lv->vg->pool_metadata_spare_lv)) {
+ log_error("Unable to deactivate spare logical volume %s.",
+ display_lvname(lv->vg->pool_metadata_spare_lv));
+ return 0;
+ }
+ if (!lv_remove(lv->vg->pool_metadata_spare_lv))
+ return_0;
+ }
+ }
+
return 1;
}
@@ -592,6 +1700,10 @@ int lv_empty(struct logical_volume *lv)
int replace_lv_with_error_segment(struct logical_volume *lv)
{
uint32_t len = lv->le_count;
+ struct segment_type *segtype;
+
+ if (!(segtype = get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_ERROR)))
+ return_0;
if (len && !lv_empty(lv))
return_0;
@@ -605,29 +1717,149 @@ int replace_lv_with_error_segment(struct logical_volume *lv)
* an error segment, we should also clear any flags
* that suggest it is anything other than "error".
*/
- lv->status &= ~(MIRRORED|PVMOVE);
+ /* FIXME Check for other flags that need removing */
+ lv->status &= ~(MIRROR|MIRRORED|PVMOVE|LOCKED);
- /* FIXME: Should we bug if we find a log_lv attached? */
+ /* FIXME Check for any attached LVs that will become orphans e.g. mirror logs */
- if (!lv_add_virtual_segment(lv, 0, len, get_segtype_from_string(lv->vg->cmd, "error"), NULL))
+ if (!lv_add_virtual_segment(lv, 0, len, segtype))
return_0;
return 1;
}
+static int _lv_refresh_suspend_resume(const struct logical_volume *lv)
+{
+ struct cmd_context *cmd = lv->vg->cmd;
+ int r = 1;
+
+ if (!cmd->partial_activation && lv_is_partial(lv)) {
+ log_error("Refusing refresh of partial LV %s."
+ " Use '--activationmode partial' to override.",
+ display_lvname(lv));
+ return 0;
+ }
+
+ if (!suspend_lv(cmd, lv)) {
+ log_error("Failed to suspend %s.", display_lvname(lv));
+ r = 0;
+ }
+
+ if (!resume_lv(cmd, lv)) {
+ log_error("Failed to reactivate %s.", display_lvname(lv));
+ r = 0;
+ }
+
+ return r;
+}
+
+int lv_refresh_suspend_resume(const struct logical_volume *lv)
+{
+ if (!_lv_refresh_suspend_resume(lv))
+ return 0;
+
+ /*
+ * Remove any transiently activated error
+ * devices which arean't used any more.
+ */
+ if (lv_is_raid(lv) && !lv_deactivate_any_missing_subdevs(lv)) {
+ log_error("Failed to remove temporary SubLVs from %s", display_lvname(lv));
+ return 0;
+ }
+
+ return 1;
+}
+
/*
* Remove given number of extents from LV.
*/
int lv_reduce(struct logical_volume *lv, uint32_t extents)
{
+ struct lv_segment *seg = first_seg(lv);
+
+ /* Ensure stripe boundary extents on RAID LVs */
+ if (lv_is_raid(lv) && extents != lv->le_count)
+ extents =_round_to_stripe_boundary(lv->vg, extents,
+ seg_is_raid1(seg) ? 0 : _raid_stripes_count(seg), 0);
+
+ if ((extents == lv->le_count) && lv_is_component(lv) && lv_is_active(lv)) {
+ /* When LV is removed, make sure it is inactive */
+ log_error(INTERNAL_ERROR "Removing still active LV %s.", display_lvname(lv));
+ return 0;
+ }
+
return _lv_reduce(lv, extents, 1);
}
+int historical_glv_remove(struct generic_logical_volume *glv)
+{
+ struct generic_logical_volume *origin_glv;
+ struct glv_list *glvl, *user_glvl;
+ struct historical_logical_volume *hlv;
+ int reconnected;
+
+ if (!glv || !glv->is_historical)
+ return_0;
+
+ hlv = glv->historical;
+
+ if (!(glv = find_historical_glv(hlv->vg, hlv->name, 0, &glvl))) {
+ if (!(find_historical_glv(hlv->vg, hlv->name, 1, NULL))) {
+ log_error(INTERNAL_ERROR "historical_glv_remove: historical LV %s/-%s not found ",
+ hlv->vg->name, hlv->name);
+ return 0;
+ }
+
+ log_verbose("Historical LV %s/-%s already on removed list ",
+ hlv->vg->name, hlv->name);
+ return 1;
+ }
+
+ if ((origin_glv = hlv->indirect_origin) &&
+ !remove_glv_from_indirect_glvs(origin_glv, glv))
+ return_0;
+
+ dm_list_iterate_items(user_glvl, &hlv->indirect_glvs) {
+ reconnected = 0;
+ if ((origin_glv && !origin_glv->is_historical) && !user_glvl->glv->is_historical)
+ log_verbose("Removing historical connection between %s and %s.",
+ origin_glv->live->name, user_glvl->glv->live->name);
+ else if (hlv->vg->cmd->record_historical_lvs) {
+ if (!add_glv_to_indirect_glvs(hlv->vg->vgmem, origin_glv, user_glvl->glv))
+ return_0;
+ reconnected = 1;
+ }
+
+ if (!reconnected) {
+ /*
+ * Break ancestry chain if we're removing historical LV and tracking
+ * historical LVs is switched off either via:
+ * - "metadata/record_lvs_history=0" config
+ * - "--nohistory" cmd line option
+ *
+ * Also, break the chain if we're unable to store such connection at all
+ * because we're removing the very last historical LV that was in between
+ * live LVs - pure live LVs can't store any indirect origin relation in
+ * metadata - we need at least one historical LV to do that!
+ */
+ if (user_glvl->glv->is_historical)
+ user_glvl->glv->historical->indirect_origin = NULL;
+ else
+ first_seg(user_glvl->glv->live)->indirect_origin = NULL;
+ }
+ }
+
+ dm_list_move(&hlv->vg->removed_historical_lvs, &glvl->list);
+ return 1;
+}
+
/*
* Completely remove an LV.
*/
int lv_remove(struct logical_volume *lv)
{
+ if (lv_is_historical(lv))
+ return historical_glv_remove(lv->this_glv);
if (!lv_reduce(lv, lv->le_count))
return_0;
@@ -654,9 +1886,10 @@ struct alloc_handle {
struct dm_pool *mem;
alloc_policy_t alloc; /* Overall policy */
+ int approx_alloc; /* get as much as possible up to new_extents */
uint32_t new_extents; /* Number of new extents required */
uint32_t area_count; /* Number of parallel areas */
- uint32_t parity_count; /* Adds to area_count, but not area_multiple */
+ uint32_t parity_count; /* Adds to area_count, but not area_multiple */
uint32_t area_multiple; /* seg->len = area_len * area_multiple */
uint32_t log_area_count; /* Number of parallel logs */
uint32_t metadata_area_count; /* Number of parallel metadata areas */
@@ -673,8 +1906,12 @@ struct alloc_handle {
* that is new_extents + log_len and then split that between two
* allocated areas when found. 'alloc_and_split_meta' indicates
* that this is the desired dynamic.
+ *
+ * This same idea is used by cache LVs to get the metadata device
+ * and data device allocated together.
*/
unsigned alloc_and_split_meta;
+ unsigned split_metadata_is_allocated; /* Metadata has been allocated */
const struct dm_config_node *cling_tag_list_cn;
@@ -684,57 +1921,18 @@ struct alloc_handle {
* Contains area_count lists of areas allocated to data stripes
* followed by log_area_count lists of areas allocated to log stripes.
*/
- struct dm_list alloced_areas[0];
+ struct dm_list alloced_areas[];
};
-static uint32_t _calc_area_multiple(const struct segment_type *segtype,
- const uint32_t area_count, const uint32_t stripes)
-{
- if (!area_count)
- return 1;
-
- /* Striped */
- if (segtype_is_striped(segtype))
- return area_count;
-
- /* Parity RAID (e.g. RAID 4/5/6) */
- if (segtype_is_raid(segtype) && segtype->parity_devs) {
- /*
- * As articulated in _alloc_init, we can tell by
- * the area_count whether a replacement drive is
- * being allocated; and if this is the case, then
- * there is no area_multiple that should be used.
- */
- if (area_count <= segtype->parity_devs)
- return 1;
- return area_count - segtype->parity_devs;
- }
-
- /* RAID10 - only has 2-way mirror right now */
- if (!strcmp(segtype->name, "raid10")) {
- // FIXME: I'd like the 'stripes' arg always given
- if (!stripes)
- return area_count / 2;
- return stripes;
- }
-
- /* Mirrored stripes */
- if (stripes)
- return stripes;
-
- /* Mirrored */
- return 1;
-}
-
/*
* Returns log device size in extents, algorithm from kernel code
*/
#define BYTE_SHIFT 3
-static uint32_t mirror_log_extents(uint32_t region_size, uint32_t pe_size, uint32_t area_len)
+static uint32_t _mirror_log_extents(uint32_t region_size, uint32_t pe_size, uint32_t area_len)
{
- size_t area_size, bitset_size, log_size, region_count;
+ uint64_t area_size, region_count, bitset_size, log_size;
- area_size = (size_t)area_len * pe_size;
+ area_size = (uint64_t) area_len * pe_size;
region_count = dm_div_up(area_size, region_size);
/* Work out how many "unsigned long"s we need to hold the bitset. */
@@ -746,186 +1944,30 @@ static uint32_t mirror_log_extents(uint32_t region_size, uint32_t pe_size, uint3
log_size >>= SECTOR_SHIFT;
log_size = dm_div_up(log_size, pe_size);
- /*
- * Kernel requires a mirror to be at least 1 region large. So,
- * if our mirror log is itself a mirror, it must be at least
- * 1 region large. This restriction may not be necessary for
- * non-mirrored logs, but we apply the rule anyway.
- *
- * (The other option is to make the region size of the log
- * mirror smaller than the mirror it is acting as a log for,
- * but that really complicates things. It's much easier to
- * keep the region_size the same for both.)
- */
- return (log_size > (region_size / pe_size)) ? log_size :
- (region_size / pe_size);
-}
-
-/*
- * Preparation for a specific allocation attempt
- * stripes and mirrors refer to the parallel areas used for data.
- * If log_area_count > 1 it is always mirrored (not striped).
- */
-static struct alloc_handle *_alloc_init(struct cmd_context *cmd,
- struct dm_pool *mem,
- const struct segment_type *segtype,
- alloc_policy_t alloc,
- uint32_t new_extents,
- uint32_t mirrors,
- uint32_t stripes,
- uint32_t metadata_area_count,
- uint32_t extent_size,
- uint32_t region_size,
- struct dm_list *parallel_areas)
-{
- struct alloc_handle *ah;
- uint32_t s, area_count, alloc_count, parity_count;
- size_t size = 0;
-
- /* FIXME Caller should ensure this */
- if (mirrors && !stripes)
- stripes = 1;
-
- if (segtype_is_virtual(segtype))
- area_count = 0;
- else if (mirrors > 1)
- area_count = mirrors * stripes;
- else
- area_count = stripes;
-
- size = sizeof(*ah);
-
- /*
- * It is a requirement that RAID 4/5/6 are created with a number of
- * stripes that is greater than the number of parity devices. (e.g
- * RAID4/5 must have at least 2 stripes and RAID6 must have at least
- * 3.) It is also a constraint that, when replacing individual devices
- * in a RAID 4/5/6 array, no more devices can be replaced than
- * there are parity devices. (Otherwise, there would not be enough
- * redundancy to maintain the array.) Understanding these two
- * constraints allows us to infer whether the caller of this function
- * is intending to allocate an entire array or just replacement
- * component devices. In the former case, we must account for the
- * necessary parity_count. In the later case, we do not need to
- * account for the extra parity devices because the array already
- * exists and they only want replacement drives.
- */
- parity_count = (area_count <= segtype->parity_devs) ? 0 :
- segtype->parity_devs;
- alloc_count = area_count + parity_count;
- if (segtype_is_raid(segtype) && metadata_area_count)
- /* RAID has a meta area for each device */
- alloc_count *= 2;
- else
- /* mirrors specify their exact log count */
- alloc_count += metadata_area_count;
-
- size += sizeof(ah->alloced_areas[0]) * alloc_count;
-
- if (!(ah = dm_pool_zalloc(mem, size))) {
- log_error("allocation handle allocation failed");
- return NULL;
- }
-
- ah->cmd = cmd;
-
- if (segtype_is_virtual(segtype))
- return ah;
-
- if (!(area_count + metadata_area_count)) {
- log_error(INTERNAL_ERROR "_alloc_init called for non-virtual segment with no disk space.");
- return NULL;
- }
-
- if (!(ah->mem = dm_pool_create("allocation", 1024))) {
- log_error("allocation pool creation failed");
- return NULL;
- }
-
- if (mirrors || stripes)
- ah->new_extents = new_extents;
- else
- ah->new_extents = 0;
- ah->area_count = area_count;
- ah->parity_count = parity_count;
- ah->region_size = region_size;
- ah->alloc = alloc;
-
- /*
- * For the purposes of allocation, area_count and parity_count are
- * kept separately. However, the 'area_count' field in an
- * lv_segment includes both; and this is what '_calc_area_multiple'
- * is calculated from. So, we must pass in the total count to get
- * a correct area_multiple.
- */
- ah->area_multiple = _calc_area_multiple(segtype, area_count + parity_count, stripes);
- ah->mirror_logs_separate = find_config_tree_bool(cmd, "allocation/mirror_logs_require_separate_pvs",
- DEFAULT_MIRROR_LOGS_REQUIRE_SEPARATE_PVS);
-
- if (segtype_is_raid(segtype)) {
- if (metadata_area_count) {
- if (metadata_area_count != area_count)
- log_error(INTERNAL_ERROR
- "Bad metadata_area_count");
- ah->metadata_area_count = area_count;
- ah->alloc_and_split_meta = 1;
-
- ah->log_len = RAID_METADATA_AREA_LEN;
-
- /*
- * We need 'log_len' extents for each
- * RAID device's metadata_area
- */
- ah->new_extents += (ah->log_len * ah->area_multiple);
- } else {
- ah->log_area_count = 0;
- ah->log_len = 0;
- }
- } else if (segtype_is_thin_pool(segtype)) {
- ah->log_area_count = metadata_area_count;
- /* thin_pool uses region_size to pass metadata size in extents */
- ah->log_len = ah->region_size;
- ah->region_size = 0;
- ah->mirror_logs_separate =
- find_config_tree_bool(cmd, "allocation/thin_pool_metadata_require_separate_pvs",
- DEFAULT_THIN_POOL_METADATA_REQUIRE_SEPARATE_PVS);
- } else {
- ah->log_area_count = metadata_area_count;
- ah->log_len = !metadata_area_count ? 0 :
- mirror_log_extents(ah->region_size, extent_size,
- new_extents / ah->area_multiple);
+ if (log_size > UINT32_MAX) {
+ log_error("Log size needs too many extents "FMTu64" with region size of %u sectors.",
+ log_size, region_size);
+ log_size = UINT32_MAX;
+ /* VG likely will not have enough free space for this allocation -> error */
}
- for (s = 0; s < alloc_count; s++)
- dm_list_init(&ah->alloced_areas[s]);
-
- ah->parallel_areas = parallel_areas;
-
- ah->cling_tag_list_cn = find_config_tree_node(cmd, "allocation/cling_tag_list");
-
- ah->maximise_cling = find_config_tree_bool(cmd, "allocation/maximise_cling", DEFAULT_MAXIMISE_CLING);
-
- return ah;
-}
-
-void alloc_destroy(struct alloc_handle *ah)
-{
- if (ah->mem)
- dm_pool_destroy(ah->mem);
+ return (uint32_t) log_size;
}
/* Is there enough total space or should we give up immediately? */
static int _sufficient_pes_free(struct alloc_handle *ah, struct dm_list *pvms,
- uint32_t allocated, uint32_t extents_still_needed)
+ uint32_t allocated, uint32_t log_still_needed,
+ uint32_t extents_still_needed)
{
uint32_t area_extents_needed = (extents_still_needed - allocated) * ah->area_count / ah->area_multiple;
uint32_t parity_extents_needed = (extents_still_needed - allocated) * ah->parity_count / ah->area_multiple;
- uint32_t metadata_extents_needed = ah->metadata_area_count * RAID_METADATA_AREA_LEN; /* One each */
- uint32_t total_extents_needed = area_extents_needed + parity_extents_needed + metadata_extents_needed;
+ uint32_t metadata_extents_needed = (ah->alloc_and_split_meta ? 0 : ah->metadata_area_count * RAID_METADATA_AREA_LEN) +
+ (log_still_needed ? ah->log_len : 0); /* One each */
+ uint64_t total_extents_needed = (uint64_t)area_extents_needed + parity_extents_needed + metadata_extents_needed;
uint32_t free_pes = pv_maps_size(pvms);
if (total_extents_needed > free_pes) {
- log_error("Insufficient free space: %" PRIu32 " extents needed,"
+ log_error("Insufficient free space: %" PRIu64 " extents needed,"
" but only %" PRIu32 " available",
total_extents_needed, free_pes);
return 0;
@@ -948,7 +1990,9 @@ static uint32_t _stripes_per_mimage(struct lv_segment *seg)
return 1;
}
-static void _init_alloc_parms(struct alloc_handle *ah, struct alloc_parms *alloc_parms, alloc_policy_t alloc,
+static void _init_alloc_parms(struct alloc_handle *ah,
+ struct alloc_parms *alloc_parms,
+ alloc_policy_t alloc,
struct lv_segment *prev_lvseg, unsigned can_split,
uint32_t allocated, uint32_t extents_still_needed)
{
@@ -957,78 +2001,56 @@ static void _init_alloc_parms(struct alloc_handle *ah, struct alloc_parms *alloc
alloc_parms->flags = 0;
alloc_parms->extents_still_needed = extents_still_needed;
- /* Are there any preceding segments we must follow on from? */
- if (alloc_parms->prev_lvseg) {
- if (alloc_parms->alloc == ALLOC_CONTIGUOUS)
+ /*
+ * Only attempt contiguous/cling allocation to previous segment
+ * areas if the number of areas matches.
+ */
+ if (alloc_parms->prev_lvseg &&
+ ((ah->area_count + ah->parity_count) == prev_lvseg->area_count)) {
+ alloc_parms->flags |= A_AREA_COUNT_MATCHES;
+
+ /* Are there any preceding segments we must follow on from? */
+ if (alloc_parms->alloc == ALLOC_CONTIGUOUS) {
alloc_parms->flags |= A_CONTIGUOUS_TO_LVSEG;
- else if ((alloc_parms->alloc == ALLOC_CLING) || (alloc_parms->alloc == ALLOC_CLING_BY_TAGS))
+ alloc_parms->flags |= A_POSITIONAL_FILL;
+ } else if ((alloc_parms->alloc == ALLOC_CLING) ||
+ (alloc_parms->alloc == ALLOC_CLING_BY_TAGS)) {
alloc_parms->flags |= A_CLING_TO_LVSEG;
+ alloc_parms->flags |= A_POSITIONAL_FILL;
+ }
} else
/*
- * A cling allocation that follows a successful contiguous allocation
- * must use the same PVs (or else fail).
+ * A cling allocation that follows a successful contiguous
+ * allocation must use the same PVs (or else fail).
*/
- if ((alloc_parms->alloc == ALLOC_CLING) || (alloc_parms->alloc == ALLOC_CLING_BY_TAGS))
+ if ((alloc_parms->alloc == ALLOC_CLING) ||
+ (alloc_parms->alloc == ALLOC_CLING_BY_TAGS)) {
alloc_parms->flags |= A_CLING_TO_ALLOCED;
+ alloc_parms->flags |= A_POSITIONAL_FILL;
+ }
if (alloc_parms->alloc == ALLOC_CLING_BY_TAGS)
alloc_parms->flags |= A_CLING_BY_TAGS;
+ if (!(alloc_parms->alloc & A_POSITIONAL_FILL) &&
+ (alloc_parms->alloc == ALLOC_CONTIGUOUS) &&
+ ah->cling_tag_list_cn)
+ alloc_parms->flags |= A_PARTITION_BY_TAGS;
+
/*
- * For normal allocations, if any extents have already been found
+ * For normal allocations, if any extents have already been found
* for allocation, prefer to place further extents on the same disks as
* have already been used.
*/
- if (ah->maximise_cling && alloc_parms->alloc == ALLOC_NORMAL && allocated != alloc_parms->extents_still_needed)
+ if (ah->maximise_cling &&
+ (alloc_parms->alloc == ALLOC_NORMAL) &&
+ (allocated != alloc_parms->extents_still_needed))
alloc_parms->flags |= A_CLING_TO_ALLOCED;
if (can_split)
alloc_parms->flags |= A_CAN_SPLIT;
}
-static int _log_parallel_areas(struct dm_pool *mem, struct dm_list *parallel_areas)
-{
- struct seg_pvs *spvs;
- struct pv_list *pvl;
- char *pvnames;
-
- if (!parallel_areas)
- return 1;
-
- dm_list_iterate_items(spvs, parallel_areas) {
- if (!dm_pool_begin_object(mem, 256)) {
- log_error("dm_pool_begin_object failed");
- return 0;
- }
-
- dm_list_iterate_items(pvl, &spvs->pvs) {
- if (!dm_pool_grow_object(mem, pv_dev_name(pvl->pv), strlen(pv_dev_name(pvl->pv)))) {
- log_error("dm_pool_grow_object failed");
- dm_pool_abandon_object(mem);
- return 0;
- }
- if (!dm_pool_grow_object(mem, " ", 1)) {
- log_error("dm_pool_grow_object failed");
- dm_pool_abandon_object(mem);
- return 0;
- }
- }
-
- if (!dm_pool_grow_object(mem, "\0", 1)) {
- log_error("dm_pool_grow_object failed");
- dm_pool_abandon_object(mem);
- return 0;
- }
-
- pvnames = dm_pool_end_object(mem);
- log_debug("Parallel PVs at LE %" PRIu32 " length %" PRIu32 ": %s",
- spvs->le, spvs->len, pvnames);
- dm_pool_free(mem, pvnames);
- }
-
- return 1;
-}
-
static int _setup_alloced_segment(struct logical_volume *lv, uint64_t status,
uint32_t area_count,
uint32_t stripe_size,
@@ -1040,12 +2062,12 @@ static int _setup_alloced_segment(struct logical_volume *lv, uint64_t status,
struct lv_segment *seg;
area_multiple = _calc_area_multiple(segtype, area_count, 0);
+ extents = aa[0].len * area_multiple;
- if (!(seg = alloc_lv_segment(segtype, lv, lv->le_count,
- aa[0].len * area_multiple,
- status, stripe_size, NULL, NULL,
+ if (!(seg = alloc_lv_segment(segtype, lv, lv->le_count, extents, 0,
+ status, stripe_size, NULL,
area_count,
- aa[0].len, 0u, region_size, 0u, NULL))) {
+ aa[0].len, 0, 0u, region_size, 0u, NULL))) {
log_error("Couldn't allocate new LV segment.");
return 0;
}
@@ -1057,11 +2079,9 @@ static int _setup_alloced_segment(struct logical_volume *lv, uint64_t status,
dm_list_add(&lv->segments, &seg->list);
extents = aa[0].len * area_multiple;
- lv->le_count += extents;
- lv->size += (uint64_t) extents *lv->vg->extent_size;
- if (segtype_is_mirrored(segtype))
- lv->status |= MIRRORED;
+ if (!_setup_lv_size(lv, lv->le_count + extents))
+ return_0;
return 1;
}
@@ -1095,16 +2115,15 @@ static int _alloc_parallel_area(struct alloc_handle *ah, uint32_t max_to_allocat
struct alloc_state *alloc_state, uint32_t ix_log_offset)
{
uint32_t area_len, len;
- uint32_t s;
+ uint32_t s, smeta;
uint32_t ix_log_skip = 0; /* How many areas to skip in middle of array to reach log areas */
uint32_t total_area_count;
struct alloced_area *aa;
struct pv_area *pva;
- total_area_count = ah->area_count + alloc_state->log_area_count_still_needed;
- total_area_count += ah->parity_count;
+ total_area_count = ah->area_count + ah->parity_count + alloc_state->log_area_count_still_needed;
if (!total_area_count) {
- log_error(INTERNAL_ERROR "_alloc_parallel_area called without any allocation to do.");
+ log_warn(INTERNAL_ERROR "_alloc_parallel_area called without any allocation to do.");
return 1;
}
@@ -1115,7 +2134,7 @@ static int _alloc_parallel_area(struct alloc_handle *ah, uint32_t max_to_allocat
if (area_len > alloc_state->areas[s].used)
area_len = alloc_state->areas[s].used;
- len = (ah->alloc_and_split_meta) ? total_area_count * 2 : total_area_count;
+ len = (ah->alloc_and_split_meta && !ah->split_metadata_is_allocated) ? total_area_count * 2 : total_area_count;
len *= sizeof(*aa);
if (!(aa = dm_pool_alloc(ah->mem, len))) {
log_error("alloced_area allocation failed");
@@ -1135,7 +2154,7 @@ static int _alloc_parallel_area(struct alloc_handle *ah, uint32_t max_to_allocat
}
pva = alloc_state->areas[s + ix_log_skip].pva;
- if (ah->alloc_and_split_meta) {
+ if (ah->alloc_and_split_meta && !ah->split_metadata_is_allocated) {
/*
* The metadata area goes at the front of the allocated
* space for now, but could easily go at the end (or
@@ -1145,23 +2164,25 @@ static int _alloc_parallel_area(struct alloc_handle *ah, uint32_t max_to_allocat
* allocation, we store the images at the beginning
* of the areas array and the metadata at the end.
*/
- s += ah->area_count + ah->parity_count;
- aa[s].pv = pva->map->pv;
- aa[s].pe = pva->start;
- aa[s].len = ah->log_len;
-
- log_debug("Allocating parallel metadata area %" PRIu32
- " on %s start PE %" PRIu32
- " length %" PRIu32 ".",
- (s - (ah->area_count + ah->parity_count)),
- pv_dev_name(aa[s].pv), aa[s].pe,
- ah->log_len);
-
- consume_pv_area(pva, ah->log_len);
- dm_list_add(&ah->alloced_areas[s], &aa[s].list);
- s -= ah->area_count + ah->parity_count;
- }
- aa[s].len = (ah->alloc_and_split_meta) ? len - ah->log_len : len;
+ smeta = s + ah->area_count + ah->parity_count;
+ aa[smeta].pv = pva->map->pv;
+ aa[smeta].pe = pva->start;
+ aa[smeta].len = ah->log_len;
+ if (aa[smeta].len > pva->count) {
+ log_error("Metadata does not fit on a single PV.");
+ return 0;
+ }
+ log_debug_alloc("Allocating parallel metadata area %" PRIu32
+ " on %s start PE %" PRIu32
+ " length %" PRIu32 ".",
+ (smeta - (ah->area_count + ah->parity_count)),
+ pv_dev_name(aa[smeta].pv), aa[smeta].pe,
+ aa[smeta].len);
+
+ consume_pv_area(pva, aa[smeta].len);
+ dm_list_add(&ah->alloced_areas[smeta], &aa[smeta].list);
+ }
+ aa[s].len = (ah->alloc_and_split_meta && !ah->split_metadata_is_allocated) ? len - ah->log_len : len;
/* Skip empty allocations */
if (!aa[s].len)
continue;
@@ -1169,9 +2190,9 @@ static int _alloc_parallel_area(struct alloc_handle *ah, uint32_t max_to_allocat
aa[s].pv = pva->map->pv;
aa[s].pe = pva->start;
- log_debug("Allocating parallel area %" PRIu32
- " on %s start PE %" PRIu32 " length %" PRIu32 ".",
- s, pv_dev_name(aa[s].pv), aa[s].pe, aa[s].len);
+ log_debug_alloc("Allocating parallel area %" PRIu32
+ " on %s start PE %" PRIu32 " length %" PRIu32 ".",
+ s, pv_dev_name(aa[s].pv), aa[s].pe, aa[s].len);
consume_pv_area(pva, aa[s].len);
@@ -1179,7 +2200,8 @@ static int _alloc_parallel_area(struct alloc_handle *ah, uint32_t max_to_allocat
}
/* Only need to alloc metadata from the first batch */
- ah->alloc_and_split_meta = 0;
+ if (ah->alloc_and_split_meta)
+ ah->split_metadata_is_allocated = 1;
ah->total_area_len += area_len;
@@ -1194,6 +2216,7 @@ static int _alloc_parallel_area(struct alloc_handle *ah, uint32_t max_to_allocat
* reduced to cover only the first.
* fn should return 0 on error, 1 to continue scanning or >1 to terminate without error.
* In the last case, this function passes on the return code.
+ * FIXME I think some callers are expecting this to check all PV segments used by an LV.
*/
static int _for_each_pv(struct cmd_context *cmd, struct logical_volume *lv,
uint32_t le, uint32_t len, struct lv_segment *seg,
@@ -1227,7 +2250,7 @@ static int _for_each_pv(struct cmd_context *cmd, struct logical_volume *lv,
*max_seg_len = remaining_seg_len;
area_multiple = _calc_area_multiple(seg->segtype, seg->area_count, 0);
- area_len = remaining_seg_len / area_multiple ? : 1;
+ area_len = (remaining_seg_len / area_multiple) ? : 1;
/* For striped mirrors, all the areas are counted, through the mirror layer */
if (top_level_area_index == -1)
@@ -1263,7 +2286,20 @@ static int _for_each_pv(struct cmd_context *cmd, struct logical_volume *lv,
return r;
}
- /* FIXME Add snapshot cow LVs etc. */
+ /* FIXME Add snapshot cow, thin meta etc. */
+
+/*
+ if (!only_single_area_segments && !max_areas && seg_is_raid(seg)) {
+ for (s = first_area; s < seg->area_count; s++) {
+ if (seg_metalv(seg, s))
+ if (!(r = _for_each_pv(cmd, seg_metalv(seg, s), 0, seg_metalv(seg, s)->le_count, NULL,
+ NULL, 0, 0, 0, 0, fn, data)))
+ stack;
+ if (r != 1)
+ return r;
+ }
+ }
+*/
return 1;
}
@@ -1276,7 +2312,7 @@ static int _comp_area(const void *l, const void *r)
if (lhs->used < rhs->used)
return 1;
- else if (lhs->used > rhs->used)
+ if (lhs->used > rhs->used)
return -1;
return 0;
@@ -1288,9 +2324,9 @@ static int _comp_area(const void *l, const void *r)
struct pv_match {
int (*condition)(struct pv_match *pvmatch, struct pv_segment *pvseg, struct pv_area *pva);
- struct pv_area_used *areas;
+ struct alloc_handle *ah;
+ struct alloc_state *alloc_state;
struct pv_area *pva;
- uint32_t areas_size;
const struct dm_config_node *cling_tag_list_cn;
int s; /* Area index of match */
};
@@ -1307,69 +2343,235 @@ static int _is_same_pv(struct pv_match *pvmatch __attribute((unused)), struct pv
}
/*
- * Does PV area have a tag listed in allocation/cling_tag_list that
- * matches a tag of the PV of the existing segment?
+ * Does PV area have a tag listed in allocation/cling_tag_list that
+ * matches EITHER a tag of the PV of the existing segment OR a tag in pv_tags?
+ * If mem is set, then instead we append a list of matching tags for printing to the object there.
*/
-static int _pvs_have_matching_tag(const struct dm_config_node *cling_tag_list_cn, struct physical_volume *pv1, struct physical_volume *pv2)
+static int _match_pv_tags(const struct dm_config_node *cling_tag_list_cn,
+ struct physical_volume *pv1, uint32_t pv1_start_pe, uint32_t area_num,
+ struct physical_volume *pv2, struct dm_list *pv_tags, unsigned validate_only,
+ struct dm_pool *mem, unsigned parallel_pv)
{
const struct dm_config_value *cv;
const char *str;
const char *tag_matched;
+ struct dm_list *tags_to_match = mem ? NULL : pv_tags ? : ((pv2) ? &pv2->tags : NULL);
+ struct dm_str_list *sl;
+ unsigned first_tag = 1;
for (cv = cling_tag_list_cn->v; cv; cv = cv->next) {
if (cv->type != DM_CFG_STRING) {
- log_error("Ignoring invalid string in config file entry "
- "allocation/cling_tag_list");
+ if (validate_only)
+ log_warn("WARNING: Ignoring invalid string in config file entry "
+ "allocation/cling_tag_list");
continue;
}
str = cv->v.str;
if (!*str) {
- log_error("Ignoring empty string in config file entry "
- "allocation/cling_tag_list");
+ if (validate_only)
+ log_warn("WARNING: Ignoring empty string in config file entry "
+ "allocation/cling_tag_list");
continue;
}
if (*str != '@') {
- log_error("Ignoring string not starting with @ in config file entry "
- "allocation/cling_tag_list: %s", str);
+ if (validate_only)
+ log_warn("WARNING: Ignoring string not starting with @ in config file entry "
+ "allocation/cling_tag_list: %s", str);
continue;
}
str++;
if (!*str) {
- log_error("Ignoring empty tag in config file entry "
- "allocation/cling_tag_list");
+ if (validate_only)
+ log_warn("WARNING: Ignoring empty tag in config file entry "
+ "allocation/cling_tag_list");
continue;
}
+ if (validate_only)
+ continue;
+
/* Wildcard matches any tag against any tag. */
if (!strcmp(str, "*")) {
- if (!str_list_match_list(&pv1->tags, &pv2->tags, &tag_matched))
+ if (mem) {
+ dm_list_iterate_items(sl, &pv1->tags) {
+ if (!first_tag && !dm_pool_grow_object(mem, ",", 0)) {
+ log_error("PV tags string extension failed.");
+ return 0;
+ }
+ first_tag = 0;
+ if (!dm_pool_grow_object(mem, sl->str, 0)) {
+ log_error("PV tags string extension failed.");
+ return 0;
+ }
+ }
continue;
- else {
- log_debug("Matched allocation PV tag %s on existing %s with free space on %s.",
- tag_matched, pv_dev_name(pv1), pv_dev_name(pv2));
- return 1;
}
+
+ if (tags_to_match && !str_list_match_list(&pv1->tags, tags_to_match, &tag_matched))
+ continue;
+
+ if (!pv_tags) {
+ if (parallel_pv)
+ log_debug_alloc("Not using free space on %s: Matched allocation PV tag %s on existing parallel PV %s.",
+ pv_dev_name(pv1), tag_matched, pv2 ? pv_dev_name(pv2) : "-");
+ else
+ log_debug_alloc("Matched allocation PV tag %s on existing %s with free space on %s.",
+ tag_matched, pv_dev_name(pv1), pv2 ? pv_dev_name(pv2) : "-");
+ } else
+ log_debug_alloc("Eliminating allocation area %" PRIu32 " at PV %s start PE %" PRIu32
+ " from consideration: PV tag %s already used.",
+ area_num, pv_dev_name(pv1), pv1_start_pe, tag_matched);
+ return 1;
}
if (!str_list_match_item(&pv1->tags, str) ||
- !str_list_match_item(&pv2->tags, str))
+ (tags_to_match && !str_list_match_item(tags_to_match, str)))
+ continue;
+
+ if (mem) {
+ if (!first_tag && !dm_pool_grow_object(mem, ",", 0)) {
+ log_error("PV tags string extension failed.");
+ return 0;
+ }
+ first_tag = 0;
+ if (!dm_pool_grow_object(mem, str, 0)) {
+ log_error("PV tags string extension failed.");
+ return 0;
+ }
continue;
- else {
- log_debug("Matched allocation PV tag %s on existing %s with free space on %s.",
- str, pv_dev_name(pv1), pv_dev_name(pv2));
- return 1;
}
+
+ if (!pv_tags) {
+ if (parallel_pv)
+ log_debug_alloc("Not using free space on %s: Matched allocation PV tag %s on existing parallel PV %s.",
+ pv2 ? pv_dev_name(pv2) : "-", str, pv_dev_name(pv1));
+ else
+ log_debug_alloc("Matched allocation PV tag %s on existing %s with free space on %s.",
+ str, pv_dev_name(pv1), pv2 ? pv_dev_name(pv2) : "-");
+ } else
+ log_debug_alloc("Eliminating allocation area %" PRIu32 " at PV %s start PE %" PRIu32
+ " from consideration: PV tag %s already used.",
+ area_num, pv_dev_name(pv1), pv1_start_pe, str);
+
+ return 1;
}
+ if (mem)
+ return 1;
+
return 0;
}
+static int _validate_tag_list(const struct dm_config_node *cling_tag_list_cn)
+{
+ return _match_pv_tags(cling_tag_list_cn, NULL, 0, 0, NULL, NULL, 1, NULL, 0);
+}
+
+static int _tags_list_str(struct dm_pool *mem, struct physical_volume *pv1, const struct dm_config_node *cling_tag_list_cn)
+{
+ if (!_match_pv_tags(cling_tag_list_cn, pv1, 0, 0, NULL, NULL, 0, mem, 0)) {
+ dm_pool_abandon_object(mem);
+ return_0;
+ }
+
+ return 1;
+}
+
+/*
+ * Does PV area have a tag listed in allocation/cling_tag_list that
+ * matches a tag in the pv_tags list?
+ */
+static int _pv_has_matching_tag(const struct dm_config_node *cling_tag_list_cn,
+ struct physical_volume *pv1, uint32_t pv1_start_pe, uint32_t area_num,
+ struct dm_list *pv_tags)
+{
+ return _match_pv_tags(cling_tag_list_cn, pv1, pv1_start_pe, area_num, NULL, pv_tags, 0, NULL, 0);
+}
+
+/*
+ * Does PV area have a tag listed in allocation/cling_tag_list that
+ * matches a tag of the PV of the existing segment?
+ */
+static int _pvs_have_matching_tag(const struct dm_config_node *cling_tag_list_cn,
+ struct physical_volume *pv1, struct physical_volume *pv2,
+ unsigned parallel_pv)
+{
+ return _match_pv_tags(cling_tag_list_cn, pv1, 0, 0, pv2, NULL, 0, NULL, parallel_pv);
+}
+
static int _has_matching_pv_tag(struct pv_match *pvmatch, struct pv_segment *pvseg, struct pv_area *pva)
{
- return _pvs_have_matching_tag(pvmatch->cling_tag_list_cn, pvseg->pv, pva->map->pv);
+ return _pvs_have_matching_tag(pvmatch->cling_tag_list_cn, pvseg->pv, pva->map->pv, 0);
+}
+
+static int _log_parallel_areas(struct dm_pool *mem, struct dm_list *parallel_areas,
+ const struct dm_config_node *cling_tag_list_cn)
+{
+ struct seg_pvs *spvs;
+ struct pv_list *pvl;
+ char *pvnames;
+ unsigned first;
+
+ if (!parallel_areas)
+ return 1;
+
+ dm_list_iterate_items(spvs, parallel_areas) {
+ first = 1;
+
+ if (!dm_pool_begin_object(mem, 256)) {
+ log_error("dm_pool_begin_object failed");
+ return 0;
+ }
+
+ dm_list_iterate_items(pvl, &spvs->pvs) {
+ if (!first && !dm_pool_grow_object(mem, " ", 1)) {
+ log_error("dm_pool_grow_object failed");
+ dm_pool_abandon_object(mem);
+ return 0;
+ }
+
+ if (!dm_pool_grow_object(mem, pv_dev_name(pvl->pv), strlen(pv_dev_name(pvl->pv)))) {
+ log_error("dm_pool_grow_object failed");
+ dm_pool_abandon_object(mem);
+ return 0;
+ }
+
+ if (cling_tag_list_cn) {
+ if (!dm_pool_grow_object(mem, "(", 1)) {
+ log_error("dm_pool_grow_object failed");
+ dm_pool_abandon_object(mem);
+ return 0;
+ }
+ if (!_tags_list_str(mem, pvl->pv, cling_tag_list_cn)) {
+ dm_pool_abandon_object(mem);
+ return_0;
+ }
+ if (!dm_pool_grow_object(mem, ")", 1)) {
+ log_error("dm_pool_grow_object failed");
+ dm_pool_abandon_object(mem);
+ return 0;
+ }
+ }
+
+ first = 0;
+ }
+
+ if (!dm_pool_grow_object(mem, "\0", 1)) {
+ log_error("dm_pool_grow_object failed");
+ dm_pool_abandon_object(mem);
+ return 0;
+ }
+
+ pvnames = dm_pool_end_object(mem);
+ log_debug_alloc("Parallel PVs at LE %" PRIu32 " length %" PRIu32 ": %s",
+ spvs->le, spvs->len, pvnames);
+ dm_pool_free(mem, pvnames);
+ }
+
+ return 1;
}
/*
@@ -1386,17 +2588,65 @@ static int _is_contiguous(struct pv_match *pvmatch __attribute((unused)), struct
return 1;
}
-static void _reserve_area(struct pv_area_used *area_used, struct pv_area *pva, uint32_t required,
- uint32_t ix_pva, uint32_t unreserved)
+static int _reserve_area(struct alloc_handle *ah, struct alloc_state *alloc_state, struct pv_area *pva,
+ uint32_t required, uint32_t ix_pva, uint32_t unreserved)
{
- log_debug("%s allocation area %" PRIu32 " %s %s start PE %" PRIu32
- " length %" PRIu32 " leaving %" PRIu32 ".",
- area_used->pva ? "Changing " : "Considering",
- ix_pva - 1, area_used->pva ? "to" : "as",
- dev_name(pva->map->pv->dev), pva->start, required, unreserved);
+ struct pv_area_used *area_used = &alloc_state->areas[ix_pva];
+ const char *pv_tag_list = NULL;
+
+ if (ah->cling_tag_list_cn) {
+ if (!dm_pool_begin_object(ah->mem, 256)) {
+ log_error("PV tags string allocation failed.");
+ return 0;
+ } else if (!_tags_list_str(ah->mem, pva->map->pv, ah->cling_tag_list_cn))
+ dm_pool_abandon_object(ah->mem);
+ else if (!dm_pool_grow_object(ah->mem, "\0", 1)) {
+ dm_pool_abandon_object(ah->mem);
+ log_error("PV tags string extension failed.");
+ return 0;
+ } else
+ pv_tag_list = dm_pool_end_object(ah->mem);
+ }
+
+ log_debug_alloc("%s allocation area %" PRIu32 " %s %s start PE %" PRIu32
+ " length %" PRIu32 " leaving %" PRIu32 "%s%s.",
+ area_used->pva ? "Changing " : "Considering",
+ ix_pva, area_used->pva ? "to" : "as",
+ dev_name(pva->map->pv->dev), pva->start, required, unreserved,
+ pv_tag_list ? " with PV tags: " : "",
+ pv_tag_list ? : "");
+
+ if (pv_tag_list)
+ dm_pool_free(ah->mem, (void *)pv_tag_list);
area_used->pva = pva;
area_used->used = required;
+
+ return 1;
+}
+
+static int _reserve_required_area(struct alloc_handle *ah, struct alloc_state *alloc_state, struct pv_area *pva,
+ uint32_t required, uint32_t ix_pva, uint32_t unreserved)
+{
+ uint32_t s;
+ struct pv_area_used *new_state;
+
+ /* Expand areas array if needed after an area was split. */
+ if (ix_pva >= alloc_state->areas_size) {
+ alloc_state->areas_size *= 2;
+ if (!(new_state = realloc(alloc_state->areas, sizeof(*alloc_state->areas) * (alloc_state->areas_size)))) {
+ log_error("Memory reallocation for parallel areas failed.");
+ return 0;
+ }
+ alloc_state->areas = new_state;
+ for (s = alloc_state->areas_size / 2; s < alloc_state->areas_size; s++)
+ alloc_state->areas[s].pva = NULL;
+ }
+
+ if (!_reserve_area(ah, alloc_state, pva, required, ix_pva, unreserved))
+ return_0;
+
+ return 1;
}
static int _is_condition(struct cmd_context *cmd __attribute__((unused)),
@@ -1404,21 +2654,28 @@ static int _is_condition(struct cmd_context *cmd __attribute__((unused)),
void *data)
{
struct pv_match *pvmatch = data;
+ int positional = pvmatch->alloc_state->alloc_parms->flags & A_POSITIONAL_FILL;
- if (pvmatch->areas[s].pva)
+ if (positional && pvmatch->alloc_state->areas[s].pva)
return 1; /* Area already assigned */
if (!pvmatch->condition(pvmatch, pvseg, pvmatch->pva))
return 1; /* Continue */
- if (s >= pvmatch->areas_size)
+ if (positional && (s >= pvmatch->alloc_state->num_positional_areas))
+ return 1;
+
+ /* FIXME The previous test should make this one redundant. */
+ if (positional && (s >= pvmatch->alloc_state->areas_size))
return 1;
/*
* Only used for cling and contiguous policies (which only make one allocation per PV)
* so it's safe to say all the available space is used.
*/
- _reserve_area(&pvmatch->areas[s], pvmatch->pva, pvmatch->pva->count, s + 1, 0);
+ if (positional &&
+ !_reserve_required_area(pvmatch->ah, pvmatch->alloc_state, pvmatch->pva, pvmatch->pva->count, s, 0))
+ return_0;
return 2; /* Finished */
}
@@ -1435,9 +2692,9 @@ static int _check_cling(struct alloc_handle *ah,
int r;
uint32_t le, len;
+ pvmatch.ah = ah;
pvmatch.condition = cling_tag_list_cn ? _has_matching_pv_tag : _is_same_pv;
- pvmatch.areas = alloc_state->areas;
- pvmatch.areas_size = alloc_state->areas_size;
+ pvmatch.alloc_state = alloc_state;
pvmatch.pva = pva;
pvmatch.cling_tag_list_cn = cling_tag_list_cn;
@@ -1466,21 +2723,21 @@ static int _check_cling(struct alloc_handle *ah,
/*
* Is pva contiguous to any existing areas or on the same PV?
*/
-static int _check_contiguous(struct cmd_context *cmd,
+static int _check_contiguous(struct alloc_handle *ah,
struct lv_segment *prev_lvseg, struct pv_area *pva,
struct alloc_state *alloc_state)
{
struct pv_match pvmatch;
int r;
+ pvmatch.ah = ah;
pvmatch.condition = _is_contiguous;
- pvmatch.areas = alloc_state->areas;
- pvmatch.areas_size = alloc_state->areas_size;
+ pvmatch.alloc_state = alloc_state;
pvmatch.pva = pva;
pvmatch.cling_tag_list_cn = NULL;
/* FIXME Cope with stacks by flattening */
- if (!(r = _for_each_pv(cmd, prev_lvseg->lv,
+ if (!(r = _for_each_pv(ah->cmd, prev_lvseg->lv,
prev_lvseg->le + prev_lvseg->len - 1, 1, NULL, NULL,
0, 0, -1, 1,
_is_condition, &pvmatch)))
@@ -1500,6 +2757,7 @@ static int _check_cling_to_alloced(struct alloc_handle *ah, const struct dm_conf
{
unsigned s;
struct alloced_area *aa;
+ int positional = alloc_state->alloc_parms->flags & A_POSITIONAL_FILL;
/*
* Ignore log areas. They are always allocated whole as part of the
@@ -1509,12 +2767,14 @@ static int _check_cling_to_alloced(struct alloc_handle *ah, const struct dm_conf
return 0;
for (s = 0; s < ah->area_count; s++) {
- if (alloc_state->areas[s].pva)
+ if (positional && alloc_state->areas[s].pva)
continue; /* Area already assigned */
dm_list_iterate_items(aa, &ah->alloced_areas[s]) {
if ((!cling_tag_list_cn && (pva->map->pv == aa[0].pv)) ||
- (cling_tag_list_cn && _pvs_have_matching_tag(cling_tag_list_cn, pva->map->pv, aa[0].pv))) {
- _reserve_area(&alloc_state->areas[s], pva, pva->count, s + 1, 0);
+ (cling_tag_list_cn && _pvs_have_matching_tag(cling_tag_list_cn, pva->map->pv, aa[0].pv, 0))) {
+ if (positional &&
+ !_reserve_required_area(ah, alloc_state, pva, pva->count, s, 0))
+ return_0;
return 1;
}
}
@@ -1523,13 +2783,20 @@ static int _check_cling_to_alloced(struct alloc_handle *ah, const struct dm_conf
return 0;
}
-static int _pv_is_parallel(struct physical_volume *pv, struct dm_list *parallel_pvs)
+static int _pv_is_parallel(struct physical_volume *pv, struct dm_list *parallel_pvs, const struct dm_config_node *cling_tag_list_cn)
{
struct pv_list *pvl;
- dm_list_iterate_items(pvl, parallel_pvs)
- if (pv == pvl->pv)
+ dm_list_iterate_items(pvl, parallel_pvs) {
+ if (pv == pvl->pv) {
+ log_debug_alloc("Not using free space on existing parallel PV %s.",
+ pv_dev_name(pvl->pv));
+ return 1;
+ }
+ if (cling_tag_list_cn && _pvs_have_matching_tag(cling_tag_list_cn, pvl->pv, pv, 1))
return 1;
+ }
+
return 0;
}
@@ -1539,9 +2806,10 @@ static int _pv_is_parallel(struct physical_volume *pv, struct dm_list *parallel_
* alloc_state->areas may get modified.
*/
static area_use_t _check_pva(struct alloc_handle *ah, struct pv_area *pva, uint32_t still_needed,
- const struct alloc_parms *alloc_parms, struct alloc_state *alloc_state,
+ struct alloc_state *alloc_state,
unsigned already_found_one, unsigned iteration_count, unsigned log_iteration_count)
{
+ const struct alloc_parms *alloc_parms = alloc_state->alloc_parms;
unsigned s;
/* Skip fully-reserved areas (which are not currently removed from the list). */
@@ -1560,34 +2828,36 @@ static area_use_t _check_pva(struct alloc_handle *ah, struct pv_area *pva, uint3
/* If maximise_cling is set, perform several checks, otherwise perform exactly one. */
if (!iteration_count && !log_iteration_count && alloc_parms->flags & (A_CONTIGUOUS_TO_LVSEG | A_CLING_TO_LVSEG | A_CLING_TO_ALLOCED)) {
/* Contiguous? */
- if (((alloc_parms->flags & A_CONTIGUOUS_TO_LVSEG) || (ah->maximise_cling && alloc_parms->prev_lvseg)) &&
- _check_contiguous(ah->cmd, alloc_parms->prev_lvseg, pva, alloc_state))
- return PREFERRED;
-
+ if (((alloc_parms->flags & A_CONTIGUOUS_TO_LVSEG) ||
+ (ah->maximise_cling && (alloc_parms->flags & A_AREA_COUNT_MATCHES))) &&
+ _check_contiguous(ah, alloc_parms->prev_lvseg, pva, alloc_state))
+ goto found;
+
/* Try next area on same PV if looking for contiguous space */
if (alloc_parms->flags & A_CONTIGUOUS_TO_LVSEG)
return NEXT_AREA;
/* Cling to prev_lvseg? */
- if (((alloc_parms->flags & A_CLING_TO_LVSEG) || (ah->maximise_cling && alloc_parms->prev_lvseg)) &&
+ if (((alloc_parms->flags & A_CLING_TO_LVSEG) ||
+ (ah->maximise_cling && (alloc_parms->flags & A_AREA_COUNT_MATCHES))) &&
_check_cling(ah, NULL, alloc_parms->prev_lvseg, pva, alloc_state))
/* If this PV is suitable, use this first area */
- return PREFERRED;
+ goto found;
/* Cling_to_alloced? */
if ((alloc_parms->flags & A_CLING_TO_ALLOCED) &&
_check_cling_to_alloced(ah, NULL, pva, alloc_state))
- return PREFERRED;
+ goto found;
/* Cling_by_tags? */
if (!(alloc_parms->flags & A_CLING_BY_TAGS) || !ah->cling_tag_list_cn)
return NEXT_PV;
- if (alloc_parms->prev_lvseg) {
+ if ((alloc_parms->flags & A_AREA_COUNT_MATCHES)) {
if (_check_cling(ah, ah->cling_tag_list_cn, alloc_parms->prev_lvseg, pva, alloc_state))
- return PREFERRED;
+ goto found;
} else if (_check_cling_to_alloced(ah, ah->cling_tag_list_cn, pva, alloc_state))
- return PREFERRED;
+ goto found;
/* All areas on this PV give same result so pointless checking more */
return NEXT_PV;
@@ -1601,6 +2871,10 @@ static area_use_t _check_pva(struct alloc_handle *ah, struct pv_area *pva, uint3
(already_found_one && alloc_parms->alloc != ALLOC_ANYWHERE)))
return NEXT_PV;
+found:
+ if (alloc_parms->flags & A_POSITIONAL_FILL)
+ return PREFERRED;
+
return USE_AREA;
}
@@ -1613,12 +2887,12 @@ static uint32_t _calc_required_extents(struct alloc_handle *ah, struct pv_area *
uint32_t required = max_to_allocate / ah->area_multiple;
/*
- * Update amount unreserved - effectively splitting an area
+ * Update amount unreserved - effectively splitting an area
* into two or more parts. If the whole stripe doesn't fit,
* reduce amount we're looking for.
*/
if (alloc == ALLOC_ANYWHERE) {
- if (ix_pva - 1 >= ah->area_count)
+ if (ix_pva >= ah->area_count + ah->parity_count)
required = ah->log_len;
} else if (required < ah->log_len)
required = ah->log_len;
@@ -1634,33 +2908,12 @@ static uint32_t _calc_required_extents(struct alloc_handle *ah, struct pv_area *
return required;
}
-static int _reserve_required_area(struct alloc_handle *ah, uint32_t max_to_allocate,
- unsigned ix_pva, struct pv_area *pva,
- struct alloc_state *alloc_state, alloc_policy_t alloc)
-{
- uint32_t required = _calc_required_extents(ah, pva, ix_pva, max_to_allocate, alloc);
- uint32_t s;
-
- /* Expand areas array if needed after an area was split. */
- if (ix_pva > alloc_state->areas_size) {
- alloc_state->areas_size *= 2;
- if (!(alloc_state->areas = dm_realloc(alloc_state->areas, sizeof(*alloc_state->areas) * (alloc_state->areas_size)))) {
- log_error("Memory reallocation for parallel areas failed.");
- return 0;
- }
- for (s = alloc_state->areas_size / 2; s < alloc_state->areas_size; s++)
- alloc_state->areas[s].pva = NULL;
- }
-
- _reserve_area(&alloc_state->areas[ix_pva - 1], pva, required, ix_pva, pva->unreserved);
-
- return 1;
-}
-
static void _clear_areas(struct alloc_state *alloc_state)
{
uint32_t s;
+ alloc_state->num_positional_areas = 0;
+
for (s = 0; s < alloc_state->areas_size; s++)
alloc_state->areas[s].pva = NULL;
}
@@ -1679,48 +2932,89 @@ static void _reset_unreserved(struct dm_list *pvms)
}
static void _report_needed_allocation_space(struct alloc_handle *ah,
- struct alloc_state *alloc_state)
+ struct alloc_state *alloc_state,
+ struct dm_list *pvms)
{
const char *metadata_type;
uint32_t parallel_areas_count, parallel_area_size;
uint32_t metadata_count, metadata_size;
- parallel_area_size = (ah->new_extents - alloc_state->allocated) / ah->area_multiple -
- ((ah->alloc_and_split_meta) ? ah->log_len : 0);
+ parallel_area_size = ah->new_extents - alloc_state->allocated;
+ parallel_area_size /= ah->area_multiple;
+ parallel_area_size -= (ah->alloc_and_split_meta && !ah->split_metadata_is_allocated) ? ah->log_len : 0;
parallel_areas_count = ah->area_count + ah->parity_count;
metadata_size = ah->log_len;
if (ah->alloc_and_split_meta) {
- metadata_type = "RAID metadata area";
+ metadata_type = "metadata area";
metadata_count = parallel_areas_count;
+ if (ah->split_metadata_is_allocated)
+ metadata_size = 0;
} else {
metadata_type = "mirror log";
metadata_count = alloc_state->log_area_count_still_needed;
}
- log_debug("Still need %" PRIu32 " total extents:",
- parallel_area_size * parallel_areas_count + metadata_size * metadata_count);
- log_debug(" %" PRIu32 " (%" PRIu32 " data/%" PRIu32
- " parity) parallel areas of %" PRIu32 " extents each",
- parallel_areas_count, ah->area_count, ah->parity_count, parallel_area_size);
- log_debug(" %" PRIu32 " %ss of %" PRIu32 " extents each",
- metadata_count, metadata_type, metadata_size);
+ log_debug_alloc("Still need %s%" PRIu32 " total extents from %" PRIu32 " remaining (%" PRIu32 " positional slots):",
+ ah->approx_alloc ? "up to " : "",
+ parallel_area_size * parallel_areas_count + metadata_size * metadata_count, pv_maps_size(pvms),
+ alloc_state->num_positional_areas);
+ log_debug_alloc(" %" PRIu32 " (%" PRIu32 " data/%" PRIu32
+ " parity) parallel areas of %" PRIu32 " extents each",
+ parallel_areas_count, ah->area_count, ah->parity_count, parallel_area_size);
+ log_debug_alloc(" %" PRIu32 " %s%s of %" PRIu32 " extents each",
+ metadata_count, metadata_type,
+ (metadata_count == 1) ? "" : "s",
+ metadata_size);
}
+
+/* Work through the array, removing any entries with tags already used by previous areas. */
+static int _limit_to_one_area_per_tag(struct alloc_handle *ah, struct alloc_state *alloc_state,
+ uint32_t ix_log_offset, unsigned *ix)
+{
+ uint32_t s = 0, u = 0;
+ DM_LIST_INIT(pv_tags);
+
+ while (s < alloc_state->areas_size && alloc_state->areas[s].pva) {
+ /* Start again with an empty tag list when we reach the log devices */
+ if (u == ix_log_offset)
+ dm_list_init(&pv_tags);
+ if (!_pv_has_matching_tag(ah->cling_tag_list_cn, alloc_state->areas[s].pva->map->pv, alloc_state->areas[s].pva->start, s, &pv_tags)) {
+ /* The comparison fn will ignore any non-cling tags so just add everything */
+ if (!str_list_add_list(ah->mem, &pv_tags, &alloc_state->areas[s].pva->map->pv->tags))
+ return_0;
+
+ if (s != u)
+ alloc_state->areas[u] = alloc_state->areas[s];
+
+ u++;
+ } else
+ (*ix)--; /* One area removed */
+
+ s++;
+ }
+
+ if (u < alloc_state->areas_size)
+ alloc_state->areas[u].pva = NULL;
+
+ return 1;
+}
+
/*
* Returns 1 regardless of whether any space was found, except on error.
*/
-static int _find_some_parallel_space(struct alloc_handle *ah, const struct alloc_parms *alloc_parms,
+static int _find_some_parallel_space(struct alloc_handle *ah,
struct dm_list *pvms, struct alloc_state *alloc_state,
struct dm_list *parallel_pvs, uint32_t max_to_allocate)
{
+ const struct alloc_parms *alloc_parms = alloc_state->alloc_parms;
unsigned ix = 0;
unsigned last_ix;
struct pv_map *pvm;
struct pv_area *pva;
unsigned preferred_count = 0;
unsigned already_found_one;
- unsigned ix_offset = 0; /* Offset for non-preferred allocations */
unsigned ix_log_offset; /* Offset to start of areas to use for log */
unsigned too_small_for_log_count; /* How many too small for log? */
unsigned iteration_count = 0; /* cling_to_alloced may need 2 iterations */
@@ -1728,30 +3022,38 @@ static int _find_some_parallel_space(struct alloc_handle *ah, const struct alloc
struct alloced_area *aa;
uint32_t s;
uint32_t devices_needed = ah->area_count + ah->parity_count;
+ uint32_t required;
- /* ix_offset holds the number of parallel allocations that must be contiguous/cling */
- /* At most one of A_CONTIGUOUS_TO_LVSEG, A_CLING_TO_LVSEG or A_CLING_TO_ALLOCED may be set */
- if (alloc_parms->flags & (A_CONTIGUOUS_TO_LVSEG | A_CLING_TO_LVSEG))
- ix_offset = _stripes_per_mimage(alloc_parms->prev_lvseg) * alloc_parms->prev_lvseg->area_count;
+ _clear_areas(alloc_state);
+ _reset_unreserved(pvms);
- if (alloc_parms->flags & A_CLING_TO_ALLOCED)
- ix_offset = ah->area_count;
+ /* num_positional_areas holds the number of parallel allocations that must be contiguous/cling */
+ /* These appear first in the array, so it is also the offset to the non-preferred allocations */
+ /* At most one of A_CONTIGUOUS_TO_LVSEG, A_CLING_TO_LVSEG or A_CLING_TO_ALLOCED may be set */
+ if (!(alloc_parms->flags & A_POSITIONAL_FILL))
+ alloc_state->num_positional_areas = 0;
+ else if (alloc_parms->flags & (A_CONTIGUOUS_TO_LVSEG | A_CLING_TO_LVSEG))
+ alloc_state->num_positional_areas = _stripes_per_mimage(alloc_parms->prev_lvseg) * alloc_parms->prev_lvseg->area_count;
+ else if (alloc_parms->flags & A_CLING_TO_ALLOCED)
+ alloc_state->num_positional_areas = ah->area_count;
if (alloc_parms->alloc == ALLOC_NORMAL || (alloc_parms->flags & A_CLING_TO_ALLOCED))
- log_debug("Cling_to_allocated is %sset",
- alloc_parms->flags & A_CLING_TO_ALLOCED ? "" : "not ");
+ log_debug_alloc("Cling_to_allocated is %sset",
+ alloc_parms->flags & A_CLING_TO_ALLOCED ? "" : "not ");
- _clear_areas(alloc_state);
- _reset_unreserved(pvms);
+ if (alloc_parms->flags & A_POSITIONAL_FILL)
+ log_debug_alloc("%u preferred area(s) to be filled positionally.", alloc_state->num_positional_areas);
+ else
+ log_debug_alloc("Areas to be sorted and filled sequentially.");
- _report_needed_allocation_space(ah, alloc_state);
+ _report_needed_allocation_space(ah, alloc_state, pvms);
/* ix holds the number of areas found on other PVs */
do {
if (log_iteration_count) {
- log_debug("Found %u areas for %" PRIu32 " parallel areas and %" PRIu32 " log areas so far.", ix, devices_needed, alloc_state->log_area_count_still_needed);
+ log_debug_alloc("Found %u areas for %" PRIu32 " parallel areas and %" PRIu32 " log areas so far.", ix, devices_needed, alloc_state->log_area_count_still_needed);
} else if (iteration_count)
- log_debug("Filled %u out of %u preferred areas so far.", preferred_count, ix_offset);
+ log_debug_alloc("Filled %u out of %u preferred areas so far.", preferred_count, alloc_state->num_positional_areas);
/*
* Provide for escape from the loop if no progress is made.
@@ -1783,16 +3085,16 @@ static int _find_some_parallel_space(struct alloc_handle *ah, const struct alloc
/* FIXME Split into log and non-log parallel_pvs and only check the log ones if log_iteration? */
/* (I've temporatily disabled the check.) */
/* Avoid PVs used by existing parallel areas */
- if (!log_iteration_count && parallel_pvs && _pv_is_parallel(pvm->pv, parallel_pvs))
+ if (!log_iteration_count && parallel_pvs && _pv_is_parallel(pvm->pv, parallel_pvs, ah->cling_tag_list_cn))
goto next_pv;
/*
- * Avoid PVs already set aside for log.
+ * Avoid PVs already set aside for log.
* We only reach here if there were enough PVs for the main areas but
* not enough for the logs.
*/
if (log_iteration_count) {
- for (s = devices_needed; s < ix + ix_offset; s++)
+ for (s = devices_needed; s < ix + alloc_state->num_positional_areas; s++)
if (alloc_state->areas[s].pva && alloc_state->areas[s].pva->map->pv == pvm->pv)
goto next_pv;
/* On a second pass, avoid PVs already used in an uncommitted area */
@@ -1806,11 +3108,16 @@ static int _find_some_parallel_space(struct alloc_handle *ah, const struct alloc
/* First area in each list is the largest */
dm_list_iterate_items(pva, &pvm->areas) {
/*
- * There are two types of allocations, which can't be mixed at present.
+ * There are two types of allocations, which can't be mixed at present:
+ *
* PREFERRED are stored immediately in a specific parallel slot.
+ * This is only used if the A_POSITIONAL_FILL flag is set.
+ * This requires the number of slots to match, so if comparing with
+ * prev_lvseg then A_AREA_COUNT_MATCHES must be set.
+ *
* USE_AREA are stored for later, then sorted and chosen from.
*/
- switch(_check_pva(ah, pva, max_to_allocate, alloc_parms,
+ switch(_check_pva(ah, pva, max_to_allocate,
alloc_state, already_found_one, iteration_count, log_iteration_count)) {
case PREFERRED:
@@ -1835,8 +3142,8 @@ static int _find_some_parallel_space(struct alloc_handle *ah, const struct alloc
}
/* Reserve required amount of pva */
- if (!_reserve_required_area(ah, max_to_allocate, ix + ix_offset,
- pva, alloc_state, alloc_parms->alloc))
+ required = _calc_required_extents(ah, pva, ix + alloc_state->num_positional_areas - 1, max_to_allocate, alloc_parms->alloc);
+ if (!_reserve_required_area(ah, alloc_state, pva, required, ix + alloc_state->num_positional_areas - 1, pva->unreserved))
return_0;
}
@@ -1847,22 +3154,23 @@ static int _find_some_parallel_space(struct alloc_handle *ah, const struct alloc
/* With cling and contiguous we stop if we found a match for *all* the areas */
/* FIXME Rename these variables! */
if ((alloc_parms->alloc == ALLOC_ANYWHERE &&
- ix + ix_offset >= devices_needed + alloc_state->log_area_count_still_needed) ||
- (preferred_count == ix_offset &&
- (ix_offset == devices_needed + alloc_state->log_area_count_still_needed)))
+ ix + alloc_state->num_positional_areas >= devices_needed + alloc_state->log_area_count_still_needed) ||
+ (preferred_count == alloc_state->num_positional_areas &&
+ (alloc_state->num_positional_areas == devices_needed + alloc_state->log_area_count_still_needed)))
break;
}
} while ((alloc_parms->alloc == ALLOC_ANYWHERE && last_ix != ix && ix < devices_needed + alloc_state->log_area_count_still_needed) ||
/* With cling_to_alloced and normal, if there were gaps in the preferred areas, have a second iteration */
(alloc_parms->alloc == ALLOC_NORMAL && preferred_count &&
- (preferred_count < ix_offset || alloc_state->log_area_count_still_needed) &&
+ (preferred_count < alloc_state->num_positional_areas || alloc_state->log_area_count_still_needed) &&
(alloc_parms->flags & A_CLING_TO_ALLOCED) && !iteration_count++) ||
/* Extra iteration needed to fill log areas on PVs already used? */
- (alloc_parms->alloc == ALLOC_NORMAL && preferred_count == ix_offset && !ah->mirror_logs_separate &&
+ (alloc_parms->alloc == ALLOC_NORMAL && preferred_count == alloc_state->num_positional_areas && !ah->mirror_logs_separate &&
(ix + preferred_count >= devices_needed) &&
(ix + preferred_count < devices_needed + alloc_state->log_area_count_still_needed) && !log_iteration_count++));
- if (preferred_count < ix_offset && !(alloc_parms->flags & A_CLING_TO_ALLOCED))
+ /* Non-zero ix means at least one USE_AREA was returned */
+ if (preferred_count < alloc_state->num_positional_areas && !(alloc_parms->flags & A_CLING_TO_ALLOCED) && !ix)
return 1;
if (ix + preferred_count < devices_needed + alloc_state->log_area_count_still_needed)
@@ -1871,26 +3179,26 @@ static int _find_some_parallel_space(struct alloc_handle *ah, const struct alloc
/* Sort the areas so we allocate from the biggest */
if (log_iteration_count) {
if (ix > devices_needed + 1) {
- log_debug("Sorting %u log areas", ix - devices_needed);
+ log_debug_alloc("Sorting %u log areas", ix - devices_needed);
qsort(alloc_state->areas + devices_needed, ix - devices_needed, sizeof(*alloc_state->areas),
_comp_area);
}
} else if (ix > 1) {
- log_debug("Sorting %u areas", ix);
- qsort(alloc_state->areas + ix_offset, ix, sizeof(*alloc_state->areas),
+ log_debug_alloc("Sorting %u areas", ix);
+ qsort(alloc_state->areas + alloc_state->num_positional_areas, ix, sizeof(*alloc_state->areas),
_comp_area);
}
- /* If there are gaps in our preferred areas, fill then from the sorted part of the array */
- if (preferred_count && preferred_count != ix_offset) {
+ /* If there are gaps in our preferred areas, fill them from the sorted part of the array */
+ if (preferred_count && preferred_count != alloc_state->num_positional_areas) {
for (s = 0; s < devices_needed; s++)
if (!alloc_state->areas[s].pva) {
- alloc_state->areas[s].pva = alloc_state->areas[ix_offset].pva;
- alloc_state->areas[s].used = alloc_state->areas[ix_offset].used;
- alloc_state->areas[ix_offset++].pva = NULL;
+ alloc_state->areas[s].pva = alloc_state->areas[alloc_state->num_positional_areas].pva;
+ alloc_state->areas[s].used = alloc_state->areas[alloc_state->num_positional_areas].used;
+ alloc_state->areas[alloc_state->num_positional_areas++].pva = NULL;
}
}
-
+
/*
* First time around, if there's a log, allocate it on the
* smallest device that has space for it.
@@ -1901,19 +3209,60 @@ static int _find_some_parallel_space(struct alloc_handle *ah, const struct alloc
/* FIXME This logic is due to its heritage and can be simplified! */
if (alloc_state->log_area_count_still_needed) {
/* How many areas are too small for the log? */
- while (too_small_for_log_count < ix_offset + ix &&
- (*(alloc_state->areas + ix_offset + ix - 1 -
+ while (too_small_for_log_count < alloc_state->num_positional_areas + ix &&
+ (*(alloc_state->areas + alloc_state->num_positional_areas + ix - 1 -
too_small_for_log_count)).used < ah->log_len)
too_small_for_log_count++;
- ix_log_offset = ix_offset + ix - too_small_for_log_count - ah->log_area_count;
+ if (ah->mirror_logs_separate &&
+ too_small_for_log_count &&
+ (too_small_for_log_count >= devices_needed))
+ return 1;
+ if ((alloc_state->num_positional_areas + ix) < (too_small_for_log_count + ah->log_area_count))
+ return 1;
+ ix_log_offset = alloc_state->num_positional_areas + ix - (too_small_for_log_count + ah->log_area_count);
}
- if (ix + ix_offset < devices_needed +
- (alloc_state->log_area_count_still_needed ? alloc_state->log_area_count_still_needed +
- too_small_for_log_count : 0))
+ if (ix + alloc_state->num_positional_areas < devices_needed)
return 1;
/*
+ * FIXME We should change the code to do separate calls for the log allocation
+ * and the data allocation so that _limit_to_one_area_per_tag doesn't have to guess
+ * where the split is going to occur.
+ */
+
+ /*
+ * This code covers the initial allocation - after that there is something to 'cling' to
+ * and we shouldn't get this far.
+ * alloc_state->num_positional_areas is assumed to be 0 with A_PARTITION_BY_TAGS.
+ *
+ * FIXME Consider a second attempt with A_PARTITION_BY_TAGS if, for example, the largest area
+ * had all the tags set, but other areas don't.
+ */
+ if ((alloc_parms->flags & A_PARTITION_BY_TAGS) && !alloc_state->num_positional_areas) {
+ if (!_limit_to_one_area_per_tag(ah, alloc_state, ix_log_offset, &ix))
+ return_0;
+
+ /* Recalculate log position because we might have removed some areas from consideration */
+ if (alloc_state->log_area_count_still_needed) {
+ /* How many areas are too small for the log? */
+ too_small_for_log_count = 0;
+ while (too_small_for_log_count < ix &&
+ (*(alloc_state->areas + ix - 1 - too_small_for_log_count)).pva &&
+ (*(alloc_state->areas + ix - 1 - too_small_for_log_count)).used < ah->log_len)
+ too_small_for_log_count++;
+ if (ix < too_small_for_log_count + ah->log_area_count)
+ return 1;
+ ix_log_offset = ix - too_small_for_log_count - ah->log_area_count;
+ }
+
+ if (ix < devices_needed +
+ (alloc_state->log_area_count_still_needed ? alloc_state->log_area_count_still_needed +
+ too_small_for_log_count : 0))
+ return 1;
+ }
+
+ /*
* Finally add the space identified to the list of areas to be used.
*/
if (!_alloc_parallel_area(ah, max_to_allocate, alloc_state, ix_log_offset))
@@ -1928,7 +3277,7 @@ static int _find_some_parallel_space(struct alloc_handle *ah, const struct alloc
}
/*
- * Choose sets of parallel areas to use, respecting any constraints
+ * Choose sets of parallel areas to use, respecting any constraints
* supplied in alloc_parms.
*/
static int _find_max_parallel_space_for_one_policy(struct alloc_handle *ah, struct alloc_parms *alloc_parms,
@@ -1941,6 +3290,8 @@ static int _find_max_parallel_space_for_one_policy(struct alloc_handle *ah, stru
struct seg_pvs *spvs;
struct dm_list *parallel_pvs;
+ alloc_state->alloc_parms = alloc_parms;
+
/* FIXME This algorithm needs a lot of cleaning up! */
/* FIXME anywhere doesn't find all space yet */
do {
@@ -1965,11 +3316,11 @@ static int _find_max_parallel_space_for_one_policy(struct alloc_handle *ah, stru
* data together will be split, we must adjust
* the comparison accordingly.
*/
- if (ah->alloc_and_split_meta)
+ if (ah->alloc_and_split_meta && !ah->split_metadata_is_allocated)
max_tmp -= ah->log_len;
if (max_tmp > (spvs->le + spvs->len) * ah->area_multiple) {
max_to_allocate = (spvs->le + spvs->len) * ah->area_multiple - alloc_state->allocated;
- max_to_allocate += ah->alloc_and_split_meta ? ah->log_len : 0;
+ max_to_allocate += (ah->alloc_and_split_meta && !ah->split_metadata_is_allocated) ? ah->log_len : 0;
}
parallel_pvs = &spvs->pvs;
break;
@@ -1978,10 +3329,13 @@ static int _find_max_parallel_space_for_one_policy(struct alloc_handle *ah, stru
old_allocated = alloc_state->allocated;
- if (!_find_some_parallel_space(ah, alloc_parms, pvms, alloc_state, parallel_pvs, max_to_allocate))
+ if (!_find_some_parallel_space(ah, pvms, alloc_state, parallel_pvs, max_to_allocate))
return_0;
/*
+ * For ALLOC_CLING, if the number of areas matches and maximise_cling is
+ * set we allow two passes, first with A_POSITIONAL_FILL then without.
+ *
* If we didn't allocate anything this time with ALLOC_NORMAL and had
* A_CLING_TO_ALLOCED set, try again without it.
*
@@ -1990,14 +3344,17 @@ static int _find_max_parallel_space_for_one_policy(struct alloc_handle *ah, stru
* remain on the same disks where possible.
*/
if (old_allocated == alloc_state->allocated) {
- if ((alloc_parms->alloc == ALLOC_NORMAL) && (alloc_parms->flags & A_CLING_TO_ALLOCED))
+ if (ah->maximise_cling && ((alloc_parms->alloc == ALLOC_CLING) || (alloc_parms->alloc == ALLOC_CLING_BY_TAGS)) &&
+ (alloc_parms->flags & A_CLING_TO_LVSEG) && (alloc_parms->flags & A_POSITIONAL_FILL))
+ alloc_parms->flags &= ~A_POSITIONAL_FILL;
+ else if ((alloc_parms->alloc == ALLOC_NORMAL) && (alloc_parms->flags & A_CLING_TO_ALLOCED))
alloc_parms->flags &= ~A_CLING_TO_ALLOCED;
else
break; /* Give up */
} else if (ah->maximise_cling && alloc_parms->alloc == ALLOC_NORMAL &&
!(alloc_parms->flags & A_CLING_TO_ALLOCED))
alloc_parms->flags |= A_CLING_TO_ALLOCED;
- } while ((alloc_parms->alloc != ALLOC_CONTIGUOUS) && alloc_state->allocated != alloc_parms->extents_still_needed && (alloc_parms->flags & A_CAN_SPLIT));
+ } while ((alloc_parms->alloc != ALLOC_CONTIGUOUS) && alloc_state->allocated != alloc_parms->extents_still_needed && (alloc_parms->flags & A_CAN_SPLIT) && (!ah->approx_alloc || pv_maps_size(pvms)));
return 1;
}
@@ -2024,13 +3381,13 @@ static int _allocate(struct alloc_handle *ah,
alloc_state.allocated = lv ? lv->le_count : 0;
if (alloc_state.allocated >= ah->new_extents && !ah->log_area_count) {
- log_error("_allocate called with no work to do!");
+ log_warn("_allocate called with no work to do!");
return 1;
}
if (ah->area_multiple > 1 &&
(ah->new_extents - alloc_state.allocated) % ah->area_multiple) {
- log_error("Number of extents requested (%d) needs to be divisible by %d.",
+ log_error("Number of extents requested (" FMTu32 ") needs to be divisible by " FMTu32 ".",
ah->new_extents - alloc_state.allocated,
ah->area_multiple);
return 0;
@@ -2041,16 +3398,15 @@ static int _allocate(struct alloc_handle *ah,
if (ah->alloc == ALLOC_CONTIGUOUS)
can_split = 0;
- if (lv && !dm_list_empty(&lv->segments))
- prev_lvseg = dm_list_item(dm_list_last(&lv->segments),
- struct lv_segment);
+ if (lv)
+ prev_lvseg = last_seg(lv);
/*
* Build the sets of available areas on the pv's.
*/
if (!(pvms = create_pv_maps(ah->mem, vg, allocatable_pvs)))
return_0;
- if (!_log_parallel_areas(ah->mem, ah->parallel_areas))
+ if (!_log_parallel_areas(ah->mem, ah->parallel_areas, ah->cling_tag_list_cn))
stack;
alloc_state.areas_size = dm_list_size(pvms);
@@ -2071,7 +3427,7 @@ static int _allocate(struct alloc_handle *ah,
alloc_state.areas_size += _stripes_per_mimage(prev_lvseg) * prev_lvseg->area_count;
/* Allocate an array of pv_areas to hold the largest space on each PV */
- if (!(alloc_state.areas = dm_malloc(sizeof(*alloc_state.areas) * alloc_state.areas_size))) {
+ if (!(alloc_state.areas = malloc(sizeof(*alloc_state.areas) * alloc_state.areas_size))) {
log_error("Couldn't allocate areas array.");
return 0;
}
@@ -2089,9 +3445,11 @@ static int _allocate(struct alloc_handle *ah,
if (alloc == ALLOC_CLING_BY_TAGS && !ah->cling_tag_list_cn)
continue;
old_allocated = alloc_state.allocated;
- log_debug("Trying allocation using %s policy.", get_alloc_string(alloc));
+ log_debug_alloc("Trying allocation using %s policy.", get_alloc_string(alloc));
- if (!_sufficient_pes_free(ah, pvms, alloc_state.allocated, ah->new_extents))
+ if (!ah->approx_alloc && !_sufficient_pes_free(ah, pvms, alloc_state.allocated,
+ alloc_state.log_area_count_still_needed,
+ ah->new_extents))
goto_out;
_init_alloc_parms(ah, &alloc_parms, alloc, prev_lvseg,
@@ -2101,19 +3459,45 @@ static int _allocate(struct alloc_handle *ah,
if (!_find_max_parallel_space_for_one_policy(ah, &alloc_parms, pvms, &alloc_state))
goto_out;
- if ((alloc_state.allocated == ah->new_extents && !alloc_state.log_area_count_still_needed) ||
+ /* As a workaround, if only the log is missing now, fall through and try later policies up to normal. */
+ /* FIXME Change the core algorithm so the log extents cling to parallel LVs instead of avoiding them. */
+ if (alloc_state.allocated == ah->new_extents &&
+ alloc_state.log_area_count_still_needed &&
+ ah->alloc < ALLOC_NORMAL) {
+ ah->alloc = ALLOC_NORMAL;
+ continue;
+ }
+
+ if ((alloc_state.allocated == ah->new_extents &&
+ !alloc_state.log_area_count_still_needed) ||
(!can_split && (alloc_state.allocated != old_allocated)))
break;
}
if (alloc_state.allocated != ah->new_extents) {
- log_error("Insufficient suitable %sallocatable extents "
- "for logical volume %s: %u more required",
- can_split ? "" : "contiguous ",
- lv ? lv->name : "",
- (ah->new_extents - alloc_state.allocated) * ah->area_count
- / ah->area_multiple);
- goto out;
+ if (!ah->approx_alloc) {
+ log_error("Insufficient suitable %sallocatable extents "
+ "for logical volume %s: %u more required",
+ can_split ? "" : "contiguous ",
+ lv ? lv->name : "",
+ (ah->new_extents - alloc_state.allocated) *
+ ah->area_count / ah->area_multiple);
+ goto out;
+ }
+ if (!alloc_state.allocated) {
+ log_error("Insufficient suitable %sallocatable extents "
+ "found for logical volume %s.",
+ can_split ? "" : "contiguous ",
+ lv ? lv->name : "");
+ goto out;
+ }
+ log_verbose("Found fewer %sallocatable extents "
+ "for logical volume %s than requested: using %" PRIu32 " extents (reduced by %u).",
+ can_split ? "" : "contiguous ",
+ lv ? lv->name : "",
+ alloc_state.allocated,
+ (ah->new_extents - alloc_state.allocated) * ah->area_count / ah->area_multiple);
+ ah->new_extents = alloc_state.allocated;
}
if (alloc_state.log_area_count_still_needed) {
@@ -2126,61 +3510,268 @@ static int _allocate(struct alloc_handle *ah,
r = 1;
out:
- dm_free(alloc_state.areas);
+ free(alloc_state.areas);
return r;
}
-int lv_add_virtual_segment(struct logical_volume *lv, uint64_t status,
- uint32_t extents, const struct segment_type *segtype,
- const char *thin_pool_name)
+/*
+ * FIXME: Add proper allocation function for VDO segment on top
+ * of VDO pool with virtual size.
+ *
+ * Note: ATM lvm2 can't resize VDO device so it can add only a single segment.
+ */
+static int _lv_add_vdo_segment(struct logical_volume *lv, uint64_t status,
+ uint32_t extents, const struct segment_type *segtype)
{
struct lv_segment *seg;
- struct logical_volume *thin_pool_lv = NULL;
- struct lv_list *lvl;
- uint32_t size;
- if (thin_pool_name) {
- if (!(lvl = find_lv_in_vg(lv->vg, thin_pool_name))) {
- log_error("Unable to find existing pool LV %s in VG %s.",
- thin_pool_name, lv->vg->name);
+ if (!dm_list_empty(&lv->segments) &&
+ (seg = last_seg(lv)) && (seg->segtype == segtype)) {
+ seg->area_len += extents;
+ seg->len += extents;
+ } else {
+ if (!(seg = alloc_lv_segment(segtype, lv, lv->le_count, extents, 0,
+ status, 0, NULL, 1,
+ extents, 0, 0, 0, 0, NULL))) {
+ log_error("Couldn't allocate new %s segment.", segtype->name);
return 0;
}
- thin_pool_lv = lvl->lv;
- size = first_seg(thin_pool_lv)->chunk_size;
- if (lv->vg->extent_size < size) {
- /* Align extents on chunk boundary size */
- size = ((uint64_t)lv->vg->extent_size * extents + size - 1) /
- size * size / lv->vg->extent_size;
- if (size != extents) {
- log_print_unless_silent("Rounding size (%d extents) up to chunk boundary "
- "size (%d extents).", extents, size);
- extents = size;
- }
- }
+ lv->status |= LV_VDO;
+ dm_list_add(&lv->segments, &seg->list);
}
+ lv->le_count += extents;
+ lv->size += (uint64_t) extents * lv->vg->extent_size;
+
+ if (seg_lv(seg, 0) &&
+ !update_vdo_pool_virtual_size(first_seg(seg_lv(seg, 0))))
+ return_0;
+
+ return 1;
+}
+
+int lv_add_virtual_segment(struct logical_volume *lv, uint64_t status,
+ uint32_t extents, const struct segment_type *segtype)
+{
+ struct lv_segment *seg;
+
+ if (segtype_is_vdo(segtype))
+ return _lv_add_vdo_segment(lv, 0u, extents, segtype);
+
if (!dm_list_empty(&lv->segments) &&
(seg = last_seg(lv)) && (seg->segtype == segtype)) {
seg->area_len += extents;
seg->len += extents;
} else {
- if (!(seg = alloc_lv_segment(segtype, lv, lv->le_count, extents,
- status, 0, NULL, thin_pool_lv, 0,
- extents, 0, 0, 0, NULL))) {
- log_error("Couldn't allocate new zero segment.");
+ if (!(seg = alloc_lv_segment(segtype, lv, lv->le_count, extents, 0,
+ status, 0, NULL, 0,
+ extents, 0, 0, 0, 0, NULL))) {
+ log_error("Couldn't allocate new %s segment.", segtype->name);
return 0;
}
lv->status |= VIRTUAL;
dm_list_add(&lv->segments, &seg->list);
}
- lv->le_count += extents;
- lv->size += (uint64_t) extents *lv->vg->extent_size;
+ if (!_setup_lv_size(lv, lv->le_count + extents))
+ return_0;
return 1;
}
/*
+ * Preparation for a specific allocation attempt
+ * stripes and mirrors refer to the parallel areas used for data.
+ * If log_area_count > 1 it is always mirrored (not striped).
+ */
+static struct alloc_handle *_alloc_init(struct cmd_context *cmd,
+ const struct segment_type *segtype,
+ alloc_policy_t alloc, int approx_alloc,
+ uint32_t existing_extents,
+ uint32_t new_extents,
+ uint32_t mirrors,
+ uint32_t stripes,
+ uint32_t metadata_area_count,
+ uint32_t extent_size,
+ uint32_t region_size,
+ struct dm_list *parallel_areas)
+{
+ struct dm_pool *mem;
+ struct alloc_handle *ah;
+ uint32_t s, area_count, alloc_count, parity_count, total_extents;
+ size_t size = 0;
+
+ if (segtype_is_virtual(segtype)) {
+ log_error(INTERNAL_ERROR "_alloc_init called for virtual segment.");
+ return NULL;
+ }
+
+ /* FIXME Caller should ensure this */
+ if (mirrors && !stripes)
+ stripes = 1;
+
+ if (mirrors > 1)
+ area_count = mirrors * stripes;
+ else
+ area_count = stripes;
+
+ if (!(area_count + metadata_area_count)) {
+ log_error(INTERNAL_ERROR "_alloc_init called for non-virtual segment with no disk space.");
+ return NULL;
+ }
+
+ size = sizeof(*ah);
+
+ /*
+ * It is a requirement that RAID 4/5/6 are created with a number of
+ * stripes that is greater than the number of parity devices. (e.g
+ * RAID4/5 must have at least 2 stripes and RAID6 must have at least
+ * 3.) It is also a constraint that, when replacing individual devices
+ * in a RAID 4/5/6 array, no more devices can be replaced than
+ * there are parity devices. (Otherwise, there would not be enough
+ * redundancy to maintain the array.) Understanding these two
+ * constraints allows us to infer whether the caller of this function
+ * is intending to allocate an entire array or just replacement
+ * component devices. In the former case, we must account for the
+ * necessary parity_count. In the later case, we do not need to
+ * account for the extra parity devices because the array already
+ * exists and they only want replacement drives.
+ */
+ parity_count = (area_count <= segtype->parity_devs) ? 0 : segtype->parity_devs;
+ alloc_count = area_count + parity_count;
+ if (segtype_is_raid(segtype) && metadata_area_count)
+ /* RAID has a meta area for each device */
+ alloc_count *= 2;
+ else
+ /* mirrors specify their exact log count */
+ alloc_count += metadata_area_count;
+
+ size += sizeof(ah->alloced_areas[0]) * alloc_count;
+
+ if (!(mem = dm_pool_create("allocation", 1024))) {
+ log_error("allocation pool creation failed");
+ return NULL;
+ }
+
+ if (!(ah = dm_pool_zalloc(mem, size))) {
+ log_error("allocation handle allocation failed");
+ dm_pool_destroy(mem);
+ return NULL;
+ }
+
+ ah->cmd = cmd;
+ ah->mem = mem;
+ ah->area_count = area_count;
+ ah->parity_count = parity_count;
+ ah->region_size = region_size;
+ ah->alloc = alloc;
+
+ /*
+ * For the purposes of allocation, area_count and parity_count are
+ * kept separately. However, the 'area_count' field in an
+ * lv_segment includes both; and this is what '_calc_area_multiple'
+ * is calculated from. So, we must pass in the total count to get
+ * a correct area_multiple.
+ */
+ ah->area_multiple = _calc_area_multiple(segtype, area_count + parity_count, stripes);
+ //FIXME: s/mirror_logs_separate/metadata_separate/ so it can be used by others?
+ ah->mirror_logs_separate = find_config_tree_bool(cmd, allocation_mirror_logs_require_separate_pvs_CFG, NULL);
+
+ if (mirrors || stripes)
+ total_extents = new_extents;
+ else
+ total_extents = 0;
+
+ if (segtype_is_raid(segtype)) {
+ if (metadata_area_count) {
+ uint32_t cur_rimage_extents, new_rimage_extents;
+
+ if (metadata_area_count != area_count)
+ log_error(INTERNAL_ERROR
+ "Bad metadata_area_count");
+
+ /* Calculate log_len (i.e. length of each rmeta device) for RAID */
+ cur_rimage_extents = raid_rimage_extents(segtype, existing_extents, stripes, mirrors);
+ new_rimage_extents = raid_rimage_extents(segtype, existing_extents + new_extents, stripes, mirrors),
+ ah->log_len = raid_rmeta_extents_delta(cmd, cur_rimage_extents, new_rimage_extents,
+ region_size, extent_size);
+ ah->metadata_area_count = metadata_area_count;
+ ah->alloc_and_split_meta = !!ah->log_len;
+ /*
+ * We need 'log_len' extents for each
+ * RAID device's metadata_area
+ */
+ total_extents += ah->log_len * (segtype_is_raid1(segtype) ? 1 : ah->area_multiple);
+ } else {
+ ah->log_area_count = 0;
+ ah->log_len = 0;
+ }
+ } else if (segtype_is_thin_pool(segtype)) {
+ /*
+ * thin_pool uses ah->region_size to
+ * pass metadata size in extents
+ */
+ ah->log_len = ah->region_size;
+ ah->log_area_count = metadata_area_count;
+ ah->region_size = 0;
+ ah->mirror_logs_separate =
+ find_config_tree_bool(cmd, allocation_thin_pool_metadata_require_separate_pvs_CFG, NULL);
+ } else if (segtype_is_cache_pool(segtype)) {
+ /*
+ * Like thin_pool, cache_pool uses ah->region_size to
+ * pass metadata size in extents
+ */
+ ah->log_len = ah->region_size;
+ /* use metadata_area_count, not log_area_count */
+ ah->metadata_area_count = metadata_area_count;
+ ah->region_size = 0;
+ ah->mirror_logs_separate =
+ find_config_tree_bool(cmd, allocation_cache_pool_metadata_require_separate_pvs_CFG, NULL);
+ if (!ah->mirror_logs_separate) {
+ ah->alloc_and_split_meta = 1;
+ total_extents += ah->log_len;
+ }
+ } else {
+ ah->log_area_count = metadata_area_count;
+ ah->log_len = !metadata_area_count ? 0 :
+ _mirror_log_extents(ah->region_size, extent_size,
+ (existing_extents + new_extents) / ah->area_multiple);
+ }
+
+ if (total_extents || existing_extents)
+ log_debug("Adjusted allocation request to " FMTu32 " logical extents. Existing size " FMTu32 ". New size " FMTu32 ".",
+ total_extents, existing_extents, total_extents + existing_extents);
+ if (ah->log_len)
+ log_debug("Mirror log of " FMTu32 " extents of size " FMTu32 " sectors needed for region size %s.",
+ ah->log_len, extent_size, display_size(cmd, (uint64_t)ah->region_size));
+
+ if (mirrors || stripes)
+ total_extents += existing_extents;
+
+ ah->new_extents = total_extents;
+
+ for (s = 0; s < alloc_count; s++)
+ dm_list_init(&ah->alloced_areas[s]);
+
+ ah->parallel_areas = parallel_areas;
+
+ if ((ah->cling_tag_list_cn = find_config_tree_array(cmd, allocation_cling_tag_list_CFG, NULL)))
+ (void) _validate_tag_list(ah->cling_tag_list_cn);
+
+ ah->maximise_cling = find_config_tree_bool(cmd, allocation_maximise_cling_CFG, NULL);
+
+ ah->approx_alloc = approx_alloc;
+
+ return ah;
+}
+
+void alloc_destroy(struct alloc_handle *ah)
+{
+ if (ah)
+ dm_pool_destroy(ah->mem);
+}
+
+/*
* Entry point for all extent allocations.
*/
struct alloc_handle *allocate_extents(struct volume_group *vg,
@@ -2190,11 +3781,10 @@ struct alloc_handle *allocate_extents(struct volume_group *vg,
uint32_t mirrors, uint32_t log_count,
uint32_t region_size, uint32_t extents,
struct dm_list *allocatable_pvs,
- alloc_policy_t alloc,
+ alloc_policy_t alloc, int approx_alloc,
struct dm_list *parallel_areas)
{
struct alloc_handle *ah;
- uint32_t new_extents;
if (segtype_is_virtual(segtype)) {
log_error("allocate_extents does not handle virtual segments");
@@ -2219,9 +3809,8 @@ struct alloc_handle *allocate_extents(struct volume_group *vg,
if (alloc >= ALLOC_INHERIT)
alloc = vg->alloc;
- new_extents = (lv ? lv->le_count : 0) + extents;
- if (!(ah = _alloc_init(vg->cmd, vg->cmd->mem, segtype, alloc,
- new_extents, mirrors, stripes, log_count,
+ if (!(ah = _alloc_init(vg->cmd, segtype, alloc, approx_alloc,
+ lv ? lv->le_count : 0, extents, mirrors, stripes, log_count,
vg->extent_size, region_size,
parallel_areas)))
return_NULL;
@@ -2255,7 +3844,7 @@ int lv_add_segment(struct alloc_handle *ah,
return 0;
}
- if ((status & MIRROR_LOG) && dm_list_size(&lv->segments)) {
+ if ((status & MIRROR_LOG) && !dm_list_empty(&lv->segments)) {
log_error("Log segments can only be added to an empty LV");
return 0;
}
@@ -2266,7 +3855,7 @@ int lv_add_segment(struct alloc_handle *ah,
region_size))
return_0;
- if ((segtype->flags & SEG_CAN_SPLIT) && !lv_merge_segments(lv)) {
+ if (segtype_can_split(segtype) && !lv_merge_segments(lv)) {
log_error("Couldn't merge segments after extending "
"logical volume.");
return 0;
@@ -2302,14 +3891,14 @@ static struct lv_segment *_convert_seg_to_mirror(struct lv_segment *seg,
return NULL;
}
- if (!(newseg = alloc_lv_segment(get_segtype_from_string(seg->lv->vg->cmd, "mirror"),
- seg->lv, seg->le, seg->len,
+ if (!(newseg = alloc_lv_segment(get_segtype_from_string(seg->lv->vg->cmd, SEG_TYPE_NAME_MIRROR),
+ seg->lv, seg->le, seg->len, 0,
seg->status, seg->stripe_size,
- log_lv, NULL,
- seg->area_count, seg->area_len,
+ log_lv,
+ seg->area_count, seg->area_len, 0,
seg->chunk_size, region_size,
seg->extents_copied, NULL))) {
- log_error("Couldn't allocate converted LV segment");
+ log_error("Couldn't allocate converted LV segment.");
return NULL;
}
@@ -2328,6 +3917,116 @@ static struct lv_segment *_convert_seg_to_mirror(struct lv_segment *seg,
/*
* Add new areas to mirrored segments
*/
+int lv_add_segmented_mirror_image(struct alloc_handle *ah,
+ struct logical_volume *lv, uint32_t le,
+ uint32_t region_size)
+{
+ char *image_name;
+ struct alloced_area *aa;
+ struct lv_segment *seg, *new_seg;
+ uint32_t current_le = le;
+ uint32_t s;
+ struct segment_type *segtype;
+ struct logical_volume *orig_lv, *copy_lv;
+
+ if (!lv_is_pvmove(lv)) {
+ log_error(INTERNAL_ERROR
+ "Non-pvmove LV, %s, passed as argument.",
+ display_lvname(lv));
+ return 0;
+ }
+
+ if (seg_type(first_seg(lv), 0) != AREA_PV) {
+ log_error(INTERNAL_ERROR
+ "Bad segment type for first segment area.");
+ return 0;
+ }
+
+ /*
+ * If the allocator provided two or more PV allocations for any
+ * single segment of the original LV, that LV segment must be
+ * split up to match.
+ */
+ dm_list_iterate_items(aa, &ah->alloced_areas[0]) {
+ if (!(seg = find_seg_by_le(lv, current_le))) {
+ log_error("Failed to find segment for %s extent " FMTu32 ".",
+ display_lvname(lv), current_le);
+ return 0;
+ }
+
+ /* Allocator assures aa[0].len <= seg->area_len */
+ if (aa[0].len < seg->area_len) {
+ if (!lv_split_segment(lv, seg->le + aa[0].len)) {
+ log_error("Failed to split segment at %s "
+ "extent " FMTu32 ".",
+ display_lvname(lv), le);
+ return 0;
+ }
+ }
+ current_le += seg->area_len;
+ }
+
+ current_le = le;
+
+ if (!insert_layer_for_lv(lv->vg->cmd, lv, PVMOVE, "_mimage_0")) {
+ log_error("Failed to build pvmove LV-type mirror %s.",
+ display_lvname(lv));
+ return 0;
+ }
+ orig_lv = seg_lv(first_seg(lv), 0);
+ if (!(image_name = dm_pool_strdup(lv->vg->vgmem, orig_lv->name)))
+ return_0;
+ image_name[strlen(image_name) - 1] = '1';
+
+ if (!(copy_lv = lv_create_empty(image_name, NULL,
+ orig_lv->status,
+ ALLOC_INHERIT, lv->vg)))
+ return_0;
+
+ if (!lv_add_mirror_lvs(lv, &copy_lv, 1, MIRROR_IMAGE, region_size))
+ return_0;
+
+ if (!(segtype = get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_STRIPED)))
+ return_0;
+
+ dm_list_iterate_items(aa, &ah->alloced_areas[0]) {
+ if (!(seg = find_seg_by_le(orig_lv, current_le))) {
+ log_error("Failed to find segment for %s extent " FMTu32 ".",
+ display_lvname(lv), current_le);
+ return 0;
+ }
+
+ if (!(new_seg = alloc_lv_segment(segtype, copy_lv,
+ seg->le, seg->len, 0, PVMOVE, 0,
+ NULL, 1, seg->len, 0,
+ 0, 0, 0, NULL)))
+ return_0;
+
+ for (s = 0; s < ah->area_count; s++) {
+ if (!set_lv_segment_area_pv(new_seg, s,
+ aa[s].pv, aa[s].pe))
+ return_0;
+ }
+
+ dm_list_add(&copy_lv->segments, &new_seg->list);
+
+ current_le += seg->area_len;
+ copy_lv->le_count += seg->area_len;
+ }
+ lv->status |= MIRRORED;
+
+ /* FIXME: add log */
+
+ if (lv->vg->fid->fmt->ops->lv_setup &&
+ !lv->vg->fid->fmt->ops->lv_setup(lv->vg->fid, lv))
+ return_0;
+
+ return 1;
+}
+
+/*
+ * Add new areas to mirrored segments
+ */
int lv_add_mirror_areas(struct alloc_handle *ah,
struct logical_volume *lv, uint32_t le,
uint32_t region_size)
@@ -2339,16 +4038,16 @@ int lv_add_mirror_areas(struct alloc_handle *ah,
dm_list_iterate_items(aa, &ah->alloced_areas[0]) {
if (!(seg = find_seg_by_le(lv, current_le))) {
- log_error("Failed to find segment for %s extent %"
- PRIu32, lv->name, current_le);
+ log_error("Failed to find segment for %s extent " FMTu32 ".",
+ display_lvname(lv), current_le);
return 0;
}
/* Allocator assures aa[0].len <= seg->area_len */
if (aa[0].len < seg->area_len) {
if (!lv_split_segment(lv, seg->le + aa[0].len)) {
- log_error("Failed to split segment at %s "
- "extent %" PRIu32, lv->name, le);
+ log_error("Failed to split segment at %s extent " FMTu32 ".",
+ display_lvname(lv), le);
return 0;
}
}
@@ -2360,7 +4059,7 @@ int lv_add_mirror_areas(struct alloc_handle *ah,
old_area_count = seg->area_count;
new_area_count = old_area_count + ah->area_count;
- if (!_lv_segment_add_areas(lv, seg, new_area_count))
+ if (!add_lv_segment_areas(seg, new_area_count))
return_0;
for (s = 0; s < ah->area_count; s++) {
@@ -2389,36 +4088,31 @@ int lv_add_mirror_lvs(struct logical_volume *lv,
uint32_t num_extra_areas,
uint64_t status, uint32_t region_size)
{
- struct lv_segment *seg;
- uint32_t old_area_count, new_area_count;
uint32_t m;
+ uint32_t old_area_count, new_area_count;
struct segment_type *mirror_segtype;
-
- seg = first_seg(lv);
+ struct lv_segment *seg = first_seg(lv);
if (dm_list_size(&lv->segments) != 1 || seg_type(seg, 0) != AREA_LV) {
- log_error("Mirror layer must be inserted before adding mirrors");
+ log_error(INTERNAL_ERROR "Mirror layer must be inserted before adding mirrors.");
return 0;
}
- mirror_segtype = get_segtype_from_string(lv->vg->cmd, "mirror");
+ mirror_segtype = get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_MIRROR);
if (seg->segtype != mirror_segtype)
if (!(seg = _convert_seg_to_mirror(seg, region_size, NULL)))
return_0;
if (region_size && region_size != seg->region_size) {
- log_error("Conflicting region_size");
+ log_error("Conflicting region_size %u != %u.", region_size, seg->region_size);
return 0;
}
old_area_count = seg->area_count;
new_area_count = old_area_count + num_extra_areas;
- if (!_lv_segment_add_areas(lv, seg, new_area_count)) {
- log_error("Failed to allocate widened LV segment for %s.",
- lv->name);
- return 0;
- }
+ if (!add_lv_segment_areas(seg, new_area_count))
+ return_0;
for (m = 0; m < old_area_count; m++)
seg_lv(seg, m)->status |= status;
@@ -2460,8 +4154,7 @@ int lv_add_log_segment(struct alloc_handle *ah, uint32_t first_area,
{
return lv_add_segment(ah, ah->area_count + first_area, 1, log_lv,
- get_segtype_from_string(log_lv->vg->cmd,
- "striped"),
+ get_segtype_from_string(log_lv->vg->cmd, SEG_TYPE_NAME_STRIPED),
0, status, 0);
}
@@ -2474,8 +4167,7 @@ static int _lv_insert_empty_sublvs(struct logical_volume *lv,
uint32_t i;
uint64_t sub_lv_status = 0;
const char *layer_name;
- size_t len = strlen(lv->name) + 32;
- char img_name[len];
+ char img_name[NAME_LEN];
struct lv_segment *mapseg;
if (lv->le_count || !dm_list_empty(&lv->segments)) {
@@ -2498,10 +4190,11 @@ static int _lv_insert_empty_sublvs(struct logical_volume *lv,
/*
* First, create our top-level segment for our top-level LV
*/
- if (!(mapseg = alloc_lv_segment(segtype, lv, 0, 0, lv->status,
- stripe_size, NULL, NULL,
- devices, 0, 0, region_size, 0, NULL))) {
- log_error("Failed to create mapping segment for %s", lv->name);
+ if (!(mapseg = alloc_lv_segment(segtype, lv, 0, 0, 0, lv->status,
+ stripe_size, NULL,
+ devices, 0, 0, 0, region_size, 0, NULL))) {
+ log_error("Failed to create mapping segment for %s.",
+ display_lvname(lv));
return 0;
}
@@ -2511,80 +4204,122 @@ static int _lv_insert_empty_sublvs(struct logical_volume *lv,
for (i = 0; i < devices; i++) {
/* Data LVs */
if (devices > 1) {
- if (dm_snprintf(img_name, len, "%s_%s_%u",
+ if (dm_snprintf(img_name, sizeof(img_name), "%s_%s_%u",
lv->name, layer_name, i) < 0)
- return_0;
+ goto_bad;
} else {
- if (dm_snprintf(img_name, len, "%s_%s",
+ if (dm_snprintf(img_name, sizeof(img_name), "%s_%s",
lv->name, layer_name) < 0)
- return_0;
+ goto_bad;
}
/* FIXME Should use ALLOC_INHERIT here and inherit from parent LV */
if (!(sub_lv = lv_create_empty(img_name, NULL,
- LVM_READ | LVM_WRITE,
- lv->alloc, lv->vg)))
+ LVM_READ | LVM_WRITE,
+ lv->alloc, lv->vg)))
return_0;
if (!set_lv_segment_area_lv(mapseg, i, sub_lv, 0, sub_lv_status))
return_0;
/* Metadata LVs for raid */
- if (segtype_is_raid(segtype)) {
- if (dm_snprintf(img_name, len, "%s_rmeta_%u", lv->name, i) < 0)
+ if (segtype_is_raid_with_meta(segtype)) {
+ if (dm_snprintf(img_name, sizeof(img_name), "%s_rmeta_%u",
+ lv->name, i) < 0)
+ goto_bad;
+ /* FIXME Should use ALLOC_INHERIT here and inherit from parent LV */
+ if (!(sub_lv = lv_create_empty(img_name, NULL,
+ LVM_READ | LVM_WRITE,
+ lv->alloc, lv->vg)))
return_0;
- } else
- continue;
-
- /* FIXME Should use ALLOC_INHERIT here and inherit from parent LV */
- if (!(sub_lv = lv_create_empty(img_name, NULL,
- LVM_READ | LVM_WRITE,
- lv->alloc, lv->vg)))
- return_0;
- if (!set_lv_segment_area_lv(mapseg, i, sub_lv, 0, RAID_META))
+ if (!set_lv_segment_area_lv(mapseg, i, sub_lv, 0, RAID_META))
return_0;
+ }
}
dm_list_add(&lv->segments, &mapseg->list);
return 1;
+
+bad:
+ log_error("Failed to create sub LV name for LV %s.",
+ display_lvname(lv));
+
+ return 0;
+}
+
+/* Add all rmeta SubLVs for @seg to @lvs and return allocated @lvl to free by caller. */
+static struct lv_list *_raid_list_metalvs(struct lv_segment *seg, struct dm_list *lvs)
+{
+ uint32_t s;
+ struct lv_list *lvl;
+
+ dm_list_init(lvs);
+
+ if (!(lvl = dm_pool_alloc(seg->lv->vg->vgmem, sizeof(*lvl) * seg->area_count)))
+ return_NULL;
+
+ for (s = 0; s < seg->area_count; s++) {
+ lvl[s].lv = seg_metalv(seg, s);
+ dm_list_add(lvs, &lvl[s].list);
+ }
+
+ return lvl;
}
static int _lv_extend_layered_lv(struct alloc_handle *ah,
struct logical_volume *lv,
uint32_t extents, uint32_t first_area,
- uint32_t stripes, uint32_t stripe_size)
+ uint32_t mirrors, uint32_t stripes, uint32_t stripe_size)
{
+ struct logical_volume *sub_lvs[DEFAULT_RAID_MAX_IMAGES];
const struct segment_type *segtype;
- struct logical_volume *sub_lv, *meta_lv;
- struct lv_segment *seg;
+ struct logical_volume *meta_lv, *sub_lv;
+ struct lv_segment *seg = first_seg(lv);
+ struct lv_segment *sub_lv_seg;
uint32_t fa, s;
int clear_metadata = 0;
+ int integrity_sub_lvs = 0;
+ uint32_t area_multiple = 1;
- segtype = get_segtype_from_string(lv->vg->cmd, "striped");
+ if (!(segtype = get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_STRIPED)))
+ return_0;
/*
* The component devices of a "striped" LV all go in the same
* LV. However, RAID has an LV for each device - making the
* 'stripes' and 'stripe_size' parameters meaningless.
*/
- if (seg_is_raid(first_seg(lv))) {
+ if (seg_is_raid(seg)) {
stripes = 1;
stripe_size = 0;
+ if (seg_is_any_raid0(seg))
+ area_multiple = seg->area_count;
+ }
+
+ for (s = 0; s < seg->area_count; s++) {
+ sub_lv = seg_lv(seg, s);
+ sub_lv_seg = sub_lv ? first_seg(sub_lv) : NULL;
+
+ if (sub_lv_seg && seg_is_integrity(sub_lv_seg)) {
+ sub_lvs[s] = seg_lv(sub_lv_seg, 0);
+ integrity_sub_lvs = 1;
+ } else
+ sub_lvs[s] = sub_lv;
}
- seg = first_seg(lv);
for (fa = first_area, s = 0; s < seg->area_count; s++) {
- if (is_temporary_mirror_layer(seg_lv(seg, s))) {
- if (!_lv_extend_layered_lv(ah, seg_lv(seg, s), extents,
- fa, stripes, stripe_size))
+ sub_lv = sub_lvs[s];
+
+ if (is_temporary_mirror_layer(sub_lv)) {
+ if (!_lv_extend_layered_lv(ah, sub_lv, extents / area_multiple,
+ fa, mirrors, stripes, stripe_size))
return_0;
- fa += lv_mirror_count(seg_lv(seg, s));
+ fa += lv_mirror_count(sub_lv);
continue;
}
- sub_lv = seg_lv(seg, s);
if (!lv_add_segment(ah, fa, stripes, sub_lv, segtype,
stripe_size, sub_lv->status, 0)) {
log_error("Aborting. Failed to extend %s in %s.",
@@ -2592,8 +4327,10 @@ static int _lv_extend_layered_lv(struct alloc_handle *ah,
return 0;
}
+ last_seg(lv)->data_copies = mirrors;
+
/* Extend metadata LVs only on initial creation */
- if (seg_is_raid(seg) && !lv->le_count) {
+ if (seg_is_raid_with_meta(seg) && !lv->le_count) {
if (!seg->meta_areas) {
log_error("No meta_areas for RAID type");
return 0;
@@ -2608,115 +4345,224 @@ static int _lv_extend_layered_lv(struct alloc_handle *ah,
return 0;
}
lv_set_visible(meta_lv);
+
+ /*
+ * Copy any tags from the new LV to the metadata LV so
+ * it can be activated temporarily.
+ */
+ if (!str_list_dup(meta_lv->vg->vgmem, &meta_lv->tags, &lv->tags)) {
+ log_error("Failed to copy tags onto LV %s to clear metadata.", display_lvname(meta_lv));
+ return 0;
+ }
+
clear_metadata = 1;
}
fa += stripes;
}
- if (clear_metadata) {
- /*
- * We must clear the metadata areas upon creation.
- */
- if (!vg_write(lv->vg) || !vg_commit(lv->vg))
- return_0;
-
+ /*
+ * In raid+integrity, the lv_iorig raid images have been extended above.
+ * Now propagate the new lv_iorig sizes up to the integrity LV layers
+ * that are referencing the lv_iorig.
+ */
+ if (integrity_sub_lvs) {
for (s = 0; s < seg->area_count; s++) {
- meta_lv = seg_metalv(seg, s);
+ struct logical_volume *lv_image;
+ struct logical_volume *lv_iorig;
+ struct lv_segment *seg_image;
- if (test_mode()) {
- lv_set_hidden(meta_lv);
- continue;
- }
+ lv_image = seg_lv(seg, s);
+ seg_image = first_seg(lv_image);
- if (!activate_lv(meta_lv->vg->cmd, meta_lv)) {
- log_error("Failed to activate %s/%s for clearing",
- meta_lv->vg->name, meta_lv->name);
+ if (!seg_image->integrity_meta_dev) {
+ log_error("1");
return 0;
}
- log_verbose("Clearing metadata area of %s/%s",
- meta_lv->vg->name, meta_lv->name);
- /*
- * Rather than wiping meta_lv->size, we can simply
- * wipe '1' to remove the superblock of any previous
- * RAID devices. It is much quicker.
- */
- if (!set_lv(meta_lv->vg->cmd, meta_lv, 1, 0)) {
- log_error("Failed to zero %s/%s",
- meta_lv->vg->name, meta_lv->name);
+ if (!(lv_iorig = seg_lv(seg_image, 0))) {
+ log_error("2");
return 0;
}
- if (!deactivate_lv(meta_lv->vg->cmd, meta_lv)) {
- log_error("Failed to deactivate %s/%s",
- meta_lv->vg->name, meta_lv->name);
+ /* new size in sectors */
+ lv_image->size = lv_iorig->size;
+ seg_image->integrity_data_sectors = lv_iorig->size;
+ /* new size in extents */
+ lv_image->le_count = lv_iorig->le_count;
+ seg_image->len = lv_iorig->le_count;
+ seg_image->area_len = lv_iorig->le_count;
+ }
+ }
+
+ seg->len += extents;
+ if (seg_is_raid(seg))
+ seg->area_len = seg->len;
+ else
+ seg->area_len += extents / area_multiple;
+
+ if (!_setup_lv_size(lv, lv->le_count + extents))
+ return_0;
+
+ if (clear_metadata) {
+ struct volume_group *vg = lv->vg;
+
+ /*
+ * We must clear the metadata areas upon creation.
+ */
+
+ /*
+ * Declare the new RaidLV as temporary to avoid visible SubLV
+ * failures on activation until after we wiped them so that
+ * we can avoid activating crashed, potentially partially
+ * wiped RaidLVs.
+ */
+ lv->status |= LV_ACTIVATION_SKIP;
+
+ if (test_mode()) {
+ /* FIXME VG is not in a fully-consistent state here and should not be committed! */
+ if (!vg_write(vg) || !vg_commit(vg))
+ return_0;
+
+ log_verbose("Test mode: Skipping wiping of metadata areas.");
+ } else {
+ struct dm_list meta_lvs;
+ struct lv_list *lvl;
+
+ if (!(lvl = _raid_list_metalvs(seg, &meta_lvs)))
return 0;
+
+ /* Wipe lv list committing metadata */
+ if (!activate_and_wipe_lvlist(&meta_lvs, 1)) {
+ /* If we failed clearing rmeta SubLVs, try removing the new RaidLV */
+ if (!lv_remove(lv))
+ log_error("Failed to remove LV");
+ else if (!vg_write(vg) || !vg_commit(vg))
+ log_error("Failed to commit VG %s", vg->name);
+ return_0;
}
- lv_set_hidden(meta_lv);
+
+ dm_pool_free(vg->vgmem, lvl);
}
+
+ for (s = 0; s < seg->area_count; s++)
+ lv_set_hidden(seg_metalv(seg, s));
+
+ lv->status &= ~LV_ACTIVATION_SKIP;
}
- seg->area_len += extents;
- seg->len += extents;
- lv->le_count += extents;
- lv->size += (uint64_t) extents * lv->vg->extent_size;
+ return 1;
+}
- /*
- * The MD bitmap is limited to being able to track 2^21 regions.
- * The region_size must be adjusted to meet that criteria.
- */
- while (seg_is_raid(seg) && (seg->region_size < (lv->size / (1 << 21)))) {
- seg->region_size *= 2;
- log_very_verbose("Forced to adjust RAID region_size to %uS",
- seg->region_size);
+/* Check either RAID images and metas are being allocated redundantly. */
+static int _lv_raid_redundant(struct logical_volume *lv,
+ struct dm_list *allocatable_pvs, int meta)
+{
+ uint32_t nlvs, s;
+ struct lv_segment *seg = first_seg(lv);
+ struct pv_list *pvl;
+
+ if (meta && !seg->meta_areas)
+ return 1;
+
+ dm_list_iterate_items(pvl, allocatable_pvs) {
+ nlvs = 0;
+
+ for (s = 0; s < seg->area_count; s++) {
+ struct logical_volume *slv = meta ? seg_metalv(seg, s) : seg_lv(seg, s);
+
+ if (slv && lv_is_on_pv(slv, pvl->pv) && nlvs++) {
+ log_error("LV %s using PV %s is not redundant.",
+ display_lvname(slv), dev_name(pvl->pv->dev));
+ return 0;
+ }
+ }
}
return 1;
}
+/* Check both RAID images and metas are being allocated redundantly. */
+static int _lv_raid_redundant_allocation(struct logical_volume *lv, struct dm_list *allocatable_pvs)
+{
+ return _lv_raid_redundant(lv, allocatable_pvs, 0) &&
+ _lv_raid_redundant(lv, allocatable_pvs, 1);
+}
+
/*
* Entry point for single-step LV allocation + extension.
+ * Extents is the number of logical extents to append to the LV unless
+ * approx_alloc is set when it is an upper limit for the total number of
+ * extents to use from the VG.
+ *
+ * FIXME The approx_alloc raid/stripe conversion should be performed
+ * before calling this function.
*/
int lv_extend(struct logical_volume *lv,
const struct segment_type *segtype,
uint32_t stripes, uint32_t stripe_size,
uint32_t mirrors, uint32_t region_size,
- uint32_t extents, const char *thin_pool_name,
- struct dm_list *allocatable_pvs, alloc_policy_t alloc)
+ uint32_t extents,
+ struct dm_list *allocatable_pvs, alloc_policy_t alloc,
+ int approx_alloc)
{
int r = 1;
int log_count = 0;
struct alloc_handle *ah;
uint32_t sub_lv_count;
+ uint32_t old_extents;
+ uint32_t new_extents; /* Total logical size after extension. */
+ uint64_t raid_size;
- log_very_verbose("Extending segment type, %s", segtype->name);
+ log_very_verbose("Adding segment of type %s to LV %s.", segtype->name, lv->name);
if (segtype_is_virtual(segtype))
- return lv_add_virtual_segment(lv, 0u, extents, segtype, thin_pool_name);
-
- if (!lv->le_count && segtype_is_thin_pool(segtype)) {
- /* Thin pool allocation treats its metadata device like a mirror log. */
- /* FIXME Allow pool and data on same device with NORMAL */
- /* FIXME Support striped metadata pool */
- log_count = 1;
- } else if (segtype_is_raid(segtype) && !lv->le_count)
- log_count = mirrors * stripes;
+ return lv_add_virtual_segment(lv, 0u, extents, segtype);
+
+ if (!lv->le_count) {
+ if (segtype_is_pool(segtype))
+ /*
+ * Pool allocations treat the metadata device like a mirror log.
+ */
+ /* FIXME Support striped metadata pool */
+ log_count = 1;
+ else if (segtype_is_raid0_meta(segtype))
+ /* Extend raid0 metadata LVs too */
+ log_count = stripes;
+ else if (segtype_is_raid_with_meta(segtype))
+ log_count = mirrors * stripes;
+ }
/* FIXME log_count should be 1 for mirrors */
+ if (segtype_is_raid(segtype) && !segtype_is_any_raid0(segtype)) {
+ raid_size = ((uint64_t) lv->le_count + extents) * lv->vg->extent_size;
+
+ /*
+ * The MD bitmap is limited to being able to track 2^21 regions.
+ * The region_size must be adjusted to meet that criteria
+ * unless raid0/raid0_meta, which doesn't have a bitmap.
+ */
+
+ region_size = raid_ensure_min_region_size(lv, raid_size, region_size);
+
+ if (first_seg(lv))
+ first_seg(lv)->region_size = region_size;
+
+ }
+
if (!(ah = allocate_extents(lv->vg, lv, segtype, stripes, mirrors,
log_count, region_size, extents,
- allocatable_pvs, alloc, NULL)))
+ allocatable_pvs, alloc, approx_alloc, NULL)))
return_0;
- if (segtype_is_thin_pool(segtype)) {
- if (!lv->le_count) {
- if (!(r = extend_pool(lv, segtype, ah, stripes, stripe_size)))
- stack;
- } else if (!(r = _lv_extend_layered_lv(ah, lv, extents, 0,
- stripes, stripe_size)))
+ new_extents = ah->new_extents;
+ if (segtype_is_raid_with_meta(segtype))
+ new_extents -= ah->log_len * ah->area_multiple;
+
+ if (segtype_is_pool(segtype)) {
+ if (!(r = create_pool(lv, segtype, ah, stripes, stripe_size)))
stack;
- } else if (!segtype_is_mirrored(segtype) && !segtype_is_raid(segtype)) {
+ } else if (!segtype_is_mirror(segtype) && !segtype_is_raid(segtype)) {
if (!(r = lv_add_segment(ah, 0, ah->area_count, lv, segtype,
stripe_size, 0u, 0)))
stack;
@@ -2732,6 +4578,8 @@ int lv_extend(struct logical_volume *lv,
else
sub_lv_count = mirrors;
+ old_extents = lv->le_count;
+
if (!lv->le_count &&
!(r = _lv_insert_empty_sublvs(lv, segtype, stripe_size,
region_size, sub_lv_count))) {
@@ -2739,48 +4587,64 @@ int lv_extend(struct logical_volume *lv,
goto out;
}
- if (!(r = _lv_extend_layered_lv(ah, lv, extents, 0,
- stripes, stripe_size)))
+ if (!(r = _lv_extend_layered_lv(ah, lv, new_extents - lv->le_count, 0,
+ mirrors, stripes, stripe_size)))
goto_out;
+ if (segtype_is_raid(segtype) &&
+ alloc != ALLOC_ANYWHERE &&
+ !(r = _lv_raid_redundant_allocation(lv, allocatable_pvs))) {
+ log_error("Insufficient suitable allocatable extents for logical volume %s", display_lvname(lv));
+ if (!lv_remove(lv) || !vg_write(lv->vg) || !vg_commit(lv->vg))
+ return_0;
+ goto out;
+ }
+
+ if (lv_raid_has_integrity(lv)) {
+ if (!lv_extend_integrity_in_raid(lv, allocatable_pvs)) {
+ r = 0;
+ goto_out;
+ }
+ }
+
/*
* If we are expanding an existing mirror, we can skip the
* resync of the extension if the LV is currently in-sync
* and the LV has the LV_NOTSYNCED flag set.
*/
- if ((lv->le_count != extents) &&
+ if (old_extents &&
segtype_is_mirrored(segtype) &&
- (lv->status & LV_NOTSYNCED)) {
- percent_t sync_percent = PERCENT_INVALID;
+ (lv_is_not_synced(lv))) {
+ dm_percent_t sync_percent = DM_PERCENT_INVALID;
if (!lv_is_active(lv)) {
- log_error("%s/%s is not active."
- " Unable to get sync percent.",
- lv->vg->name, lv->name);
+ log_error("Unable to read sync percent while LV %s "
+ "is not locally active.", display_lvname(lv));
/* FIXME Support --force */
if (yes_no_prompt("Do full resync of extended "
- "portion of %s/%s? [y/n]: ",
- lv->vg->name, lv->name) == 'y')
- goto out;
- r = 0;
+ "portion of %s? [y/n]: ",
+ display_lvname(lv)) == 'n') {
+ r = 0;
+ goto_out;
+ }
goto out;
}
if (!(r = lv_mirror_percent(lv->vg->cmd, lv, 0,
&sync_percent, NULL))) {
- log_error("Failed to get sync percent for %s/%s",
- lv->vg->name, lv->name);
+ log_error("Failed to get sync percent for %s.",
+ display_lvname(lv));
goto out;
- } else if (sync_percent == PERCENT_100) {
+ } else if (lv_is_not_synced(lv) ||
+ sync_percent == DM_PERCENT_100) {
log_verbose("Skipping initial resync for "
- "extended portion of %s/%s",
- lv->vg->name, lv->name);
+ "extended portion of %s",
+ display_lvname(lv));
init_mirror_in_sync(1);
lv->status |= LV_NOTSYNCED;
} else {
- log_error("%s/%s cannot be extended while"
- " it is recovering.",
- lv->vg->name, lv->name);
+ log_error("LV %s cannot be extended while it "
+ "is recovering.", display_lvname(lv));
r = 0;
goto out;
}
@@ -2795,19 +4659,21 @@ out:
/*
* Minimal LV renaming function.
* Metadata transaction should be made by caller.
- * Assumes new_name is allocated from cmd->mem pool.
+ * Assumes new_name is allocated from lv->vgmem pool.
*/
static int _rename_single_lv(struct logical_volume *lv, char *new_name)
{
struct volume_group *vg = lv->vg;
+ int historical;
- if (find_lv_in_vg(vg, new_name)) {
- log_error("Logical volume \"%s\" already exists in "
- "volume group \"%s\"", new_name, vg->name);
+ if (lv_name_is_used_in_vg(vg, new_name, &historical)) {
+ log_error("%sLogical Volume \"%s\" already exists in "
+ "volume group \"%s\"", historical ? "historical " : "",
+ new_name, vg->name);
return 0;
}
- if (lv->status & LOCKED) {
+ if (lv_is_locked(lv)) {
log_error("Cannot rename locked LV %s", lv->name);
return 0;
}
@@ -2821,8 +4687,7 @@ static int _rename_single_lv(struct logical_volume *lv, char *new_name)
* Rename sub LV.
* 'lv_name_old' and 'lv_name_new' are old and new names of the main LV.
*/
-static int _rename_sub_lv(struct cmd_context *cmd,
- struct logical_volume *lv,
+static int _rename_sub_lv(struct logical_volume *lv,
const char *lv_name_old, const char *lv_name_new)
{
const char *suffix;
@@ -2849,7 +4714,7 @@ static int _rename_sub_lv(struct cmd_context *cmd,
* a new name for main LV is "lvol1"
*/
len = strlen(lv_name_new) + strlen(suffix) + 1;
- new_name = dm_pool_alloc(cmd->mem, len);
+ new_name = dm_pool_alloc(lv->vg->vgmem, len);
if (!new_name) {
log_error("Failed to allocate space for new name");
return 0;
@@ -2859,73 +4724,100 @@ static int _rename_sub_lv(struct cmd_context *cmd,
return 0;
}
+ if (!validate_name(new_name)) {
+ log_error("Cannot rename \"%s\". New logical volume name \"%s\" is invalid.",
+ lv->name, new_name);
+ return 0;
+ }
+
/* Rename it */
return _rename_single_lv(lv, new_name);
}
/* Callback for for_each_sub_lv */
-static int _rename_cb(struct cmd_context *cmd, struct logical_volume *lv,
- void *data)
+static int _rename_cb(struct logical_volume *lv, void *data)
{
struct lv_names *lv_names = (struct lv_names *) data;
- return _rename_sub_lv(cmd, lv, lv_names->old, lv_names->new);
+ return _rename_sub_lv(lv, lv_names->old, lv_names->new);
+}
+
+static int _rename_skip_pools_externals_cb(struct logical_volume *lv, void *data)
+{
+ if (lv_is_pool(lv) ||
+ lv_is_vdo_pool(lv) ||
+ lv_is_cache_vol(lv) ||
+ lv_is_external_origin(lv))
+ return -1; /* and skip subLVs */
+
+ return _rename_cb(lv, data);
}
/*
* Loop down sub LVs and call fn for each.
* fn is responsible to log necessary information on failure.
+ * Return value '0' stops whole traversal.
+ * Return value '-1' stops subtree traversal.
*/
-int for_each_sub_lv(struct cmd_context *cmd, struct logical_volume *lv,
- int (*fn)(struct cmd_context *cmd,
- struct logical_volume *lv, void *data),
- void *data)
+static int _for_each_sub_lv(struct logical_volume *lv, int level,
+ int (*fn)(struct logical_volume *lv, void *data),
+ void *data)
{
struct logical_volume *org;
struct lv_segment *seg;
uint32_t s;
+ int r;
- if (lv_is_cow(lv) && lv_is_virtual_origin(org = origin_from_cow(lv))) {
- if (!fn(cmd, org, data))
+ if (!lv)
+ return 1;
+
+ if (level++) {
+ if (!(r = fn(lv, data)))
return_0;
- if (!for_each_sub_lv(cmd, org, fn, data))
+ if (r == -1)
+ return 1;
+ /* Only r != -1 continues with for_each_sub_lv()... */
+ }
+
+ if (lv_is_cow(lv) && lv_is_virtual_origin(org = origin_from_cow(lv))) {
+ if (!_for_each_sub_lv(org, level, fn, data))
return_0;
}
dm_list_iterate_items(seg, &lv->segments) {
- if (seg->log_lv) {
- if (!fn(cmd, seg->log_lv, data))
- return_0;
- if (!for_each_sub_lv(cmd, seg->log_lv, fn, data))
- return_0;
- }
+ if (!_for_each_sub_lv(seg->external_lv, level, fn, data))
+ return_0;
- if (seg->metadata_lv) {
- if (!fn(cmd, seg->metadata_lv, data))
- return_0;
- if (!for_each_sub_lv(cmd, seg->metadata_lv, fn, data))
- return_0;
- }
+ if (!_for_each_sub_lv(seg->log_lv, level, fn, data))
+ return_0;
+
+ if (!_for_each_sub_lv(seg->metadata_lv, level, fn, data))
+ return_0;
+
+ if (!_for_each_sub_lv(seg->pool_lv, level, fn, data))
+ return_0;
+
+ if (!_for_each_sub_lv(seg->writecache, level, fn, data))
+ return_0;
+
+ if (!_for_each_sub_lv(seg->integrity_meta_dev, level, fn, data))
+ return_0;
for (s = 0; s < seg->area_count; s++) {
if (seg_type(seg, s) != AREA_LV)
continue;
- if (!fn(cmd, seg_lv(seg, s), data))
- return_0;
- if (!for_each_sub_lv(cmd, seg_lv(seg, s), fn, data))
+ if (!_for_each_sub_lv(seg_lv(seg, s), level, fn, data))
return_0;
}
- if (!seg_is_raid(seg))
+ if (!seg_is_raid_with_meta(seg))
continue;
/* RAID has meta_areas */
for (s = 0; s < seg->area_count; s++) {
- if (seg_metatype(seg, s) != AREA_LV)
+ if ((seg_metatype(seg, s) != AREA_LV) || !seg_metalv(seg, s))
continue;
- if (!fn(cmd, seg_metalv(seg, s), data))
- return_0;
- if (!for_each_sub_lv(cmd, seg_metalv(seg, s), fn, data))
+ if (!_for_each_sub_lv(seg_metalv(seg, s), level, fn, data))
return_0;
}
}
@@ -2933,6 +4825,12 @@ int for_each_sub_lv(struct cmd_context *cmd, struct logical_volume *lv,
return 1;
}
+int for_each_sub_lv(struct logical_volume *lv,
+ int (*fn)(struct logical_volume *lv, void *data),
+ void *data)
+{
+ return _for_each_sub_lv(lv, 0, fn, data);
+}
/*
* Core of LV renaming routine.
@@ -2942,90 +4840,2318 @@ int lv_rename_update(struct cmd_context *cmd, struct logical_volume *lv,
const char *new_name, int update_mda)
{
struct volume_group *vg = lv->vg;
- struct lv_names lv_names;
- DM_LIST_INIT(lvs_changed);
- struct lv_list lvl, lvl2, *lvlp;
- int r = 0;
+ struct lv_names lv_names = { .old = lv->name };
+ int old_lv_is_historical = lv_is_historical(lv);
+ int historical;
+ unsigned attrs;
+ const struct segment_type *segtype;
- /* rename is not allowed on sub LVs */
- if (!lv_is_visible(lv)) {
+ /*
+ * rename is not allowed on sub LVs except for pools
+ * (thin pool is 'visible', but cache may not)
+ */
+ if (!lv_is_pool(lv) &&
+ !lv_is_vdo_pool(lv) &&
+ !lv_is_visible(lv)) {
log_error("Cannot rename internal LV \"%s\".", lv->name);
return 0;
}
- if (find_lv_in_vg(vg, new_name)) {
- log_error("Logical volume \"%s\" already exists in "
- "volume group \"%s\"", new_name, vg->name);
+ if (lv_name_is_used_in_vg(vg, new_name, &historical)) {
+ log_error("%sLogical Volume \"%s\" already exists in "
+ "volume group \"%s\"", historical ? "Historical " : "",
+ new_name, vg->name);
return 0;
}
- if (lv->status & LOCKED) {
+ if (lv_is_locked(lv)) {
log_error("Cannot rename locked LV %s", lv->name);
return 0;
}
- if (update_mda && !archive(vg))
- return 0;
+ if (lv_is_vdo_pool(lv) && lv_is_active(lv_lock_holder(lv))) {
+ segtype = first_seg(lv)->segtype;
+ if (!segtype->ops->target_present ||
+ !segtype->ops->target_present(lv->vg->cmd, NULL, &attrs) ||
+ !(attrs & VDO_FEATURE_ONLINE_RENAME)) {
+ log_error("Cannot rename active VDOPOOL volume %s, "
+ "VDO target feature support is missing.",
+ display_lvname(lv));
+ return 0;
+ }
+ }
+
+ if (old_lv_is_historical) {
+ /*
+ * Historical LVs have neither sub LVs nor any
+ * devices to reload, so just update metadata.
+ */
+ lv->this_glv->historical->name = lv->name = new_name;
+ if (update_mda &&
+ (!vg_write(vg) || !vg_commit(vg)))
+ return_0;
+ } else {
+ if (!(lv_names.new = dm_pool_strdup(cmd->mem, new_name))) {
+ log_error("Failed to allocate space for new name.");
+ return 0;
+ }
+
+ /* rename sub LVs */
+ if (!for_each_sub_lv(lv, _rename_skip_pools_externals_cb, (void *) &lv_names))
+ return_0;
+
+ /* rename main LV */
+ lv->name = lv_names.new;
+
+ if (lv_is_cow(lv))
+ lv = origin_from_cow(lv);
+
+ if (update_mda && !lv_update_and_reload((struct logical_volume *)lv_lock_holder(lv)))
+ return_0;
+ }
+
+ return 1;
+}
+
+/*
+ * Rename LV to new name, if name is occupies, lvol% is generated.
+ * VG must be locked by caller.
+ */
+int lv_uniq_rename_update(struct cmd_context *cmd, struct logical_volume *lv,
+ const char *new_name, int update_mda)
+{
+ char uniq_name[NAME_LEN];
- /* rename sub LVs */
- lv_names.old = lv->name;
- lv_names.new = new_name;
- if (!for_each_sub_lv(cmd, lv, _rename_cb, (void *) &lv_names))
+ /* If the name is in use, generate new lvol%d */
+ if (lv_name_is_used_in_vg(lv->vg, new_name, NULL)) {
+ if (!generate_lv_name(lv->vg, "lvol%d", uniq_name, sizeof(uniq_name))) {
+ log_error("Failed to generate unique name for unused logical volume.");
+ return 0;
+ }
+ new_name = uniq_name;
+ }
+
+ if (!lv_rename_update(cmd, lv, new_name, 0))
+ return_0;
+
+ return 1;
+}
+
+/*
+ * Core of LV renaming routine.
+ * VG must be locked by caller.
+ */
+int lv_rename(struct cmd_context *cmd, struct logical_volume *lv,
+ const char *new_name)
+{
+ return lv_rename_update(cmd, lv, new_name, 1);
+}
+
+/*
+ * Core lv resize code
+ */
+
+#define SIZE_BUF 128
+
+/* TODO: unify stripe size validation across source code */
+static int _validate_stripesize(const struct volume_group *vg,
+ struct lvresize_params *lp)
+{
+ if (lp->stripe_size > (STRIPE_SIZE_LIMIT * 2)) {
+ log_error("Stripe size cannot be larger than %s.",
+ display_size(vg->cmd, (uint64_t) STRIPE_SIZE_LIMIT));
return 0;
+ }
- /* rename main LV */
- if (!(lv->name = dm_pool_strdup(cmd->mem, new_name))) {
- log_error("Failed to allocate space for new name");
+ if (lp->stripe_size > vg->extent_size) {
+ log_print_unless_silent("Reducing stripe size %s to maximum, "
+ "physical extent size %s.",
+ display_size(vg->cmd, lp->stripe_size),
+ display_size(vg->cmd, vg->extent_size));
+ lp->stripe_size = vg->extent_size;
+ }
+
+ if (!is_power_of_2(lp->stripe_size)) {
+ log_error("Stripe size must be power of 2.");
return 0;
}
- lvl.lv = lv;
- dm_list_add(&lvs_changed, &lvl.list);
+ return 1;
+}
+
+static int _lv_reduce_confirmation(struct logical_volume *lv,
+ struct lvresize_params *lp)
+{
+ const struct volume_group *vg = lv->vg;
+ struct lvinfo info = { 0 };
- /* rename active virtual origin too */
- if (lv_is_cow(lv) && lv_is_virtual_origin(lvl2.lv = origin_from_cow(lv)))
- dm_list_add_h(&lvs_changed, &lvl2.list);
+ if (!lv_info(vg->cmd, lv, 0, &info, 1, 0) && driver_version(NULL, 0)) {
+ log_error("lv_info failed: aborting.");
+ return 0;
+ }
- if (!update_mda)
+ if (!info.exists)
return 1;
- log_verbose("Writing out updated volume group");
- if (!vg_write(vg))
+ log_warn("WARNING: Reducing active%s logical volume to %s.",
+ info.open_count ? " and open" : "",
+ display_size(vg->cmd, (uint64_t) lp->extents * vg->extent_size));
+
+ log_warn("THIS MAY DESTROY YOUR DATA (filesystem etc.)");
+
+ if (!lp->force && !lp->yes) {
+ if (yes_no_prompt("Do you really want to reduce %s? [y/n]: ",
+ display_lvname(lv)) == 'n') {
+ log_error("Logical volume %s NOT reduced.",
+ display_lvname(lv));
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+enum fsadm_cmd_e { FSADM_CMD_CHECK, FSADM_CMD_RESIZE };
+
+#define FSADM_CMD_MAX_ARGS 10
+#define FSADM_CHECK_FAILS_FOR_MOUNTED 3 /* shell exist status code */
+
+/*
+ * fsadm --dry-run --verbose --force check lv_path
+ * fsadm --dry-run --verbose --force resize lv_path size
+ */
+static int _fsadm_cmd(enum fsadm_cmd_e fcmd,
+ struct logical_volume *lv,
+ uint32_t extents,
+ int yes,
+ int force,
+ int *status)
+{
+ struct volume_group *vg = lv->vg;
+ struct cmd_context *cmd = vg->cmd;
+ char lv_path[PATH_MAX];
+ char size_buf[SIZE_BUF];
+ unsigned i = 1;
+ const char *argv[FSADM_CMD_MAX_ARGS] = {
+ find_config_tree_str(cmd, global_fsadm_executable_CFG, NULL)
+ };
+
+ if (!argv[0] || !*argv[0]) {
+ log_error("Cannot use misconfigured fsadm executable to resize %s.", display_lvname(lv));
return 0;
+ }
- if (!suspend_lvs(cmd, &lvs_changed, vg))
- goto_out;
+ if (test_mode())
+ argv[i++] = "--dry-run";
+
+ if (verbose_level() >= _LOG_NOTICE)
+ argv[i++] = "--verbose";
+
+ if (yes)
+ argv[i++] = "--yes";
+
+ if (force)
+ argv[i++] = "--force";
+
+ argv[i++] = (fcmd == FSADM_CMD_RESIZE) ? "resize" : "check";
+
+ if (status)
+ *status = -1;
+
+ if (dm_snprintf(lv_path, sizeof(lv_path), "%s%s/%s", cmd->dev_dir,
+ vg->name, lv->name) < 0) {
+ log_error("Couldn't create LV path for %s.", display_lvname(lv));
+ return 0;
+ }
+
+ argv[i++] = lv_path;
+
+ if (fcmd == FSADM_CMD_RESIZE) {
+ if (dm_snprintf(size_buf, sizeof(size_buf), FMTu64 "K",
+ (uint64_t) extents * (vg->extent_size / 2)) < 0) {
+ log_error("Couldn't generate new LV size string.");
+ return 0;
+ }
+
+ argv[i++] = size_buf;
+ }
+
+ return exec_cmd(cmd, argv, status, 1);
+}
+
+static uint32_t _adjust_amount(dm_percent_t percent, int policy_threshold, int policy_amount)
+{
+ if (!((50 * DM_PERCENT_1) < percent && percent <= DM_PERCENT_100) ||
+ percent <= (policy_threshold * DM_PERCENT_1))
+ return 0; /* nothing to do */
+ /*
+ * Evaluate the minimal amount needed to get bellow threshold.
+ * Keep using DM_PERCENT_1 units for better precision.
+ * Round-up to needed percentage value
+ */
+ policy_threshold *= (DM_PERCENT_1 / 100);
+ percent = (percent + policy_threshold - 1) / policy_threshold - 100;
+
+ /* Use it if current policy amount is smaller */
+ return (policy_amount < percent) ? (uint32_t) percent : (uint32_t) policy_amount;
+}
+
+/* "amount" here is percent */
+int lv_extend_policy_calculate_percent(struct logical_volume *lv,
+ uint32_t *amount, uint32_t *meta_amount)
+{
+ struct cmd_context *cmd = lv->vg->cmd;
+ dm_percent_t percent;
+ dm_percent_t min_threshold;
+ int policy_threshold, policy_amount;
+ struct lv_status_thin_pool *thin_pool_status;
+
+ *amount = *meta_amount = 0;
+
+ if (lv_is_thin_pool(lv)) {
+ policy_threshold =
+ find_config_tree_int(cmd, activation_thin_pool_autoextend_threshold_CFG,
+ lv_config_profile(lv));
+ policy_amount =
+ find_config_tree_int(cmd, activation_thin_pool_autoextend_percent_CFG,
+ lv_config_profile(lv));
+ if (policy_threshold < 50) {
+ log_warn("WARNING: Thin pool autoextend threshold %d%% is set below "
+ "minimum supported 50%%.", policy_threshold);
+ policy_threshold = 50;
+ }
+ } else if (lv_is_vdo_pool(lv)) {
+ policy_threshold =
+ find_config_tree_int(cmd, activation_vdo_pool_autoextend_threshold_CFG,
+ lv_config_profile(lv));
+ policy_amount =
+ find_config_tree_int(cmd, activation_vdo_pool_autoextend_percent_CFG,
+ lv_config_profile(lv));
+ if (policy_threshold < 50) {
+ log_warn("WARNING: VDO pool autoextend threshold %d%% is set below "
+ "minimum supported 50%%.", policy_threshold);
+ policy_threshold = 50;
+ }
+ } else {
+ policy_threshold =
+ find_config_tree_int(cmd, activation_snapshot_autoextend_threshold_CFG, NULL);
+ policy_amount =
+ find_config_tree_int(cmd, activation_snapshot_autoextend_percent_CFG, NULL);
+ if (policy_threshold < 50) {
+ log_warn("WARNING: Snapshot autoextend threshold %d%% is set bellow "
+ "minimal supported value 50%%.", policy_threshold);
+ policy_threshold = 50;
+ }
+ }
+
+ if (policy_threshold >= 100) {
+ log_debug("lvextend policy disabled by threshold 100");
+ return 1; /* nothing to do */
+ }
+
+ if (!policy_amount) {
+ log_error("Can't extend %s with %s autoextend percent set to 0%%.",
+ display_lvname(lv), lvseg_name(first_seg(lv)));
+ return 0;
+ }
+
+ if (lv_is_thin_pool(lv)) {
+ if (!lv_thin_pool_status(lv, 0, &thin_pool_status))
+ goto_bad;
+
+ /* Resize below the minimal usable value */
+ min_threshold = thin_pool_metadata_min_threshold(first_seg(lv)) / DM_PERCENT_1;
+ *meta_amount = _adjust_amount(thin_pool_status->metadata_usage,
+ (min_threshold < policy_threshold) ?
+ min_threshold : policy_threshold, policy_amount);
+ if (*meta_amount)
+ /* Compensate possible extra space consumption by kernel on resize */
+ (*meta_amount)++;
+ percent = thin_pool_status->data_usage;
+ dm_pool_destroy(thin_pool_status->mem);
+ } else if (lv_is_vdo_pool(lv)) {
+ if (!lv_vdo_pool_percent(lv, &percent))
+ goto_bad;
+ } else if (!lv_snapshot_percent(lv, &percent))
+ goto_bad;
+ else if (!lv_is_active(lv)) {
+ bad:
+ log_error("Can't read state of locally inactive LV %s.",
+ display_lvname(lv));
+ return 0;
+ }
+
+ *amount = _adjust_amount(percent, policy_threshold, policy_amount);
+
+ log_debug("lvextend policy calculated percentages main %u meta %u from threshold %d percent %d",
+ *amount, *meta_amount, policy_threshold, policy_amount);
+ return 1;
+}
+
+static uint32_t _lvseg_get_stripes(struct lv_segment *seg, uint32_t *stripesize)
+{
+ uint32_t s;
+ struct lv_segment *seg_get, *seg_image, *seg_iorig;
+ struct logical_volume *lv_image, *lv_iorig;
+
+ /* If segment mirrored, check if images are striped */
+ if (seg_is_mirrored(seg)) {
+ for (s = 0; s < seg->area_count; s++) {
+ if (seg_type(seg, s) != AREA_LV)
+ continue;
- if (!(r = vg_commit(vg)))
+ lv_image = seg_lv(seg, s);
+ seg_image = first_seg(lv_image);
+ seg_get = NULL;
+
+ if (seg_is_integrity(seg_image)) {
+ /* Get stripe values from the iorig layer. */
+ lv_iorig = seg_lv(seg_image, 0);
+ seg_iorig = first_seg(lv_iorig);
+ seg_get = seg_iorig;
+ } else {
+ /* Get stripe values from the image layer. */
+ seg_get = seg_image;
+ }
+
+ if (seg_get && seg_is_striped(seg_get)) {
+ seg = seg_get;
+ break;
+ }
+ }
+ }
+
+ if (seg_is_striped(seg)) {
+ *stripesize = seg->stripe_size;
+ return seg->area_count;
+ }
+
+ if (seg_is_raid(seg)) {
+ *stripesize = seg->stripe_size;
+ return _raid_stripes_count(seg);
+ }
+
+ *stripesize = 0;
+ return 0;
+}
+
+static int _lvresize_adjust_size(struct volume_group *vg,
+ uint64_t size, sign_t sign,
+ uint32_t *extents)
+{
+ uint32_t extent_size = vg->extent_size;
+ uint32_t adjust;
+
+ /*
+ * First adjust to an exact multiple of extent size.
+ * When changing to an absolute size, we round that size up.
+ * When extending by a relative amount we round that amount up.
+ * When reducing by a relative amount we remove at most that amount.
+ */
+ if ((adjust = (size % extent_size))) {
+ if (sign != SIGN_MINUS) /* not reducing */
+ size += extent_size;
+
+ size -= adjust;
+ log_print_unless_silent("Rounding size to boundary between physical extents: %s.",
+ display_size(vg->cmd, size));
+ }
+
+ if (!(*extents = extents_from_size(vg->cmd, size, extent_size)))
+ return_0;
+
+ return 1;
+}
+
+/*
+ * If percent options were used, convert them into actual numbers of extents.
+ * FIXME: fix cases where lp->extents is initially used as a percentage,
+ * and is then rewritten to be a number of extents (simply save the percent
+ * value elsewhere.)
+ */
+static int _lvresize_extents_from_percent(const struct logical_volume *lv,
+ struct lvresize_params *lp)
+{
+ const struct volume_group *vg = lv->vg;
+ uint32_t pv_extent_count;
+ uint32_t old_extents = lp->extents;
+
+ log_debug("lvresize_extents_from_percent type %d extents %u percent_value %u",
+ lp->percent, lp->extents, lp->percent_value);
+
+ switch (lp->percent) {
+ case PERCENT_VG:
+ /* rewrites lp->extents from percentage to extents */
+ lp->extents = percent_of_extents(lp->extents, vg->extent_count,
+ (lp->sign != SIGN_MINUS));
+ if ((lp->sign == SIGN_NONE) && (lp->extents > (lv->le_count + vg->free_count))) {
+ lp->extents = lv->le_count + vg->free_count;
+ log_print_unless_silent("Reducing %u%%VG to remaining free space %s in VG.",
+ old_extents,
+ display_size(vg->cmd, (uint64_t)vg->extent_size * lp->extents));
+ }
+ break;
+ case PERCENT_FREE:
+ /* rewrites lp->extents from percentage to extents */
+ lp->extents = percent_of_extents(lp->extents, vg->free_count,
+ (lp->sign != SIGN_MINUS));
+ break;
+ case PERCENT_LV:
+ if (lp->extents) {
+ /* rewrites lp->extents from percentage to extents */
+ lp->extents = percent_of_extents(lp->extents, lv->le_count,
+ (lp->sign != SIGN_MINUS));
+ } else if (lp->percent_value) {
+ old_extents = lp->percent_value;
+ lp->extents = percent_of_extents(lp->percent_value, lv->le_count,
+ (lp->sign != SIGN_MINUS));
+ }
+ break;
+ case PERCENT_PVS:
+ if (lp->pvh != &vg->pvs) {
+ pv_extent_count = pv_list_extents_free(lp->pvh);
+ if (lp->extents) {
+ /* rewrites lp->extents from percentage to extents */
+ lp->extents = percent_of_extents(lp->extents, pv_extent_count,
+ (lp->sign != SIGN_MINUS));
+ } else if (lp->percent_value) {
+ /* lvresize has PVs args and no size of exents options */
+ old_extents = lp->percent_value;
+ lp->extents = percent_of_extents(lp->percent_value, pv_extent_count,
+ (lp->sign != SIGN_MINUS));
+ }
+ } else {
+ if (lp->extents) {
+ /* rewrites lp->extents from percentage to extents */
+ lp->extents = percent_of_extents(lp->extents, vg->extent_count,
+ (lp->sign != SIGN_MINUS));
+ } else if (lp->percent_value) {
+ old_extents = lp->percent_value;
+ lp->extents = percent_of_extents(lp->percent_value, vg->extent_count,
+ (lp->sign != SIGN_MINUS));
+ }
+ }
+ break;
+ case PERCENT_ORIGIN:
+ if (!lv_is_cow(lv)) {
+ log_error("Specified LV does not have an origin LV.");
+ return 0;
+ }
+ lp->extents = percent_of_extents(lp->extents, origin_from_cow(lv)->le_count,
+ (lp->sign != SIGN_MINUS));
+ break;
+ case PERCENT_NONE:
+ return 1; /* Nothing to do */
+ default:
+ log_error(INTERNAL_ERROR "Unsupported percent type %u.", lp->percent);
+ return 0;
+ }
+
+ if (lp->percent == PERCENT_VG || lp->percent == PERCENT_FREE || lp->percent == PERCENT_PVS)
+ lp->extents_are_pes = 1;
+
+ if (lp->sign == SIGN_NONE && (lp->percent == PERCENT_VG || lp->percent == PERCENT_FREE || lp->percent == PERCENT_PVS))
+ lp->approx_alloc = 1;
+
+ if (lp->sign == SIGN_PLUS && lp->percent == PERCENT_FREE)
+ lp->approx_alloc = 1;
+
+ log_verbose("Converted %" PRIu32 "%%%s into %s%" PRIu32 " %s extents.", old_extents, get_percent_string(lp->percent),
+ lp->approx_alloc ? "at most " : "", lp->extents, lp->extents_are_pes ? "physical" : "logical");
+
+ return 1;
+}
+
+static int _add_pes(struct logical_volume *lv, void *data)
+{
+ uint32_t *pe_total = data;
+ struct lv_segment *seg;
+ uint32_t s;
+
+ dm_list_iterate_items(seg, &lv->segments) {
+ for (s = 0; s < seg->area_count; s++) {
+ if (seg_type(seg, s) != AREA_PV)
+ continue;
+
+ *pe_total += seg_pvseg(seg, s)->len;
+ }
+ }
+
+ return 1;
+}
+
+static uint32_t _lv_pe_count(struct logical_volume *lv)
+{
+ uint32_t pe_total = 0;
+
+ /* Top-level LV first */
+ if (!_add_pes(lv, &pe_total))
+ stack;
+
+ /* Any sub-LVs */
+ if (!for_each_sub_lv(lv, _add_pes, &pe_total))
stack;
+ return pe_total;
+}
+
+/* FIXME Avoid having variables like lp->extents mean different things at different places */
+static int _lvresize_adjust_extents(struct logical_volume *lv,
+ struct lvresize_params *lp,
+ int *matches_existing)
+{
+ struct volume_group *vg = lv->vg;
+ struct cmd_context *cmd = vg->cmd;
+ uint32_t logical_extents_used = 0;
+ uint32_t physical_extents_used = 0;
+ uint32_t seg_stripes = 0, seg_stripesize = 0;
+ uint32_t seg_mirrors = 0;
+ struct lv_segment *seg, *seg_last;
+ uint32_t sz, str;
+ uint32_t seg_logical_extents;
+ uint32_t seg_physical_extents;
+ uint32_t area_multiple;
+ uint32_t stripes_extents;
+ uint32_t size_rest;
+ uint32_t existing_logical_extents = lv->le_count;
+ uint32_t existing_physical_extents, saved_existing_physical_extents;
+ uint32_t existing_extents;
+ uint32_t seg_size = 0;
+ uint32_t new_extents;
+ uint64_t max_metadata_size;
+ thin_crop_metadata_t crop;
+ int reducing = 0;
+
+ seg_last = last_seg(lv);
+
+ if (!lp->segtype)
+ /* Use segment type of last segment */
+ lp->segtype = seg_last->segtype;
+ else if (lp->segtype != seg_last->segtype) {
+ /* Support newseg error or zero with lastseg striped
+ * and newseg striped with lastseg error or zero */
+ if ((segtype_is_error(lp->segtype) || segtype_is_zero(lp->segtype) ||
+ segtype_is_striped(lp->segtype)) &&
+ (segtype_is_striped(seg_last->segtype) ||
+ segtype_is_error(seg_last->segtype) || segtype_is_zero(seg_last->segtype))) {
+ if (!lp->stripes)
+ lp->stripes = 1;
+ } else {
+ log_error("VolumeType does not match (%s).", lp->segtype->name);
+ return 0;
+ }
+ /* FIXME Support more LVs with mixed segment types */
+ log_print_unless_silent("Logical volume %s is using mixing segment types %s and %s.",
+ display_lvname(lv), seg_last->segtype->name, lp->segtype->name);
+ }
+
+ /* For virtual devices, just pretend the physical size matches. */
+ existing_physical_extents = saved_existing_physical_extents = _lv_pe_count(lv);
+ if (!existing_physical_extents) {
+ existing_physical_extents = lv->le_count;
+ lp->extents_are_pes = 0;
+ }
+
+ existing_extents = (lp->extents_are_pes)
+ ? existing_physical_extents : existing_logical_extents;
+
+ /* Initial decision on whether we are extending or reducing */
+ if (lp->sign == SIGN_MINUS ||
+ (lp->sign == SIGN_NONE && (lp->extents < existing_extents)))
+ reducing = 1;
+
+ /* If extending, find properties of last segment */
+ if (!reducing) {
+ seg_mirrors = seg_is_mirrored(seg_last) ? lv_mirror_count(lv) : 0;
+
+ if (!lp->mirrors && seg_mirrors) {
+ log_print_unless_silent("Extending %" PRIu32 " mirror images.", seg_mirrors);
+ lp->mirrors = seg_mirrors;
+ } else if ((lp->mirrors || seg_mirrors) && (lp->mirrors != seg_mirrors)) {
+ log_error("Cannot vary number of mirrors in LV yet.");
+ return 0;
+ }
+
+ if (seg_is_raid10(seg_last)) {
+ if (!seg_mirrors) {
+ log_error(INTERNAL_ERROR "Missing mirror segments for %s.",
+ display_lvname(lv));
+ return 0;
+ }
+ /* FIXME Warn if command line values are being overridden? */
+ lp->stripes = seg_last->area_count / seg_mirrors;
+ lp->stripe_size = seg_last->stripe_size;
+ } else if (!(lp->stripes == 1 || (lp->stripes > 1 && lp->stripe_size))) {
+ /* If extending, find stripes, stripesize & size of last segment */
+ /* FIXME Don't assume mirror seg will always be AREA_LV */
+ /* FIXME We will need to support resize for metadata LV as well,
+ * and data LV could be any type (i.e. mirror)) */
+ dm_list_iterate_items(seg, seg_mirrors ? &seg_lv(seg_last, 0)->segments : &lv->segments) {
+ /* Allow through "striped" and RAID 4/5/6/10 */
+ if (!seg_is_striped(seg) &&
+ (!seg_is_raid(seg) || seg_is_mirrored(seg)) &&
+ !seg_is_raid10(seg))
+ continue;
+
+ sz = seg->stripe_size;
+ str = seg->area_count - lp->segtype->parity_devs;
+
+ if ((seg_stripesize && seg_stripesize != sz &&
+ sz && !lp->stripe_size) ||
+ (seg_stripes && seg_stripes != str && !lp->stripes)) {
+ log_error("Please specify number of "
+ "stripes (-i) and stripesize (-I)");
+ return 0;
+ }
+
+ seg_stripesize = sz;
+ seg_stripes = str;
+ }
+
+ if (!lp->stripes)
+ lp->stripes = seg_stripes;
+ else if (seg_is_raid(first_seg(lv)) &&
+ (lp->stripes != seg_stripes)) {
+ log_error("Unable to extend \"%s\" segment type with different number of stripes.",
+ lvseg_name(first_seg(lv)));
+ return 0;
+ }
+
+ if (!lp->stripe_size && lp->stripes > 1) {
+ if (seg_stripesize) {
+ log_print_unless_silent("Using stripesize of last segment %s",
+ display_size(cmd, (uint64_t) seg_stripesize));
+ lp->stripe_size = seg_stripesize;
+ } else {
+ lp->stripe_size =
+ find_config_tree_int(cmd, metadata_stripesize_CFG, NULL) * 2;
+ log_print_unless_silent("Using default stripesize %s",
+ display_size(cmd, (uint64_t) lp->stripe_size));
+ }
+ }
+ }
+
+ if (lp->stripes > 1 && !lp->stripe_size) {
+ log_error("Stripesize for striped segment should not be 0!");
+ return 0;
+ }
+
+ /* Determine the amount to extend by */
+ if (lp->sign == SIGN_PLUS)
+ seg_size = lp->extents;
+ else
+ seg_size = lp->extents - existing_extents;
+
+ if (lv_is_vdo_pool_data(lv)) {
+ if (!(seg = get_only_segment_using_this_lv(lv)))
+ return_0;
+ /* Min growth is defined this way: max(1 slab, 128M + 128K (recovery journal + slab summary)) */
+ new_extents = max(seg->vdo_params.slab_size_mb * 1024, UINT32_C(128 * 1024 + 128));
+ new_extents *= (1024 >> SECTOR_SHIFT); /* minimal growth (~128MiB..32GiB) in sectors */
+
+ if (new_extents > vg->extent_size) {
+ /* Minimal growth in extent size units */
+ new_extents = (new_extents + vg->extent_size - 1) / vg->extent_size;
+
+ if (new_extents > seg_size) {
+ /* Notify user about extra increase of extension */
+ log_print_unless_silent("Increasing incremention size from %s to %s to fit new VDO slab.",
+ display_size(cmd, (uint64_t)seg_size * vg->extent_size),
+ display_size(cmd, (uint64_t)new_extents * vg->extent_size));
+ seg_size = new_extents;
+ }
+ }
+ }
+
+ /* Convert PEs to LEs */
+ if (lp->extents_are_pes && !seg_is_striped(seg_last) && !seg_is_virtual(seg_last)) {
+ area_multiple = _calc_area_multiple(seg_last->segtype, seg_last->area_count, 0);
+ seg_size = seg_size * area_multiple / (seg_last->area_count - seg_last->segtype->parity_devs);
+ seg_size = (seg_size / area_multiple) * area_multiple;
+ }
+
+ if (seg_size >= (MAX_EXTENT_COUNT - existing_logical_extents)) {
+ log_error("Unable to extend %s by %u logical extents: exceeds limit (%u).",
+ display_lvname(lv), seg_size, MAX_EXTENT_COUNT);
+ return 0;
+ }
+
+ lp->extents = existing_logical_extents + seg_size;
+
+ /* Don't allow a cow to grow larger than necessary. */
+ if (lv_is_cow(lv)) {
+ logical_extents_used = cow_max_extents(origin_from_cow(lv), find_snapshot(lv)->chunk_size);
+ if (logical_extents_used < lp->extents) {
+ log_print_unless_silent("Reached maximum COW size %s (%" PRIu32 " extents).",
+ display_size(vg->cmd, (uint64_t) vg->extent_size * logical_extents_used),
+ logical_extents_used);
+ lp->extents = logical_extents_used; // CHANGES lp->extents
+ seg_size = lp->extents - existing_logical_extents; // Recalculate
+ if (lp->extents == existing_logical_extents) {
+ /* Signal that normal resizing is not required */
+ lp->size_changed = 1;
+ return 1;
+ }
+ }
+ } else if (lv_is_thin_pool_metadata(lv)) {
+ if (!(seg = get_only_segment_using_this_lv(lv)))
+ return_0;
+
+ max_metadata_size = get_thin_pool_max_metadata_size(cmd, lv_config_profile(lv), &crop);
+
+ if (((uint64_t)lp->extents * vg->extent_size) > max_metadata_size) {
+ lp->extents = (max_metadata_size + vg->extent_size - 1) / vg->extent_size;
+ log_print_unless_silent("Reached maximum pool metadata size %s (%" PRIu32 " extents).",
+ display_size(vg->cmd, max_metadata_size), lp->extents);
+ }
+
+ if (existing_logical_extents >= lp->extents)
+ lp->extents = existing_logical_extents;
+
+ crop = get_thin_pool_crop_metadata(cmd, crop, (uint64_t)lp->extents * vg->extent_size);
+
+ if (seg->crop_metadata != crop) {
+ seg->crop_metadata = crop;
+ seg->lv->status |= LV_CROP_METADATA;
+ /* Crop change require reload even if there no size change */
+ lp->size_changed = 1;
+ log_print_unless_silent("Thin pool will use metadata without cropping.");
+ }
+
+ if (!(seg_size = lp->extents - existing_logical_extents))
+ return 1; /* No change in metadata size */
+ }
+ } else {
+ /* If reducing, find stripes, stripesize & size of last segment */
+
+ if (lp->sign == SIGN_MINUS) {
+ if (lp->extents >= existing_extents) {
+ log_error("Unable to reduce %s below 1 extent.",
+ display_lvname(lv));
+ return 0;
+ }
+ new_extents = existing_extents - lp->extents;
+ } else
+ new_extents = lp->extents;
+
+ dm_list_iterate_items(seg, &lv->segments) {
+ seg_logical_extents = seg->len;
+ seg_physical_extents = seg->area_len * seg->area_count; /* FIXME Also metadata, cow etc. */
+
+ /* Check for underlying stripe sizes */
+ seg_stripes = _lvseg_get_stripes(seg, &seg_stripesize);
+
+ if (seg_is_mirrored(seg))
+ seg_mirrors = lv_mirror_count(seg->lv);
+ else
+ seg_mirrors = 0;
+
+ /* Have we reached the final segment of the new LV? */
+ if (lp->extents_are_pes) {
+ if (new_extents <= physical_extents_used + seg_physical_extents) {
+ seg_size = new_extents - physical_extents_used;
+ if (seg_mirrors)
+ seg_size /= seg_mirrors;
+ lp->extents = logical_extents_used + seg_size;
+ break;
+ }
+ } else if (new_extents <= logical_extents_used + seg_logical_extents) {
+ seg_size = new_extents - logical_extents_used;
+ lp->extents = new_extents;
+ break;
+ }
+
+ logical_extents_used += seg_logical_extents;
+ physical_extents_used += seg_physical_extents;
+ }
+
+ lp->stripe_size = seg_stripesize;
+ lp->stripes = seg_stripes;
+ lp->mirrors = seg_mirrors;
+ }
+
+ /* At this point, lp->extents should hold the correct NEW logical size required. */
+
+ if (!lp->extents) {
+ log_error("New size of 0 not permitted.");
+ return 0;
+ }
+
+ if ((lp->extents == existing_logical_extents) && !lp->use_policies) {
+ log_print_unless_silent("New size (%d extents) matches existing size (%d extents).",
+ lp->extents, existing_logical_extents);
+ if (lp->resize == LV_ANY)
+ lp->resize = LV_EXTEND; /* lets pretend zero size extension */
+ *matches_existing = 1;
+ return 1;
+ }
+
+ /* Perform any rounding to produce complete stripes. */
+ if (lp->stripes > 1) {
+ if (lp->stripe_size < STRIPE_SIZE_MIN) {
+ log_error("Invalid stripe size %s.",
+ display_size(cmd, (uint64_t) lp->stripe_size));
+ return 0;
+ }
+
+ /* Segment size in extents must be divisible by stripes */
+ stripes_extents = lp->stripes;
+ if (lp->stripe_size > vg->extent_size)
+ /* Strip size is bigger then extent size needs more extents */
+ stripes_extents *= (lp->stripe_size / vg->extent_size);
+
+ size_rest = seg_size % stripes_extents;
+ /* Round toward the original size. */
+ if (size_rest &&
+ ((lp->extents < existing_logical_extents) ||
+ !lp->percent ||
+ (vg->free_count >= (lp->extents - existing_logical_extents - size_rest +
+ stripes_extents)))) {
+ log_print_unless_silent("Rounding size (%d extents) up to stripe "
+ "boundary size for segment (%d extents).",
+ lp->extents,
+ lp->extents - size_rest + stripes_extents);
+ lp->extents = lp->extents - size_rest + stripes_extents;
+ } else if (size_rest) {
+ log_print_unless_silent("Rounding size (%d extents) down to stripe "
+ "boundary size for segment (%d extents)",
+ lp->extents, lp->extents - size_rest);
+ lp->extents = lp->extents - size_rest;
+ }
+ }
+
+ /* Final sanity checking */
+ if (lp->extents < existing_logical_extents) {
+ if (lp->resize == LV_EXTEND) {
+ log_error("New size given (%d extents) not larger "
+ "than existing size (%d extents)",
+ lp->extents, existing_logical_extents);
+ return 0;
+ }
+ lp->resize = LV_REDUCE;
+ } else if (lp->extents > existing_logical_extents) {
+ if (lp->resize == LV_REDUCE) {
+ log_error("New size given (%d extents) not less than "
+ "existing size (%d extents)", lp->extents,
+ existing_logical_extents);
+ return 0;
+ }
+ lp->resize = LV_EXTEND;
+ } else if ((lp->extents == existing_logical_extents) && !lp->use_policies) {
+ log_print_unless_silent("New size (%d extents) matches existing size (%d extents)",
+ lp->extents, existing_logical_extents);
+ if (lp->resize == LV_ANY)
+ lp->resize = LV_EXTEND;
+ *matches_existing = 1;
+ return 1;
+ }
+
/*
- * FIXME: resume LVs in reverse order to prevent memory
- * lock imbalance when resuming virtual snapshot origin
- * (resume of snapshot resumes origin too)
+ * Has the user specified that they would like the additional
+ * extents of a mirror not to have an initial sync?
*/
- dm_list_iterate_back_items(lvlp, &lvs_changed)
- if (!resume_lv(cmd, lvlp->lv))
- stack;
-out:
- backup(vg);
- return r;
+ if ((lp->extents > existing_logical_extents)) {
+ if (seg_is_mirrored(first_seg(lv)) && lp->nosync)
+ lv->status |= LV_NOTSYNCED;
+ }
+
+ log_debug("New size for %s: %" PRIu32 ". Existing logical extents: %" PRIu32 " / physical extents: %" PRIu32 ".",
+ display_lvname(lv), lp->extents, existing_logical_extents, saved_existing_physical_extents);
+
+ return 1;
+}
+
+static int _lv_reduce_vdo_discard(struct cmd_context *cmd,
+ struct logical_volume *lv,
+ struct lvresize_params *lp)
+{
+ char name[PATH_MAX];
+ struct device *dev;
+ struct volume_group *vg = lv->vg;
+
+ /* FIXME: stop using dev-cache and struct device here, dev-cache
+ should only be used for scanning headers/metadata to find PVs. */
+
+ if (dm_snprintf(name, sizeof(name), "%s%s/%s", cmd->dev_dir,
+ vg->name, lv->name) < 0) {
+ log_error("Name too long - device not discarded (%s)", lv->name);
+ return 0;
+ }
+
+ if (!(dev = dev_cache_get(cmd, name, NULL))) {
+ log_error("%s: not found: device not discarded.", name);
+ return 0;
+ }
+
+ if (!dev_discard_max_bytes(cmd->dev_types, dev) ||
+ !dev_discard_granularity(cmd->dev_types, dev)) {
+ log_error("%s: max bytes and granularity query fails.", name);
+ dev_destroy_file(dev);
+ return 0;
+ }
+
+ log_warn("WARNING: %s: Discarding %s at offset " FMTu64 ", please wait...",
+ name, display_size(cmd, (uint64_t)(lv->le_count - lp->extents) * vg->extent_size),
+ ((uint64_t)lp->extents * vg->extent_size) << SECTOR_SHIFT);
+
+ if (!dev_discard_blocks(dev, ((uint64_t)lp->extents * vg->extent_size) << SECTOR_SHIFT,
+ ((uint64_t)(lv->le_count - lp->extents) * vg->extent_size) << SECTOR_SHIFT)) {
+ log_error("%s: discarding failed.", name);
+ dev_destroy_file(dev);
+ return 0;
+ }
+
+ dev_destroy_file(dev);
+ return 1;
+}
+
+static int _lv_resize_check_type(struct logical_volume *lv,
+ struct lvresize_params *lp)
+{
+ struct lv_segment *seg;
+
+ if (lv_is_origin(lv)) {
+ if (lp->resize == LV_REDUCE) {
+ log_error("Snapshot origin volumes cannot be reduced in size yet.");
+ return 0;
+ }
+
+ if (lv_is_active(lv)) {
+ log_error("Snapshot origin volumes can be resized "
+ "only while inactive: try lvchange -an.");
+ return 0;
+ }
+ }
+
+ if (lv_is_raid_image(lv) || lv_is_raid_metadata(lv)) {
+ log_error("Cannot resize a RAID %s directly for %s",
+ lv_is_raid_image(lv) ? "image" : "metadata area",
+ display_lvname(lv));
+ return 0;
+ }
+
+ seg = first_seg(lv);
+ if ((seg_is_raid4(seg) || seg_is_any_raid5(seg)) && seg->area_count < 3) {
+ log_error("Cannot resize %s LV %s. Convert to more stripes first.",
+ lvseg_name(seg), display_lvname(lv));
+ return 0;
+ }
+
+ if (lp->resize == LV_REDUCE) {
+ if (lv_is_thin_pool_data(lv)) {
+ log_error("Thin pool volumes %s cannot be reduced in size yet.",
+ display_lvname(lv));
+ return 0;
+ }
+ if (lv_is_thin_pool_metadata(lv)) {
+ log_error("Thin pool metadata volumes cannot be reduced.");
+ return 0;
+ }
+ if (lv_is_vdo_pool_data(lv)) {
+ log_error("Cannot reduce VDO pool data volume %s.",
+ display_lvname(lv));
+ return 0;
+ }
+ if (lv_is_writecache(lv)) {
+ /* TODO: detect kernel with support for reduction */
+ log_error("Reduce not yet allowed on LVs with writecache attached.");
+ return 0;
+ }
+ if (lv_is_raid(lv)) {
+ unsigned attrs = 0;
+ const struct segment_type *segtype = first_seg(lv)->segtype;
+
+ if (!segtype->ops->target_present ||
+ !segtype->ops->target_present(lv->vg->cmd, NULL, &attrs) ||
+ !(attrs & RAID_FEATURE_SHRINK)) {
+ log_error("RAID module does not support shrinking.");
+ return 0;
+ }
+ }
+ if (lv_is_integrity(lv) || lv_raid_has_integrity(lv)) {
+ log_error("Cannot reduce LV with integrity.");
+ return 0;
+ }
+ } else if (lp->resize == LV_EXTEND) {
+ if (lv_is_thin_pool_metadata(lv) &&
+ (!(seg = find_pool_seg(first_seg(lv))) ||
+ !thin_pool_feature_supported(seg->lv, THIN_FEATURE_METADATA_RESIZE))) {
+ log_error("Support for online metadata resize of %s not detected.",
+ display_lvname(lv));
+ return 0;
+ }
+
+ /* Validate thin target supports bigger size of thin volume then external origin */
+ if (lv_is_thin_volume(lv) && first_seg(lv)->external_lv &&
+ (lp->extents > first_seg(lv)->external_lv->le_count) &&
+ !thin_pool_feature_supported(first_seg(lv)->pool_lv, THIN_FEATURE_EXTERNAL_ORIGIN_EXTEND)) {
+ log_error("Thin target does not support external origin smaller then thin volume.");
+ return 0;
+ }
+ }
+
+ /* Prevent resizing on out-of-sync reshapable raid */
+ if (first_seg(lv)->reshape_len && !lv_raid_in_sync(lv)) {
+ log_error("Can't resize reshaping LV %s.", display_lvname(lv));
+ return 0;
+ }
+
+ if ((lp->resize == LV_REDUCE) && (lp->pvh != &lv->vg->pvs))
+ log_print_unless_silent("Ignoring PVs on command line when reducing.");
+
+ return 1;
+}
+
+static int _lv_resize_volume(struct logical_volume *lv,
+ struct lvresize_params *lp,
+ struct dm_list *pvh)
+{
+ struct volume_group *vg = lv->vg;
+ struct cmd_context *cmd = vg->cmd;
+ uint32_t old_extents;
+ alloc_policy_t alloc = lp->alloc ? : lv->alloc;
+
+ old_extents = lv->le_count;
+ log_verbose("%sing logical volume %s to %s%s",
+ (lp->resize == LV_REDUCE) ? "Reduc" : "Extend",
+ display_lvname(lv), lp->approx_alloc ? "up to " : "",
+ display_size(cmd, (uint64_t) lp->extents * vg->extent_size));
+
+ if (lp->resize == LV_REDUCE) {
+ if (!lv_reduce(lv, lv->le_count - lp->extents))
+ return_0;
+ } else if ((lp->extents > lv->le_count) && /* Ensure we extend */
+ !lv_extend(lv, lp->segtype,
+ lp->stripes, lp->stripe_size,
+ lp->mirrors, first_seg(lv)->region_size,
+ lp->extents - lv->le_count,
+ pvh, alloc, lp->approx_alloc))
+ return_0;
+
+ if (old_extents == lv->le_count)
+ log_print_unless_silent("Size of logical volume %s unchanged from %s (%" PRIu32 " extents).",
+ display_lvname(lv),
+ display_size(cmd, (uint64_t) old_extents * vg->extent_size), old_extents);
+ else {
+ lp->size_changed = 1;
+ log_print_unless_silent("Size of logical volume %s changed from %s (%" PRIu32 " extents) to %s (%" PRIu32 " extents).",
+ display_lvname(lv),
+ display_size(cmd, (uint64_t) old_extents * vg->extent_size), old_extents,
+ display_size(cmd, (uint64_t) lv->le_count * vg->extent_size), lv->le_count);
+ }
+
+ return 1;
+}
+
+static int _lv_resize_adjust_size(struct logical_volume *lv,
+ struct lvresize_params *lp,
+ int *matches_existing)
+{
+ /* Resolve extents from size */
+ if (lp->size) {
+ if (!_lvresize_adjust_size(lv->vg, lp->size, lp->sign, &lp->extents))
+ return_0;
+ }
+
+ /* set lp->extents based on lp->percent_value */
+ else if (lp->percent_value) {
+ if (!_lvresize_extents_from_percent(lv, lp))
+ return_0;
+ }
+
+ /* rewrites lp->extents from percentage to extents */
+ else if (lp->extents && (lp->percent != PERCENT_NONE)) {
+ if (!_lvresize_extents_from_percent(lv, lp))
+ return_0;
+ }
+
+ /* Ensure stripe boundary extents! */
+ if (!lp->percent && lv_is_raid(lv))
+ lp->extents =_round_to_stripe_boundary(lv->vg, lp->extents,
+ seg_is_raid1(first_seg(lv)) ? 0 : _raid_stripes_count(first_seg(lv)),
+ lp->resize == LV_REDUCE ? 0 : 1);
+
+ if (!_lvresize_adjust_extents(lv, lp, matches_existing))
+ return_0;
+
+ return 1;
+}
+
+/* Set thin pool metadata properties, we can't use those from command line */
+static void _setup_params_for_extend_metadata(struct logical_volume *lv,
+ struct lvresize_params *lp)
+{
+ struct lv_segment *mseg = last_seg(lv);
+
+ lp->alloc = lv->alloc;
+ lp->percent = PERCENT_NONE;
+ lp->segtype = mseg->segtype;
+ lp->mirrors = seg_is_mirrored(mseg) ? lv_mirror_count(lv) : 0;
+ lp->fsopt[0] = '\0';
+ lp->stripes = lp->mirrors ? mseg->area_count / lp->mirrors : 0;
+ lp->stripe_size = mseg->stripe_size;
+}
+
+
+static int _lv_resize_check_used(struct logical_volume *lv)
+{
+ if (!lv) {
+ log_error(INTERNAL_ERROR "LV is not specified.");
+ return 0;
+ }
+
+ if (lv_is_locked(lv)) {
+ log_error("Can't resize locked logical volume %s.", display_lvname(lv));
+ return 0;
+ }
+
+ if (lv_is_converting(lv)) {
+ log_error("Can't resize logical volume %s while lvconvert in progress.", display_lvname(lv));
+ return 0;
+ }
+
+ if (lv_component_is_active(lv)) {
+ log_error("Cannot resize logical volume %s with active component LV(s).", display_lvname(lv));
+ return 0;
+ }
+
+ if (lv_is_raid_with_tracking(lv)) {
+ log_error("Cannot resize logical volume %s while it is tracking a split image.", display_lvname(lv));
+ return 0;
+ }
+
+ if (lv_is_vdo(lv) && !lv_is_active(lv)) {
+ log_error("Cannot resize inactive VDO logical volume %s.", display_lvname(lv));
+ return 0;
+ }
+
+ if (lv_is_vdo_pool(lv) && !lv_is_active(lv_lock_holder(lv))) {
+ log_error("Cannot resize inactive VDO POOL volume %s.", display_lvname(lv));
+ return 0;
+ }
+
+ if (lv_is_external_origin(lv)) {
+ /*
+ * Since external-origin can be activated read-only,
+ * there is no way to use extended areas.
+ */
+ log_error("Cannot resize external origin logical volume %s.",
+ display_lvname(lv));
+ return 0;
+ }
+
+ return 1;
}
/*
- * Core of LV renaming routine.
- * VG must be locked by caller.
+ * --fs checksize: check fs size and allow the lv to reduce if the fs is not
+ * using the affected space, i.e. the fs does not need to be
+ * resized. fail the command without reducing the fs or lv if
+ * the fs is using the affected space.
+ *
+ * --fs resize --fsmode manage: resize the fs, mounting/unmounting the fs
+ * as needed, but avoiding mounting/unmounted when possible.
+ *
+ * --fs resize --fsmode nochange: resize the fs without changing the current
+ * mount/unmount state. fail the command without reducing the
+ * fs or lv if the fs resize would require mounting or unmounting.
+ *
+ * --fs resize --fsmode offline: resize the fs only while it's unmounted
+ * unmounting the fs if needed. fail the commandn without
+ * reducing the fs or lv if the fs resize would require having
+ * the fs mounted.
+ *
+ * --fs resize_fsadm: old method using fsadm script to do everything
*/
-int lv_rename(struct cmd_context *cmd, struct logical_volume *lv,
- const char *new_name)
+static int _fs_reduce_allow(struct cmd_context *cmd, struct logical_volume *lv,
+ struct lvresize_params *lp, uint64_t newsize_bytes_lv,
+ uint64_t newsize_bytes_fs, struct fs_info *fsi)
{
- return lv_rename_update(cmd, lv, new_name, 1);
+ const char *fs_reduce_cmd = "";
+ const char *cmp_desc = "";
+ int equal = 0, smaller = 0, larger = 0;
+ int is_ext_fstype = 0;
+ int confirm_mount_change = 0;
+
+ /*
+ * Allow reducing the LV for other fs types if the fs is not using
+ * space that's being reduced.
+ */
+ if (!strcmp(fsi->fstype, "ext2") ||
+ !strcmp(fsi->fstype, "ext3") ||
+ !strcmp(fsi->fstype, "ext4") ||
+ !strcmp(fsi->fstype, "xfs")) {
+ log_debug("Found fs %s last_byte %llu newsize_bytes_fs %llu",
+ fsi->fstype,
+ (unsigned long long)fsi->fs_last_byte,
+ (unsigned long long)newsize_bytes_fs);
+ if (!strncmp(fsi->fstype, "ext", 3)) {
+ is_ext_fstype = 1;
+ fs_reduce_cmd = " resize2fs";
+ }
+ }
+
+ if (!fsi->mounted)
+ log_print_unless_silent("File system %s%s found on %s.",
+ fsi->fstype, fsi->needs_crypt ? "+crypto_LUKS" : "",
+ display_lvname(lv));
+ else
+ log_print_unless_silent("File system %s%s found on %s mounted at %s.",
+ fsi->fstype, fsi->needs_crypt ? "+crypto_LUKS" : "",
+ display_lvname(lv), fsi->mount_dir);
+
+ if (!fsi->fs_last_byte) {
+ if (!strcmp(fsi->fstype, "reiserfs")) {
+ log_error("File system reduce for reiserfs requires --fs resize_fsadm.");
+ return 0;
+ }
+ log_error("File system device usage is not available from libblkid.");
+ return 0;
+ }
+
+ if ((equal = (fsi->fs_last_byte == newsize_bytes_fs)))
+ cmp_desc = "equal to";
+ else if ((smaller = (fsi->fs_last_byte < newsize_bytes_fs)))
+ cmp_desc = "smaller than";
+ else if ((larger = (fsi->fs_last_byte > newsize_bytes_fs)))
+ cmp_desc = "larger than";
+
+ log_print_unless_silent("File system size (%s) is %s the requested size (%s).",
+ display_size(cmd, fsi->fs_last_byte/512), cmp_desc,
+ display_size(cmd, newsize_bytes_fs/512));
+
+ /*
+ * FS reduce is not needed, it's not using the affected space.
+ */
+ if (smaller || equal) {
+ log_print_unless_silent("File system reduce is not needed, skipping.");
+ fsi->needs_reduce = 0;
+ return 1;
+ }
+
+ /*
+ * FS reduce is required, but checksize does not allow it.
+ */
+ if (!strcmp(lp->fsopt, "checksize")) {
+ if (is_ext_fstype)
+ log_error("File system reduce is required (see resize2fs or --resizefs.)");
+ else
+ log_error("File system reduce is required and not supported (%s).", fsi->fstype);
+ return 0;
+ }
+
+ /*
+ * FS reduce required, ext* supports it, xfs does not.
+ */
+ if (is_ext_fstype) {
+ log_print_unless_silent("File system reduce is required using resize2fs.");
+ } else if (!strcmp(fsi->fstype, "reiserfs")) {
+ log_error("File system reduce for reiserfs requires --fs resize_fsadm.");
+ return 0;
+ } else {
+ log_error("File system reduce is required and not supported (%s).", fsi->fstype);
+ return 0;
+ }
+
+ /*
+ * Set fstype-specific requirements for running fs resize command.
+ * ext2,3,4 require the fs to be unmounted to shrink with resize2fs,
+ * and they require e2fsck to be run first, unless resize2fs -f is used.
+ */
+ if (is_ext_fstype) {
+ /* it's traditional to run fsck before shrink */
+ if (!lp->nofsck)
+ fsi->needs_fsck = 1;
+
+ /* ext2,3,4 require fs to be unmounted to shrink */
+ if (fsi->mounted)
+ fsi->needs_unmount = 1;
+
+ fsi->needs_reduce = 1;
+ } else {
+ /*
+ * Shouldn't reach here since no other fs types get this far.
+ * A future fs supporting shrink may require the fs to be
+ * mounted or unmounted to run the fs shrink command.
+ * set fsi->needs_unmount or fs->needs_mount according to
+ * the fs-specific shrink command's requirement.
+ */
+ log_error("File system %s: fs reduce not implemented.", fsi->fstype);
+ return 0;
+ }
+
+ /*
+ * FS reduce may require mounting or unmounting, check the fsopt value
+ * from the user, and the current mount state to decide if fs resize
+ * can be done.
+ */
+ if (!strcmp(lp->fsopt, "resize") && !strcmp(lp->fsmode, "nochange")) {
+ /* can't mount|unmount to run fs resize */
+ if (fsi->needs_mount) {
+ log_error("File system needs to be mounted to reduce fs (see --fsmode).");
+ return 0;
+ }
+ if (fsi->needs_unmount) {
+ log_error("File system needs to be unmounted to reduce fs (see --fsmode).");
+ return 0;
+ }
+ } else if (!strcmp(lp->fsopt, "resize") && !strcmp(lp->fsmode, "offline")) {
+ /* we can unmount if needed to run fs resize */
+ if (fsi->needs_mount) {
+ log_error("File system needs to be mounted to reduce fs (see --fsmode).");
+ return 0;
+ }
+ } else if (!strcmp(lp->fsopt, "resize") && !strcmp(lp->fsmode, "manage")) {
+ /* we can mount|unmount as needed to run fs resize */
+ /* confirm mount change unless --fsmode manage is set explicitly */
+
+ if (fsi->needs_mount || fsi->needs_unmount)
+ confirm_mount_change = 1;
+
+ if (lp->user_set_fsmode)
+ confirm_mount_change = 0;
+ } else {
+ log_error("Unknown file system resize options: --fs %s --fsmode %s", lp->fsopt, lp->fsmode);
+ return 0;
+ }
+
+ /*
+ * If future file systems can be reduced while mounted, then suppress
+ * needs_fsck here if the fs is already mounted.
+ */
+
+ if (fsi->needs_unmount)
+ log_print_unless_silent("File system unmount is needed for reduce.");
+ if (fsi->needs_fsck)
+ log_print_unless_silent("File system fsck will be run before reduce.");
+ if (fsi->needs_mount)
+ log_print_unless_silent("File system mount is needed for reduce.");
+ if (fsi->needs_crypt)
+ log_print_unless_silent("cryptsetup resize is needed for reduce.");
+
+ /*
+ * Use a confirmation prompt because mount|unmount is needed, and
+ * no specific --fsmode was set (i.e. the user did not give specific
+ * direction about how to handle mounting|unmounting with --fsmode.)
+ */
+ if (!lp->yes && confirm_mount_change) {
+ if (yes_no_prompt("Continue with %s file system reduce steps:%s%s%s%s%s? [y/n]:",
+ fsi->fstype,
+ fsi->needs_unmount ? " unmount," : "",
+ fsi->needs_fsck ? " fsck," : "",
+ fsi->needs_mount ? " mount," : "",
+ fsi->needs_crypt ? " cryptsetup," : "",
+ fsi->needs_reduce ? fs_reduce_cmd : "") == 'n') {
+ log_error("File system not reduced.");
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+static int _fs_extend_allow(struct cmd_context *cmd, struct logical_volume *lv,
+ struct lvresize_params *lp, struct fs_info *fsi)
+{
+ const char *fs_extend_cmd = "";
+ int is_ext_fstype = 0;
+ int confirm_mount_change = 0;
+
+ if (!strcmp(fsi->fstype, "ext2") ||
+ !strcmp(fsi->fstype, "ext3") ||
+ !strcmp(fsi->fstype, "ext4") ||
+ !strcmp(fsi->fstype, "xfs")) {
+ log_debug("Found fs %s last_byte %llu",
+ fsi->fstype, (unsigned long long)fsi->fs_last_byte);
+ if (!strncmp(fsi->fstype, "ext", 3))
+ is_ext_fstype = 1;
+ } else if (!strcmp(fsi->fstype, "reiserfs")) {
+ log_error("File system extend for reiserfs requires --fs resize_fsadm.");
+ return 0;
+ } else {
+ log_error("File system extend is not supported (%s).", fsi->fstype);
+ return 0;
+ }
+
+ if (!fsi->mounted)
+ log_print_unless_silent("File system %s%s found on %s.",
+ fsi->fstype, fsi->needs_crypt ? "+crypto_LUKS" : "",
+ display_lvname(lv));
+ else
+ log_print_unless_silent("File system %s%s found on %s mounted at %s.",
+ fsi->fstype, fsi->needs_crypt ? "+crypto_LUKS" : "",
+ display_lvname(lv), fsi->mount_dir);
+
+ /*
+ * FS extend may require mounting or unmounting, check the fsopt value
+ * from the user, and the current mount state to decide if fs extend
+ * can be done.
+ */
+
+ if (is_ext_fstype) {
+ fs_extend_cmd = " resize2fs";
+
+ /*
+ * ext* can be extended while it's mounted or unmounted. If
+ * the fs is unmounted, it's traditional to run fsck before
+ * running the fs extend.
+ *
+ * --fs resize --fsmode nochange: don't change mount condition.
+ * if mounted: fs_extend
+ * if unmounted: fsck, fs_extend
+ *
+ * --fs resize --fsmode offline: extend offline, so unmount first if mounted.
+ * if mounted: unmount, fsck, fs_extend
+ * if unmounted: fsck, fs_extend
+ *
+ * --fs resize --fsmode manage: do any mount or unmount that's necessary,
+ * avoiding unnecessary mounting/unmounting.
+ * if mounted: fs_extend
+ * if unmounted: fsck, fs_extend
+ */
+ if (!strcmp(lp->fsopt, "resize") && !strcmp(lp->fsmode, "nochange")) {
+ if (fsi->mounted)
+ fsi->needs_extend = 1;
+ else if (fsi->unmounted) {
+ fsi->needs_fsck = 1;
+ fsi->needs_extend = 1;
+ }
+ } else if (!strcmp(lp->fsopt, "resize") && !strcmp(lp->fsmode, "offline")) {
+ if (fsi->mounted) {
+ fsi->needs_unmount = 1;
+ fsi->needs_fsck = 1;
+ fsi->needs_extend = 1;
+ } else if (fsi->unmounted) {
+ fsi->needs_fsck = 1;
+ fsi->needs_extend = 1;
+ }
+ } else if (!strcmp(lp->fsopt, "resize") && !strcmp(lp->fsmode, "manage")) {
+ if (fsi->mounted)
+ fsi->needs_extend = 1;
+ else if (fsi->unmounted) {
+ fsi->needs_fsck = 1;
+ fsi->needs_extend = 1;
+ }
+ }
+
+ if (lp->nofsck)
+ fsi->needs_fsck = 0;
+
+ } else if (!strcmp(fsi->fstype, "xfs")) {
+ fs_extend_cmd = " xfs_growfs";
+
+ /*
+ * xfs must be mounted to extend.
+ *
+ * --fs resize --fsmode nochange: don't change mount condition.
+ * if mounted: fs_extend
+ * if unmounted: fail
+ *
+ * --fs resize --fsmode offline: extend offline, so unmount first if mounted.
+ * if mounted: fail
+ * if unmounted: fail
+ *
+ * --fs resize --fsmode manage: do any mount or unmount that's necessary,
+ * avoiding unnecessary mounting/unmounting.
+ * if mounted: fs_extend
+ * if unmounted: mount, fs_extend
+ */
+ if (!strcmp(lp->fsopt, "resize") && !strcmp(lp->fsmode, "nochange")) {
+ if (fsi->mounted)
+ fsi->needs_extend = 1;
+ else if (fsi->unmounted) {
+ log_error("File system must be mounted to extend (see --fsmode).");
+ return 0;
+ }
+ } else if (!strcmp(lp->fsopt, "resize") && !strcmp(lp->fsmode, "offline")) {
+ log_error("File system must be mounted to extend (see --fsmode).");
+ return 0;
+ } else if (!strcmp(lp->fsopt, "resize") && !strcmp(lp->fsmode, "manage")) {
+ if (fsi->mounted)
+ fsi->needs_extend = 1;
+ else if (fsi->unmounted) {
+ fsi->needs_mount = 1;
+ fsi->needs_extend = 1;
+ }
+ }
+
+ } else {
+ /* shouldn't reach here */
+ log_error("File system type %s not handled.", fsi->fstype);
+ return 0;
+ }
+
+ /*
+ * Skip needs_fsck if the fs is mounted and we can extend the fs while
+ * it's mounted.
+ */
+ if (fsi->mounted && !fsi->needs_unmount && fsi->needs_fsck) {
+ log_print_unless_silent("File system fsck skipped for extending mounted fs.");
+ fsi->needs_fsck = 0;
+ }
+
+ if (fsi->needs_unmount)
+ log_print_unless_silent("File system unmount is needed for extend.");
+ if (fsi->needs_fsck)
+ log_print_unless_silent("File system fsck will be run before extend.");
+ if (fsi->needs_mount)
+ log_print_unless_silent("File system mount is needed for extend.");
+ if (fsi->needs_crypt)
+ log_print_unless_silent("cryptsetup resize is needed for extend.");
+
+ /*
+ * Use a confirmation prompt when mount|unmount is needed if
+ * the user did not give specific direction about how to handle
+ * mounting|unmounting with --fsmode.
+ */
+ if (!strcmp(lp->fsopt, "resize") && !lp->user_set_fsmode &&
+ (fsi->needs_mount || fsi->needs_unmount))
+ confirm_mount_change = 1;
+
+ if (!lp->yes && confirm_mount_change) {
+ if (yes_no_prompt("Continue with %s file system extend steps:%s%s%s%s%s? [y/n]:",
+ fsi->fstype,
+ fsi->needs_unmount ? " unmount," : "",
+ fsi->needs_fsck ? " fsck," : "",
+ fsi->needs_mount ? " mount," : "",
+ fsi->needs_crypt ? " cryptsetup," : "",
+ fsi->needs_extend ? fs_extend_cmd : "") == 'n') {
+ log_error("File system not extended.");
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+static int _fs_reduce(struct cmd_context *cmd, struct logical_volume *lv,
+ struct lvresize_params *lp)
+{
+ struct fs_info fsinfo;
+ struct fs_info fsinfo2;
+ uint64_t newsize_bytes_lv;
+ uint64_t newsize_bytes_fs;
+ int ret = 0;
+
+ memset(&fsinfo, 0, sizeof(fsinfo));
+ memset(&fsinfo2, 0, sizeof(fsinfo));
+
+ if (!fs_get_info(cmd, lv, &fsinfo, 1))
+ goto_out;
+
+ if (fsinfo.nofs) {
+ ret = 1;
+ goto_out;
+ }
+
+ /* extent_size units is SECTOR_SIZE (512) */
+ newsize_bytes_lv = lp->extents * lv->vg->extent_size * SECTOR_SIZE;
+ newsize_bytes_fs = newsize_bytes_lv;
+
+ /*
+ * If needs_crypt, then newsize_bytes passed to fs_reduce_script() and
+ * crypt_resize_script() needs to be decreased by the offset of crypt
+ * data on the LV (usually the size of the LUKS header which is usually
+ * 2MB for LUKS1 and 16MB for LUKS2.)
+ */
+ if (fsinfo.needs_crypt) {
+ newsize_bytes_fs -= fsinfo.crypt_offset_bytes;
+ log_print_unless_silent("File system size %llub is adjusted for crypt data offset %ub.",
+ (unsigned long long)newsize_bytes_fs, fsinfo.crypt_offset_bytes);
+ }
+
+ /*
+ * Based on the --fs command option, the fs type, the last block used,
+ * and the mount state, determine if LV reduce is allowed. If not
+ * returns 0 and lvreduce should fail. If allowed, returns 1 and sets
+ * fsinfo.needs_* for any steps that are required to reduce the LV.
+ */
+ if (!_fs_reduce_allow(cmd, lv, lp, newsize_bytes_lv, newsize_bytes_fs, &fsinfo))
+ goto_out;
+
+ /*
+ * Uncommon special case in which the FS does not need to be shrunk,
+ * but the crypt dev over the LV should be shrunk to correspond with
+ * the LV size, so that the FS does not see an incorrect device size.
+ */
+ if (!fsinfo.needs_reduce && fsinfo.needs_crypt) {
+ /* Check if the crypt device is already sufficiently reduced. */
+ if (fsinfo.crypt_dev_size_bytes <= newsize_bytes_fs) {
+ log_print_unless_silent("crypt device is already reduced to %llu bytes.",
+ (unsigned long long)fsinfo.crypt_dev_size_bytes);
+ ret = 1;
+ goto out;
+ }
+ if (!strcmp(lp->fsopt, "checksize")) {
+ log_error("crypt reduce is required (see --resizefs or cryptsetup resize.)");
+ ret = 0;
+ goto out;
+ }
+ if (test_mode()) {
+ ret = 1;
+ goto_out;
+ }
+ ret = crypt_resize_script(cmd, lv, &fsinfo, newsize_bytes_fs);
+ goto out;
+ }
+
+ /*
+ * fs reduce is not needed to reduce the LV.
+ */
+ if (!fsinfo.needs_reduce) {
+ ret = 1;
+ goto_out;
+ }
+
+ if (test_mode()) {
+ if (fsinfo.needs_unmount)
+ log_print_unless_silent("Skip unmount in test mode.");
+ if (fsinfo.needs_fsck)
+ log_print_unless_silent("Skip fsck in test mode.");
+ if (fsinfo.needs_mount)
+ log_print_unless_silent("Skip mount in test mode.");
+ if (fsinfo.needs_crypt)
+ log_print_unless_silent("Skip cryptsetup in test mode.");
+ log_print_unless_silent("Skip fs reduce in test mode.");
+ ret = 1;
+ goto out;
+ }
+
+ /*
+ * mounting, unmounting, fsck, and shrink command can all take a long
+ * time to run, and this lvm command should not block other lvm
+ * commands from running during that time, so release the vg lock
+ * around the long-running steps, and reacquire after.
+ */
+ unlock_vg(cmd, lv->vg, lv->vg->name);
+
+ if (!fs_reduce_script(cmd, lv, &fsinfo, newsize_bytes_fs, lp->fsmode))
+ goto_out;
+
+ if (!lock_vol(cmd, lv->vg->name, LCK_VG_WRITE, NULL)) {
+ log_error("Failed to lock VG, cannot reduce LV.");
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * Check that the vg wasn't changed while it was unlocked.
+ * (can_use_one_scan: check just one mda in the vg for changes)
+ */
+ cmd->can_use_one_scan = 1;
+ if (scan_text_mismatch(cmd, lv->vg->name, NULL)) {
+ log_print_unless_silent("VG was changed during fs operations, restarting.");
+ lp->vg_changed_error = 1;
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * Re-check the fs last block which should now be less than the
+ * requested (reduced) LV size.
+ */
+ if (!fs_get_info(cmd, lv, &fsinfo2, 0))
+ goto_out;
+
+ if (fsinfo.fs_last_byte && (fsinfo2.fs_last_byte > newsize_bytes_fs)) {
+ log_error("File system last byte %llu is greater than new size %llu bytes.",
+ (unsigned long long)fsinfo2.fs_last_byte,
+ (unsigned long long)newsize_bytes_fs);
+ goto_out;
+ }
+
+ ret = 1;
+ out:
+ return ret;
+}
+
+static int _fs_extend(struct cmd_context *cmd, struct logical_volume *lv,
+ struct lvresize_params *lp)
+{
+ struct fs_info fsinfo;
+ uint64_t newsize_bytes_lv;
+ uint64_t newsize_bytes_fs;
+ int ret = 0;
+
+ memset(&fsinfo, 0, sizeof(fsinfo));
+
+ if (!fs_get_info(cmd, lv, &fsinfo, 1))
+ goto_out;
+
+ if (fsinfo.nofs) {
+ ret = 1;
+ goto_out;
+ }
+
+ /*
+ * Note: here in the case of extend, newsize_bytes_lv/newsize_bytes_fs
+ * are only calculated and used for log messages. The extend commands
+ * do not use these values, they just extend to the new LV size that
+ * is visible to them.
+ */
+
+ /* extent_size units is SECTOR_SIZE (512) */
+ newsize_bytes_lv = lp->extents * lv->vg->extent_size * SECTOR_SIZE;
+ newsize_bytes_fs = newsize_bytes_lv;
+ if (fsinfo.needs_crypt) {
+ newsize_bytes_fs -= fsinfo.crypt_offset_bytes;
+ log_print_unless_silent("File system size %llub is adjusted for crypt data offset %ub.",
+ (unsigned long long)newsize_bytes_fs, fsinfo.crypt_offset_bytes);
+ }
+
+ /*
+ * Decide if fs should be extended based on the --fs option,
+ * the fs type and the mount state.
+ */
+ if (!_fs_extend_allow(cmd, lv, lp, &fsinfo))
+ goto_out;
+
+ /*
+ * fs extend is not needed
+ */
+ if (!fsinfo.needs_extend) {
+ ret = 1;
+ goto_out;
+ }
+
+ if (test_mode()) {
+ if (fsinfo.needs_unmount)
+ log_print_unless_silent("Skip unmount in test mode.");
+ if (fsinfo.needs_fsck)
+ log_print_unless_silent("Skip fsck in test mode.");
+ if (fsinfo.needs_mount)
+ log_print_unless_silent("Skip mount in test mode.");
+ if (fsinfo.needs_crypt)
+ log_print_unless_silent("Skip cryptsetup in test mode.");
+ log_print_unless_silent("Skip fs extend in test mode.");
+ ret = 1;
+ goto out;
+ }
+
+ /*
+ * mounting, unmounting and extend command can all take a long
+ * time to run, and this lvm command should not block other lvm
+ * commands from running during that time, so release the vg
+ * lock around the long-running steps.
+ */
+ unlock_vg(cmd, lv->vg, lv->vg->name);
+
+ if (!fs_extend_script(cmd, lv, &fsinfo, newsize_bytes_fs, lp->fsmode))
+ goto_out;
+
+ ret = 1;
+ out:
+ return ret;
+}
+
+int lv_resize(struct cmd_context *cmd, struct logical_volume *lv,
+ struct lvresize_params *lp)
+{
+ struct lvresize_params lp_meta;
+ struct volume_group *vg = lv->vg;
+ struct lv_segment *seg = first_seg(lv);
+ struct logical_volume *lv_top = NULL;
+ struct logical_volume *lv_main = NULL;
+ struct logical_volume *lv_meta = NULL;
+ struct logical_volume *lv_main_layer = NULL;
+ struct logical_volume *lv_meta_layer = NULL;
+ int main_size_matches = 0;
+ int meta_size_matches = 0;
+ int is_extend = (lp->resize == LV_EXTEND);
+ int is_reduce = (lp->resize == LV_REDUCE);
+ int is_active = 0;
+ int activated = 0;
+ int activated_checksize = 0;
+ int status;
+ int ret = 0;
+
+ memset(&lp_meta, 0, sizeof(lp_meta));
+
+ /*
+ * Some checks apply to the LV command arg (don't require top/bottom
+ * LVs in a stack), and don't require knowing if the command is doing
+ * extend or reduce (determined later).
+ */
+
+ if (lp->stripe_size && !_validate_stripesize(vg, lp))
+ return_0;
+
+ /*
+ * The only types of !visible/internal/non-top LVs that can be directly
+ * resized via the command arg. Other internal LVs are resized
+ * indirectly when resizing a top LV.
+ */
+ if (!lv_is_visible(lv) &&
+ !lv_is_thin_pool_data(lv) &&
+ !lv_is_thin_pool_metadata(lv) &&
+ !lv_is_vdo_pool_data(lv) &&
+ !lv_is_lockd_sanlock_lv(lv)) {
+ log_error("Can't resize internal logical volume %s.", display_lvname(lv));
+ return 0;
+ }
+
+ /*
+ * Figure out which LVs are going to be extended, and set params
+ * to the requested extents/size for each. Some LVs are extended
+ * only by extending an underlying LV. Extending some top level
+ * LVs results in extending multiple underlying LVs.
+ *
+ * lv_top is the top level LV in stack.
+ * lv_main is the main LV to be resized.
+ * lv_meta is always a thin pool metadata LV.
+ *
+ * lv_main_layer/lv_meta_layer may be LV types (like cache) that are
+ * layered over the main/meta LVs. These layer LVs are skipped over
+ * by get_resizable_layer_lv() which finds the bottom-most layer
+ * which is originally resized. The layer LVs are resized indirectly
+ * as a result of the lower data-holding LVs being resized.
+ *
+ * In the simplest case there is no layering/stacking, and
+ * lv == lv_main == lv_main_layer == lv_top
+ */
+
+ if (cmd->command_enum == lvextend_policy_CMD) {
+ /* lvextend --use-policies may extend main or meta or both */
+ lv_top = lv;
+ if (lv_is_thin_pool(lv)) {
+ if (lp->policy_percent_main) {
+ lv_main = seg_lv(first_seg(lv), 0); /* thin pool data */
+ lp->percent_value = lp->policy_percent_main;
+ }
+ if (lp->policy_percent_meta) {
+ lv_meta = first_seg(lv)->metadata_lv; /* thin pool metadata */
+ _setup_params_for_extend_metadata(lv_meta, &lp_meta);
+ /* override setup function which isn't right for policy use */
+ lp_meta.percent = PERCENT_LV;
+ lp_meta.sign = SIGN_PLUS;
+ lp_meta.percent_value = lp->policy_percent_meta;
+ lp_meta.pvh = lp->pvh;
+ }
+ } else if (lv_is_vdo_pool(lv)) {
+ lv_main = seg_lv(first_seg(lv), 0); /* vdo pool data */
+ lp->percent_value = lp->policy_percent_main;
+ } else if (lv_is_cow(lv)) {
+ lv_main = lv;
+ lp->percent_value = lp->policy_percent_main;
+ } else
+ return_0;
+
+ } else if ((cmd->command_enum == lvextend_pool_metadata_CMD) ||
+ (cmd->command_enum == lvresize_pool_metadata_CMD)) {
+ /* lvresize|lvextend --poolmetadatasize, extends only thin pool metadata */
+ if (lv_is_thin_pool(lv)) {
+ lv_top = lv;
+ lv_meta = first_seg(lv)->metadata_lv; /* thin pool metadata */
+ } else if (lv_is_thin_pool_metadata(lv)) {
+ lv_top = _get_top_layer_lv(lv); /* thin pool LV */
+ lv_meta = lv;
+ } else {
+ log_error("--poolmetadatasize can be used only with thin pools.");
+ return 0;
+ }
+ lp_meta = *lp;
+ _setup_params_for_extend_metadata(lv_meta, &lp_meta);
+ lp_meta.size = lp->poolmetadata_size;
+ lp_meta.sign = lp->poolmetadata_sign;
+ lp->poolmetadata_size = 0;
+ lp->poolmetadata_sign = 0;
+
+ } else if (lv_is_thin_pool(lv) && lp->poolmetadata_size) {
+ /* extend both thin pool data and metadata */
+ lv_top = lv;
+ lv_main = seg_lv(first_seg(lv), 0); /* thin pool data */
+ lv_meta = first_seg(lv)->metadata_lv; /* thin pool metadata */
+ lp_meta = *lp;
+ _setup_params_for_extend_metadata(lv_meta, &lp_meta);
+ lp_meta.size = lp->poolmetadata_size;
+ lp_meta.sign = lp->poolmetadata_sign;
+ lp->poolmetadata_size = 0;
+ lp->poolmetadata_sign = 0;
+
+ } else if (lv_is_thin_pool_metadata(lv)) {
+ /* extend only thin pool metadata */
+ lv_top = _get_top_layer_lv(lv); /* thin pool LV */
+ lv_meta = lv;
+ lp_meta = *lp;
+ _setup_params_for_extend_metadata(lv_meta, &lp_meta);
+ if (lp->poolmetadata_size) {
+ lp_meta.size = lp->poolmetadata_size;
+ lp_meta.size = lp->poolmetadata_sign;
+ lp->poolmetadata_size = 0;
+ lp->poolmetadata_sign = 0;
+ }
+ /* else lp_meta.extents|size from lp->extents|size above */
+
+ } else if (lv_is_thin_pool(lv)) {
+ /* extend thin pool data and possibly metadata */
+ lv_top = lv;
+ lv_main = seg_lv(first_seg(lv), 0);
+ /* Do not set lv_meta to the thin pool metadata here.
+ See below "Possibly enable lv_meta extend". */
+ }
+
+ /*
+ * None of the special cases above (selecting which LVs to extend
+ * depending on options set and type of LV) have applied, so this
+ * is the standard case.
+ */
+ if (!lv_main && !lv_meta) {
+ lv_top = _get_top_layer_lv(lv);
+ lv_main_layer = lv;
+ lv_main = _get_resizable_layer_lv(lv_main_layer);
+ } else {
+ lv_main_layer = lv_main;
+ lv_meta_layer = lv_meta;
+ if (lv_main)
+ lv_main = _get_resizable_layer_lv(lv_main_layer);
+ if (lv_meta)
+ lv_meta = _get_resizable_layer_lv(lv_meta_layer);
+ }
+ /* Clear layer variables if no layer exists. */
+ if (lv_main_layer == lv_main)
+ lv_main_layer = NULL;
+ if (lv_meta_layer == lv_meta)
+ lv_meta_layer = NULL;
+
+ /*
+ * LVs to work with are now determined:
+ * lv_top is always set, it is not used to resize, but is used
+ * to reload dm devices for the lv.
+ * If lv_main is set, it is resized.
+ * If lv_meta is set, it is resized.
+ * If lv_meta is not set, it may be set below and resized.
+ */
+
+ if (!_lv_resize_check_used(lv_top))
+ return_0;
+ if (lv_main && (lv_main != lv_top) && !_lv_resize_check_used(lv_main))
+ return_0;
+
+ /*
+ * Set a new size for lv_main.
+ */
+ if (lv_main) {
+ /* sets lp extents and lp resize */
+ if (!_lv_resize_adjust_size(lv_main, lp, &main_size_matches))
+ return_0;
+ /* sanity check the result of adjust_size */
+ if (lp->extents == 0)
+ return_0;
+ /* adjust_size resolves LV_ANY to EXTEND|REDUCE */
+ if (lp->resize == LV_ANY)
+ return_0;
+ if (is_extend && (lp->resize != LV_EXTEND))
+ return_0;
+ if (is_reduce && (lp->resize != LV_REDUCE))
+ return_0;
+ is_extend = (lp->resize == LV_EXTEND);
+ is_reduce = (lp->resize == LV_REDUCE);
+
+ if (!_lv_resize_check_type(lv_main, lp))
+ return_0;
+ }
+
+ /*
+ * Possibly enable lv_meta extend if not already enabled. If lv_meta
+ * for a thin pool is not already being extended, and user requested
+ * extending the thin pool, then we may need to automatically include
+ * extending lv_meta in addition to lv_main (data), so that the
+ * metadata size is sufficient for the extended data size.
+ *
+ * If specific PVs were named to extend, this is taken to mean that
+ * only the thin pool data should be extended (using those PVs), and
+ * the thin pool metadata should not be automatically extended (since
+ * it would likely want to be extended using different PVs.)
+ */
+ if (lv_is_thin_pool(lv_top) && is_extend && lv_main && !lv_meta && (&vg->pvs == lp->pvh)) {
+ struct lv_segment *tpseg = first_seg(lv_top);
+ uint64_t meta_size = estimate_thin_pool_metadata_size(lp->extents, vg->extent_size, tpseg->chunk_size);
+ if (meta_size > tpseg->metadata_lv->size) {
+ log_verbose("Extending thin pool metadata to %llu for larger data", (unsigned long long)meta_size);
+ lv_meta = tpseg->metadata_lv;
+ lp_meta = *lp;
+ _setup_params_for_extend_metadata(lv_meta, &lp_meta);
+ lp_meta.size = meta_size;
+ lp_meta.sign = SIGN_NONE;
+ /* meta may have a layer over it */
+ lv_meta_layer = lv_meta;
+ lv_meta = _get_resizable_layer_lv(lv_meta_layer);
+ if (lv_meta == lv_meta_layer)
+ lv_meta_layer = NULL;
+ }
+ }
+
+ /*
+ * Set a new size for lv_meta (extend only.)
+ */
+ if (lv_meta) {
+ /* sets lp extents and lp resize */
+ if (!_lv_resize_adjust_size(lv_meta, &lp_meta, &meta_size_matches))
+ return_0;
+ /* sanity check the result of adjust_size */
+ if (lp_meta.extents == 0)
+ return_0;
+ /* adjust_size resolves lp_meta.resize to EXTEND|REDUCE */
+ /* _lv_resize_check_type errors if resize is EXTEND for thin meta */
+ if (!_lv_resize_check_type(lv_meta, &lp_meta))
+ return_0;
+ }
+
+ /*
+ * No resizing is needed.
+ */
+ if ((main_size_matches && meta_size_matches) ||
+ (main_size_matches && !lv_meta) ||
+ (meta_size_matches && !lv_main)) {
+ log_error("No size change.");
+ return 0;
+ }
+
+ /*
+ * If the LV is locked due to being active, this lock call is a no-op.
+ * Otherwise, this acquires a transient lock on the lv (not PERSISTENT)
+ */
+ if (!lockd_lv_resize(cmd, lv_top, "ex", 0, lp))
+ return_0;
+
+ /*
+ * Active 'hidden' -tpool can be waiting for resize, but the pool LV
+ * itself might be inactive. Here plain suspend/resume would not work.
+ * So active temporarily pool LV (with on disk metadata) then use
+ * suspend and resume and deactivate pool LV, instead of searching for
+ * an active thin volume.
+ *
+ * FIXME: why are thin pools activated where other LV types return
+ * error if inactive?
+ */
+ if (lv_is_thin_pool(lv_top) && !lv_is_active(lv_top)) {
+ if (!activation()) {
+ log_error("Cannot activate to resize %s without using device-mapper kernel driver.",
+ display_lvname(lv_top));
+ return 0;
+ }
+ if (!activate_lv(cmd, lv_top)) {
+ log_error("Failed to activate %s.", display_lvname(lv_top));
+ return 0;
+ }
+ if (!sync_local_dev_names(cmd))
+ stack;
+ activated = 1;
+ }
+
+ /*
+ * Disable fsopt checksize for lvextend.
+ */
+ if (is_extend && !strcmp(lp->fsopt, "checksize"))
+ lp->fsopt[0] = '\0';
+
+ /*
+ * Disable fsopt if LV type cannot hold a file system.
+ */
+ if (lp->fsopt[0] &&
+ !(lv_is_linear(lv) || lv_is_striped(lv) || lv_is_raid(lv) ||
+ lv_is_mirror(lv) || lv_is_thin_volume(lv) || lv_is_vdo(lv) ||
+ lv_is_cache(lv) || lv_is_writecache(lv))) {
+ log_print_unless_silent("Ignoring fs resizing options for LV type %s.",
+ seg ? seg->segtype->name : "unknown");
+ lp->fsopt[0] = '\0';
+ }
+
+ /*
+ * Using an option to resize the fs has always/traditionally required
+ * the LV to already be active, so keep that behavior. Reducing an
+ * inactive LV will activate the LV to look for a fs that would be
+ * damaged.
+ */
+ is_active = lv_is_active(lv_top);
+
+ if (is_reduce && !is_active && !strcmp(lp->fsopt, "checksize")) {
+ if (!lp->user_set_fs) {
+ log_error("The LV must be active to safely reduce (see --fs options.)");
+ goto out;
+ }
+ lv_top->status |= LV_TEMPORARY;
+ if (!activate_lv(cmd, lv_top)) {
+ log_error("Failed to activate %s to check for fs.", display_lvname(lv_top));
+ goto out;
+ }
+ lv_top->status &= ~LV_TEMPORARY;
+ if (!sync_local_dev_names(cmd))
+ stack;
+ activated_checksize = 1;
+
+ } else if (lp->fsopt[0] && !is_active) {
+ log_error("Logical volume %s must be active for file system %s.",
+ display_lvname(lv_top), lp->fsopt);
+ goto out;
+ }
+
+ /*
+ * Return an error without resizing the LV if the user requested
+ * a file system resize when no file system exists on the LV.
+ * (fs checksize does not require a fs to exist.)
+ */
+ if (lp->fsopt[0] && strcmp(lp->fsopt, "checksize") && lp->user_set_fs) {
+ char lv_path[PATH_MAX];
+ char fstype[FSTYPE_MAX];
+ int nofs = 0;
+
+ if (dm_snprintf(lv_path, sizeof(lv_path), "%s%s/%s", cmd->dev_dir,
+ lv_top->vg->name, lv_top->name) < 0) {
+ log_error("Couldn't create LV path for %s.", display_lvname(lv_top));
+ goto out;
+ }
+ if (!fs_block_size_and_type(lv_path, NULL, fstype, &nofs) || nofs) {
+ log_error("File system not found for --resizefs or --fs options.");
+ goto out;
+ }
+ if (!strcmp(fstype, "crypto_LUKS") && !lv_crypt_is_active(cmd, lv_path)) {
+ log_error("LUKS dm-crypt device must be active for fs resize.");
+ goto out;
+ }
+ /* FS utils will fail if LVs were renamed while mounted. */
+ if (fs_mount_state_is_misnamed(cmd, lv_top, lv_path, fstype))
+ goto_out;
+ }
+
+ /*
+ * Warn and confirm if checksize has been disabled for reduce.
+ */
+ if (is_reduce && !lp->fsopt[0] && !_lv_reduce_confirmation(lv_top, lp))
+ goto_out;
+
+ /* Part of old approach to fs handling using fsadm. */
+ if (!strcmp(lp->fsopt, "resize_fsadm") && !lp->nofsck &&
+ !_fsadm_cmd(FSADM_CMD_CHECK, lv_top, 0, lp->yes, lp->force, &status)) {
+ if (status != FSADM_CHECK_FAILS_FOR_MOUNTED) {
+ log_error("Filesystem check failed.");
+ goto out;
+ }
+ }
+
+ if (is_reduce && lp->fsopt[0]) {
+ if (!strcmp(lp->fsopt, "resize_fsadm")) {
+ /* Old approach to fs handling using fsadm. */
+ if (!_fsadm_cmd(FSADM_CMD_RESIZE, lv_top, lp->extents, lp->yes, lp->force, NULL)) {
+ log_error("Filesystem resize failed.");
+ goto out;
+ }
+ } else {
+ /* New approach to fs handling using fs info. */
+ if (!_fs_reduce(cmd, lv_top, lp))
+ goto_out;
+ }
+
+ if (activated_checksize && !deactivate_lv(cmd, lv_top))
+ log_warn("Problem deactivating %s.", display_lvname(lv_top));
+ }
+
+ /*
+ * Send DISCARD/TRIM to reduced area of VDO volumes
+ * TODO: enable thin and provide
+ * TODO2: we need polling method
+ */
+ if (is_reduce && lv_is_vdo(lv_top) && !_lv_reduce_vdo_discard(cmd, lv_top, lp))
+ goto_out;
+
+ /*
+ * Remove any striped raid reshape space for LV resizing (not common).
+ */
+ if (lv_meta && first_seg(lv_meta)->reshape_len && !lv_raid_free_reshape_space(lv_meta))
+ goto_out;
+ if (lv_main && first_seg(lv_main)->reshape_len && !lv_raid_free_reshape_space(lv_main))
+ goto_out;
+
+ /*
+ * The core of the actual lv resizing.
+ * Allocate or free extents in the VG, adjust LV segments to reflect
+ * new requested size, write VG metadata, reload the dm device stack
+ * (reload from the top LV.) Do lv_meta first.
+ * When extending lv_meta, also extend (or create) the pool's spare
+ * meta lv to match the size of lv_meta (only do this when the
+ * command is not limited to allocating from specific PVs.)
+ */
+
+ if (!lv_meta)
+ goto do_main;
+ if (!_lv_resize_volume(lv_meta, &lp_meta, lp->pvh))
+ goto_out;
+ if (!lp_meta.size_changed)
+ goto do_main;
+ if ((&vg->pvs == lp->pvh) && !handle_pool_metadata_spare(vg, 0, lp->pvh, 1))
+ stack;
+ if (!lv_update_and_reload(lv_top))
+ goto_out;
+ log_debug("Resized thin pool metadata %s to %u extents.", display_lvname(lv_meta), lp_meta.extents);
+
+ do_main:
+
+ if (!lv_main)
+ goto end_main;
+ if (!_lv_resize_volume(lv_main, lp, lp->pvh))
+ goto_out;
+ if (!lp->size_changed)
+ goto_out;
+ if (!lv_update_and_reload(lv_top))
+ goto_out;
+ log_debug("Resized %s to %u extents.", display_lvname(lv_main), lp->extents);
+
+ end_main:
+
+ /*
+ * other maintenance:
+ * - update lvm pool metadata (drop messages).
+ * - print warnings about overprovisioning.
+ * - stop monitoring cow snapshot larger than origin
+ */
+ if (lv_is_thin_pool(lv_top)) {
+ if (!update_thin_pool_lv(lv_top, 1))
+ goto_out;
+ }
+ if (lv_is_thin_type(lv_top) && is_extend)
+ thin_pool_check_overprovisioning(lv_top);
+
+ if (lv_main && lv_is_cow_covering_origin(lv_main)) {
+ if (!monitor_dev_for_events(cmd, lv_main, 0, 0))
+ stack;
+ }
+
+ if (is_extend && lp->fsopt[0]) {
+ if (!strcmp(lp->fsopt, "resize_fsadm")) {
+ /* Old approach to fs handling using fsadm. */
+ if (!_fsadm_cmd(FSADM_CMD_RESIZE, lv_top, lp->extents, lp->yes, lp->force, NULL)) {
+ log_error("File system extend error.");
+ lp->extend_fs_error = 1;
+ goto out;
+ }
+ } else {
+ /* New approach to fs handling using fs info. */
+ if (!_fs_extend(cmd, lv_top, lp)) {
+ log_error("File system extend error.");
+ lp->extend_fs_error = 1;
+ goto out;
+ }
+ }
+ }
+
+ ret = 1;
+
+ out:
+ if (activated || activated_checksize) {
+ if (!sync_local_dev_names(cmd))
+ stack;
+ if (!deactivate_lv(cmd, lv_top))
+ log_warn("Problem deactivating %s.", display_lvname(lv_top));
+ }
+
+ return ret;
}
char *generate_lv_name(struct volume_group *vg, const char *format,
char *buffer, size_t len)
{
struct lv_list *lvl;
+ struct glv_list *glvl;
int high = -1, i;
dm_list_iterate_items(lvl, &vg->lvs) {
@@ -3036,26 +7162,114 @@ char *generate_lv_name(struct volume_group *vg, const char *format,
high = i;
}
+ dm_list_iterate_items(glvl, &vg->historical_lvs) {
+ if (sscanf(glvl->glv->historical->name, format, &i) != 1)
+ continue;
+
+ if (i > high)
+ high = i;
+ }
+
if (dm_snprintf(buffer, len, format, high + 1) < 0)
return NULL;
return buffer;
}
-int vg_max_lv_reached(struct volume_group *vg)
+struct generic_logical_volume *get_or_create_glv(struct dm_pool*mem, struct logical_volume *lv, int *glv_created)
{
- if (!vg->max_lv)
- return 0;
+ struct generic_logical_volume *glv;
+
+ if (!(glv = lv->this_glv)) {
+ if (!(glv = dm_pool_zalloc(mem, sizeof(struct generic_logical_volume)))) {
+ log_error("Failed to allocate generic logical volume structure.");
+ return NULL;
+ }
+ glv->live = lv;
+ lv->this_glv = glv;
+ if (glv_created)
+ *glv_created = 1;
+ } else if (glv_created)
+ *glv_created = 0;
+
+ return glv;
+}
+
+struct glv_list *get_or_create_glvl(struct dm_pool *mem, struct logical_volume *lv, int *glv_created)
+{
+ struct glv_list *glvl;
+
+ if (!(glvl = dm_pool_zalloc(mem, sizeof(struct glv_list)))) {
+ log_error("Failed to allocate generic logical volume list item.");
+ return NULL;
+ }
- if (vg->max_lv > vg_visible_lvs(vg))
+ if (!(glvl->glv = get_or_create_glv(mem, lv, glv_created))) {
+ dm_pool_free(mem, glvl);
+ return_NULL;
+ }
+
+ return glvl;
+}
+
+int add_glv_to_indirect_glvs(struct dm_pool *mem,
+ struct generic_logical_volume *origin_glv,
+ struct generic_logical_volume *glv)
+{
+ struct glv_list *glvl;
+
+ if (!(glvl = dm_pool_zalloc(mem, sizeof(struct glv_list)))) {
+ log_error("Failed to allocate generic volume list item "
+ "for indirect glv %s", glv->is_historical ? glv->historical->name
+ : glv->live->name);
return 0;
+ }
- log_verbose("Maximum number of logical volumes (%u) reached "
- "in volume group %s", vg->max_lv, vg->name);
+ glvl->glv = glv;
+
+ if (glv->is_historical)
+ glv->historical->indirect_origin = origin_glv;
+ else
+ first_seg(glv->live)->indirect_origin = origin_glv;
+
+ if (origin_glv) {
+ if (origin_glv->is_historical)
+ dm_list_add(&origin_glv->historical->indirect_glvs, &glvl->list);
+ else
+ dm_list_add(&origin_glv->live->indirect_glvs, &glvl->list);
+ }
return 1;
}
+int remove_glv_from_indirect_glvs(struct generic_logical_volume *origin_glv,
+ struct generic_logical_volume *glv)
+{
+ struct glv_list *glvl, *tglvl;
+ struct dm_list *list = origin_glv->is_historical ? &origin_glv->historical->indirect_glvs
+ : &origin_glv->live->indirect_glvs;
+
+ dm_list_iterate_items_safe(glvl, tglvl, list) {
+ if (glvl->glv != glv)
+ continue;
+
+ dm_list_del(&glvl->list);
+
+ if (glvl->glv->is_historical)
+ glvl->glv->historical->indirect_origin = NULL;
+ else
+ first_seg(glvl->glv->live)->indirect_origin = NULL;
+
+ return 1;
+ }
+
+ log_error(INTERNAL_ERROR "%s logical volume %s is not a user of %s.",
+ glv->is_historical ? "historical" : "Live",
+ glv->is_historical ? glv->historical->name : glv->live->name,
+ origin_glv->is_historical ? origin_glv->historical->name : origin_glv->live->name);
+ return 0;
+}
+
struct logical_volume *alloc_lv(struct dm_pool *mem)
{
struct logical_volume *lv;
@@ -3065,12 +7279,11 @@ struct logical_volume *alloc_lv(struct dm_pool *mem)
return NULL;
}
- lv->snapshot = NULL;
dm_list_init(&lv->snapshot_segs);
dm_list_init(&lv->segments);
dm_list_init(&lv->tags);
dm_list_init(&lv->segs_using_this_lv);
- dm_list_init(&lv->rsites);
+ dm_list_init(&lv->indirect_glvs);
return lv;
}
@@ -3087,6 +7300,7 @@ struct logical_volume *lv_create_empty(const char *name,
struct format_instance *fi = vg->fid;
struct logical_volume *lv;
char dname[NAME_LEN];
+ int historical;
if (vg_max_lv_reached(vg))
stack;
@@ -3096,9 +7310,12 @@ struct logical_volume *lv_create_empty(const char *name,
log_error("Failed to generate unique name for the new "
"logical volume");
return NULL;
- } else if (find_lv_in_vg(vg, name)) {
+ }
+
+ if (lv_name_is_used_in_vg(vg, name, &historical)) {
log_error("Unable to create LV %s in Volume Group %s: "
- "name already in use.", name, vg->name);
+ "name already in use%s.", name, vg->name,
+ historical ? " by historical LV" : "");
return NULL;
}
@@ -3126,10 +7343,13 @@ struct logical_volume *lv_create_empty(const char *name,
if (!lv_set_creation(lv, NULL, 0))
goto_bad;
-
+
if (fi->fmt->ops->lv_setup && !fi->fmt->ops->lv_setup(fi, lv))
goto_bad;
-
+
+ if (vg->fid->fmt->features & FMT_CONFIG_PROFILE)
+ lv->profile = vg->cmd->profile_params->global_metadata_profile;
+
return lv;
bad:
dm_pool_free(vg->vgmem, lv);
@@ -3144,9 +7364,9 @@ static int _add_pvs(struct cmd_context *cmd, struct pv_segment *peg,
/* Don't add again if it's already on list. */
if (find_pv_in_pv_list(&spvs->pvs, peg->pv))
- return 1;
+ return 1;
- if (!(pvl = dm_pool_alloc(cmd->mem, sizeof(*pvl)))) {
+ if (!(pvl = dm_pool_zalloc(cmd->mem, sizeof(*pvl)))) {
log_error("pv_list allocation failed");
return 0;
}
@@ -3159,20 +7379,35 @@ static int _add_pvs(struct cmd_context *cmd, struct pv_segment *peg,
}
/*
- * Construct dm_list of segments of LVs showing which PVs they use.
- * For pvmove we use the *parent* LV so we can pick up stripes & existing mirrors etc.
+ * build_parallel_areas_from_lv
+ * @lv
+ * @use_pvmove_parent_lv
+ * @create_single_list
+ *
+ * For each segment in an LV, create a list of PVs used by the segment.
+ * Thus, the returned list is really a list of segments (seg_pvs)
+ * containing a list of PVs that are in use by that segment.
+ *
+ * use_pvmove_parent_lv: For pvmove we use the *parent* LV so we can
+ * pick up stripes & existing mirrors etc.
+ * create_single_list : Instead of creating a list of segments that
+ * each contain a list of PVs, return a list
+ * containing just one segment (i.e. seg_pvs)
+ * that contains a list of all the PVs used by
+ * the entire LV and all it's segments.
*/
struct dm_list *build_parallel_areas_from_lv(struct logical_volume *lv,
- unsigned use_pvmove_parent_lv)
+ unsigned use_pvmove_parent_lv,
+ unsigned create_single_list)
{
struct cmd_context *cmd = lv->vg->cmd;
struct dm_list *parallel_areas;
- struct seg_pvs *spvs;
+ struct seg_pvs *spvs = NULL;
uint32_t current_le = 0;
uint32_t raid_multiple;
struct lv_segment *seg = first_seg(lv);
- if (!(parallel_areas = dm_pool_alloc(cmd->mem, sizeof(*parallel_areas)))) {
+ if (!(parallel_areas = dm_pool_alloc(lv->vg->vgmem, sizeof(*parallel_areas)))) {
log_error("parallel_areas allocation failed");
return NULL;
}
@@ -3180,19 +7415,20 @@ struct dm_list *build_parallel_areas_from_lv(struct logical_volume *lv,
dm_list_init(parallel_areas);
do {
- if (!(spvs = dm_pool_zalloc(cmd->mem, sizeof(*spvs)))) {
- log_error("allocation failed");
- return NULL;
- }
-
- dm_list_init(&spvs->pvs);
+ if (!spvs || !create_single_list) {
+ if (!(spvs = dm_pool_zalloc(lv->vg->vgmem, sizeof(*spvs)))) {
+ log_error("allocation failed");
+ return NULL;
+ }
+ dm_list_init(&spvs->pvs);
+ dm_list_add(parallel_areas, &spvs->list);
+ }
spvs->le = current_le;
spvs->len = lv->le_count - current_le;
- dm_list_add(parallel_areas, &spvs->list);
-
- if (use_pvmove_parent_lv && !(seg = find_seg_by_le(lv, current_le))) {
+ if (use_pvmove_parent_lv &&
+ !(seg = find_seg_by_le(lv, current_le))) {
log_error("Failed to find segment for %s extent %" PRIu32,
lv->name, current_le);
return 0;
@@ -3213,38 +7449,18 @@ struct dm_list *build_parallel_areas_from_lv(struct logical_volume *lv,
seg->area_count - seg->segtype->parity_devs : 1;
} while ((current_le * raid_multiple) < lv->le_count);
- /* FIXME Merge adjacent segments with identical PV lists (avoids need for contiguous allocation attempts between successful allocations) */
-
- return parallel_areas;
-}
-
-int link_lv_to_vg(struct volume_group *vg, struct logical_volume *lv)
-{
- struct lv_list *lvl;
-
- if (vg_max_lv_reached(vg))
- stack;
-
- if (!(lvl = dm_pool_zalloc(vg->vgmem, sizeof(*lvl))))
- return_0;
-
- lvl->lv = lv;
- lv->vg = vg;
- dm_list_add(&vg->lvs, &lvl->list);
-
- return 1;
-}
-
-int unlink_lv_from_vg(struct logical_volume *lv)
-{
- struct lv_list *lvl;
-
- if (!(lvl = find_lv_in_vg(lv->vg, lv->name)))
- return_0;
+ if (create_single_list) {
+ spvs->le = 0;
+ spvs->len = lv->le_count;
+ }
- dm_list_del(&lvl->list);
+ /*
+ * FIXME: Merge adjacent segments with identical PV lists
+ * (avoids need for contiguous allocation attempts between
+ * successful allocations)
+ */
- return 1;
+ return parallel_areas;
}
void lv_set_visible(struct logical_volume *lv)
@@ -3254,7 +7470,7 @@ void lv_set_visible(struct logical_volume *lv)
lv->status |= VISIBLE_LV;
- log_debug("LV %s in VG %s is now visible.", lv->name, lv->vg->name);
+ log_debug_metadata("LV %s in VG %s is now visible.", lv->name, lv->vg->name);
}
void lv_set_hidden(struct logical_volume *lv)
@@ -3264,19 +7480,79 @@ void lv_set_hidden(struct logical_volume *lv)
lv->status &= ~VISIBLE_LV;
- log_debug("LV %s in VG %s is now hidden.", lv->name, lv->vg->name);
+ log_debug_metadata("LV %s in VG %s is now hidden.", lv->name, lv->vg->name);
+}
+
+static int _lv_remove_check_in_use(struct logical_volume *lv, force_t force)
+{
+ struct volume_group *vg = lv->vg;
+ const char *volume_type = "";
+ char buffer[50 + NAME_LEN * 2] = "";
+ int active;
+ int issue_discards =
+ (vg->cmd->current_settings.issue_discards &&
+ !lv_is_thin_volume(lv) &&
+ !lv_is_vdo(lv) &&
+ !lv_is_virtual_origin(lv)) ? 1 : 0;
+
+ switch (lv_check_not_in_use(lv, 1)) {
+ case 2: /* Not active, prompt when discarding real LVs */
+ if (!issue_discards ||
+ lv_is_historical(lv))
+ return 1;
+ active = 0;
+ break;
+ case 1: /* Active, not in use, prompt when visible */
+ if (!lv_is_visible(lv) ||
+ lv_is_pending_delete(lv))
+ return 1;
+ active = 1;
+ break;
+ default: /* Active, in use, can't remove */
+ return_0;
+ }
+
+ if (force == PROMPT) {
+ if (vg->needs_write_and_commit && (!vg_write(vg) || !vg_commit(vg)))
+ return_0;
+
+ if (lv_is_origin(lv)) {
+ volume_type = " origin";
+ (void) dm_snprintf(buffer, sizeof(buffer), " with %u snapshots(s)",
+ lv->origin_count);
+ } else if (lv_is_merging_origin(lv)) {
+ volume_type = " merging origin";
+ (void) dm_snprintf(buffer, sizeof(buffer), " with snapshot %s",
+ display_lvname(find_snapshot(lv)->lv));
+ }
+
+ if (yes_no_prompt("Do you really want to remove%s%s%s%s "
+ "logical volume %s%s? [y/n]: ",
+ issue_discards ? " and DISCARD" : "",
+ active ? " active" : "",
+ vg_is_clustered(vg) ? " clustered" : "",
+ volume_type, display_lvname(lv),
+ buffer) == 'n') {
+ lv->to_remove = 0;
+ log_error("Logical volume %s not removed.", display_lvname(lv));
+ return 0;
+ }
+ }
+
+ return 1;
}
int lv_remove_single(struct cmd_context *cmd, struct logical_volume *lv,
- const force_t force)
+ force_t force, int suppress_remove_message)
{
struct volume_group *vg;
- struct lvinfo info;
- struct logical_volume *format1_origin = NULL;
- int format1_reload_required = 0;
- int visible;
+ int visible, historical;
struct logical_volume *pool_lv = NULL;
- int ask_discard;
+ struct logical_volume *lock_lv = lv;
+ struct lv_segment *cache_seg = NULL;
+ struct seg_list *sl;
+ struct lv_segment *seg = first_seg(lv);
+ char msg[NAME_LEN + 300], *msg_dup;
vg = lv->vg;
@@ -3284,227 +7560,450 @@ int lv_remove_single(struct cmd_context *cmd, struct logical_volume *lv,
return_0;
if (lv_is_origin(lv)) {
- log_error("Can't remove logical volume \"%s\" under snapshot",
- lv->name);
+ log_error("Can't remove logical volume %s under snapshot.",
+ display_lvname(lv));
return 0;
}
- if (lv->status & MIRROR_IMAGE) {
- log_error("Can't remove logical volume %s used by a mirror",
- lv->name);
+ if (lv_is_external_origin(lv)) {
+ log_error("Can't remove external origin logical volume %s.",
+ display_lvname(lv));
return 0;
}
- if (lv->status & MIRROR_LOG) {
- log_error("Can't remove logical volume %s used as mirror log",
- lv->name);
+ if (lv_is_mirror_image(lv)) {
+ log_error("Can't remove logical volume %s used by a mirror.",
+ display_lvname(lv));
return 0;
}
- if (lv->status & (RAID_META | RAID_IMAGE)) {
- log_error("Can't remove logical volume %s used as RAID device",
- lv->name);
+ if (lv_is_mirror_log(lv)) {
+ log_error("Can't remove logical volume %s used as mirror log.",
+ display_lvname(lv));
return 0;
}
- if (lv_is_thin_pool_data(lv) || lv_is_thin_pool_metadata(lv)) {
- log_error("Can't remove logical volume %s used by a thin pool.",
- lv->name);
+ if (lv_is_raid_metadata(lv) || lv_is_raid_image(lv)) {
+ log_error("Can't remove logical volume %s used as RAID device.",
+ display_lvname(lv));
return 0;
- } else if (lv_is_thin_volume(lv))
- pool_lv = first_seg(lv)->pool_lv;
+ }
- if (lv->status & LOCKED) {
- log_error("Can't remove locked LV %s", lv->name);
+ if (lv_is_thin_pool_data(lv) || lv_is_thin_pool_metadata(lv) ||
+ lv_is_cache_pool_data(lv) || lv_is_cache_pool_metadata(lv)) {
+ log_error("Can't remove logical volume %s used by a pool.",
+ display_lvname(lv));
return 0;
}
- /* FIXME Ensure not referred to by another existing LVs */
- ask_discard = find_config_tree_bool(cmd,
- "devices/issue_discards", DEFAULT_ISSUE_DISCARDS);
+ if (lv_is_thin_volume(lv)) {
+ if (!(pool_lv = first_seg(lv)->pool_lv)) {
+ log_error(INTERNAL_ERROR "Thin LV %s without pool.",
+ display_lvname(lv));
+ return 0;
+ }
+ lock_lv = pool_lv;
+ if (pool_lv->to_remove)
+ /* Thin pool is to be removed so skip updating it when possible */
+ pool_lv = NULL;
+ }
- if (lv_info(cmd, lv, 0, &info, 1, 0)) {
- if (!lv_check_not_in_use(cmd, lv, &info))
+ if (lv_is_locked(lv)) {
+ log_error("Can't remove locked logical volume %s.", display_lvname(lv));
+ return 0;
+ }
+
+ if (!lockd_lv(cmd, lock_lv, "ex", LDLV_PERSISTENT))
+ return_0;
+
+ if (!lv_is_cache_vol(lv)) {
+ if (!_lv_remove_check_in_use(lv, force))
return_0;
+ }
+
+ /* if thin pool data lv is writecache, then detach and remove the writecache */
+ if (lv_is_thin_pool(lv)) {
+ struct logical_volume *data_lv = data_lv_from_thin_pool(lv);
- if ((force == PROMPT) &&
- lv_is_visible(lv) &&
- lv_is_active(lv)) {
- if (yes_no_prompt("Do you really want to remove%s active "
- "%slogical volume %s? [y/n]: ",
- ask_discard ? " and DISCARD" : "",
- vg_is_clustered(vg) ? "clustered " : "",
- lv->name) == 'n') {
- log_error("Logical volume %s not removed", lv->name);
+ if (data_lv && lv_is_writecache(data_lv)) {
+ struct logical_volume *cachevol_lv = first_seg(data_lv)->writecache;
+
+ if (!lv_detach_writecache_cachevol(data_lv, 1)) {
+ log_error("Failed to detach writecache from %s", display_lvname(data_lv));
+ return 0;
+ }
+
+ if (!lv_remove_single(cmd, cachevol_lv, force, 1)) {
+ log_error("Failed to remove cachevol %s.", display_lvname(cachevol_lv));
return 0;
- } else {
- ask_discard = 0;
}
}
}
- if ((force == PROMPT) && ask_discard &&
- yes_no_prompt("Do you really want to remove and DISCARD "
- "logical volume %s? [y/n]: ",
- lv->name) == 'n') {
- log_error("Logical volume %s not removed", lv->name);
- return 0;
+ if (lv_is_writecache(lv)) {
+ struct logical_volume *cachevol_lv = first_seg(lv)->writecache;
+
+ if (!deactivate_lv(cmd, lv)) {
+ log_error("Failed to deactivate LV %s", display_lvname(lv));
+ return 0;
+ }
+
+ if (!lv_detach_writecache_cachevol(lv, 1)) {
+ log_error("Failed to detach writecache from %s", display_lvname(lv));
+ return 0;
+ }
+
+ if (!lv_remove_single(cmd, cachevol_lv, force, suppress_remove_message)) {
+ log_error("Failed to remove cachevol %s.", display_lvname(cachevol_lv));
+ return 0;
+ }
}
- if (!archive(vg))
- return 0;
- if (lv_is_cow(lv)) {
- /* Old format1 code */
- if (!(lv->vg->fid->fmt->features & FMT_MDAS))
- format1_origin = origin_from_cow(lv);
+ /* Used cache pool, COW or historical LV cannot be activated */
+ if (!lv_is_used_cache_pool(lv) &&
+ !lv_is_cache_vol(lv) &&
+ !lv_is_cow(lv) && !lv_is_historical(lv) &&
+ !deactivate_lv_with_sub_lv(lv))
+ /* FIXME Review and fix the snapshot error paths! */
+ return_0;
+
+ /* Special case removing a striped raid LV with allocated reshape space */
+ if (seg && seg->reshape_len) {
+ if (!(seg->segtype = get_segtype_from_string(cmd, SEG_TYPE_NAME_STRIPED)))
+ return_0;
+ lv->le_count = seg->len = seg->area_len = seg_lv(seg, 0)->le_count * seg->area_count;
+ }
+
+ /* Clear thin pool stacked messages */
+ if (pool_lv && thin_pool_has_message(first_seg(pool_lv), lv, 0) &&
+ !update_thin_pool_lv(pool_lv, 1)) {
+ if (force < DONT_PROMPT_OVERRIDE) {
+ log_error("Failed to update pool %s.", display_lvname(pool_lv));
+ return 0;
+ }
+ log_print_unless_silent("Ignoring update failure of pool %s.",
+ display_lvname(pool_lv));
+ pool_lv = NULL; /* Do not retry */
+ }
+
+ /* When referenced by the LV with pending delete flag, remove this deleted LV first */
+ dm_list_iterate_items(sl, &lv->segs_using_this_lv)
+ if (lv_is_pending_delete(sl->seg->lv) && !lv_remove(sl->seg->lv)) {
+ log_error("Error releasing logical volume %s with pending delete.",
+ display_lvname(sl->seg->lv));
+ return 0;
+ }
- log_verbose("Removing snapshot %s", lv->name);
+ if (lv_is_cow(lv)) {
+ log_verbose("Removing snapshot volume %s.", display_lvname(lv));
/* vg_remove_snapshot() will preload origin/former snapshots */
if (!vg_remove_snapshot(lv))
return_0;
+
+ if (!deactivate_lv(cmd, lv)) {
+ /* FIXME Review and fix the snapshot error paths! */
+ log_error("Unable to deactivate logical volume %s.",
+ display_lvname(lv));
+ return 0;
+ }
}
- /* FIXME Review and fix the snapshot error paths! */
- if (!deactivate_lv(cmd, lv)) {
- log_error("Unable to deactivate logical volume \"%s\"",
- lv->name);
- return 0;
+ if (lv_is_cache_vol(lv)) {
+ if ((cache_seg = get_only_segment_using_this_lv(lv))) {
+ /* When used with cache, lvremove on cachevol also removes the cache! */
+ if (seg_is_cache(cache_seg)) {
+ if (!lv_cache_remove(cache_seg->lv))
+ return_0;
+ } else if (seg_is_writecache(cache_seg)) {
+ log_error("Detach cachevol before removing.");
+ return 0;
+ }
+ }
}
- /* Clear thin pool stacked messages */
- if (pool_lv && !pool_has_message(first_seg(pool_lv), lv, 0) &&
- !update_pool_lv(pool_lv, 1)) {
- log_error("Failed to update thin pool %s.", pool_lv->name);
- return 0;
+ if (lv_is_used_cache_pool(lv)) {
+ /* Cache pool removal drops cache layer
+ * If the cache pool is not linked, we can simply remove it. */
+ if (!(cache_seg = get_only_segment_using_this_lv(lv)))
+ return_0;
+ /* TODO: polling */
+ if (!lv_cache_remove(cache_seg->lv))
+ return_0;
}
visible = lv_is_visible(lv);
+ historical = lv_is_historical(lv);
- log_verbose("Releasing logical volume \"%s\"", lv->name);
+ log_verbose("Releasing %slogical volume \"%s\"",
+ historical ? "historical " : "",
+ historical ? lv->this_glv->historical->name : lv->name);
if (!lv_remove(lv)) {
- log_error("Error releasing logical volume \"%s\"", lv->name);
+ log_error("Error releasing %slogical volume \"%s\"",
+ historical ? "historical ": "",
+ historical ? lv->this_glv->historical->name : lv->name);
return 0;
}
- /*
- * Old format1 code: If no snapshots left reload without -real.
- */
- if (format1_origin && !lv_is_origin(format1_origin)) {
- log_warn("WARNING: Support for snapshots with old LVM1-style metadata is deprecated.");
- log_warn("WARNING: Please use lvconvert to update to lvm2 metadata at your convenience.");
- format1_reload_required = 1;
- }
-
- /* store it on disks */
- if (!vg_write(vg))
- return_0;
-
- /* format1 */
- if (format1_reload_required && !suspend_lv(cmd, format1_origin))
- log_error("Failed to refresh %s without snapshot.", format1_origin->name);
-
- if (!vg_commit(vg))
+ if (!pool_lv && (!strcmp(cmd->name, "lvremove") || !strcmp(cmd->name, "vgremove"))) {
+ /* With lvremove & vgremove try to postpone commit after last such LV */
+ vg->needs_write_and_commit = 1;
+ log_debug_metadata("Postponing write and commit.");
+ } else if (!vg_write(vg) || !vg_commit(vg)) /* store it on disks */
return_0;
-
- /* format1 */
- if (format1_reload_required && !resume_lv(cmd, format1_origin)) {
- log_error("Failed to resume %s.", format1_origin->name);
- return 0;
- }
/* Release unneeded blocks in thin pool */
/* TODO: defer when multiple LVs relased at once */
- if (pool_lv && !update_pool_lv(pool_lv, 1)) {
- log_error("Failed to update thin pool %s.", pool_lv->name);
- return 0;
+ if (pool_lv && !update_thin_pool_lv(pool_lv, 1)) {
+ if (force < DONT_PROMPT_OVERRIDE) {
+ log_error("Failed to update thin pool %s.", display_lvname(pool_lv));
+ return 0;
+ }
+ log_print_unless_silent("Ignoring update failure of pool %s.",
+ display_lvname(pool_lv));
+ }
+
+ if (!lockd_lv(cmd, lv, "un", LDLV_PERSISTENT))
+ log_warn("WARNING: Failed to unlock %s.", display_lvname(lv));
+ lockd_free_lv(cmd, vg, lv->name, &lv->lvid.id[1], lv->lock_args);
+
+ if (!suppress_remove_message && (visible || historical)) {
+ (void) dm_snprintf(msg, sizeof(msg),
+ "%sogical volume \"%s\" successfully removed.",
+ historical ? "Historical l" : "L",
+ historical ? lv->this_glv->historical->name : lv->name);
+ if (!vg->needs_write_and_commit)
+ log_print_unless_silent("%s", msg);
+ /* Keep print message for later display with next vg_write() and vg_commit() */
+ else if (!(msg_dup = dm_pool_strdup(vg->vgmem, msg)) ||
+ !str_list_add_no_dup_check(vg->vgmem, &vg->msg_list, msg_dup))
+ return_0;
}
- backup(vg);
+ return 1;
+}
- if (visible)
- log_print_unless_silent("Logical volume \"%s\" successfully removed", lv->name);
+static int _lv_remove_segs_using_this_lv(struct cmd_context *cmd, struct logical_volume *lv,
+ const force_t force, unsigned level,
+ const char *lv_type)
+{
+ struct seg_list *sl;
+
+ if ((force == PROMPT) &&
+ yes_no_prompt("Removing %s %s will remove %u dependent volume(s). "
+ "Proceed? [y/n]: ", lv_type, display_lvname(lv),
+ dm_list_size(&lv->segs_using_this_lv)) == 'n') {
+ lv->to_remove = 0;
+ log_error("Logical volume %s not removed.", display_lvname(lv));
+ return 0;
+ }
+
+ /*
+ * Not using _safe iterator here - since we may delete whole subtree
+ * (similar as process_each_lv_in_vg())
+ * the code is roughly equivalent to this:
+ *
+ * while (!dm_list_empty(&lv->segs_using_this_lv))
+ * dm_list_iterate_items(sl, &lv->segs_using_this_lv)
+ * break;
+ */
+ dm_list_iterate_items(sl, &lv->segs_using_this_lv)
+ if (!lv_remove_with_dependencies(cmd, sl->seg->lv,
+ force, level + 1))
+ return_0;
return 1;
}
-
/*
* remove LVs with its dependencies - LV leaf nodes should be removed first
*/
int lv_remove_with_dependencies(struct cmd_context *cmd, struct logical_volume *lv,
const force_t force, unsigned level)
{
- percent_t snap_percent;
+ dm_percent_t snap_percent;
struct dm_list *snh, *snht;
- struct seg_list *sl, *tsl;
struct lvinfo info;
+ struct lv_list *lvl;
+ struct logical_volume *origin;
- if (lv_is_cow(lv)) {
+ /* Make aware users of this LV, it's going to be removed, so they
+ * can skip any updates of itself */
+ lv->to_remove = 1;
+
+ if (!level && lv_is_cow(lv)) {
/*
* A merging snapshot cannot be removed directly unless
* it has been invalidated or failed merge removal is requested.
*/
- if (lv_is_merging_cow(lv) && !level) {
+ if (lv_is_merging_cow(lv)) {
if (lv_info(lv->vg->cmd, lv, 0, &info, 1, 0) &&
info.exists && info.live_table) {
if (!lv_snapshot_percent(lv, &snap_percent)) {
- log_error("Failed to obtain merging snapshot progress percentage for logical volume %s.",
- lv->name);
+ log_error("Failed to obtain merging snapshot progress "
+ "percentage for logical volume %s.",
+ display_lvname(lv));
return 0;
}
- if ((snap_percent != PERCENT_INVALID) &&
- (snap_percent != PERCENT_MERGE_FAILED)) {
- log_error("Can't remove merging snapshot logical volume \"%s\"",
- lv->name);
- return 0;
- } else if ((snap_percent == PERCENT_MERGE_FAILED) &&
- (force == PROMPT) &&
- yes_no_prompt("Removing snapshot \"%s\" that failed to merge may leave origin \"%s\" inconsistent. "
- "Proceed? [y/n]: ", lv->name, origin_from_cow(lv)->name) == 'n') {
- log_error("Logical volume %s not removed.", lv->name);
+
+ if ((snap_percent != DM_PERCENT_INVALID) &&
+ (snap_percent != LVM_PERCENT_MERGE_FAILED)) {
+ log_error("Can't remove merging snapshot logical volume %s.",
+ display_lvname(lv));
return 0;
}
+
+ if ((snap_percent == LVM_PERCENT_MERGE_FAILED) &&
+ (force == PROMPT) &&
+ yes_no_prompt("Removing snapshot %s that failed to merge "
+ "may leave origin %s inconsistent. Proceed? [y/n]: ",
+ display_lvname(lv),
+ display_lvname(origin_from_cow(lv))) == 'n')
+ goto no_remove;
}
- }
+ } else if (lv_is_virtual_origin(origin = origin_from_cow(lv)))
+ /* If this is a sparse device, remove its origin too. */
+ /* Stacking is not supported */
+ lv = origin;
}
if (lv_is_origin(lv)) {
/* Remove snapshot LVs first */
- if ((force == PROMPT) &&
- /* Active snapshot already needs to confirm each active LV */
- !lv_is_active(lv) &&
- yes_no_prompt("Removing origin %s will also remove %u "
- "snapshots(s). Proceed? [y/n]: ",
- lv->name, lv->origin_count) == 'n') {
- log_error("Logical volume %s not removed.", lv->name);
- return 0;
- }
+ if (!_lv_remove_check_in_use(lv, force))
+ return_0;
+
+ if (!deactivate_lv(cmd, lv))
+ goto no_remove;
+
+ log_verbose("Removing origin logical volume %s with %u snapshots(s).",
+ display_lvname(lv), lv->origin_count);
dm_list_iterate_safe(snh, snht, &lv->snapshot_segs)
if (!lv_remove_with_dependencies(cmd, dm_list_struct_base(snh, struct lv_segment,
origin_list)->cow,
force, level + 1))
return_0;
+ } else if (lv_is_merging_origin(lv)) {
+ /* Removing thin merging origin requires to remove its merging snapshot first */
+ if (!_lv_remove_check_in_use(lv, force))
+ return_0;
+
+ if (!deactivate_lv(cmd, lv))
+ goto no_remove;
+
+ log_verbose("Removing merging origin logical volume %s.", display_lvname(lv));
+
+ if (!lv_remove_with_dependencies(cmd, find_snapshot(lv)->lv,
+ force, level + 1))
+ return_0;
+ }
+
+ if (!level && lv_is_merging_thin_snapshot(lv)) {
+ /* Merged snapshot LV is no longer available for the user */
+ log_error("Unable to remove %s, volume is merged to %s.",
+ display_lvname(lv), display_lvname(first_seg(lv)->merge_lv));
+ return 0;
+ }
+
+ if (lv_is_cache_origin(lv) || lv_is_writecache_origin(lv)) {
+ if (!_lv_remove_segs_using_this_lv(cmd, lv, force, level, "cache origin"))
+ return_0;
+ /* Removal of cache LV also removes caching origin */
+ return 1;
}
- if (lv_is_used_thin_pool(lv)) {
- /* Remove thin LVs first */
- if ((force == PROMPT) &&
- yes_no_prompt("Removing pool %s will also remove %u "
- "thin volume(s). OK? [y/n]: ", lv->name,
- /* Note: Snaphosts not included */
- dm_list_size(&lv->segs_using_this_lv)) == 'n') {
- log_error("Logical volume %s not removed.", lv->name);
+ if (lv_is_external_origin(lv) &&
+ !_lv_remove_segs_using_this_lv(cmd, lv, force, level, "external origin"))
+ return_0;
+
+ if (lv_is_used_thin_pool(lv) &&
+ !_lv_remove_segs_using_this_lv(cmd, lv, force, level, "pool"))
+ return_0;
+
+ if (lv_is_vdo_pool(lv)) {
+ if (!_lv_remove_segs_using_this_lv(cmd, lv, force, level, "VDO pool"))
+ return_0;
+ /* Last user removes VDO pool itself, lv no longer exists */
+ return 1;
+ }
+
+ if (lv_is_cache_pool(lv) && !lv_is_used_cache_pool(lv)) {
+ if (!deactivate_lv(cmd, first_seg(lv)->metadata_lv) ||
+ !deactivate_lv(cmd, seg_lv(first_seg(lv),0))) {
+ log_error("Unable to fully deactivate unused cache-pool %s.",
+ display_lvname(lv));
return 0;
}
+ }
- dm_list_iterate_items_safe(sl, tsl, &lv->segs_using_this_lv)
- if (!lv_remove_with_dependencies(cmd, sl->seg->lv,
- force, level + 1))
- return_0;
+ if (lv_is_pool_metadata_spare(lv) &&
+ (force == PROMPT)) {
+ dm_list_iterate_items(lvl, &lv->vg->lvs)
+ if (lv_is_pool_metadata(lvl->lv)) {
+ if (yes_no_prompt("Removal of pool metadata spare logical volume "
+ "%s disables automatic recovery attempts "
+ "after damage to a thin or cache pool. "
+ "Proceed? [y/n]: ", display_lvname(lv)) == 'n')
+ goto no_remove;
+ break;
+ }
+ }
+
+ return lv_remove_single(cmd, lv, force, 0);
+
+no_remove:
+ log_error("Logical volume %s not removed.", display_lvname(lv));
+
+ return 0;
+}
+
+static int _lv_update_and_reload(struct logical_volume *lv, int origin_only)
+{
+ struct volume_group *vg = lv->vg;
+ int r = 0;
+ const struct logical_volume *lock_lv = lv_lock_holder(lv);
+
+ log_very_verbose("Updating logical volume %s on disk(s)%s.",
+ display_lvname(lock_lv), origin_only ? " (origin only)": "");
+ if (!vg_write(vg))
+ return_0;
+
+ if (origin_only && (lock_lv != lv)) {
+ log_debug_activation("Dropping origin_only for %s as lock holds %s",
+ display_lvname(lv), display_lvname(lock_lv));
+ origin_only = 0;
}
- return lv_remove_single(cmd, lv, force);
+ if (!(origin_only ? suspend_lv_origin(vg->cmd, lock_lv) : suspend_lv(vg->cmd, lock_lv))) {
+ log_error("Failed to suspend logical volume %s.",
+ display_lvname(lock_lv));
+ vg_revert(vg);
+ if (!revert_lv(vg->cmd, lock_lv))
+ log_error("Failed to revert logical volume %s.",
+ display_lvname(lock_lv));
+ return 0;
+ } else if (!(r = vg_commit(vg)))
+ stack; /* !vg_commit() has implict vg_revert() */
+
+ log_very_verbose("Updating logical volume %s in kernel.",
+ display_lvname(lock_lv));
+
+ if (!(origin_only ? resume_lv_origin(vg->cmd, lock_lv) : resume_lv(vg->cmd, lock_lv))) {
+ log_error("Problem reactivating logical volume %s.",
+ display_lvname(lock_lv));
+ r = 0;
+ }
+
+ return r;
+}
+
+int lv_update_and_reload(struct logical_volume *lv)
+{
+ return _lv_update_and_reload(lv, 0);
+}
+
+int lv_update_and_reload_origin(struct logical_volume *lv)
+{
+ return _lv_update_and_reload(lv, 1);
}
/*
@@ -3534,16 +8033,16 @@ static int _split_parent_area(struct lv_segment *seg, uint32_t s,
while (parent_area_len > 0) {
/* Find the layer segment pointed at */
if (!(spvs = _find_seg_pvs_by_le(layer_seg_pvs, layer_le))) {
- log_error("layer segment for %s:%" PRIu32 " not found",
- seg->lv->name, parent_le);
+ log_error("layer segment for %s:" FMTu32 " not found.",
+ display_lvname(seg->lv), parent_le);
return 0;
}
if (spvs->le != layer_le) {
log_error("Incompatible layer boundary: "
- "%s:%" PRIu32 "[%" PRIu32 "] on %s:%" PRIu32,
- seg->lv->name, parent_le, s,
- seg_lv(seg, s)->name, layer_le);
+ "%s:" FMTu32 "[" FMTu32 "] on %s:" FMTu32 ".",
+ display_lvname(seg->lv), parent_le, s,
+ display_lvname(seg_lv(seg, s)), layer_le);
return 0;
}
@@ -3572,7 +8071,7 @@ int split_parent_segments_for_layer(struct cmd_context *cmd,
uint32_t s;
struct dm_list *parallel_areas;
- if (!(parallel_areas = build_parallel_areas_from_lv(layer_lv, 0)))
+ if (!(parallel_areas = build_parallel_areas_from_lv(layer_lv, 0, 0)))
return_0;
/* Loop through all LVs except itself */
@@ -3643,7 +8142,7 @@ int remove_layers_for_segments(struct cmd_context *cmd,
log_error("Layer boundary mismatch: "
"%s:%" PRIu32 "-%" PRIu32 " on "
"%s:%" PRIu32 " / "
- "%" PRIu32 "-%" PRIu32 " / ",
+ FMTu32 "-" FMTu32 " / ",
lv->name, seg->le, seg->area_len,
layer_lv->name, seg_le(seg, s),
lseg->le, lseg->area_len);
@@ -3655,7 +8154,7 @@ int remove_layers_for_segments(struct cmd_context *cmd,
/* Replace mirror with error segment */
if (!(lseg->segtype =
- get_segtype_from_string(lv->vg->cmd, "error"))) {
+ get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_ERROR))) {
log_error("Missing error segtype");
return 0;
}
@@ -3702,6 +8201,14 @@ int remove_layers_for_segments_all(struct cmd_context *cmd,
if (!lv_empty(layer_lv))
return_0;
+ /* Assumes only used by PVMOVE ATM when unlocking LVs */
+ dm_list_iterate_items(lvl, lvs_changed) {
+ /* FIXME Assumes only one pvmove at a time! */
+ lvl->lv->status &= ~LOCKED;
+ if (!lv_merge_segments(lvl->lv))
+ return_0;
+ }
+
return 1;
}
@@ -3709,6 +8216,17 @@ int move_lv_segments(struct logical_volume *lv_to,
struct logical_volume *lv_from,
uint64_t set_status, uint64_t reset_status)
{
+ const uint64_t MOVE_BITS = (CACHE |
+ CACHE_POOL |
+ INTEGRITY |
+ LV_CACHE_VOL |
+ LV_VDO |
+ LV_VDO_POOL |
+ MIRROR |
+ RAID |
+ THIN_POOL |
+ THIN_VOLUME |
+ WRITECACHE);
struct lv_segment *seg;
dm_list_iterate_items(seg, &lv_to->segments)
@@ -3726,6 +8244,16 @@ int move_lv_segments(struct logical_volume *lv_to,
seg->status |= set_status;
}
+ /*
+ * Move LV status bits for selected types with their segments
+ * i.e. when inserting layer to cache LV, we move raid segments
+ * to a new place, thus 'raid' LV property now belongs to this LV.
+ *
+ * Bits should match to those which appears after read from disk.
+ */
+ lv_to->status |= lv_from->status & MOVE_BITS;
+ lv_from->status &= ~MOVE_BITS;
+
lv_to->le_count = lv_from->le_count;
lv_to->size = lv_from->size;
@@ -3739,41 +8267,98 @@ int move_lv_segments(struct logical_volume *lv_to,
int remove_layer_from_lv(struct logical_volume *lv,
struct logical_volume *layer_lv)
{
- struct logical_volume *parent;
+ static const char _suffixes[][8] = { "_tdata", "_cdata", "_corig", "_wcorig", "_vdata" };
+ struct logical_volume *parent_lv;
struct lv_segment *parent_seg;
struct segment_type *segtype;
+ struct lv_names lv_names;
+ unsigned r;
log_very_verbose("Removing layer %s for %s", layer_lv->name, lv->name);
if (!(parent_seg = get_only_segment_using_this_lv(layer_lv))) {
log_error("Failed to find layer %s in %s",
- layer_lv->name, lv->name);
+ layer_lv->name, lv->name);
+ return 0;
+ }
+ parent_lv = parent_seg->lv;
+ if (parent_lv != lv) {
+ log_error(INTERNAL_ERROR "Wrong layer %s in %s",
+ layer_lv->name, lv->name);
return 0;
}
- parent = parent_seg->lv;
/*
* Before removal, the layer should be cleaned up,
* i.e. additional segments and areas should have been removed.
*/
- if (dm_list_size(&parent->segments) != 1 ||
- parent_seg->area_count != 1 ||
- seg_type(parent_seg, 0) != AREA_LV ||
- layer_lv != seg_lv(parent_seg, 0) ||
- parent->le_count != layer_lv->le_count)
- return_0;
+ /* FIXME:
+ * These are all INTERNAL_ERROR, but ATM there is
+ * some internal API problem and this code is wrongle
+ * executed with certain mirror manipulations.
+ * So we need to fix mirror code first, then switch...
+ */
+ if (dm_list_size(&parent_lv->segments) != 1) {
+ log_error("Invalid %d segments in %s, expected only 1.",
+ dm_list_size(&parent_lv->segments),
+ display_lvname(parent_lv));
+ return 0;
+ }
+
+ if (parent_seg->area_count != 1) {
+ log_error("Invalid %d area count(s) in %s, expected only 1.",
+ parent_seg->area_count, display_lvname(parent_lv));
+ return 0;
+ }
+
+ if (seg_type(parent_seg, 0) != AREA_LV) {
+ log_error("Invalid seg_type %d in %s, expected LV.",
+ seg_type(parent_seg, 0), display_lvname(parent_lv));
+ return 0;
+ }
+
+ if (layer_lv != seg_lv(parent_seg, 0)) {
+ log_error("Layer doesn't match segment in %s.",
+ display_lvname(parent_lv));
+ return 0;
+ }
+
+ if (parent_lv->le_count != layer_lv->le_count) {
+ log_error("Inconsistent extent count (%u != %u) of layer %s.",
+ parent_lv->le_count, layer_lv->le_count,
+ display_lvname(parent_lv));
+ return 0;
+ }
- if (!lv_empty(parent))
+ if (!lv_empty(parent_lv))
return_0;
- if (!move_lv_segments(parent, layer_lv, 0, 0))
+ if (!move_lv_segments(parent_lv, layer_lv, 0, 0))
return_0;
/* Replace the empty layer with error segment */
- segtype = get_segtype_from_string(lv->vg->cmd, "error");
- if (!lv_add_virtual_segment(layer_lv, 0, parent->le_count, segtype, NULL))
+ if (!(segtype = get_segtype_from_string(lv->vg->cmd, SEG_TYPE_NAME_ERROR)))
+ return_0;
+ if (!lv_add_virtual_segment(layer_lv, 0, parent_lv->le_count, segtype))
return_0;
+ /*
+ * recuresively rename sub LVs
+ * currently supported only for thin data layer
+ * FIXME: without strcmp it breaks mirrors....
+ */
+ if (!strstr(layer_lv->name, "_mimage")) {
+ for (r = 0; r < DM_ARRAY_SIZE(_suffixes); ++r) {
+ if (strstr(layer_lv->name, _suffixes[r]) == 0) {
+ lv_names.old = layer_lv->name;
+ lv_names.new = parent_lv->name;
+ if (!for_each_sub_lv(parent_lv, _rename_skip_pools_externals_cb, (void *) &lv_names))
+ return_0;
+ break;
+ }
+ }
+ }
+
return 1;
}
@@ -3788,45 +8373,36 @@ struct logical_volume *insert_layer_for_lv(struct cmd_context *cmd,
uint64_t status,
const char *layer_suffix)
{
+ static const char _suffixes[][10] = { "_tdata", "_cdata", "_corig", "_wcorig", "_vdata", "_tpool%d" };
int r;
- char *name;
- size_t len;
- struct str_list *sl;
+ char name[NAME_LEN];
+ struct dm_str_list *sl;
struct logical_volume *layer_lv;
struct segment_type *segtype;
struct lv_segment *mapseg;
struct lv_names lv_names;
- unsigned exclusive = 0;
+ unsigned i;
/* create an empty layer LV */
- len = strlen(lv_where->name) + 32;
- if (!(name = alloca(len))) {
- log_error("layer name allocation failed. "
- "Remove new LV and retry.");
- return NULL;
- }
-
- if (dm_snprintf(name, len, "%s%s", lv_where->name, layer_suffix) < 0) {
- log_error("layer name allocation failed. "
- "Remove new LV and retry.");
+ if (dm_snprintf(name, sizeof(name), "%s%s", lv_where->name, layer_suffix) < 0) {
+ log_error("Layered name is too long. Please use shorter LV name.");
return NULL;
}
- if (!(layer_lv = lv_create_empty(name, NULL, LVM_READ | LVM_WRITE,
+ if (!(layer_lv = lv_create_empty(name, NULL,
+ /* Preserve read-only flag */
+ LVM_READ | (lv_where->status & LVM_WRITE),
ALLOC_INHERIT, lv_where->vg))) {
log_error("Creation of layer LV failed");
return NULL;
}
- if (lv_is_active_exclusive_locally(lv_where))
- exclusive = 1;
-
- if (lv_is_active(lv_where) && strstr(name, "_mimagetmp")) {
+ if (lv_is_active(lv_where) && strstr(name, MIRROR_SYNC_LAYER)) {
log_very_verbose("Creating transient LV %s for mirror conversion in VG %s.", name, lv_where->vg->name);
- segtype = get_segtype_from_string(cmd, "error");
+ segtype = get_segtype_from_string(cmd, SEG_TYPE_NAME_ERROR);
- if (!lv_add_virtual_segment(layer_lv, 0, lv_where->le_count, segtype, NULL)) {
+ if (!lv_add_virtual_segment(layer_lv, 0, lv_where->le_count, segtype)) {
log_error("Creation of transient LV %s for mirror conversion in VG %s failed.", name, lv_where->vg->name);
return NULL;
}
@@ -3846,14 +8422,10 @@ struct logical_volume *insert_layer_for_lv(struct cmd_context *cmd,
if (!vg_commit(lv_where->vg)) {
log_error("Failed to commit intermediate VG %s metadata for mirror conversion.", lv_where->vg->name);
- vg_revert(lv_where->vg);
return NULL;
}
- if (exclusive)
- r = activate_lv_excl(cmd, layer_lv);
- else
- r = activate_lv(cmd, layer_lv);
+ r = activate_lv(cmd, layer_lv);
if (!r) {
log_error("Failed to resume transient LV"
@@ -3865,7 +8437,6 @@ struct logical_volume *insert_layer_for_lv(struct cmd_context *cmd,
/* Remove the temporary tags */
dm_list_iterate_items(sl, &lv_where->tags)
str_list_del(&layer_lv->tags, sl->str);
-
}
log_very_verbose("Inserting layer %s for %s",
@@ -3874,12 +8445,12 @@ struct logical_volume *insert_layer_for_lv(struct cmd_context *cmd,
if (!move_lv_segments(layer_lv, lv_where, 0, 0))
return_NULL;
- if (!(segtype = get_segtype_from_string(cmd, "striped")))
+ if (!(segtype = get_segtype_from_string(cmd, SEG_TYPE_NAME_STRIPED)))
return_NULL;
/* allocate a new linear segment */
- if (!(mapseg = alloc_lv_segment(segtype, lv_where, 0, layer_lv->le_count,
- status, 0, NULL, NULL, 1, layer_lv->le_count,
+ if (!(mapseg = alloc_lv_segment(segtype, lv_where, 0, layer_lv->le_count, 0,
+ status, 0, NULL, 1, layer_lv->le_count, 0,
0, 0, 0, NULL)))
return_NULL;
@@ -3892,17 +8463,22 @@ struct logical_volume *insert_layer_for_lv(struct cmd_context *cmd,
lv_where->le_count = layer_lv->le_count;
lv_where->size = (uint64_t) lv_where->le_count * lv_where->vg->extent_size;
+ if (lv_where->vg->fid->fmt->features & FMT_CONFIG_PROFILE)
+ lv_where->profile = lv_where->vg->cmd->profile_params->global_metadata_profile;
+
/*
* recuresively rename sub LVs
* currently supported only for thin data layer
* FIXME: without strcmp it breaks mirrors....
*/
- if (strcmp(layer_suffix, "_tdata") == 0) {
- lv_names.old = lv_where->name;
- lv_names.new = layer_lv->name;
- if (!for_each_sub_lv(cmd, layer_lv, _rename_cb, (void *) &lv_names))
- return 0;
- }
+ for (i = 0; i < DM_ARRAY_SIZE(_suffixes); ++i)
+ if (strcmp(layer_suffix, _suffixes[i]) == 0) {
+ lv_names.old = lv_where->name;
+ lv_names.new = layer_lv->name;
+ if (!for_each_sub_lv(layer_lv, _rename_skip_pools_externals_cb, (void *) &lv_names))
+ return_NULL;
+ break;
+ }
return layer_lv;
}
@@ -3922,7 +8498,7 @@ static int _extend_layer_lv_for_segment(struct logical_volume *layer_lv,
if (seg_type(seg, s) != AREA_PV && seg_type(seg, s) != AREA_LV)
return_0;
- if (!(segtype = get_segtype_from_string(layer_lv->vg->cmd, "striped")))
+ if (!(segtype = get_segtype_from_string(layer_lv->vg->cmd, SEG_TYPE_NAME_STRIPED)))
return_0;
/* FIXME Incomplete message? Needs more context */
@@ -3933,8 +8509,8 @@ static int _extend_layer_lv_for_segment(struct logical_volume *layer_lv,
/* allocate a new segment */
if (!(mapseg = alloc_lv_segment(segtype, layer_lv, layer_lv->le_count,
- seg->area_len, status, 0,
- NULL, NULL, 1, seg->area_len, 0, 0, 0, seg)))
+ seg->area_len, 0, status, 0,
+ NULL, 1, seg->area_len, 0, 0, 0, 0, seg)))
return_0;
/* map the new segment to the original underlying are */
@@ -3980,10 +8556,10 @@ static int _match_seg_area_to_pe_range(struct lv_segment *seg, uint32_t s,
continue;
/* FIXME Missing context in this message - add LV/seg details */
- log_debug("Matched PE range %s:%" PRIu32 "-%" PRIu32 " against "
- "%s %" PRIu32 " len %" PRIu32, dev_name(pvl->pv->dev),
- per->start, per_end, dev_name(seg_dev(seg, s)),
- seg_pe(seg, s), seg->area_len);
+ log_debug_alloc("Matched PE range %s:%" PRIu32 "-%" PRIu32 " against "
+ "%s %" PRIu32 " len %" PRIu32, dev_name(pvl->pv->dev),
+ per->start, per_end, dev_name(seg_dev(seg, s)),
+ seg_pe(seg, s), seg->area_len);
return 1;
}
@@ -4066,14 +8642,25 @@ int insert_layer_for_segments_on_pv(struct cmd_context *cmd,
struct lv_list *lvl;
int lv_used = 0;
uint32_t s;
+ struct logical_volume *holder = (struct logical_volume *) lv_lock_holder(lv_where);
log_very_verbose("Inserting layer %s for segments of %s on %s",
layer_lv->name, lv_where->name,
pvl ? pv_dev_name(pvl->pv) : "any");
+ /* Temporarily hide layer_lv from vg->lvs list
+ * so the lv_split_segment() passes vg_validate()
+ * since here layer_lv has empty segment list */
+ if (!(lvl = find_lv_in_vg(lv_where->vg, layer_lv->name)))
+ return_0;
+ dm_list_del(&lvl->list);
+
if (!_align_segment_boundary_to_pe_range(lv_where, pvl))
return_0;
+ /* Put back layer_lv in vg->lv */
+ dm_list_add(&lv_where->vg->lvs, &lvl->list);
+
/* Work through all segments on the supplied PV */
dm_list_iterate_items(seg, &lv_where->segments) {
for (s = 0; s < seg->area_count; s++) {
@@ -4082,13 +8669,23 @@ int insert_layer_for_segments_on_pv(struct cmd_context *cmd,
/* First time, add LV to list of LVs affected */
if (!lv_used && lvs_changed) {
- if (!(lvl = dm_pool_alloc(cmd->mem, sizeof(*lvl)))) {
- log_error("lv_list alloc failed");
- return 0;
+ /* First check if LV is listed already */
+ dm_list_iterate_items(lvl, lvs_changed)
+ if (lvl->lv == holder) {
+ lv_used = 1;
+ break;
+ }
+
+ if (!lv_used) {
+ if (!(lvl = dm_pool_alloc(cmd->mem, sizeof(*lvl)))) {
+ log_error("lv_list alloc failed.");
+ return 0;
+ }
+
+ lvl->lv = holder;
+ dm_list_add(lvs_changed, &lvl->list);
+ lv_used = 1;
}
- lvl->lv = lv_where;
- dm_list_add(lvs_changed, &lvl->list);
- lv_used = 1;
}
if (!_extend_layer_lv_for_segment(layer_lv, seg, s,
@@ -4108,11 +8705,29 @@ int insert_layer_for_segments_on_pv(struct cmd_context *cmd,
/*
* Initialize the LV with 'value'.
*/
-int set_lv(struct cmd_context *cmd, struct logical_volume *lv,
- uint64_t sectors, int value)
+int wipe_lv(struct logical_volume *lv, struct wipe_params wp)
{
struct device *dev;
- char *name;
+ char name[PATH_MAX];
+ uint64_t zero_sectors;
+ int zero_metadata;
+
+ if (!wp.do_zero && !wp.do_wipe_signatures && !wp.is_metadata)
+ /* nothing to do */
+ return 1;
+
+ if (!lv_is_active(lv)) {
+ log_error("Volume %s is not active locally (volume_list activation filter?).",
+ display_lvname(lv));
+ return 0;
+ }
+
+ /* Wait until devices are available */
+ if (!sync_local_dev_names(lv->vg->cmd)) {
+ log_error("Failed to sync local devices before wiping volume %s.",
+ display_lvname(lv));
+ return 0;
+ }
/*
* FIXME:
@@ -4121,44 +8736,198 @@ int set_lv(struct cmd_context *cmd, struct logical_volume *lv,
* <ejt_> k, I'll drop a fixme to that effect
* (I know the device is at least 4k, but not 32k)
*/
- if (!(name = dm_pool_alloc(cmd->mem, PATH_MAX))) {
- log_error("Name allocation failed - device not cleared");
+ if (dm_snprintf(name, sizeof(name), "%s%s/%s", lv->vg->cmd->dev_dir,
+ lv->vg->name, lv->name) < 0) {
+ log_error("Name too long - device not cleared (%s)", lv->name);
return 0;
}
- if (dm_snprintf(name, PATH_MAX, "%s%s/%s", cmd->dev_dir,
- lv->vg->name, lv->name) < 0) {
- log_error("Name too long - device not cleared (%s)", lv->name);
+ if (!(dev = dev_cache_get(lv->vg->cmd, name, NULL))) {
+ log_error("%s: not found: device not cleared", name);
return 0;
}
- sync_local_dev_names(cmd); /* Wait until devices are available */
+ if (!label_scan_open_rw(dev)) {
+ log_error("Failed to open %s for wiping and zeroing.", display_lvname(lv));
+ return 0;
+ }
- log_verbose("Clearing start of logical volume \"%s\"", lv->name);
+ sigint_allow();
+ if (wp.do_wipe_signatures) {
+ log_verbose("Wiping known signatures on logical volume %s.",
+ display_lvname(lv));
+ if (!wipe_known_signatures(lv->vg->cmd, dev, name, 0,
+ TYPE_DM_SNAPSHOT_COW,
+ wp.yes, wp.force, NULL)) {
+ sigint_restore();
+ label_scan_invalidate(dev);
+ log_error("%s logical volume %s.",
+ sigint_caught() ?
+ "Interrupted initialization of" : "Failed to wipe signatures on",
+ display_lvname(lv));
+ return 0;
+ }
+ }
- if (!(dev = dev_cache_get(name, NULL))) {
- log_error("%s: not found: device not cleared", name);
- return 0;
+ if (wp.do_zero || wp.is_metadata) {
+ zero_metadata = !wp.is_metadata ? 0 :
+ find_config_tree_bool(lv->vg->cmd, allocation_zero_metadata_CFG, NULL);
+ if (zero_metadata) {
+ log_debug("Metadata logical volume %s will be fully zeroed.",
+ display_lvname(lv));
+ zero_sectors = lv->size;
+ wp.zero_value = 0;
+ } else {
+ if (wp.is_metadata) /* Verbosely notify metadata will not be fully zeroed */
+ log_verbose("Metadata logical volume %s not fully zeroed and may contain stale data.",
+ display_lvname(lv));
+ zero_sectors = UINT64_C(4096) >> SECTOR_SHIFT;
+ if (wp.zero_sectors > zero_sectors)
+ zero_sectors = wp.zero_sectors;
+
+ if (zero_sectors > lv->size)
+ zero_sectors = lv->size;
+ }
+
+ log_verbose("Initializing %s of logical volume %s with value %d.",
+ display_size(lv->vg->cmd, zero_sectors),
+ display_lvname(lv), wp.zero_value);
+
+#ifdef HAVE_BLKZEROOUT
+ if (!test_mode() && !wp.zero_value && (zero_sectors > 16)) {
+ /* TODO: maybe integrate with bcache_zero_set() */
+ const uint64_t end = zero_sectors << SECTOR_SHIFT;
+ uint64_t range[2] = { 0, 1024 * 1024 }; /* zeroing with 1M steps (for better ^C support) */
+ for (/* empty */ ; range[0] < end; range[0] += range[1]) {
+ if ((range[0] + range[1]) > end)
+ range[1] = end - range[0];
+
+ if (ioctl(dev->bcache_fd, BLKZEROOUT, &range)) {
+ if (errno == EINVAL)
+ goto retry_with_dev_set; /* Kernel without support for BLKZEROOUT */
+ log_sys_debug("ioctl", "BLKZEROOUT");
+ sigint_restore();
+ label_scan_invalidate(dev);
+ log_error("%s logical volume %s at position " FMTu64 " and size " FMTu64 ".",
+ sigint_caught() ? "Interrupted initialization of" : "Failed to initialize",
+ display_lvname(lv), range[0], range[1]);
+ return 0;
+ }
+ }
+ } else
+retry_with_dev_set:
+#endif
+ if (!dev_set_bytes(dev, UINT64_C(0), (size_t) zero_sectors << SECTOR_SHIFT, wp.zero_value)) {
+ sigint_restore();
+ log_error("%s logical volume %s with value %d and size %s.",
+ sigint_caught() ? "Interrupted initialization" : "Failed to initialize",
+ display_lvname(lv), wp.zero_value,
+ display_size(lv->vg->cmd, zero_sectors));
+ return 0;
+ }
}
+ sigint_restore();
+
+ label_scan_invalidate(dev);
- if (!dev_open_quiet(dev))
+ lv->status &= ~LV_NOSCAN;
+
+ return 1;
+}
+
+/*
+ * Optionally makes on-disk metadata changes if @commit
+ *
+ * If LV is active:
+ * wipe any signatures and clear first sector of LVs listed on @lv_list
+ * otherwise:
+ * activate, wipe (as above), deactivate
+ *
+ * Returns: 1 on success, 0 on failure
+ */
+int activate_and_wipe_lvlist(struct dm_list *lv_list, int commit)
+{
+ struct lv_list *lvl;
+ struct volume_group *vg = NULL;
+ unsigned i = 0, sz = dm_list_size(lv_list);
+ char *was_active;
+ int r = 1;
+
+ if (!sz) {
+ log_debug_metadata(INTERNAL_ERROR "Empty list of LVs given for wiping.");
+ return 1;
+ }
+
+ dm_list_iterate_items(lvl, lv_list) {
+ if (!lv_is_visible(lvl->lv)) {
+ log_error(INTERNAL_ERROR
+ "LVs must be set visible before wiping.");
+ return 0;
+ }
+ vg = lvl->lv->vg;
+ }
+
+ if (test_mode())
+ return 1;
+
+ /*
+ * FIXME: only vg_[write|commit] if LVs are not already written
+ * as visible in the LVM metadata (which is never the case yet).
+ */
+ if (commit &&
+ (!vg || !vg_write(vg) || !vg_commit(vg)))
return_0;
- if (!sectors)
- sectors = UINT64_C(4096) >> SECTOR_SHIFT;
+ was_active = alloca(sz);
- if (sectors > lv->size)
- sectors = lv->size;
+ dm_list_iterate_items(lvl, lv_list)
+ if (!(was_active[i++] = lv_is_active(lvl->lv))) {
+ lvl->lv->status |= LV_TEMPORARY;
+ if (!activate_lv(vg->cmd, lvl->lv)) {
+ log_error("Failed to activate localy %s for wiping.",
+ display_lvname(lvl->lv));
+ r = 0;
+ goto out;
+ }
+ lvl->lv->status &= ~LV_TEMPORARY;
+ }
- if (!dev_set(dev, UINT64_C(0), (size_t) sectors << SECTOR_SHIFT, value))
- stack;
+ dm_list_iterate_items(lvl, lv_list) {
+ /* Wipe any know signatures */
+ if (!wipe_lv(lvl->lv, (struct wipe_params) { .do_zero = 1 /* TODO: is_metadata = 1 */ })) {
+ r = 0;
+ goto_out;
+ }
+ }
+out:
+ /* TODO: deactivation is only needed with clustered locking
+ * in normal case we should keep device active
+ */
+ sz = 0;
+ dm_list_iterate_items(lvl, lv_list)
+ if ((i > sz) && !was_active[sz++] &&
+ !deactivate_lv(vg->cmd, lvl->lv)) {
+ log_error("Failed to deactivate %s.", display_lvname(lvl->lv));
+ r = 0; /* Continue deactivating as many as possible. */
+ }
- dev_flush(dev);
+ if (!sync_local_dev_names(vg->cmd))
+ log_debug("Failed to sync local device names after deactivation of wiped volumes.");
- if (!dev_close_immediate(dev))
- stack;
+ return r;
+}
- return 1;
+/* Wipe logical volume @lv, optionally with @commit of metadata */
+int activate_and_wipe_lv(struct logical_volume *lv, int commit)
+{
+ struct dm_list lv_list;
+ struct lv_list lvl;
+
+ lvl.lv = lv;
+ dm_list_init(&lv_list);
+ dm_list_add(&lv_list, &lvl.list);
+
+ return activate_and_wipe_lvlist(&lv_list, commit);
}
static struct logical_volume *_create_virtual_origin(struct cmd_context *cmd,
@@ -4168,19 +8937,16 @@ static struct logical_volume *_create_virtual_origin(struct cmd_context *cmd,
uint64_t voriginextents)
{
const struct segment_type *segtype;
- size_t len;
- char *vorigin_name;
+ char vorigin_name[NAME_LEN];
struct logical_volume *lv;
- if (!(segtype = get_segtype_from_string(cmd, "zero"))) {
+ if (!(segtype = get_segtype_from_string(cmd, SEG_TYPE_NAME_ZERO))) {
log_error("Zero segment type for virtual origin not found");
return NULL;
}
- len = strlen(lv_name) + 32;
- if (!(vorigin_name = alloca(len)) ||
- dm_snprintf(vorigin_name, len, "%s_vorigin", lv_name) < 0) {
- log_error("Virtual origin name allocation failed.");
+ if (dm_snprintf(vorigin_name, sizeof(vorigin_name), "%s_vorigin", lv_name) < 0) {
+ log_error("Virtual origin name is too long.");
return NULL;
}
@@ -4189,289 +8955,446 @@ static struct logical_volume *_create_virtual_origin(struct cmd_context *cmd,
return_NULL;
if (!lv_extend(lv, segtype, 1, 0, 1, 0, voriginextents,
- NULL, NULL, ALLOC_INHERIT))
+ NULL, ALLOC_INHERIT, 0))
return_NULL;
- /* store vg on disk(s) */
- if (!vg_write(vg) || !vg_commit(vg))
- return_NULL;
+ return lv;
+}
+
+/*
+ * Automatically set ACTIVATION_SKIP flag for the LV supplied - this
+ * is default behaviour. If override_default is set, then override
+ * the default behaviour and add/clear the flag based on 'add_skip' arg
+ * supplied instead.
+ */
+void lv_set_activation_skip(struct logical_volume *lv, int override_default,
+ int add_skip)
+{
+ int skip = 0;
- backup(vg);
+ /* override default behaviour */
+ if (override_default)
+ skip = add_skip;
+ /* default behaviour */
+ else if (lv->vg->cmd->auto_set_activation_skip) {
+ /* skip activation for thin snapshots by default */
+ if (lv_is_thin_volume(lv) && first_seg(lv)->origin)
+ skip = 1;
+ }
- return lv;
+ if (skip)
+ lv->status |= LV_ACTIVATION_SKIP;
+ else
+ lv->status &= ~LV_ACTIVATION_SKIP;
}
-/* Thin notes:
- * If lp->thin OR lp->activate is AY*, activate the pool if not already active.
- * If lp->thin, create thin LV within the pool - as a snapshot if lp->snapshot.
- * If lp->activate is AY*, activate it.
- * If lp->activate was AN* and the pool was originally inactive, deactivate it.
+/*
+ * Get indication whether the LV should be skipped during activation
+ * based on the ACTIVATION_SKIP flag (deactivation is never skipped!).
+ * If 'override_lv_skip_flag' is set, then override it based on the value
+ * of the 'skip' arg supplied instead.
*/
-static struct logical_volume *_lv_create_an_lv(struct volume_group *vg, struct lvcreate_params *lp,
- const char *new_lv_name)
+int lv_activation_skip(struct logical_volume *lv, activation_change_t activate,
+ int override_lv_skip_flag)
{
- struct cmd_context *cmd = vg->cmd;
- uint32_t size_rest;
- uint64_t status = UINT64_C(0);
- struct logical_volume *lv, *org = NULL;
- struct logical_volume *pool_lv;
- struct lv_list *lvl;
- int origin_active = 0;
- struct lvinfo info;
+ if (!(lv->status & LV_ACTIVATION_SKIP) ||
+ !is_change_activating(activate) || /* Do not skip deactivation */
+ override_lv_skip_flag)
+ return 0;
- if (new_lv_name && find_lv_in_vg(vg, new_lv_name)) {
- log_error("Logical volume \"%s\" already exists in "
- "volume group \"%s\"", new_lv_name, lp->vg_name);
- return NULL;
+ log_verbose("ACTIVATION_SKIP flag set for LV %s/%s, skipping activation.",
+ lv->vg->name, lv->name);
+ return 1;
+}
+
+static int _should_wipe_lv(struct lvcreate_params *lp,
+ struct logical_volume *lv, int warn)
+{
+ /* Unzeroable segment */
+ if (seg_cannot_be_zeroed(first_seg(lv)))
+ return 0;
+
+ /* Thin snapshot need not to be zeroed */
+ /* Thin pool with zeroing doesn't need zeroing or wiping */
+ if (lv_is_thin_volume(lv) &&
+ (first_seg(lv)->origin ||
+ first_seg(first_seg(lv)->pool_lv)->zero_new_blocks))
+ return 0;
+
+ /* VDO LV do not need to be zeroed */
+ if (lv_is_vdo(lv))
+ return 0;
+
+ if (warn && (lv_passes_readonly_filter(lv))) {
+ log_warn("WARNING: Read-only activated logical volume %s not zeroed.",
+ display_lvname(lv));
+ return 0;
}
+ /* Cannot zero read-only volume */
+ if ((lv->status & LVM_WRITE) &&
+ (lp->zero || lp->wipe_signatures))
+ return 1;
+
+ if (warn && (!lp->zero || !(lv->status & LVM_WRITE)))
+ log_warn("WARNING: Logical volume %s not zeroed.",
+ display_lvname(lv));
+ if (warn && (!lp->wipe_signatures || !(lv->status & LVM_WRITE)))
+ log_verbose("Signature wiping on logical volume %s not requested.",
+ display_lvname(lv));
+
+ return 0;
+}
+
+/* Check if VG metadata supports needed features */
+static int _vg_check_features(struct volume_group *vg,
+ struct lvcreate_params *lp)
+{
+ uint32_t features = vg->fid->fmt->features;
+
if (vg_max_lv_reached(vg)) {
log_error("Maximum number of logical volumes (%u) reached "
"in volume group %s", vg->max_lv, vg->name);
- return NULL;
+ return 0;
}
- if ((segtype_is_mirrored(lp->segtype) ||
- segtype_is_raid(lp->segtype) || segtype_is_thin(lp->segtype)) &&
- !(vg->fid->fmt->features & FMT_SEGMENTS)) {
+ if (!(features & FMT_SEGMENTS) &&
+ (seg_is_cache(lp) ||
+ seg_is_cache_pool(lp) ||
+ seg_is_mirror(lp) ||
+ seg_is_raid(lp) ||
+ seg_is_thin(lp))) {
log_error("Metadata does not support %s segments.",
lp->segtype->name);
- return NULL;
+ return 0;
+ }
+
+ if (!(features & FMT_TAGS) && !dm_list_empty(&lp->tags)) {
+ log_error("Volume group %s does not support tags.", vg->name);
+ return 0;
}
- if (lp->read_ahead != DM_READ_AHEAD_AUTO &&
+ if ((features & FMT_RESTRICTED_READAHEAD) &&
+ lp->read_ahead != DM_READ_AHEAD_AUTO &&
lp->read_ahead != DM_READ_AHEAD_NONE &&
- (vg->fid->fmt->features & FMT_RESTRICTED_READAHEAD) &&
(lp->read_ahead < 2 || lp->read_ahead > 120)) {
log_error("Metadata only supports readahead values between 2 and 120.");
- return NULL;
- }
-
- if (lp->stripe_size > vg->extent_size) {
- log_error("Reducing requested stripe size %s to maximum, "
- "physical extent size %s",
- display_size(cmd, (uint64_t) lp->stripe_size),
- display_size(cmd, (uint64_t) vg->extent_size));
- lp->stripe_size = vg->extent_size;
+ return 0;
}
/* Need to check the vg's format to verify this - the cmd format isn't setup properly yet */
- if (lp->stripes > 1 &&
- !(vg->fid->fmt->features & FMT_UNLIMITED_STRIPESIZE) &&
- (lp->stripe_size > STRIPE_SIZE_MAX)) {
- log_error("Stripe size may not exceed %s",
- display_size(cmd, (uint64_t) STRIPE_SIZE_MAX));
- return NULL;
+ if (!(features & FMT_UNLIMITED_STRIPESIZE) &&
+ (lp->stripes > 1) && (lp->stripe_size > STRIPE_SIZE_MAX)) {
+ log_error("Stripe size may not exceed %s.",
+ display_size(vg->cmd, (uint64_t) STRIPE_SIZE_MAX));
+ return 0;
}
- if ((size_rest = lp->extents % lp->stripes)) {
- log_print_unless_silent("Rounding size (%d extents) up to stripe boundary "
- "size (%d extents)", lp->extents,
- lp->extents - size_rest + lp->stripes);
- lp->extents = lp->extents - size_rest + lp->stripes;
- }
+ return 1;
+}
- /* Does LV need to be zeroed? Thin handles this as a per-pool in-kernel setting. */
- if (lp->zero && !segtype_is_thin(lp->segtype) && !activation()) {
- log_error("Can't wipe start of new LV without using "
- "device-mapper kernel driver");
+/* Thin notes:
+ * If lp->thin OR lp->activate is AY*, activate the pool if not already active.
+ * If lp->thin, create thin LV within the pool - as a snapshot if lp->snapshot.
+ * If lp->activate is AY*, activate it.
+ * If lp->activate is AN* and the pool was originally not active, deactivate it.
+ */
+static struct logical_volume *_lv_create_an_lv(struct volume_group *vg,
+ struct lvcreate_params *lp,
+ const char *new_lv_name)
+{
+ struct cmd_context *cmd = vg->cmd;
+ uint32_t size;
+ uint64_t status = lp->permission | VISIBLE_LV;
+ const struct segment_type *create_segtype = lp->segtype;
+ struct logical_volume *lv, *origin_lv = NULL;
+ struct logical_volume *pool_lv = NULL;
+ struct logical_volume *tmp_lv;
+ struct lv_segment *seg = NULL, *pool_seg;
+ int thin_pool_was_active = -1; /* not scanned, inactive, active */
+ int historical;
+ uint64_t transaction_id;
+ int ret;
+
+ if (new_lv_name && lv_name_is_used_in_vg(vg, new_lv_name, &historical)) {
+ log_error("%sLogical Volume \"%s\" already exists in "
+ "volume group \"%s\"", historical ? "historical " : "",
+ new_lv_name, vg->name);
return NULL;
}
- status |= lp->permission | VISIBLE_LV;
+ if (!_vg_check_features(vg, lp))
+ return_NULL;
- if (lp->snapshot && lp->thin) {
- if (!(org = find_lv(vg, lp->origin))) {
- log_error("Couldn't find origin volume '%s'.",
- lp->origin);
+ if (!activation()) {
+ if (seg_is_cache(lp) ||
+ seg_is_mirror(lp) ||
+ (seg_is_raid(lp) && !seg_is_raid0(lp)) ||
+ seg_is_thin(lp) ||
+ seg_is_vdo(lp) ||
+ lp->snapshot) {
+ /*
+ * FIXME: For thin pool add some code to allow delayed
+ * initialization of empty thin pool volume.
+ * i.e. using some LV flag, fake message,...
+ * and testing for metadata pool header signature?
+ */
+ log_error("Can't create %s without using "
+ "device-mapper kernel driver.",
+ lp->segtype->name);
return NULL;
}
-
- if (org->status & LOCKED) {
- log_error("Snapshots of locked devices are not supported.");
- return NULL;
+ /* Does LV need to be zeroed? */
+ if (lp->zero) {
+ log_warn("WARNING: Skipping zeroing and wipping, compiled without activation support.");
+ lp->zero = 0;
+ lp->wipe_signatures = 0;
}
+ }
- lp->voriginextents = org->le_count;
- } else if (lp->snapshot) {
- if (!activation()) {
- log_error("Can't create snapshot without using "
- "device-mapper kernel driver");
+ if (lp->stripe_size > vg->extent_size) {
+ if (seg_is_raid(lp) && (vg->extent_size < STRIPE_SIZE_MIN)) {
+ /*
+ * FIXME: RAID will simply fail to load the table if
+ * this is the case, but we should probably
+ * honor the stripe minimum for regular stripe
+ * volumes as well. Avoiding doing that now
+ * only to minimize the change.
+ */
+ log_error("The extent size in volume group %s is too "
+ "small to support striped RAID volumes.",
+ vg->name);
return NULL;
}
- /* Must zero cow */
- status |= LVM_WRITE;
-
- if (lp->voriginsize)
- origin_active = 1;
- else {
-
- if (!(org = find_lv(vg, lp->origin))) {
- log_error("Couldn't find origin volume '%s'.",
- lp->origin);
- return NULL;
- }
- if (lv_is_virtual_origin(org)) {
- log_error("Can't share virtual origins. "
- "Use --virtualsize.");
- return NULL;
- }
- if (lv_is_cow(org)) {
- log_error("Snapshots of snapshots are not "
- "supported yet.");
- return NULL;
- }
- if (org->status & LOCKED) {
- log_error("Snapshots of locked devices are not "
- "supported yet");
- return NULL;
- }
- if (lv_is_merging_origin(org)) {
- log_error("Snapshots of an origin that has a "
- "merging snapshot is not supported");
- return NULL;
- }
-
- if (lv_is_thin_type(org) && !lv_is_thin_volume(org)) {
- log_error("Snapshots of thin pool %sdevices "
- "are not supported.",
- lv_is_thin_pool_data(org) ? "data " :
- lv_is_thin_pool_metadata(org) ?
- "metadata " : "");
- return NULL;
- }
-
- if (lv_is_mirror_type(org) &&
- !seg_is_raid(first_seg(org))) {
- log_warn("WARNING: Snapshots of mirrors can deadlock under rare device failures.");
- log_warn("WARNING: Consider using the raid1 mirror type to avoid this.");
- log_warn("WARNING: See global/mirror_segtype_default in lvm.conf.");
- }
-
- if (!lv_info(cmd, org, 0, &info, 0, 0)) {
- log_error("Check for existence of active snapshot "
- "origin '%s' failed.", org->name);
- return NULL;
- }
- origin_active = info.exists;
-
- if (vg_is_clustered(vg) &&
- !lv_is_active_exclusive_locally(org)) {
- log_error("%s must be active exclusively to"
- " create snapshot", org->name);
- return NULL;
- }
- }
- }
-
- if (!seg_is_thin_volume(lp) && !lp->extents) {
- log_error("Unable to create new logical volume with no extents");
- return NULL;
+ log_print_unless_silent("Reducing requested stripe size %s to maximum, "
+ "physical extent size %s.",
+ display_size(cmd, (uint64_t) lp->stripe_size),
+ display_size(cmd, (uint64_t) vg->extent_size));
+ lp->stripe_size = vg->extent_size;
}
- if (seg_is_thin_pool(lp) &&
- ((uint64_t)lp->extents * vg->extent_size < lp->chunk_size)) {
- log_error("Unable to create thin pool smaller than 1 chunk.");
- return NULL;
- }
+ lp->extents = _round_to_stripe_boundary(vg, lp->extents, lp->stripes, 1);
- if (lp->snapshot && !lp->thin && ((uint64_t)lp->extents * vg->extent_size < 2 * lp->chunk_size)) {
- log_error("Unable to create a snapshot smaller than 2 chunks.");
+ if (!lp->extents && !seg_is_virtual(lp)) {
+ log_error(INTERNAL_ERROR "Unable to create new logical volume with no extents.");
return NULL;
}
- if (!seg_is_virtual(lp) &&
- vg->free_count < lp->extents) {
- log_error("Volume group \"%s\" has insufficient free space "
- "(%u extents): %u required.",
- vg->name, vg->free_count, lp->extents);
+ if ((seg_is_pool(lp) || seg_is_cache(lp)) &&
+ ((uint64_t)lp->extents * vg->extent_size < lp->chunk_size)) {
+ log_error("Unable to create %s smaller than 1 chunk.",
+ lp->segtype->name);
return NULL;
}
- if (lp->stripes > dm_list_size(lp->pvh) && lp->alloc != ALLOC_ANYWHERE) {
+ if ((lp->alloc != ALLOC_ANYWHERE) && (lp->stripes > dm_list_size(lp->pvh))) {
log_error("Number of stripes (%u) must not exceed "
"number of physical volumes (%d)", lp->stripes,
dm_list_size(lp->pvh));
return NULL;
}
- if (!activation() &&
- (seg_is_mirrored(lp) ||
- seg_is_raid(lp) ||
- seg_is_thin_pool(lp))) {
- /*
- * FIXME: For thin pool add some code to allow delayed
- * initialization of empty thin pool volume.
- * i.e. using some LV flag, fake message,...
- * and testing for metadata pool header signature?
- */
- log_error("Can't create %s without using "
- "device-mapper kernel driver.",
- segtype_is_raid(lp->segtype) ? lp->segtype->name :
- segtype_is_mirrored(lp->segtype) ? "mirror" :
- "thin pool volume");
- return NULL;
- }
-
- /* The snapshot segment gets created later */
- if (lp->snapshot && !lp->thin &&
- !(lp->segtype = get_segtype_from_string(cmd, "striped")))
- return_NULL;
-
- if (!archive(vg))
- return_NULL;
+ if (seg_is_pool(lp))
+ status |= LVM_WRITE; /* Pool is always writable */
+ else if (seg_is_cache(lp) || seg_is_thin_volume(lp) || seg_is_vdo(lp)) {
+ /* Resolve pool volume */
+ if (!lp->pool_name) {
+ /* Should be already checked */
+ log_error(INTERNAL_ERROR "Cannot create %s volume without %s pool.",
+ lp->segtype->name, lp->segtype->name);
+ return NULL;
+ }
- if (!dm_list_empty(&lp->tags)) {
- if (!(vg->fid->fmt->features & FMT_TAGS)) {
- log_error("Volume group %s does not support tags",
- vg->name);
+ if (!(pool_lv = find_lv(vg, lp->pool_name))) {
+ log_error("Couldn't find volume %s in Volume group %s.",
+ lp->pool_name, vg->name);
return NULL;
}
- }
- if (seg_is_thin_volume(lp) &&
- ((lp->activate == CHANGE_AY) ||
- (lp->activate == CHANGE_AE) ||
- (lp->activate == CHANGE_ALY))) {
- /* Ensure all stacked messages are submitted */
- if (!(lvl = find_lv_in_vg(vg, lp->pool))) {
- log_error("Unable to find existing pool LV %s in VG %s.",
- lp->pool, vg->name);
+ if (lv_is_locked(pool_lv)) {
+ log_error("Cannot use locked pool volume %s.",
+ display_lvname(pool_lv));
return NULL;
}
- if (!update_pool_lv(lvl->lv, 1))
+
+ if (seg_is_thin_volume(lp)) {
+ /* Validate volume size to to aling on chunk for small extents */
+ size = first_seg(pool_lv)->chunk_size;
+ if (size > vg->extent_size) {
+ /* Align extents on chunk boundary size */
+ size = ((uint64_t)vg->extent_size * lp->extents + size - 1) /
+ size * size / vg->extent_size;
+ if (size != lp->extents) {
+ log_print_unless_silent("Rounding size (%d extents) up to chunk boundary "
+ "size (%d extents).", lp->extents, size);
+ lp->extents = size;
+ }
+ }
+
+ thin_pool_was_active = lv_is_active(pool_lv);
+ if (lv_is_new_thin_pool(pool_lv)) {
+ if (!check_new_thin_pool(pool_lv))
+ return_NULL;
+ /* New pool is now inactive */
+ } else {
+ if (!activate_lv(cmd, pool_lv)) {
+ log_error("Aborting. Failed to locally activate thin pool %s.",
+ display_lvname(pool_lv));
+ return NULL;
+ }
+ if (!thin_pool_below_threshold(first_seg(pool_lv))) {
+ log_error("Cannot create new thin volume, free space in "
+ "thin pool %s reached threshold.",
+ display_lvname(pool_lv));
+ return NULL;
+ }
+ }
+ }
+
+ if (seg_is_cache(lp) &&
+ !wipe_cache_pool(pool_lv))
return_NULL;
}
- if (vg_is_clustered(vg) && segtype_is_raid(lp->segtype)) {
- /*
- * FIXME:
- * We could allow a RAID LV to be created as long as it
- * is activated exclusively. Any subsequent activations
- * would have to be enforced as exclusive also.
- *
- * For now, we disallow the existence of RAID LVs in a
- * cluster VG
- */
- log_error("Unable to create a %s logical volume in a cluster.",
- lp->segtype->name);
+ /* Resolve origin volume */
+ if (lp->origin_name &&
+ !(origin_lv = find_lv(vg, lp->origin_name))) {
+ log_error("Origin volume %s not found in Volume group %s.",
+ lp->origin_name, vg->name);
return NULL;
}
- if (segtype_is_mirrored(lp->segtype) || segtype_is_raid(lp->segtype)) {
+ if (origin_lv && seg_is_cache_pool(lp)) {
+ /* Converting exiting origin and creating cache pool */
+ if (!validate_lv_cache_create_origin(origin_lv))
+ return_NULL;
+
+ if (origin_lv->size < lp->chunk_size) {
+ log_error("Caching of origin cache volume smaller then chunk size is unsupported.");
+ return NULL;
+ }
+ } else if (seg_is_cache(lp)) {
+ if (!pool_lv) {
+ log_error(INTERNAL_ERROR "Pool LV for cache is missing.");
+ return NULL;
+ }
+ if (!lv_is_cache_pool(pool_lv)) {
+ log_error("Logical volume %s is not a cache pool.",
+ display_lvname(pool_lv));
+ return NULL;
+ }
+ /* Create cache origin for cache pool */
+ /* FIXME Eventually support raid/mirrors with -m */
+ if (!(create_segtype = get_segtype_from_string(vg->cmd, SEG_TYPE_NAME_STRIPED)))
+ return_0;
+
+ } else if (seg_is_integrity(lp)) {
+ if (!(create_segtype = get_segtype_from_string(vg->cmd, SEG_TYPE_NAME_STRIPED)))
+ return_0;
+
+ } else if (seg_is_mirrored(lp) || (seg_is_raid(lp) && !seg_is_any_raid0(lp))) {
+ if (!(lp->region_size = adjusted_mirror_region_size(vg->cmd,
+ vg->extent_size,
+ lp->extents,
+ lp->region_size, 0,
+ vg_is_clustered(vg))))
+ return_NULL;
+
+ /* FIXME This will not pass cluster lock! */
init_mirror_in_sync(lp->nosync);
if (lp->nosync) {
- log_warn("WARNING: New %s won't be synchronised. "
+ log_warn("WARNING: New %s won't be synchronized. "
"Don't read what you didn't write!",
lp->segtype->name);
status |= LV_NOTSYNCED;
}
+ } else if (pool_lv && seg_is_thin_volume(lp)) {
+ if (!lv_is_thin_pool(pool_lv)) {
+ log_error("Logical volume %s is not a thin pool.",
+ display_lvname(pool_lv));
+ return NULL;
+ }
+
+ if (origin_lv) {
+ if (lv_is_locked(origin_lv)) {
+ log_error("Snapshots of locked devices are not supported.");
+ return NULL;
+ }
+
+ lp->virtual_extents = origin_lv->le_count;
+
+ /*
+ * Check if using 'external origin' or the 'normal' snapshot
+ * within the same thin pool
+ */
+ if (first_seg(origin_lv)->pool_lv != pool_lv) {
+ if (!thin_pool_supports_external_origin(first_seg(pool_lv), origin_lv))
+ return_NULL;
+ if (origin_lv->status & LVM_WRITE) {
+ log_error("Cannot use writable LV as the external origin.");
+ return NULL; /* FIXME conversion for inactive */
+ }
+ if (lv_is_active(origin_lv) && !lv_is_external_origin(origin_lv)) {
+ log_error("Cannot use active LV for the external origin.");
+ return NULL; /* We can't be sure device is read-only */
+ }
+ }
+ }
+ } else if (lp->snapshot) {
+ if (!lp->virtual_extents) {
+ if (!origin_lv) {
+ log_error("Couldn't find origin volume '%s'.",
+ lp->origin_name);
+ return NULL;
+ }
+ if (lv_is_virtual_origin(origin_lv)) {
+ log_error("Can't share virtual origins. "
+ "Use --virtualsize.");
+ return NULL;
+ }
+
+ if (!validate_snapshot_origin(origin_lv))
+ return_0;
+ }
+
+ if (!cow_has_min_chunks(vg, lp->extents, lp->chunk_size))
+ return_NULL;
+
+ /* The snapshot segment gets created later */
+ if (!(create_segtype = get_segtype_from_string(cmd, SEG_TYPE_NAME_STRIPED)))
+ return_NULL;
+
+ /* Must zero cow */
+ status |= LVM_WRITE;
+ lp->zero = 1;
+ lp->wipe_signatures = 0;
+ } else if (seg_is_vdo_pool(lp)) {
+ if (!lp->virtual_extents)
+ log_verbose("Virtual size matching available free logical size in VDO pool.");
+
+ if (!(create_segtype = get_segtype_from_string(vg->cmd, SEG_TYPE_NAME_STRIPED)))
+ return_NULL;
+
+ /* Must zero and format data area */
+ status |= LVM_WRITE;
+ lp->zero = 1;
+ }
+
+ if (!segtype_is_virtual(create_segtype) && !lp->approx_alloc &&
+ (vg->free_count < lp->extents)) {
+ log_error("Volume group \"%s\" has insufficient free space "
+ "(%u extents): %u required.",
+ vg->name, vg->free_count, lp->extents);
+ return NULL;
+ }
- lp->region_size = adjusted_mirror_region_size(vg->extent_size,
- lp->extents,
- lp->region_size);
+ if (pool_lv && segtype_is_thin_volume(create_segtype)) {
+ /* Ensure all stacked messages are submitted */
+ if ((thin_pool_is_active(pool_lv) || is_change_activating(lp->activate)) &&
+ !update_thin_pool_lv(pool_lv, 1))
+ return_NULL;
}
if (!(lv = lv_create_empty(new_lv_name ? : "lvol%d", NULL,
@@ -4479,53 +9402,113 @@ static struct logical_volume *_lv_create_an_lv(struct volume_group *vg, struct l
return_NULL;
if (lp->read_ahead != lv->read_ahead) {
- log_verbose("Setting read ahead sectors");
lv->read_ahead = lp->read_ahead;
+ log_debug_metadata("Setting read ahead sectors %u.", lv->read_ahead);
}
- if (!seg_is_thin_pool(lp) && lp->minor >= 0) {
+ if (!segtype_is_pool(create_segtype) &&
+ !segtype_is_vdo_pool(create_segtype) &&
+ lp->minor >= 0) {
lv->major = lp->major;
lv->minor = lp->minor;
lv->status |= FIXED_MINOR;
- log_verbose("Setting device number to (%d, %d)", lv->major,
- lv->minor);
+ log_debug_metadata("Setting device number to (%d, %d).",
+ lv->major, lv->minor);
}
+ /*
+ * The specific LV may not use a lock. lockd_init_lv() sets
+ * lv->lock_args to NULL if this LV does not use its own lock.
+ */
+
+ if (!lockd_init_lv(vg->cmd, vg, lv, lp))
+ return_NULL;
+
dm_list_splice(&lv->tags, &lp->tags);
- if (!lv_extend(lv, lp->segtype,
+ if (!lv_extend(lv, create_segtype,
lp->stripes, lp->stripe_size,
lp->mirrors,
- seg_is_thin_pool(lp) ? lp->poolmetadataextents : lp->region_size,
- seg_is_thin_volume(lp) ? lp->voriginextents : lp->extents,
- seg_is_thin_volume(lp) ? (org ? org->name : lp->pool) : NULL, lp->pvh, lp->alloc))
+ segtype_is_pool(create_segtype) ? lp->pool_metadata_extents : lp->region_size,
+ (segtype_is_thin_volume(create_segtype) ||
+ segtype_is_vdo(create_segtype)) ? lp->virtual_extents : lp->extents,
+ lp->pvh, lp->alloc, lp->approx_alloc)) {
+ unlink_lv_from_vg(lv); /* Keep VG consistent and remove LV without any segment */
return_NULL;
+ }
- if (seg_is_thin_pool(lp)) {
- first_seg(lv)->zero_new_blocks = lp->zero ? 1 : 0;
- first_seg(lv)->chunk_size = lp->chunk_size;
- first_seg(lv)->discards = lp->discards;
- /* FIXME: use lowwatermark via lvm.conf global for all thinpools ? */
- first_seg(lv)->low_water_mark = 0;
- } else if (seg_is_thin_volume(lp)) {
- pool_lv = first_seg(lv)->pool_lv;
+ /* rhbz1269533: allow for 100%FREE allocation to work with "mirror" and a disk log */
+ if (segtype_is_mirror(create_segtype) &&
+ lp->log_count &&
+ !vg->free_count &&
+ lv->le_count > 1)
+ lv_reduce(lv, 1);
+
+ /* Unlock memory if possible */
+ memlock_unlock(vg->cmd);
- if (!(first_seg(lv)->device_id =
- get_free_pool_device_id(first_seg(pool_lv)))) {
+ if (pool_lv && segtype_is_vdo(create_segtype))
+ if (!set_lv_segment_area_lv(first_seg(lv), 0, pool_lv, 0, LV_VDO_POOL))
+ return_NULL;
+
+ if (lv_is_cache_pool(lv)) {
+ if (!cache_set_params(first_seg(lv),
+ lp->chunk_size,
+ lp->cache_metadata_format,
+ lp->cache_mode,
+ lp->policy_name,
+ lp->policy_settings)) {
stack;
goto revert_new_lv;
}
-
- if (!attach_pool_message(first_seg(pool_lv),
- DM_THIN_MESSAGE_CREATE_THIN, lv, 0, 0)) {
+ } else if (lv_is_raid(lv) && !seg_is_any_raid0(first_seg(lv))) {
+ first_seg(lv)->min_recovery_rate = lp->min_recovery_rate;
+ first_seg(lv)->max_recovery_rate = lp->max_recovery_rate;
+ } else if (lv_is_thin_pool(lv)) {
+ first_seg(lv)->chunk_size = lp->chunk_size;
+ first_seg(lv)->zero_new_blocks = lp->zero_new_blocks;
+ first_seg(lv)->discards = lp->discards;
+ if ((first_seg(lv)->crop_metadata = lp->crop_metadata) == THIN_CROP_METADATA_NO)
+ lv->status |= LV_CROP_METADATA;
+ if (!recalculate_pool_chunk_size_with_dev_hints(lv, seg_lv(first_seg(lv), 0),
+ lp->thin_chunk_size_calc_policy)) {
stack;
goto revert_new_lv;
}
+ if (lp->error_when_full)
+ lv->status |= LV_ERROR_WHEN_FULL;
+ } else if (pool_lv && lv_is_virtual(lv) && /* not yet thin LV */
+ (seg = first_seg(lv)) &&
+ seg_is_thin(seg)) { /* going to be a thin volume */
+ pool_seg = first_seg(pool_lv);
+ if (!(seg->device_id = get_free_thin_pool_device_id(pool_seg)))
+ return_NULL;
+ seg->transaction_id = pool_seg->transaction_id;
+ if (origin_lv && lv_is_thin_volume(origin_lv) &&
+ (first_seg(origin_lv)->pool_lv == pool_lv)) {
+ /* For thin snapshot pool must match */
+ if (!attach_pool_lv(seg, pool_lv, origin_lv, NULL, NULL))
+ return_NULL;
+ /* Use the same external origin */
+ if (!attach_thin_external_origin(seg, first_seg(origin_lv)->external_lv))
+ return_NULL;
+ } else {
+ if (!attach_pool_lv(seg, pool_lv, NULL, NULL, NULL))
+ return_NULL;
+ /* If there is an external origin... */
+ if (!attach_thin_external_origin(seg, origin_lv))
+ return_NULL;
+ }
+
+ if (!attach_thin_pool_message(pool_seg, DM_THIN_MESSAGE_CREATE_THIN, lv, 0, 0))
+ return_NULL;
}
+ if (!thin_pool_check_overprovisioning(lv))
+ return_NULL;
+
/* FIXME Log allocation and attachment should have happened inside lv_extend. */
- if (lp->log_count &&
- !seg_is_raid(first_seg(lv)) && seg_is_mirrored(first_seg(lv))) {
+ if (lp->log_count && segtype_is_mirror(create_segtype)) {
if (!add_mirror_log(cmd, lv, lp->log_count,
first_seg(lv)->region_size,
lp->pvh, lp->alloc)) {
@@ -4534,11 +9517,11 @@ static struct logical_volume *_lv_create_an_lv(struct volume_group *vg, struct l
}
}
- /* store vg on disk(s) */
- if (!vg_write(vg) || !vg_commit(vg))
- return_NULL;
+ lv_set_activation_skip(lv, lp->activation_skip & ACTIVATION_SKIP_SET,
+ lp->activation_skip & ACTIVATION_SKIP_SET_ENABLED);
- backup(vg);
+ if (lp->noautoactivate)
+ lv->status |= LV_NOAUTOACTIVATE;
/*
* Check for autoactivation.
@@ -4546,184 +9529,407 @@ static struct logical_volume *_lv_create_an_lv(struct volume_group *vg, struct l
* it just as if CHANGE_AY was used, CHANGE_AN otherwise.
*/
if (lp->activate == CHANGE_AAY)
- lp->activate = lv_passes_auto_activation_filter(cmd, lv) ?
- CHANGE_ALY : CHANGE_ALN;
+ lp->activate = lv_passes_auto_activation_filter(cmd, lv)
+ ? CHANGE_ALY : CHANGE_ALN;
+
+ if (lv_activation_skip(lv, lp->activate, lp->activation_skip & ACTIVATION_SKIP_IGNORE))
+ lp->activate = CHANGE_AN;
+
+ /* store vg on disk(s) */
+ if (!vg_write(vg) || !vg_commit(vg))
+ /* Pool created metadata LV, but better avoid recover when vg_write/commit fails */
+ return_NULL;
if (test_mode()) {
- log_verbose("Test mode: Skipping activation and zeroing.");
+ log_verbose("Test mode: Skipping activation, zeroing and signature wiping.");
goto out;
}
- if (seg_is_thin(lp)) {
+ if (seg_is_raid(lp) && lp->raidintegrity) {
+ log_debug("Adding integrity to new LV");
+
+ if (!lv_add_integrity_to_raid(lv, &lp->integrity_settings, lp->pvh, NULL))
+ goto revert_new_lv;
+ }
+
+ /* Do not scan this LV until properly zeroed/wiped. */
+ if (_should_wipe_lv(lp, lv, 0))
+ lv->status |= LV_NOSCAN;
+
+ if (lp->temporary)
+ lv->status |= LV_TEMPORARY;
+
+ if (seg_is_cache(lp)) {
+ if (vg_is_shared(vg)) {
+ if (is_change_activating(lp->activate)) {
+ if (!lv_active_change(cmd, lv, CHANGE_AEY)) {
+ log_error("Aborting. Failed to activate LV %s.",
+ display_lvname(lv));
+ goto revert_new_lv;
+ }
+ }
+ }
+
+ /* FIXME Support remote exclusive activation? */
+ /* Not yet 'cache' LV, it is stripe volume for wiping */
+
+ else if (is_change_activating(lp->activate) && !activate_lv(cmd, lv)) {
+ log_error("Aborting. Failed to activate LV %s locally exclusively.",
+ display_lvname(lv));
+ goto revert_new_lv;
+ }
+ } else if (lv_is_cache_pool(lv)) {
+ /* Cache pool cannot be actived and zeroed */
+ log_very_verbose("Cache pool is prepared.");
+ } else if (lv_is_thin_volume(lv)) {
/* For snapshot, suspend active thin origin first */
- if (org && lv_is_active(org)) {
- if (!pool_below_threshold(first_seg(first_seg(org)->pool_lv))) {
- log_error("Cannot create thin snapshot. Pool %s/%s is filled "
- "over the autoextend threshold.",
- org->vg->name, first_seg(org)->pool_lv->name);
- goto revert_new_lv;
+ if (origin_lv && lv_is_thin_volume(origin_lv) && lv_is_active(origin_lv)) {
+ if (!(ret = suspend_lv_origin(cmd, origin_lv))) {
+ log_error("Failed to suspend thin snapshot origin %s.",
+ display_lvname(origin_lv));
}
- if (!suspend_lv_origin(cmd, org)) {
- log_error("Failed to suspend thin snapshot origin %s/%s.",
- org->vg->name, org->name);
- goto revert_new_lv;
+ /* Note: always proceed with resume_lv() to leave critical_section */
+ if (!resume_lv_origin(cmd, origin_lv)) { /* deptree updates thin-pool */
+ log_error("Failed to resume thin snapshot origin %s.",
+ display_lvname(origin_lv));
+ if (ret)
+ /* suspend with message was OK, only resume failed */
+ goto revert_new_lv; /* hard to fix things here */
}
- if (!resume_lv_origin(cmd, org)) { /* deptree updates thin-pool */
- log_error("Failed to resume thin snapshot origin %s/%s.",
- org->vg->name, org->name);
+ if (!ret) {
+ /* Pool transaction_id has been incremented for this canceled transaction
+ * and needs to be restored to the state from this canceled segment.
+ * TODO: there is low chance actual suspend has failed
+ */
+ struct lv_status_thin_pool *tpstatus;
+ if (!lv_thin_pool_status(pool_lv, 1, &tpstatus))
+ log_error("Aborting. Failed to read transaction_id from thin pool %s.",
+ display_lvname(pool_lv)); /* Can't even get thin pool transaction id ??? */
+ else {
+ transaction_id = tpstatus->thin_pool->transaction_id;
+ dm_pool_destroy(tpstatus->mem);
+
+ if ((transaction_id != first_seg(pool_lv)->transaction_id) &&
+ (transaction_id != seg->transaction_id))
+ log_warn("WARNING: Metadata for thin pool %s have transaction_id " FMTu64
+ ", but active pool has " FMTu64 ".",
+ display_lvname(pool_lv), seg->transaction_id, transaction_id);
+ log_debug_metadata("Restoring previous transaction_id " FMTu64 " for thin pool %s.",
+ seg->transaction_id, display_lvname(pool_lv));
+ first_seg(pool_lv)->transaction_id = seg->transaction_id;
+ first_seg(lv)->device_id = 0; /* no delete of never existing thin device */
+ }
goto revert_new_lv;
}
/* At this point remove pool messages, snapshot is active */
- if (!update_pool_lv(first_seg(org)->pool_lv, 0)) {
+ if (!update_thin_pool_lv(pool_lv, 0)) {
stack;
- goto deactivate_and_revert_new_lv;
+ goto revert_new_lv;
}
- }
- if (((lp->activate == CHANGE_AY) ||
- (lp->activate == CHANGE_AE) ||
- (lp->activate == CHANGE_ALY))) {
- /* At this point send message to kernel thin mda */
- pool_lv = lv_is_thin_pool(lv) ? lv : first_seg(lv)->pool_lv;
- if (!update_pool_lv(pool_lv, 1)) {
- stack;
- goto deactivate_and_revert_new_lv;
+ } else if (!dm_list_empty(&first_seg(pool_lv)->thin_messages)) {
+ /* Send message so that table preload knows new thin */
+ if (!lv_is_active(pool_lv)) {
+ /* Avoid multiple thin-pool activations in this case */
+ if (thin_pool_was_active < 0)
+ thin_pool_was_active = 0;
+ if (!activate_lv(cmd, pool_lv)) {
+ log_error("Failed to activate thin pool %s.",
+ display_lvname(pool_lv));
+ goto revert_new_lv;
+ }
+ if (!lv_is_active(pool_lv)) {
+ log_error("Cannot activate thin pool %s, perhaps skipped in lvm.conf volume_list?",
+ display_lvname(pool_lv));
+ return 0;
+ }
}
- if (!activate_lv_excl(cmd, lv)) {
- log_error("Aborting. Failed to activate thin %s.",
- lv->name);
- goto deactivate_and_revert_new_lv;
+ /* Keep thin pool active until thin volume is activated */
+ if (!update_thin_pool_lv(pool_lv, 1)) {
+ stack;
+ goto revert_new_lv;
}
}
+
+ if (!lv_active_change(cmd, lv, lp->activate)) {
+ log_error("Failed to activate thin %s.", lv->name);
+ goto deactivate_and_revert_new_lv;
+ }
+
+ /* Restore inactive state if needed */
+ if (!thin_pool_was_active &&
+ !deactivate_lv(cmd, pool_lv)) {
+ log_error("Failed to deactivate thin pool %s.",
+ display_lvname(pool_lv));
+ return NULL;
+ }
} else if (lp->snapshot) {
- if (!activate_lv_excl(cmd, lv)) {
+ lv->status |= LV_TEMPORARY;
+ if (!activate_lv(cmd, lv)) {
log_error("Aborting. Failed to activate snapshot "
"exception store.");
goto revert_new_lv;
}
- } else if ((lp->activate == CHANGE_AY && !activate_lv(cmd, lv)) ||
- (lp->activate == CHANGE_AE && !activate_lv_excl(cmd, lv)) ||
- (lp->activate == CHANGE_ALY && !activate_lv_local(cmd, lv))) {
- log_error("Failed to activate new LV.");
- if (lp->zero)
- goto deactivate_and_revert_new_lv;
- return NULL;
- }
-
- if (!seg_is_thin(lp) && !lp->zero && !lp->snapshot)
- log_warn("WARNING: \"%s\" not zeroed", lv->name);
- else if ((!seg_is_thin(lp) ||
- (lv_is_thin_volume(lv) &&
- !first_seg(first_seg(lv)->pool_lv)->zero_new_blocks)) &&
- !set_lv(cmd, lv, UINT64_C(0), 0)) {
- log_error("Aborting. Failed to wipe %s.",
- lp->snapshot ? "snapshot exception store" :
- "start of new LV");
+ lv->status &= ~LV_TEMPORARY;
+ } else if (seg_is_vdo_pool(lp)) {
+ lv->status |= LV_TEMPORARY;
+ if (!activate_lv(cmd, lv)) {
+ log_error("Aborting. Failed to activate temporary "
+ "volume for VDO pool creation.");
+ goto revert_new_lv;
+ }
+ lv->status &= ~LV_TEMPORARY;
+ } else if (!lv_active_change(cmd, lv, lp->activate)) {
+ log_error("Failed to activate new LV %s.", display_lvname(lv));
goto deactivate_and_revert_new_lv;
}
- if (lp->snapshot && !lp->thin) {
- /* Reset permission after zeroing */
- if (!(lp->permission & LVM_WRITE))
- lv->status &= ~LVM_WRITE;
-
- /* COW area must be deactivated if origin is not active */
- if (!origin_active && !deactivate_lv(cmd, lv)) {
- log_error("Aborting. Couldn't deactivate snapshot "
- "COW area. Manual intervention required.");
- return NULL;
+ if (_should_wipe_lv(lp, lv, !lp->suppress_zero_warn)) {
+ if (!wipe_lv(lv, (struct wipe_params)
+ {
+ .do_zero = lp->zero,
+ .do_wipe_signatures = lp->wipe_signatures,
+ .yes = lp->yes,
+ .force = lp->force,
+ .is_metadata = lp->is_metadata,
+ })) {
+ log_error("Aborting. Failed to wipe %s.", lp->snapshot
+ ? "snapshot exception store" : "start of new LV");
+ goto deactivate_and_revert_new_lv;
}
+ }
- /* A virtual origin must be activated explicitly. */
- if (lp->voriginsize &&
- (!(org = _create_virtual_origin(cmd, vg, lv->name,
- lp->permission,
- lp->voriginextents)) ||
- !activate_lv_excl(cmd, org))) {
- log_error("Couldn't create virtual origin for LV %s",
- lv->name);
- if (org && !lv_remove(org))
- stack;
+ if (seg_is_vdo_pool(lp)) {
+ if (!convert_vdo_pool_lv(lv, &lp->vdo_params, &lp->virtual_extents,
+ 1, lp->vdo_pool_header_size)) {
+ stack;
goto deactivate_and_revert_new_lv;
}
+ if ((lv->status & LV_ACTIVATION_SKIP) &&
+ !deactivate_lv(cmd, lv)) {
+ log_error("Aborting. Couldn't deactivate VDO LV %s with skipped activation.",
+ display_lvname(lv));
+ return NULL; /* Let's retry on error path */
+ }
+ } else if (seg_is_cache(lp) || (origin_lv && lv_is_cache_pool(lv))) {
+ /* Finish cache conversion magic */
+ if (origin_lv) {
+ /* Convert origin to cached LV */
+ if (!(tmp_lv = lv_cache_create(lv, origin_lv))) {
+ /* FIXME Do a better revert */
+ log_error("Aborting. Leaving cache pool %s and uncached origin volume %s.",
+ display_lvname(lv), display_lvname(origin_lv));
+ return NULL;
+ }
+ } else {
+ if (!(tmp_lv = lv_cache_create(pool_lv, lv))) {
+ /* 'lv' still keeps created new LV */
+ stack;
+ goto deactivate_and_revert_new_lv;
+ }
+ }
+ lv = tmp_lv;
- /* cow LV remains active and becomes snapshot LV */
+ if (!cache_set_params(first_seg(lv),
+ lp->chunk_size,
+ lp->cache_metadata_format,
+ lp->cache_mode,
+ lp->policy_name,
+ lp->policy_settings))
+ return_NULL; /* revert? */
+
+ if (!lv_update_and_reload(lv)) {
+ char name[NAME_LEN];
+
+ log_debug("Reverting created caching layer.");
+
+ tmp_lv = seg_lv(first_seg(lv), 0); /* tmp corigin */
+ pool_lv = first_seg(lv)->pool_lv;
+
+ if (!detach_pool_lv(first_seg(lv)))
+ return_NULL;
+ if (!remove_layer_from_lv(lv, tmp_lv))
+ return_NULL;
+ if (!lv_remove(tmp_lv))
+ return_NULL;
+
+ /* Either we need to preserve existing LV and remove created cache pool LV.
+ Or we need to preserve existing cache pool LV and remove created new LV. */
+ if (origin_lv)
+ lv = pool_lv; // created cache pool to be reverted as new LV
+ else {
+ /* Cut off suffix _cpool from preserved existing cache pool */
+ if (!drop_lvname_suffix(name, pool_lv->name, "cpool")) {
+ /* likely older instance of metadata */
+ log_debug("LV %s has no suffix for cachepool (skipping rename).",
+ display_lvname(pool_lv));
+ } else if (!lv_uniq_rename_update(cmd, pool_lv, name, 0))
+ return_NULL;
+ }
- if (!vg_add_snapshot(org, lv, NULL,
- org->le_count, lp->chunk_size)) {
- log_error("Couldn't create snapshot.");
goto deactivate_and_revert_new_lv;
}
+ } else if (lp->snapshot) {
+ /* Deactivate zeroed COW, avoid any race usage */
+ if (!deactivate_lv(cmd, lv)) {
+ log_error("Aborting. Couldn't deactivate snapshot COW area %s.",
+ display_lvname(lv));
+ goto deactivate_and_revert_new_lv; /* Let's retry on error path */
+ }
- /* store vg on disk(s) */
- if (!vg_write(vg))
- return_NULL;
+ /* Get in sync with deactivation, before reusing LV as snapshot */
+ if (!sync_local_dev_names(lv->vg->cmd)) {
+ log_error("Failed to sync local devices before creating snapshot using %s.",
+ display_lvname(lv));
+ goto revert_new_lv;
+ }
- if (!suspend_lv(cmd, org)) {
- log_error("Failed to suspend origin %s", org->name);
- vg_revert(vg);
- return NULL;
+ /* Create zero origin volume for spare snapshot */
+ if (lp->virtual_extents &&
+ !(origin_lv = _create_virtual_origin(cmd, vg, lv->name,
+ (lp->permission & ~LVM_WRITE),
+ lp->virtual_extents)))
+ goto revert_new_lv;
+
+ /* Reset permission after zeroing */
+ if (!(lp->permission & LVM_WRITE))
+ lv->status &= ~LVM_WRITE;
+
+ /*
+ * COW LV is activated via implicit activation of origin LV
+ * Only the snapshot origin holds the LV lock in cluster
+ */
+ if (!origin_lv ||
+ !vg_add_snapshot(origin_lv, lv, NULL,
+ origin_lv->le_count, lp->chunk_size)) {
+ log_error("Couldn't create snapshot.");
+ goto deactivate_and_revert_new_lv;
}
- if (!vg_commit(vg))
- return_NULL;
+ if (lp->virtual_extents) {
+ /* Store vg on disk(s) */
+ if (!vg_write(vg) || !vg_commit(vg))
+ return_NULL; /* Metadata update fails, deep troubles */
- if (!resume_lv(cmd, org)) {
- log_error("Problem reactivating origin %s", org->name);
- return NULL;
+ /*
+ * FIXME We do not actually need snapshot-origin as an active device,
+ * as virtual origin is already 'hidden' private device without
+ * vg/lv links. As such it is not supposed to be used by any user.
+ * Also it would save one dm table entry, but it needs quite a few
+ * changes in the libdm/lvm2 code base to support it.
+ */
+
+ /* Activate spare snapshot once it is a complete LV */
+ if (!lv_active_change(cmd, origin_lv, lp->activate)) {
+ log_error("Failed to activate sparce volume %s.",
+ display_lvname(origin_lv));
+ return NULL;
+ }
+ } else if (!lv_update_and_reload(origin_lv)) {
+ log_error("Aborting. Manual intervention required.");
+ return NULL; /* FIXME: revert */
}
}
- /* FIXME out of sequence */
- backup(vg);
-
out:
return lv;
deactivate_and_revert_new_lv:
+ if (!sync_local_dev_names(lv->vg->cmd))
+ log_error("Failed to sync local devices before reverting %s.",
+ display_lvname(lv));
if (!deactivate_lv(cmd, lv)) {
- log_error("Unable to deactivate failed new LV. "
- "Manual intervention required.");
+ log_error("Unable to deactivate failed new LV %s. "
+ "Manual intervention required.", display_lvname(lv));
return NULL;
}
revert_new_lv:
+ if (!lockd_lv(cmd, lv, "un", LDLV_PERSISTENT))
+ log_warn("WARNING: Failed to unlock %s.", display_lvname(lv));
+ lockd_free_lv(vg->cmd, vg, lv->name, &lv->lvid.id[1], lv->lock_args);
+
/* FIXME Better to revert to backup of metadata? */
- if (!lv_remove(lv) || !vg_write(vg) || !vg_commit(vg))
+ /* Do not remove anything for create during conversion operation */
+ if (!strncmp(cmd->name, "lvconvert", 9) ||
+ !lv_remove(lv) || !vg_write(vg) || !vg_commit(vg))
log_error("Manual intervention may be required to remove "
"abandoned LV(s) before retrying.");
- else
- backup(vg);
return NULL;
}
-int lv_create_single(struct volume_group *vg,
- struct lvcreate_params *lp)
+struct logical_volume *lv_create_single(struct volume_group *vg,
+ struct lvcreate_params *lp)
{
+ const struct segment_type *segtype;
struct logical_volume *lv;
- /* Create thin pool first if necessary */
- if (lp->create_thin_pool) {
- if (!seg_is_thin_pool(lp) &&
- !(lp->segtype = get_segtype_from_string(vg->cmd, "thin-pool")))
- return_0;
+ /* Create pool first if necessary */
+ if (lp->create_pool && !seg_is_pool(lp)) {
+ segtype = lp->segtype;
+ if (seg_is_thin_volume(lp)) {
+ if (!(lp->segtype = get_segtype_from_string(vg->cmd, SEG_TYPE_NAME_THIN_POOL)))
+ return_NULL;
- if (!(lv = _lv_create_an_lv(vg, lp, lp->pool)))
- return_0;
+ /* We want a lockd lock for the new thin pool, but not the thin lv. */
+ lp->needs_lockd_init = 1;
- if (!lp->thin)
- goto out;
+ if (!(lv = _lv_create_an_lv(vg, lp, lp->pool_name)))
+ return_NULL;
- lp->pool = lv->name;
+ lp->needs_lockd_init = 0;
- if (!(lp->segtype = get_segtype_from_string(vg->cmd, "thin")))
- return_0;
+ } else if (seg_is_cache(lp)) {
+ if (!lp->origin_name) {
+ /* Until we have --pooldatasize we are lost */
+ log_error(INTERNAL_ERROR "Unsupported creation of cache and cache pool volume.");
+ return NULL;
+ }
+ /* origin_name is defined -> creates cache LV with new cache pool */
+ if (!(lp->segtype = get_segtype_from_string(vg->cmd, SEG_TYPE_NAME_CACHE_POOL)))
+ return_NULL;
+
+ if (!(lv = _lv_create_an_lv(vg, lp, lp->pool_name)))
+ return_NULL;
+
+ if (!lv_is_cache(lv)) {
+ log_error(INTERNAL_ERROR "Logical volume is not cache %s.",
+ display_lvname(lv));
+ return NULL;
+ }
+
+ /* Convertion via lvcreate */
+ log_print_unless_silent("Logical volume %s is now cached.",
+ display_lvname(lv));
+ return lv;
+ } else if (seg_is_vdo(lp)) {
+ /* The VDO segment needs VDO pool which is layer above created striped data LV */
+ if (!(lp->segtype = get_segtype_from_string(vg->cmd, SEG_TYPE_NAME_VDO_POOL)))
+ return_NULL;
+
+ /* We want a lockd lock for the new vdo pool, but not the vdo lv. */
+ lp->needs_lockd_init = 1;
+
+ /* Use vpool names for vdo-pool */
+ if (!(lv = _lv_create_an_lv(vg, lp, lp->pool_name ? : "vpool%d")))
+ return_NULL;
+
+ lp->needs_lockd_init = 0;
+ } else {
+ log_error(INTERNAL_ERROR "Creation of pool for unsupported segment type %s.",
+ lp->segtype->name);
+ return NULL;
+ }
+ lp->pool_name = lv->name;
+ lp->segtype = segtype;
}
if (!(lv = _lv_create_an_lv(vg, lp, lp->lv_name)))
- return_0;
+ return_NULL;
-out:
- log_print_unless_silent("Logical volume \"%s\" created", lv->name);
+ if (lp->temporary)
+ log_verbose("Temporary logical volume \"%s\" created.", lv->name);
+ else
+ log_print_unless_silent("Logical volume \"%s\" created.", lv->name);
- return 1;
+ return lv;
}