osdmap.h 4.5 KB
Newer Older
S
Sage Weil 已提交
1 2 3 4
#ifndef _FS_CEPH_OSDMAP_H
#define _FS_CEPH_OSDMAP_H

#include <linux/rbtree.h>
5
#include <linux/ceph/types.h>
6
#include <linux/ceph/decode.h>
7
#include <linux/ceph/ceph_fs.h>
8
#include <linux/crush/crush.h>
S
Sage Weil 已提交
9 10 11 12 13 14 15 16 17 18 19 20 21

/*
 * The osd map describes the current membership of the osd cluster and
 * specifies the mapping of objects to placement groups and placement
 * groups to (sets of) osds.  That is, it completely specifies the
 * (desired) distribution of all data objects in the system at some
 * point in time.
 *
 * Each map version is identified by an epoch, which increases monotonically.
 *
 * The map can be updated either via an incremental map (diff) describing
 * the change between two successive epochs, or as a fully encoded map.
 */
22 23 24 25 26
struct ceph_pg {
	uint64_t pool;
	uint32_t seed;
};

27 28
#define CEPH_POOL_FLAG_HASHPSPOOL  1

S
Sage Weil 已提交
29
struct ceph_pg_pool_info {
30
	struct rb_node node;
31 32 33 34 35 36 37 38
	s64 id;
	u8 type;
	u8 size;
	u8 crush_ruleset;
	u8 object_hash;
	u32 pg_num, pgp_num;
	int pg_num_mask, pgp_num_mask;
	u64 flags;
39
	char *name;
S
Sage Weil 已提交
40 41
};

42 43 44 45 46
struct ceph_object_locator {
	uint64_t pool;
	char *key;
};

S
Sage Weil 已提交
47 48
struct ceph_pg_mapping {
	struct rb_node node;
49
	struct ceph_pg pgid;
S
Sage Weil 已提交
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
	int len;
	int osds[];
};

struct ceph_osdmap {
	struct ceph_fsid fsid;
	u32 epoch;
	u32 mkfs_epoch;
	struct ceph_timespec created, modified;

	u32 flags;         /* CEPH_OSDMAP_* */

	u32 max_osd;       /* size of osd_state, _offload, _addr arrays */
	u8 *osd_state;     /* CEPH_OSD_* */
	u32 *osd_weight;   /* 0 = failed, 0x10000 = 100% normal */
	struct ceph_entity_addr *osd_addr;

	struct rb_root pg_temp;
68 69
	struct rb_root pg_pools;
	u32 pool_max;
S
Sage Weil 已提交
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122

	/* the CRUSH map specifies the mapping of placement groups to
	 * the list of osds that store+replicate them. */
	struct crush_map *crush;
};

/*
 * file layout helpers
 */
#define ceph_file_layout_su(l) ((__s32)le32_to_cpu((l).fl_stripe_unit))
#define ceph_file_layout_stripe_count(l) \
	((__s32)le32_to_cpu((l).fl_stripe_count))
#define ceph_file_layout_object_size(l) ((__s32)le32_to_cpu((l).fl_object_size))
#define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash))
#define ceph_file_layout_object_su(l) \
	((__s32)le32_to_cpu((l).fl_object_stripe_unit))
#define ceph_file_layout_pg_pool(l) \
	((__s32)le32_to_cpu((l).fl_pg_pool))

static inline unsigned ceph_file_layout_stripe_width(struct ceph_file_layout *l)
{
	return le32_to_cpu(l->fl_stripe_unit) *
		le32_to_cpu(l->fl_stripe_count);
}

/* "period" == bytes before i start on a new set of objects */
static inline unsigned ceph_file_layout_period(struct ceph_file_layout *l)
{
	return le32_to_cpu(l->fl_object_size) *
		le32_to_cpu(l->fl_stripe_count);
}


static inline int ceph_osd_is_up(struct ceph_osdmap *map, int osd)
{
	return (osd < map->max_osd) && (map->osd_state[osd] & CEPH_OSD_UP);
}

static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag)
{
	return map && (map->flags & flag);
}

extern char *ceph_osdmap_state_str(char *str, int len, int state);

static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map,
						     int osd)
{
	if (osd >= map->max_osd)
		return NULL;
	return &map->osd_addr[osd];
}

123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid)
{
	__u8 version;

	if (!ceph_has_room(p, end, 1 + 8 + 4 + 4)) {
		pr_warning("incomplete pg encoding");

		return -EINVAL;
	}
	version = ceph_decode_8(p);
	if (version > 1) {
		pr_warning("do not understand pg encoding %d > 1",
			(int)version);
		return -EINVAL;
	}

	pgid->pool = ceph_decode_64(p);
	pgid->seed = ceph_decode_32(p);
	*p += 4;	/* skip deprecated preferred value */

	return 0;
}

S
Sage Weil 已提交
146 147 148 149 150 151 152
extern struct ceph_osdmap *osdmap_decode(void **p, void *end);
extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
					    struct ceph_osdmap *map,
					    struct ceph_messenger *msgr);
extern void ceph_osdmap_destroy(struct ceph_osdmap *map);

/* calculate mapping of a file extent to an object */
S
Sage Weil 已提交
153
extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
154
					 u64 off, u64 len,
S
Sage Weil 已提交
155
					 u64 *bno, u64 *oxoff, u64 *oxlen);
S
Sage Weil 已提交
156 157

/* calculate mapping of object to a placement group */
158 159
extern int ceph_calc_ceph_pg(struct ceph_pg *pg, const char *oid,
			  struct ceph_osdmap *osdmap, uint64_t pool);
160
extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap,
161
			       struct ceph_pg pgid,
162
			       int *acting);
163
extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
164
				struct ceph_pg pgid);
S
Sage Weil 已提交
165

166
extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id);
Y
Yehuda Sadeh 已提交
167 168
extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);

S
Sage Weil 已提交
169
#endif