Creation of Cybook 2416 (actually Gen4) repository

This commit is contained in:
mlt
2009-12-18 17:10:00 +00:00
committed by godzil
commit 76f20f4d40
13791 changed files with 6812321 additions and 0 deletions

38
fs/dlm/Kconfig Normal file
View File

@@ -0,0 +1,38 @@
menu "Distributed Lock Manager"
depends on EXPERIMENTAL && INET
config DLM
tristate "Distributed Lock Manager (DLM)"
depends on SYSFS && (IPV6 || IPV6=n)
select CONFIGFS_FS
select IP_SCTP if DLM_SCTP
help
A general purpose distributed lock manager for kernel or userspace
applications.
choice
prompt "Select DLM communications protocol"
depends on DLM
default DLM_TCP
help
The DLM Can use TCP or SCTP for it's network communications.
SCTP supports multi-homed operations whereas TCP doesn't.
However, SCTP seems to have stability problems at the moment.
config DLM_TCP
bool "TCP/IP"
config DLM_SCTP
bool "SCTP"
endchoice
config DLM_DEBUG
bool "DLM debugging"
depends on DLM
help
Under the debugfs mount point, the name of each lockspace will
appear as a file in the "dlm" directory. The output is the
list of resource and locks the local node knows about.
endmenu

21
fs/dlm/Makefile Normal file
View File

@@ -0,0 +1,21 @@
obj-$(CONFIG_DLM) += dlm.o
dlm-y := ast.o \
config.o \
dir.o \
lock.o \
lockspace.o \
main.o \
member.o \
memory.o \
midcomms.o \
rcom.o \
recover.o \
recoverd.o \
requestqueue.o \
user.o \
util.o
dlm-$(CONFIG_DLM_DEBUG) += debug_fs.o
dlm-$(CONFIG_DLM_TCP) += lowcomms-tcp.o
dlm-$(CONFIG_DLM_SCTP) += lowcomms-sctp.o

173
fs/dlm/ast.c Normal file
View File

@@ -0,0 +1,173 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#include "dlm_internal.h"
#include "lock.h"
#include "user.h"
#define WAKE_ASTS 0
static struct list_head ast_queue;
static spinlock_t ast_queue_lock;
static struct task_struct * astd_task;
static unsigned long astd_wakeflags;
static struct mutex astd_running;
void dlm_del_ast(struct dlm_lkb *lkb)
{
spin_lock(&ast_queue_lock);
if (lkb->lkb_ast_type & (AST_COMP | AST_BAST))
list_del(&lkb->lkb_astqueue);
spin_unlock(&ast_queue_lock);
}
void dlm_add_ast(struct dlm_lkb *lkb, int type)
{
if (lkb->lkb_flags & DLM_IFL_USER) {
dlm_user_add_ast(lkb, type);
return;
}
DLM_ASSERT(lkb->lkb_astaddr != DLM_FAKE_USER_AST, dlm_print_lkb(lkb););
spin_lock(&ast_queue_lock);
if (!(lkb->lkb_ast_type & (AST_COMP | AST_BAST))) {
kref_get(&lkb->lkb_ref);
list_add_tail(&lkb->lkb_astqueue, &ast_queue);
}
lkb->lkb_ast_type |= type;
spin_unlock(&ast_queue_lock);
set_bit(WAKE_ASTS, &astd_wakeflags);
wake_up_process(astd_task);
}
static void process_asts(void)
{
struct dlm_ls *ls = NULL;
struct dlm_rsb *r = NULL;
struct dlm_lkb *lkb;
void (*cast) (long param);
void (*bast) (long param, int mode);
int type = 0, found, bmode;
for (;;) {
found = 0;
spin_lock(&ast_queue_lock);
list_for_each_entry(lkb, &ast_queue, lkb_astqueue) {
r = lkb->lkb_resource;
ls = r->res_ls;
if (dlm_locking_stopped(ls))
continue;
list_del(&lkb->lkb_astqueue);
type = lkb->lkb_ast_type;
lkb->lkb_ast_type = 0;
found = 1;
break;
}
spin_unlock(&ast_queue_lock);
if (!found)
break;
cast = lkb->lkb_astaddr;
bast = lkb->lkb_bastaddr;
bmode = lkb->lkb_bastmode;
if ((type & AST_COMP) && cast)
cast(lkb->lkb_astparam);
/* FIXME: Is it safe to look at lkb_grmode here
without doing a lock_rsb() ?
Look at other checks in v1 to avoid basts. */
if ((type & AST_BAST) && bast)
if (!dlm_modes_compat(lkb->lkb_grmode, bmode))
bast(lkb->lkb_astparam, bmode);
/* this removes the reference added by dlm_add_ast
and may result in the lkb being freed */
dlm_put_lkb(lkb);
schedule();
}
}
static inline int no_asts(void)
{
int ret;
spin_lock(&ast_queue_lock);
ret = list_empty(&ast_queue);
spin_unlock(&ast_queue_lock);
return ret;
}
static int dlm_astd(void *data)
{
while (!kthread_should_stop()) {
set_current_state(TASK_INTERRUPTIBLE);
if (!test_bit(WAKE_ASTS, &astd_wakeflags))
schedule();
set_current_state(TASK_RUNNING);
mutex_lock(&astd_running);
if (test_and_clear_bit(WAKE_ASTS, &astd_wakeflags))
process_asts();
mutex_unlock(&astd_running);
}
return 0;
}
void dlm_astd_wake(void)
{
if (!no_asts()) {
set_bit(WAKE_ASTS, &astd_wakeflags);
wake_up_process(astd_task);
}
}
int dlm_astd_start(void)
{
struct task_struct *p;
int error = 0;
INIT_LIST_HEAD(&ast_queue);
spin_lock_init(&ast_queue_lock);
mutex_init(&astd_running);
p = kthread_run(dlm_astd, NULL, "dlm_astd");
if (IS_ERR(p))
error = PTR_ERR(p);
else
astd_task = p;
return error;
}
void dlm_astd_stop(void)
{
kthread_stop(astd_task);
}
void dlm_astd_suspend(void)
{
mutex_lock(&astd_running);
}
void dlm_astd_resume(void)
{
mutex_unlock(&astd_running);
}

26
fs/dlm/ast.h Normal file
View File

@@ -0,0 +1,26 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) 2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __ASTD_DOT_H__
#define __ASTD_DOT_H__
void dlm_add_ast(struct dlm_lkb *lkb, int type);
void dlm_del_ast(struct dlm_lkb *lkb);
void dlm_astd_wake(void);
int dlm_astd_start(void);
void dlm_astd_stop(void);
void dlm_astd_suspend(void);
void dlm_astd_resume(void);
#endif

919
fs/dlm/config.c Normal file
View File

@@ -0,0 +1,919 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/configfs.h>
#include <net/sock.h>
#include "config.h"
#include "lowcomms.h"
/*
* /config/dlm/<cluster>/spaces/<space>/nodes/<node>/nodeid
* /config/dlm/<cluster>/spaces/<space>/nodes/<node>/weight
* /config/dlm/<cluster>/comms/<comm>/nodeid
* /config/dlm/<cluster>/comms/<comm>/local
* /config/dlm/<cluster>/comms/<comm>/addr
* The <cluster> level is useless, but I haven't figured out how to avoid it.
*/
static struct config_group *space_list;
static struct config_group *comm_list;
static struct comm *local_comm;
struct clusters;
struct cluster;
struct spaces;
struct space;
struct comms;
struct comm;
struct nodes;
struct node;
static struct config_group *make_cluster(struct config_group *, const char *);
static void drop_cluster(struct config_group *, struct config_item *);
static void release_cluster(struct config_item *);
static struct config_group *make_space(struct config_group *, const char *);
static void drop_space(struct config_group *, struct config_item *);
static void release_space(struct config_item *);
static struct config_item *make_comm(struct config_group *, const char *);
static void drop_comm(struct config_group *, struct config_item *);
static void release_comm(struct config_item *);
static struct config_item *make_node(struct config_group *, const char *);
static void drop_node(struct config_group *, struct config_item *);
static void release_node(struct config_item *);
static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a,
char *buf);
static ssize_t store_cluster(struct config_item *i,
struct configfs_attribute *a,
const char *buf, size_t len);
static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a,
char *buf);
static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a,
const char *buf, size_t len);
static ssize_t show_node(struct config_item *i, struct configfs_attribute *a,
char *buf);
static ssize_t store_node(struct config_item *i, struct configfs_attribute *a,
const char *buf, size_t len);
static ssize_t comm_nodeid_read(struct comm *cm, char *buf);
static ssize_t comm_nodeid_write(struct comm *cm, const char *buf, size_t len);
static ssize_t comm_local_read(struct comm *cm, char *buf);
static ssize_t comm_local_write(struct comm *cm, const char *buf, size_t len);
static ssize_t comm_addr_write(struct comm *cm, const char *buf, size_t len);
static ssize_t node_nodeid_read(struct node *nd, char *buf);
static ssize_t node_nodeid_write(struct node *nd, const char *buf, size_t len);
static ssize_t node_weight_read(struct node *nd, char *buf);
static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len);
struct cluster {
struct config_group group;
unsigned int cl_tcp_port;
unsigned int cl_buffer_size;
unsigned int cl_rsbtbl_size;
unsigned int cl_lkbtbl_size;
unsigned int cl_dirtbl_size;
unsigned int cl_recover_timer;
unsigned int cl_toss_secs;
unsigned int cl_scan_secs;
unsigned int cl_log_debug;
};
enum {
CLUSTER_ATTR_TCP_PORT = 0,
CLUSTER_ATTR_BUFFER_SIZE,
CLUSTER_ATTR_RSBTBL_SIZE,
CLUSTER_ATTR_LKBTBL_SIZE,
CLUSTER_ATTR_DIRTBL_SIZE,
CLUSTER_ATTR_RECOVER_TIMER,
CLUSTER_ATTR_TOSS_SECS,
CLUSTER_ATTR_SCAN_SECS,
CLUSTER_ATTR_LOG_DEBUG,
};
struct cluster_attribute {
struct configfs_attribute attr;
ssize_t (*show)(struct cluster *, char *);
ssize_t (*store)(struct cluster *, const char *, size_t);
};
static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field,
unsigned int *info_field, int check_zero,
const char *buf, size_t len)
{
unsigned int x;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
x = simple_strtoul(buf, NULL, 0);
if (check_zero && !x)
return -EINVAL;
*cl_field = x;
*info_field = x;
return len;
}
#define __CONFIGFS_ATTR(_name,_mode,_read,_write) { \
.attr = { .ca_name = __stringify(_name), \
.ca_mode = _mode, \
.ca_owner = THIS_MODULE }, \
.show = _read, \
.store = _write, \
}
#define CLUSTER_ATTR(name, check_zero) \
static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \
{ \
return cluster_set(cl, &cl->cl_##name, &dlm_config.ci_##name, \
check_zero, buf, len); \
} \
static ssize_t name##_read(struct cluster *cl, char *buf) \
{ \
return snprintf(buf, PAGE_SIZE, "%u\n", cl->cl_##name); \
} \
static struct cluster_attribute cluster_attr_##name = \
__CONFIGFS_ATTR(name, 0644, name##_read, name##_write)
CLUSTER_ATTR(tcp_port, 1);
CLUSTER_ATTR(buffer_size, 1);
CLUSTER_ATTR(rsbtbl_size, 1);
CLUSTER_ATTR(lkbtbl_size, 1);
CLUSTER_ATTR(dirtbl_size, 1);
CLUSTER_ATTR(recover_timer, 1);
CLUSTER_ATTR(toss_secs, 1);
CLUSTER_ATTR(scan_secs, 1);
CLUSTER_ATTR(log_debug, 0);
static struct configfs_attribute *cluster_attrs[] = {
[CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
[CLUSTER_ATTR_BUFFER_SIZE] = &cluster_attr_buffer_size.attr,
[CLUSTER_ATTR_RSBTBL_SIZE] = &cluster_attr_rsbtbl_size.attr,
[CLUSTER_ATTR_LKBTBL_SIZE] = &cluster_attr_lkbtbl_size.attr,
[CLUSTER_ATTR_DIRTBL_SIZE] = &cluster_attr_dirtbl_size.attr,
[CLUSTER_ATTR_RECOVER_TIMER] = &cluster_attr_recover_timer.attr,
[CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr,
[CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr,
[CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
NULL,
};
enum {
COMM_ATTR_NODEID = 0,
COMM_ATTR_LOCAL,
COMM_ATTR_ADDR,
};
struct comm_attribute {
struct configfs_attribute attr;
ssize_t (*show)(struct comm *, char *);
ssize_t (*store)(struct comm *, const char *, size_t);
};
static struct comm_attribute comm_attr_nodeid = {
.attr = { .ca_owner = THIS_MODULE,
.ca_name = "nodeid",
.ca_mode = S_IRUGO | S_IWUSR },
.show = comm_nodeid_read,
.store = comm_nodeid_write,
};
static struct comm_attribute comm_attr_local = {
.attr = { .ca_owner = THIS_MODULE,
.ca_name = "local",
.ca_mode = S_IRUGO | S_IWUSR },
.show = comm_local_read,
.store = comm_local_write,
};
static struct comm_attribute comm_attr_addr = {
.attr = { .ca_owner = THIS_MODULE,
.ca_name = "addr",
.ca_mode = S_IRUGO | S_IWUSR },
.store = comm_addr_write,
};
static struct configfs_attribute *comm_attrs[] = {
[COMM_ATTR_NODEID] = &comm_attr_nodeid.attr,
[COMM_ATTR_LOCAL] = &comm_attr_local.attr,
[COMM_ATTR_ADDR] = &comm_attr_addr.attr,
NULL,
};
enum {
NODE_ATTR_NODEID = 0,
NODE_ATTR_WEIGHT,
};
struct node_attribute {
struct configfs_attribute attr;
ssize_t (*show)(struct node *, char *);
ssize_t (*store)(struct node *, const char *, size_t);
};
static struct node_attribute node_attr_nodeid = {
.attr = { .ca_owner = THIS_MODULE,
.ca_name = "nodeid",
.ca_mode = S_IRUGO | S_IWUSR },
.show = node_nodeid_read,
.store = node_nodeid_write,
};
static struct node_attribute node_attr_weight = {
.attr = { .ca_owner = THIS_MODULE,
.ca_name = "weight",
.ca_mode = S_IRUGO | S_IWUSR },
.show = node_weight_read,
.store = node_weight_write,
};
static struct configfs_attribute *node_attrs[] = {
[NODE_ATTR_NODEID] = &node_attr_nodeid.attr,
[NODE_ATTR_WEIGHT] = &node_attr_weight.attr,
NULL,
};
struct clusters {
struct configfs_subsystem subsys;
};
struct spaces {
struct config_group ss_group;
};
struct space {
struct config_group group;
struct list_head members;
struct mutex members_lock;
int members_count;
};
struct comms {
struct config_group cs_group;
};
struct comm {
struct config_item item;
int nodeid;
int local;
int addr_count;
struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT];
};
struct nodes {
struct config_group ns_group;
};
struct node {
struct config_item item;
struct list_head list; /* space->members */
int nodeid;
int weight;
};
static struct configfs_group_operations clusters_ops = {
.make_group = make_cluster,
.drop_item = drop_cluster,
};
static struct configfs_item_operations cluster_ops = {
.release = release_cluster,
.show_attribute = show_cluster,
.store_attribute = store_cluster,
};
static struct configfs_group_operations spaces_ops = {
.make_group = make_space,
.drop_item = drop_space,
};
static struct configfs_item_operations space_ops = {
.release = release_space,
};
static struct configfs_group_operations comms_ops = {
.make_item = make_comm,
.drop_item = drop_comm,
};
static struct configfs_item_operations comm_ops = {
.release = release_comm,
.show_attribute = show_comm,
.store_attribute = store_comm,
};
static struct configfs_group_operations nodes_ops = {
.make_item = make_node,
.drop_item = drop_node,
};
static struct configfs_item_operations node_ops = {
.release = release_node,
.show_attribute = show_node,
.store_attribute = store_node,
};
static struct config_item_type clusters_type = {
.ct_group_ops = &clusters_ops,
.ct_owner = THIS_MODULE,
};
static struct config_item_type cluster_type = {
.ct_item_ops = &cluster_ops,
.ct_attrs = cluster_attrs,
.ct_owner = THIS_MODULE,
};
static struct config_item_type spaces_type = {
.ct_group_ops = &spaces_ops,
.ct_owner = THIS_MODULE,
};
static struct config_item_type space_type = {
.ct_item_ops = &space_ops,
.ct_owner = THIS_MODULE,
};
static struct config_item_type comms_type = {
.ct_group_ops = &comms_ops,
.ct_owner = THIS_MODULE,
};
static struct config_item_type comm_type = {
.ct_item_ops = &comm_ops,
.ct_attrs = comm_attrs,
.ct_owner = THIS_MODULE,
};
static struct config_item_type nodes_type = {
.ct_group_ops = &nodes_ops,
.ct_owner = THIS_MODULE,
};
static struct config_item_type node_type = {
.ct_item_ops = &node_ops,
.ct_attrs = node_attrs,
.ct_owner = THIS_MODULE,
};
static struct cluster *to_cluster(struct config_item *i)
{
return i ? container_of(to_config_group(i), struct cluster, group):NULL;
}
static struct space *to_space(struct config_item *i)
{
return i ? container_of(to_config_group(i), struct space, group) : NULL;
}
static struct comm *to_comm(struct config_item *i)
{
return i ? container_of(i, struct comm, item) : NULL;
}
static struct node *to_node(struct config_item *i)
{
return i ? container_of(i, struct node, item) : NULL;
}
static struct config_group *make_cluster(struct config_group *g,
const char *name)
{
struct cluster *cl = NULL;
struct spaces *sps = NULL;
struct comms *cms = NULL;
void *gps = NULL;
cl = kzalloc(sizeof(struct cluster), GFP_KERNEL);
gps = kcalloc(3, sizeof(struct config_group *), GFP_KERNEL);
sps = kzalloc(sizeof(struct spaces), GFP_KERNEL);
cms = kzalloc(sizeof(struct comms), GFP_KERNEL);
if (!cl || !gps || !sps || !cms)
goto fail;
config_group_init_type_name(&cl->group, name, &cluster_type);
config_group_init_type_name(&sps->ss_group, "spaces", &spaces_type);
config_group_init_type_name(&cms->cs_group, "comms", &comms_type);
cl->group.default_groups = gps;
cl->group.default_groups[0] = &sps->ss_group;
cl->group.default_groups[1] = &cms->cs_group;
cl->group.default_groups[2] = NULL;
cl->cl_tcp_port = dlm_config.ci_tcp_port;
cl->cl_buffer_size = dlm_config.ci_buffer_size;
cl->cl_rsbtbl_size = dlm_config.ci_rsbtbl_size;
cl->cl_lkbtbl_size = dlm_config.ci_lkbtbl_size;
cl->cl_dirtbl_size = dlm_config.ci_dirtbl_size;
cl->cl_recover_timer = dlm_config.ci_recover_timer;
cl->cl_toss_secs = dlm_config.ci_toss_secs;
cl->cl_scan_secs = dlm_config.ci_scan_secs;
cl->cl_log_debug = dlm_config.ci_log_debug;
space_list = &sps->ss_group;
comm_list = &cms->cs_group;
return &cl->group;
fail:
kfree(cl);
kfree(gps);
kfree(sps);
kfree(cms);
return NULL;
}
static void drop_cluster(struct config_group *g, struct config_item *i)
{
struct cluster *cl = to_cluster(i);
struct config_item *tmp;
int j;
for (j = 0; cl->group.default_groups[j]; j++) {
tmp = &cl->group.default_groups[j]->cg_item;
cl->group.default_groups[j] = NULL;
config_item_put(tmp);
}
space_list = NULL;
comm_list = NULL;
config_item_put(i);
}
static void release_cluster(struct config_item *i)
{
struct cluster *cl = to_cluster(i);
kfree(cl->group.default_groups);
kfree(cl);
}
static struct config_group *make_space(struct config_group *g, const char *name)
{
struct space *sp = NULL;
struct nodes *nds = NULL;
void *gps = NULL;
sp = kzalloc(sizeof(struct space), GFP_KERNEL);
gps = kcalloc(2, sizeof(struct config_group *), GFP_KERNEL);
nds = kzalloc(sizeof(struct nodes), GFP_KERNEL);
if (!sp || !gps || !nds)
goto fail;
config_group_init_type_name(&sp->group, name, &space_type);
config_group_init_type_name(&nds->ns_group, "nodes", &nodes_type);
sp->group.default_groups = gps;
sp->group.default_groups[0] = &nds->ns_group;
sp->group.default_groups[1] = NULL;
INIT_LIST_HEAD(&sp->members);
mutex_init(&sp->members_lock);
sp->members_count = 0;
return &sp->group;
fail:
kfree(sp);
kfree(gps);
kfree(nds);
return NULL;
}
static void drop_space(struct config_group *g, struct config_item *i)
{
struct space *sp = to_space(i);
struct config_item *tmp;
int j;
/* assert list_empty(&sp->members) */
for (j = 0; sp->group.default_groups[j]; j++) {
tmp = &sp->group.default_groups[j]->cg_item;
sp->group.default_groups[j] = NULL;
config_item_put(tmp);
}
config_item_put(i);
}
static void release_space(struct config_item *i)
{
struct space *sp = to_space(i);
kfree(sp->group.default_groups);
kfree(sp);
}
static struct config_item *make_comm(struct config_group *g, const char *name)
{
struct comm *cm;
cm = kzalloc(sizeof(struct comm), GFP_KERNEL);
if (!cm)
return NULL;
config_item_init_type_name(&cm->item, name, &comm_type);
cm->nodeid = -1;
cm->local = 0;
cm->addr_count = 0;
return &cm->item;
}
static void drop_comm(struct config_group *g, struct config_item *i)
{
struct comm *cm = to_comm(i);
if (local_comm == cm)
local_comm = NULL;
dlm_lowcomms_close(cm->nodeid);
while (cm->addr_count--)
kfree(cm->addr[cm->addr_count]);
config_item_put(i);
}
static void release_comm(struct config_item *i)
{
struct comm *cm = to_comm(i);
kfree(cm);
}
static struct config_item *make_node(struct config_group *g, const char *name)
{
struct space *sp = to_space(g->cg_item.ci_parent);
struct node *nd;
nd = kzalloc(sizeof(struct node), GFP_KERNEL);
if (!nd)
return NULL;
config_item_init_type_name(&nd->item, name, &node_type);
nd->nodeid = -1;
nd->weight = 1; /* default weight of 1 if none is set */
mutex_lock(&sp->members_lock);
list_add(&nd->list, &sp->members);
sp->members_count++;
mutex_unlock(&sp->members_lock);
return &nd->item;
}
static void drop_node(struct config_group *g, struct config_item *i)
{
struct space *sp = to_space(g->cg_item.ci_parent);
struct node *nd = to_node(i);
mutex_lock(&sp->members_lock);
list_del(&nd->list);
sp->members_count--;
mutex_unlock(&sp->members_lock);
config_item_put(i);
}
static void release_node(struct config_item *i)
{
struct node *nd = to_node(i);
kfree(nd);
}
static struct clusters clusters_root = {
.subsys = {
.su_group = {
.cg_item = {
.ci_namebuf = "dlm",
.ci_type = &clusters_type,
},
},
},
};
int dlm_config_init(void)
{
config_group_init(&clusters_root.subsys.su_group);
init_MUTEX(&clusters_root.subsys.su_sem);
return configfs_register_subsystem(&clusters_root.subsys);
}
void dlm_config_exit(void)
{
configfs_unregister_subsystem(&clusters_root.subsys);
}
/*
* Functions for user space to read/write attributes
*/
static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a,
char *buf)
{
struct cluster *cl = to_cluster(i);
struct cluster_attribute *cla =
container_of(a, struct cluster_attribute, attr);
return cla->show ? cla->show(cl, buf) : 0;
}
static ssize_t store_cluster(struct config_item *i,
struct configfs_attribute *a,
const char *buf, size_t len)
{
struct cluster *cl = to_cluster(i);
struct cluster_attribute *cla =
container_of(a, struct cluster_attribute, attr);
return cla->store ? cla->store(cl, buf, len) : -EINVAL;
}
static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a,
char *buf)
{
struct comm *cm = to_comm(i);
struct comm_attribute *cma =
container_of(a, struct comm_attribute, attr);
return cma->show ? cma->show(cm, buf) : 0;
}
static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a,
const char *buf, size_t len)
{
struct comm *cm = to_comm(i);
struct comm_attribute *cma =
container_of(a, struct comm_attribute, attr);
return cma->store ? cma->store(cm, buf, len) : -EINVAL;
}
static ssize_t comm_nodeid_read(struct comm *cm, char *buf)
{
return sprintf(buf, "%d\n", cm->nodeid);
}
static ssize_t comm_nodeid_write(struct comm *cm, const char *buf, size_t len)
{
cm->nodeid = simple_strtol(buf, NULL, 0);
return len;
}
static ssize_t comm_local_read(struct comm *cm, char *buf)
{
return sprintf(buf, "%d\n", cm->local);
}
static ssize_t comm_local_write(struct comm *cm, const char *buf, size_t len)
{
cm->local= simple_strtol(buf, NULL, 0);
if (cm->local && !local_comm)
local_comm = cm;
return len;
}
static ssize_t comm_addr_write(struct comm *cm, const char *buf, size_t len)
{
struct sockaddr_storage *addr;
if (len != sizeof(struct sockaddr_storage))
return -EINVAL;
if (cm->addr_count >= DLM_MAX_ADDR_COUNT)
return -ENOSPC;
addr = kzalloc(sizeof(*addr), GFP_KERNEL);
if (!addr)
return -ENOMEM;
memcpy(addr, buf, len);
cm->addr[cm->addr_count++] = addr;
return len;
}
static ssize_t show_node(struct config_item *i, struct configfs_attribute *a,
char *buf)
{
struct node *nd = to_node(i);
struct node_attribute *nda =
container_of(a, struct node_attribute, attr);
return nda->show ? nda->show(nd, buf) : 0;
}
static ssize_t store_node(struct config_item *i, struct configfs_attribute *a,
const char *buf, size_t len)
{
struct node *nd = to_node(i);
struct node_attribute *nda =
container_of(a, struct node_attribute, attr);
return nda->store ? nda->store(nd, buf, len) : -EINVAL;
}
static ssize_t node_nodeid_read(struct node *nd, char *buf)
{
return sprintf(buf, "%d\n", nd->nodeid);
}
static ssize_t node_nodeid_write(struct node *nd, const char *buf, size_t len)
{
nd->nodeid = simple_strtol(buf, NULL, 0);
return len;
}
static ssize_t node_weight_read(struct node *nd, char *buf)
{
return sprintf(buf, "%d\n", nd->weight);
}
static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len)
{
nd->weight = simple_strtol(buf, NULL, 0);
return len;
}
/*
* Functions for the dlm to get the info that's been configured
*/
static struct space *get_space(char *name)
{
if (!space_list)
return NULL;
return to_space(config_group_find_obj(space_list, name));
}
static void put_space(struct space *sp)
{
config_item_put(&sp->group.cg_item);
}
static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr)
{
struct config_item *i;
struct comm *cm = NULL;
int found = 0;
if (!comm_list)
return NULL;
down(&clusters_root.subsys.su_sem);
list_for_each_entry(i, &comm_list->cg_children, ci_entry) {
cm = to_comm(i);
if (nodeid) {
if (cm->nodeid != nodeid)
continue;
found = 1;
break;
} else {
if (!cm->addr_count ||
memcmp(cm->addr[0], addr, sizeof(*addr)))
continue;
found = 1;
break;
}
}
up(&clusters_root.subsys.su_sem);
if (found)
config_item_get(i);
else
cm = NULL;
return cm;
}
static void put_comm(struct comm *cm)
{
config_item_put(&cm->item);
}
/* caller must free mem */
int dlm_nodeid_list(char *lsname, int **ids_out)
{
struct space *sp;
struct node *nd;
int i = 0, rv = 0;
int *ids;
sp = get_space(lsname);
if (!sp)
return -EEXIST;
mutex_lock(&sp->members_lock);
if (!sp->members_count) {
rv = 0;
goto out;
}
ids = kcalloc(sp->members_count, sizeof(int), GFP_KERNEL);
if (!ids) {
rv = -ENOMEM;
goto out;
}
rv = sp->members_count;
list_for_each_entry(nd, &sp->members, list)
ids[i++] = nd->nodeid;
if (rv != i)
printk("bad nodeid count %d %d\n", rv, i);
*ids_out = ids;
out:
mutex_unlock(&sp->members_lock);
put_space(sp);
return rv;
}
int dlm_node_weight(char *lsname, int nodeid)
{
struct space *sp;
struct node *nd;
int w = -EEXIST;
sp = get_space(lsname);
if (!sp)
goto out;
mutex_lock(&sp->members_lock);
list_for_each_entry(nd, &sp->members, list) {
if (nd->nodeid != nodeid)
continue;
w = nd->weight;
break;
}
mutex_unlock(&sp->members_lock);
put_space(sp);
out:
return w;
}
int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr)
{
struct comm *cm = get_comm(nodeid, NULL);
if (!cm)
return -EEXIST;
if (!cm->addr_count)
return -ENOENT;
memcpy(addr, cm->addr[0], sizeof(*addr));
put_comm(cm);
return 0;
}
int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid)
{
struct comm *cm = get_comm(0, addr);
if (!cm)
return -EEXIST;
*nodeid = cm->nodeid;
put_comm(cm);
return 0;
}
int dlm_our_nodeid(void)
{
return local_comm ? local_comm->nodeid : 0;
}
/* num 0 is first addr, num 1 is second addr */
int dlm_our_addr(struct sockaddr_storage *addr, int num)
{
if (!local_comm)
return -1;
if (num + 1 > local_comm->addr_count)
return -1;
memcpy(addr, local_comm->addr[num], sizeof(*addr));
return 0;
}
/* Config file defaults */
#define DEFAULT_TCP_PORT 21064
#define DEFAULT_BUFFER_SIZE 4096
#define DEFAULT_RSBTBL_SIZE 256
#define DEFAULT_LKBTBL_SIZE 1024
#define DEFAULT_DIRTBL_SIZE 512
#define DEFAULT_RECOVER_TIMER 5
#define DEFAULT_TOSS_SECS 10
#define DEFAULT_SCAN_SECS 5
#define DEFAULT_LOG_DEBUG 0
struct dlm_config_info dlm_config = {
.ci_tcp_port = DEFAULT_TCP_PORT,
.ci_buffer_size = DEFAULT_BUFFER_SIZE,
.ci_rsbtbl_size = DEFAULT_RSBTBL_SIZE,
.ci_lkbtbl_size = DEFAULT_LKBTBL_SIZE,
.ci_dirtbl_size = DEFAULT_DIRTBL_SIZE,
.ci_recover_timer = DEFAULT_RECOVER_TIMER,
.ci_toss_secs = DEFAULT_TOSS_SECS,
.ci_scan_secs = DEFAULT_SCAN_SECS,
.ci_log_debug = DEFAULT_LOG_DEBUG
};

43
fs/dlm/config.h Normal file
View File

@@ -0,0 +1,43 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __CONFIG_DOT_H__
#define __CONFIG_DOT_H__
#define DLM_MAX_ADDR_COUNT 3
struct dlm_config_info {
int ci_tcp_port;
int ci_buffer_size;
int ci_rsbtbl_size;
int ci_lkbtbl_size;
int ci_dirtbl_size;
int ci_recover_timer;
int ci_toss_secs;
int ci_scan_secs;
int ci_log_debug;
};
extern struct dlm_config_info dlm_config;
int dlm_config_init(void);
void dlm_config_exit(void);
int dlm_node_weight(char *lsname, int nodeid);
int dlm_nodeid_list(char *lsname, int **ids_out);
int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr);
int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid);
int dlm_our_nodeid(void);
int dlm_our_addr(struct sockaddr_storage *addr, int num);
#endif /* __CONFIG_DOT_H__ */

387
fs/dlm/debug_fs.c Normal file
View File

@@ -0,0 +1,387 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) 2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#include <linux/pagemap.h>
#include <linux/seq_file.h>
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/debugfs.h>
#include "dlm_internal.h"
#define DLM_DEBUG_BUF_LEN 4096
static char debug_buf[DLM_DEBUG_BUF_LEN];
static struct mutex debug_buf_lock;
static struct dentry *dlm_root;
struct rsb_iter {
int entry;
struct dlm_ls *ls;
struct list_head *next;
struct dlm_rsb *rsb;
};
/*
* dump all rsb's in the lockspace hash table
*/
static char *print_lockmode(int mode)
{
switch (mode) {
case DLM_LOCK_IV:
return "--";
case DLM_LOCK_NL:
return "NL";
case DLM_LOCK_CR:
return "CR";
case DLM_LOCK_CW:
return "CW";
case DLM_LOCK_PR:
return "PR";
case DLM_LOCK_PW:
return "PW";
case DLM_LOCK_EX:
return "EX";
default:
return "??";
}
}
static void print_lock(struct seq_file *s, struct dlm_lkb *lkb,
struct dlm_rsb *res)
{
seq_printf(s, "%08x %s", lkb->lkb_id, print_lockmode(lkb->lkb_grmode));
if (lkb->lkb_status == DLM_LKSTS_CONVERT
|| lkb->lkb_status == DLM_LKSTS_WAITING)
seq_printf(s, " (%s)", print_lockmode(lkb->lkb_rqmode));
if (lkb->lkb_nodeid) {
if (lkb->lkb_nodeid != res->res_nodeid)
seq_printf(s, " Remote: %3d %08x", lkb->lkb_nodeid,
lkb->lkb_remid);
else
seq_printf(s, " Master: %08x", lkb->lkb_remid);
}
if (lkb->lkb_wait_type)
seq_printf(s, " wait_type: %d", lkb->lkb_wait_type);
seq_printf(s, "\n");
}
static int print_resource(struct dlm_rsb *res, struct seq_file *s)
{
struct dlm_lkb *lkb;
int i, lvblen = res->res_ls->ls_lvblen, recover_list, root_list;
seq_printf(s, "\nResource %p Name (len=%d) \"", res, res->res_length);
for (i = 0; i < res->res_length; i++) {
if (isprint(res->res_name[i]))
seq_printf(s, "%c", res->res_name[i]);
else
seq_printf(s, "%c", '.');
}
if (res->res_nodeid > 0)
seq_printf(s, "\" \nLocal Copy, Master is node %d\n",
res->res_nodeid);
else if (res->res_nodeid == 0)
seq_printf(s, "\" \nMaster Copy\n");
else if (res->res_nodeid == -1)
seq_printf(s, "\" \nLooking up master (lkid %x)\n",
res->res_first_lkid);
else
seq_printf(s, "\" \nInvalid master %d\n", res->res_nodeid);
/* Print the LVB: */
if (res->res_lvbptr) {
seq_printf(s, "LVB: ");
for (i = 0; i < lvblen; i++) {
if (i == lvblen / 2)
seq_printf(s, "\n ");
seq_printf(s, "%02x ",
(unsigned char) res->res_lvbptr[i]);
}
if (rsb_flag(res, RSB_VALNOTVALID))
seq_printf(s, " (INVALID)");
seq_printf(s, "\n");
}
root_list = !list_empty(&res->res_root_list);
recover_list = !list_empty(&res->res_recover_list);
if (root_list || recover_list) {
seq_printf(s, "Recovery: root %d recover %d flags %lx "
"count %d\n", root_list, recover_list,
res->res_flags, res->res_recover_locks_count);
}
/* Print the locks attached to this resource */
seq_printf(s, "Granted Queue\n");
list_for_each_entry(lkb, &res->res_grantqueue, lkb_statequeue)
print_lock(s, lkb, res);
seq_printf(s, "Conversion Queue\n");
list_for_each_entry(lkb, &res->res_convertqueue, lkb_statequeue)
print_lock(s, lkb, res);
seq_printf(s, "Waiting Queue\n");
list_for_each_entry(lkb, &res->res_waitqueue, lkb_statequeue)
print_lock(s, lkb, res);
if (list_empty(&res->res_lookup))
goto out;
seq_printf(s, "Lookup Queue\n");
list_for_each_entry(lkb, &res->res_lookup, lkb_rsb_lookup) {
seq_printf(s, "%08x %s", lkb->lkb_id,
print_lockmode(lkb->lkb_rqmode));
if (lkb->lkb_wait_type)
seq_printf(s, " wait_type: %d", lkb->lkb_wait_type);
seq_printf(s, "\n");
}
out:
return 0;
}
static int rsb_iter_next(struct rsb_iter *ri)
{
struct dlm_ls *ls = ri->ls;
int i;
if (!ri->next) {
top:
/* Find the next non-empty hash bucket */
for (i = ri->entry; i < ls->ls_rsbtbl_size; i++) {
read_lock(&ls->ls_rsbtbl[i].lock);
if (!list_empty(&ls->ls_rsbtbl[i].list)) {
ri->next = ls->ls_rsbtbl[i].list.next;
read_unlock(&ls->ls_rsbtbl[i].lock);
break;
}
read_unlock(&ls->ls_rsbtbl[i].lock);
}
ri->entry = i;
if (ri->entry >= ls->ls_rsbtbl_size)
return 1;
} else {
i = ri->entry;
read_lock(&ls->ls_rsbtbl[i].lock);
ri->next = ri->next->next;
if (ri->next->next == ls->ls_rsbtbl[i].list.next) {
/* End of list - move to next bucket */
ri->next = NULL;
ri->entry++;
read_unlock(&ls->ls_rsbtbl[i].lock);
goto top;
}
read_unlock(&ls->ls_rsbtbl[i].lock);
}
ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain);
return 0;
}
static void rsb_iter_free(struct rsb_iter *ri)
{
kfree(ri);
}
static struct rsb_iter *rsb_iter_init(struct dlm_ls *ls)
{
struct rsb_iter *ri;
ri = kmalloc(sizeof *ri, GFP_KERNEL);
if (!ri)
return NULL;
ri->ls = ls;
ri->entry = 0;
ri->next = NULL;
if (rsb_iter_next(ri)) {
rsb_iter_free(ri);
return NULL;
}
return ri;
}
static void *rsb_seq_start(struct seq_file *file, loff_t *pos)
{
struct rsb_iter *ri;
loff_t n = *pos;
ri = rsb_iter_init(file->private);
if (!ri)
return NULL;
while (n--) {
if (rsb_iter_next(ri)) {
rsb_iter_free(ri);
return NULL;
}
}
return ri;
}
static void *rsb_seq_next(struct seq_file *file, void *iter_ptr, loff_t *pos)
{
struct rsb_iter *ri = iter_ptr;
(*pos)++;
if (rsb_iter_next(ri)) {
rsb_iter_free(ri);
return NULL;
}
return ri;
}
static void rsb_seq_stop(struct seq_file *file, void *iter_ptr)
{
/* nothing for now */
}
static int rsb_seq_show(struct seq_file *file, void *iter_ptr)
{
struct rsb_iter *ri = iter_ptr;
print_resource(ri->rsb, file);
return 0;
}
static struct seq_operations rsb_seq_ops = {
.start = rsb_seq_start,
.next = rsb_seq_next,
.stop = rsb_seq_stop,
.show = rsb_seq_show,
};
static int rsb_open(struct inode *inode, struct file *file)
{
struct seq_file *seq;
int ret;
ret = seq_open(file, &rsb_seq_ops);
if (ret)
return ret;
seq = file->private_data;
seq->private = inode->i_private;
return 0;
}
static const struct file_operations rsb_fops = {
.owner = THIS_MODULE,
.open = rsb_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release
};
/*
* dump lkb's on the ls_waiters list
*/
static int waiters_open(struct inode *inode, struct file *file)
{
file->private_data = inode->i_private;
return 0;
}
static ssize_t waiters_read(struct file *file, char __user *userbuf,
size_t count, loff_t *ppos)
{
struct dlm_ls *ls = file->private_data;
struct dlm_lkb *lkb;
size_t len = DLM_DEBUG_BUF_LEN, pos = 0, ret, rv;
mutex_lock(&debug_buf_lock);
mutex_lock(&ls->ls_waiters_mutex);
memset(debug_buf, 0, sizeof(debug_buf));
list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) {
ret = snprintf(debug_buf + pos, len - pos, "%x %d %d %s\n",
lkb->lkb_id, lkb->lkb_wait_type,
lkb->lkb_nodeid, lkb->lkb_resource->res_name);
if (ret >= len - pos)
break;
pos += ret;
}
mutex_unlock(&ls->ls_waiters_mutex);
rv = simple_read_from_buffer(userbuf, count, ppos, debug_buf, pos);
mutex_unlock(&debug_buf_lock);
return rv;
}
static const struct file_operations waiters_fops = {
.owner = THIS_MODULE,
.open = waiters_open,
.read = waiters_read
};
int dlm_create_debug_file(struct dlm_ls *ls)
{
char name[DLM_LOCKSPACE_LEN+8];
ls->ls_debug_rsb_dentry = debugfs_create_file(ls->ls_name,
S_IFREG | S_IRUGO,
dlm_root,
ls,
&rsb_fops);
if (!ls->ls_debug_rsb_dentry)
return -ENOMEM;
memset(name, 0, sizeof(name));
snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_waiters", ls->ls_name);
ls->ls_debug_waiters_dentry = debugfs_create_file(name,
S_IFREG | S_IRUGO,
dlm_root,
ls,
&waiters_fops);
if (!ls->ls_debug_waiters_dentry) {
debugfs_remove(ls->ls_debug_rsb_dentry);
return -ENOMEM;
}
return 0;
}
void dlm_delete_debug_file(struct dlm_ls *ls)
{
if (ls->ls_debug_rsb_dentry)
debugfs_remove(ls->ls_debug_rsb_dentry);
if (ls->ls_debug_waiters_dentry)
debugfs_remove(ls->ls_debug_waiters_dentry);
}
int dlm_register_debugfs(void)
{
mutex_init(&debug_buf_lock);
dlm_root = debugfs_create_dir("dlm", NULL);
return dlm_root ? 0 : -ENOMEM;
}
void dlm_unregister_debugfs(void)
{
debugfs_remove(dlm_root);
}

423
fs/dlm/dir.c Normal file
View File

@@ -0,0 +1,423 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#include "dlm_internal.h"
#include "lockspace.h"
#include "member.h"
#include "lowcomms.h"
#include "rcom.h"
#include "config.h"
#include "memory.h"
#include "recover.h"
#include "util.h"
#include "lock.h"
#include "dir.h"
static void put_free_de(struct dlm_ls *ls, struct dlm_direntry *de)
{
spin_lock(&ls->ls_recover_list_lock);
list_add(&de->list, &ls->ls_recover_list);
spin_unlock(&ls->ls_recover_list_lock);
}
static struct dlm_direntry *get_free_de(struct dlm_ls *ls, int len)
{
int found = 0;
struct dlm_direntry *de;
spin_lock(&ls->ls_recover_list_lock);
list_for_each_entry(de, &ls->ls_recover_list, list) {
if (de->length == len) {
list_del(&de->list);
de->master_nodeid = 0;
memset(de->name, 0, len);
found = 1;
break;
}
}
spin_unlock(&ls->ls_recover_list_lock);
if (!found)
de = allocate_direntry(ls, len);
return de;
}
void dlm_clear_free_entries(struct dlm_ls *ls)
{
struct dlm_direntry *de;
spin_lock(&ls->ls_recover_list_lock);
while (!list_empty(&ls->ls_recover_list)) {
de = list_entry(ls->ls_recover_list.next, struct dlm_direntry,
list);
list_del(&de->list);
free_direntry(de);
}
spin_unlock(&ls->ls_recover_list_lock);
}
/*
* We use the upper 16 bits of the hash value to select the directory node.
* Low bits are used for distribution of rsb's among hash buckets on each node.
*
* To give the exact range wanted (0 to num_nodes-1), we apply a modulus of
* num_nodes to the hash value. This value in the desired range is used as an
* offset into the sorted list of nodeid's to give the particular nodeid.
*/
int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash)
{
struct list_head *tmp;
struct dlm_member *memb = NULL;
uint32_t node, n = 0;
int nodeid;
if (ls->ls_num_nodes == 1) {
nodeid = dlm_our_nodeid();
goto out;
}
if (ls->ls_node_array) {
node = (hash >> 16) % ls->ls_total_weight;
nodeid = ls->ls_node_array[node];
goto out;
}
/* make_member_array() failed to kmalloc ls_node_array... */
node = (hash >> 16) % ls->ls_num_nodes;
list_for_each(tmp, &ls->ls_nodes) {
if (n++ != node)
continue;
memb = list_entry(tmp, struct dlm_member, list);
break;
}
DLM_ASSERT(memb , printk("num_nodes=%u n=%u node=%u\n",
ls->ls_num_nodes, n, node););
nodeid = memb->nodeid;
out:
return nodeid;
}
int dlm_dir_nodeid(struct dlm_rsb *r)
{
return dlm_hash2nodeid(r->res_ls, r->res_hash);
}
static inline uint32_t dir_hash(struct dlm_ls *ls, char *name, int len)
{
uint32_t val;
val = jhash(name, len, 0);
val &= (ls->ls_dirtbl_size - 1);
return val;
}
static void add_entry_to_hash(struct dlm_ls *ls, struct dlm_direntry *de)
{
uint32_t bucket;
bucket = dir_hash(ls, de->name, de->length);
list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list);
}
static struct dlm_direntry *search_bucket(struct dlm_ls *ls, char *name,
int namelen, uint32_t bucket)
{
struct dlm_direntry *de;
list_for_each_entry(de, &ls->ls_dirtbl[bucket].list, list) {
if (de->length == namelen && !memcmp(name, de->name, namelen))
goto out;
}
de = NULL;
out:
return de;
}
void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int namelen)
{
struct dlm_direntry *de;
uint32_t bucket;
bucket = dir_hash(ls, name, namelen);
write_lock(&ls->ls_dirtbl[bucket].lock);
de = search_bucket(ls, name, namelen, bucket);
if (!de) {
log_error(ls, "remove fr %u none", nodeid);
goto out;
}
if (de->master_nodeid != nodeid) {
log_error(ls, "remove fr %u ID %u", nodeid, de->master_nodeid);
goto out;
}
list_del(&de->list);
free_direntry(de);
out:
write_unlock(&ls->ls_dirtbl[bucket].lock);
}
void dlm_dir_clear(struct dlm_ls *ls)
{
struct list_head *head;
struct dlm_direntry *de;
int i;
DLM_ASSERT(list_empty(&ls->ls_recover_list), );
for (i = 0; i < ls->ls_dirtbl_size; i++) {
write_lock(&ls->ls_dirtbl[i].lock);
head = &ls->ls_dirtbl[i].list;
while (!list_empty(head)) {
de = list_entry(head->next, struct dlm_direntry, list);
list_del(&de->list);
put_free_de(ls, de);
}
write_unlock(&ls->ls_dirtbl[i].lock);
}
}
int dlm_recover_directory(struct dlm_ls *ls)
{
struct dlm_member *memb;
struct dlm_direntry *de;
char *b, *last_name = NULL;
int error = -ENOMEM, last_len, count = 0;
uint16_t namelen;
log_debug(ls, "dlm_recover_directory");
if (dlm_no_directory(ls))
goto out_status;
dlm_dir_clear(ls);
last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_KERNEL);
if (!last_name)
goto out;
list_for_each_entry(memb, &ls->ls_nodes, list) {
memset(last_name, 0, DLM_RESNAME_MAXLEN);
last_len = 0;
for (;;) {
error = dlm_recovery_stopped(ls);
if (error)
goto out_free;
error = dlm_rcom_names(ls, memb->nodeid,
last_name, last_len);
if (error)
goto out_free;
schedule();
/*
* pick namelen/name pairs out of received buffer
*/
b = ls->ls_recover_buf + sizeof(struct dlm_rcom);
for (;;) {
memcpy(&namelen, b, sizeof(uint16_t));
namelen = be16_to_cpu(namelen);
b += sizeof(uint16_t);
/* namelen of 0xFFFFF marks end of names for
this node; namelen of 0 marks end of the
buffer */
if (namelen == 0xFFFF)
goto done;
if (!namelen)
break;
error = -ENOMEM;
de = get_free_de(ls, namelen);
if (!de)
goto out_free;
de->master_nodeid = memb->nodeid;
de->length = namelen;
last_len = namelen;
memcpy(de->name, b, namelen);
memcpy(last_name, b, namelen);
b += namelen;
add_entry_to_hash(ls, de);
count++;
}
}
done:
;
}
out_status:
error = 0;
dlm_set_recover_status(ls, DLM_RS_DIR);
log_debug(ls, "dlm_recover_directory %d entries", count);
out_free:
kfree(last_name);
out:
dlm_clear_free_entries(ls);
return error;
}
static int get_entry(struct dlm_ls *ls, int nodeid, char *name,
int namelen, int *r_nodeid)
{
struct dlm_direntry *de, *tmp;
uint32_t bucket;
bucket = dir_hash(ls, name, namelen);
write_lock(&ls->ls_dirtbl[bucket].lock);
de = search_bucket(ls, name, namelen, bucket);
if (de) {
*r_nodeid = de->master_nodeid;
write_unlock(&ls->ls_dirtbl[bucket].lock);
if (*r_nodeid == nodeid)
return -EEXIST;
return 0;
}
write_unlock(&ls->ls_dirtbl[bucket].lock);
de = allocate_direntry(ls, namelen);
if (!de)
return -ENOMEM;
de->master_nodeid = nodeid;
de->length = namelen;
memcpy(de->name, name, namelen);
write_lock(&ls->ls_dirtbl[bucket].lock);
tmp = search_bucket(ls, name, namelen, bucket);
if (tmp) {
free_direntry(de);
de = tmp;
} else {
list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list);
}
*r_nodeid = de->master_nodeid;
write_unlock(&ls->ls_dirtbl[bucket].lock);
return 0;
}
int dlm_dir_lookup(struct dlm_ls *ls, int nodeid, char *name, int namelen,
int *r_nodeid)
{
return get_entry(ls, nodeid, name, namelen, r_nodeid);
}
/* Copy the names of master rsb's into the buffer provided.
Only select names whose dir node is the given nodeid. */
void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen,
char *outbuf, int outlen, int nodeid)
{
struct list_head *list;
struct dlm_rsb *start_r = NULL, *r = NULL;
int offset = 0, start_namelen, error, dir_nodeid;
char *start_name;
uint16_t be_namelen;
/*
* Find the rsb where we left off (or start again)
*/
start_namelen = inlen;
start_name = inbuf;
if (start_namelen > 1) {
/*
* We could also use a find_rsb_root() function here that
* searched the ls_root_list.
*/
error = dlm_find_rsb(ls, start_name, start_namelen, R_MASTER,
&start_r);
DLM_ASSERT(!error && start_r,
printk("error %d\n", error););
DLM_ASSERT(!list_empty(&start_r->res_root_list),
dlm_print_rsb(start_r););
dlm_put_rsb(start_r);
}
/*
* Send rsb names for rsb's we're master of and whose directory node
* matches the requesting node.
*/
down_read(&ls->ls_root_sem);
if (start_r)
list = start_r->res_root_list.next;
else
list = ls->ls_root_list.next;
for (offset = 0; list != &ls->ls_root_list; list = list->next) {
r = list_entry(list, struct dlm_rsb, res_root_list);
if (r->res_nodeid)
continue;
dir_nodeid = dlm_dir_nodeid(r);
if (dir_nodeid != nodeid)
continue;
/*
* The block ends when we can't fit the following in the
* remaining buffer space:
* namelen (uint16_t) +
* name (r->res_length) +
* end-of-block record 0x0000 (uint16_t)
*/
if (offset + sizeof(uint16_t)*2 + r->res_length > outlen) {
/* Write end-of-block record */
be_namelen = 0;
memcpy(outbuf + offset, &be_namelen, sizeof(uint16_t));
offset += sizeof(uint16_t);
goto out;
}
be_namelen = cpu_to_be16(r->res_length);
memcpy(outbuf + offset, &be_namelen, sizeof(uint16_t));
offset += sizeof(uint16_t);
memcpy(outbuf + offset, r->res_name, r->res_length);
offset += r->res_length;
}
/*
* If we've reached the end of the list (and there's room) write a
* terminating record.
*/
if ((list == &ls->ls_root_list) &&
(offset + sizeof(uint16_t) <= outlen)) {
be_namelen = 0xFFFF;
memcpy(outbuf + offset, &be_namelen, sizeof(uint16_t));
offset += sizeof(uint16_t);
}
out:
up_read(&ls->ls_root_sem);
}

30
fs/dlm/dir.h Normal file
View File

@@ -0,0 +1,30 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __DIR_DOT_H__
#define __DIR_DOT_H__
int dlm_dir_nodeid(struct dlm_rsb *rsb);
int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash);
void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int len);
void dlm_dir_clear(struct dlm_ls *ls);
void dlm_clear_free_entries(struct dlm_ls *ls);
int dlm_recover_directory(struct dlm_ls *ls);
int dlm_dir_lookup(struct dlm_ls *ls, int nodeid, char *name, int namelen,
int *r_nodeid);
void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen,
char *outbuf, int outlen, int nodeid);
#endif /* __DIR_DOT_H__ */

549
fs/dlm/dlm_internal.h Normal file
View File

@@ -0,0 +1,549 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __DLM_INTERNAL_DOT_H__
#define __DLM_INTERNAL_DOT_H__
/*
* This is the main header file to be included in each DLM source file.
*/
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/types.h>
#include <linux/ctype.h>
#include <linux/spinlock.h>
#include <linux/vmalloc.h>
#include <linux/list.h>
#include <linux/errno.h>
#include <linux/random.h>
#include <linux/delay.h>
#include <linux/socket.h>
#include <linux/kthread.h>
#include <linux/kobject.h>
#include <linux/kref.h>
#include <linux/kernel.h>
#include <linux/jhash.h>
#include <linux/miscdevice.h>
#include <linux/mutex.h>
#include <asm/semaphore.h>
#include <asm/uaccess.h>
#include <linux/dlm.h>
#include "config.h"
#define DLM_LOCKSPACE_LEN 64
/* Size of the temp buffer midcomms allocates on the stack.
We try to make this large enough so most messages fit.
FIXME: should sctp make this unnecessary? */
#define DLM_INBUF_LEN 148
struct dlm_ls;
struct dlm_lkb;
struct dlm_rsb;
struct dlm_member;
struct dlm_lkbtable;
struct dlm_rsbtable;
struct dlm_dirtable;
struct dlm_direntry;
struct dlm_recover;
struct dlm_header;
struct dlm_message;
struct dlm_rcom;
struct dlm_mhandle;
#define log_print(fmt, args...) \
printk(KERN_ERR "dlm: "fmt"\n" , ##args)
#define log_error(ls, fmt, args...) \
printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args)
#define log_debug(ls, fmt, args...) \
do { \
if (dlm_config.ci_log_debug) \
printk(KERN_DEBUG "dlm: %s: " fmt "\n", \
(ls)->ls_name , ##args); \
} while (0)
#define DLM_ASSERT(x, do) \
{ \
if (!(x)) \
{ \
printk(KERN_ERR "\nDLM: Assertion failed on line %d of file %s\n" \
"DLM: assertion: \"%s\"\n" \
"DLM: time = %lu\n", \
__LINE__, __FILE__, #x, jiffies); \
{do} \
printk("\n"); \
BUG(); \
panic("DLM: Record message above and reboot.\n"); \
} \
}
#define DLM_FAKE_USER_AST ERR_PTR(-EINVAL)
struct dlm_direntry {
struct list_head list;
uint32_t master_nodeid;
uint16_t length;
char name[1];
};
struct dlm_dirtable {
struct list_head list;
rwlock_t lock;
};
struct dlm_rsbtable {
struct list_head list;
struct list_head toss;
rwlock_t lock;
};
struct dlm_lkbtable {
struct list_head list;
rwlock_t lock;
uint16_t counter;
};
/*
* Lockspace member (per node in a ls)
*/
struct dlm_member {
struct list_head list;
int nodeid;
int weight;
};
/*
* Save and manage recovery state for a lockspace.
*/
struct dlm_recover {
struct list_head list;
int *nodeids;
int node_count;
uint64_t seq;
};
/*
* Pass input args to second stage locking function.
*/
struct dlm_args {
uint32_t flags;
void *astaddr;
long astparam;
void *bastaddr;
int mode;
struct dlm_lksb *lksb;
};
/*
* Lock block
*
* A lock can be one of three types:
*
* local copy lock is mastered locally
* (lkb_nodeid is zero and DLM_LKF_MSTCPY is not set)
* process copy lock is mastered on a remote node
* (lkb_nodeid is non-zero and DLM_LKF_MSTCPY is not set)
* master copy master node's copy of a lock owned by remote node
* (lkb_nodeid is non-zero and DLM_LKF_MSTCPY is set)
*
* lkb_exflags: a copy of the most recent flags arg provided to dlm_lock or
* dlm_unlock. The dlm does not modify these or use any private flags in
* this field; it only contains DLM_LKF_ flags from dlm.h. These flags
* are sent as-is to the remote master when the lock is remote.
*
* lkb_flags: internal dlm flags (DLM_IFL_ prefix) from dlm_internal.h.
* Some internal flags are shared between the master and process nodes;
* these shared flags are kept in the lower two bytes. One of these
* flags set on the master copy will be propagated to the process copy
* and v.v. Other internal flags are private to the master or process
* node (e.g. DLM_IFL_MSTCPY). These are kept in the high two bytes.
*
* lkb_sbflags: status block flags. These flags are copied directly into
* the caller's lksb.sb_flags prior to the dlm_lock/dlm_unlock completion
* ast. All defined in dlm.h with DLM_SBF_ prefix.
*
* lkb_status: the lock status indicates which rsb queue the lock is
* on, grant, convert, or wait. DLM_LKSTS_ WAITING/GRANTED/CONVERT
*
* lkb_wait_type: the dlm message type (DLM_MSG_ prefix) for which a
* reply is needed. Only set when the lkb is on the lockspace waiters
* list awaiting a reply from a remote node.
*
* lkb_nodeid: when the lkb is a local copy, nodeid is 0; when the lkb
* is a master copy, nodeid specifies the remote lock holder, when the
* lkb is a process copy, the nodeid specifies the lock master.
*/
/* lkb_ast_type */
#define AST_COMP 1
#define AST_BAST 2
/* lkb_status */
#define DLM_LKSTS_WAITING 1
#define DLM_LKSTS_GRANTED 2
#define DLM_LKSTS_CONVERT 3
/* lkb_flags */
#define DLM_IFL_MSTCPY 0x00010000
#define DLM_IFL_RESEND 0x00020000
#define DLM_IFL_DEAD 0x00040000
#define DLM_IFL_USER 0x00000001
#define DLM_IFL_ORPHAN 0x00000002
struct dlm_lkb {
struct dlm_rsb *lkb_resource; /* the rsb */
struct kref lkb_ref;
int lkb_nodeid; /* copied from rsb */
int lkb_ownpid; /* pid of lock owner */
uint32_t lkb_id; /* our lock ID */
uint32_t lkb_remid; /* lock ID on remote partner */
uint32_t lkb_exflags; /* external flags from caller */
uint32_t lkb_sbflags; /* lksb flags */
uint32_t lkb_flags; /* internal flags */
uint32_t lkb_lvbseq; /* lvb sequence number */
int8_t lkb_status; /* granted, waiting, convert */
int8_t lkb_rqmode; /* requested lock mode */
int8_t lkb_grmode; /* granted lock mode */
int8_t lkb_bastmode; /* requested mode */
int8_t lkb_highbast; /* highest mode bast sent for */
int8_t lkb_wait_type; /* type of reply waiting for */
int8_t lkb_ast_type; /* type of ast queued for */
struct list_head lkb_idtbl_list; /* lockspace lkbtbl */
struct list_head lkb_statequeue; /* rsb g/c/w list */
struct list_head lkb_rsb_lookup; /* waiting for rsb lookup */
struct list_head lkb_wait_reply; /* waiting for remote reply */
struct list_head lkb_astqueue; /* need ast to be sent */
struct list_head lkb_ownqueue; /* list of locks for a process */
char *lkb_lvbptr;
struct dlm_lksb *lkb_lksb; /* caller's status block */
void *lkb_astaddr; /* caller's ast function */
void *lkb_bastaddr; /* caller's bast function */
long lkb_astparam; /* caller's ast arg */
};
struct dlm_rsb {
struct dlm_ls *res_ls; /* the lockspace */
struct kref res_ref;
struct mutex res_mutex;
unsigned long res_flags;
int res_length; /* length of rsb name */
int res_nodeid;
uint32_t res_lvbseq;
uint32_t res_hash;
uint32_t res_bucket; /* rsbtbl */
unsigned long res_toss_time;
uint32_t res_first_lkid;
struct list_head res_lookup; /* lkbs waiting on first */
struct list_head res_hashchain; /* rsbtbl */
struct list_head res_grantqueue;
struct list_head res_convertqueue;
struct list_head res_waitqueue;
struct list_head res_root_list; /* used for recovery */
struct list_head res_recover_list; /* used for recovery */
int res_recover_locks_count;
char *res_lvbptr;
char res_name[1];
};
/* find_rsb() flags */
#define R_MASTER 1 /* only return rsb if it's a master */
#define R_CREATE 2 /* create/add rsb if not found */
/* rsb_flags */
enum rsb_flags {
RSB_MASTER_UNCERTAIN,
RSB_VALNOTVALID,
RSB_VALNOTVALID_PREV,
RSB_NEW_MASTER,
RSB_NEW_MASTER2,
RSB_RECOVER_CONVERT,
RSB_LOCKS_PURGED,
};
static inline void rsb_set_flag(struct dlm_rsb *r, enum rsb_flags flag)
{
__set_bit(flag, &r->res_flags);
}
static inline void rsb_clear_flag(struct dlm_rsb *r, enum rsb_flags flag)
{
__clear_bit(flag, &r->res_flags);
}
static inline int rsb_flag(struct dlm_rsb *r, enum rsb_flags flag)
{
return test_bit(flag, &r->res_flags);
}
/* dlm_header is first element of all structs sent between nodes */
#define DLM_HEADER_MAJOR 0x00030000
#define DLM_HEADER_MINOR 0x00000000
#define DLM_MSG 1
#define DLM_RCOM 2
struct dlm_header {
uint32_t h_version;
uint32_t h_lockspace;
uint32_t h_nodeid; /* nodeid of sender */
uint16_t h_length;
uint8_t h_cmd; /* DLM_MSG, DLM_RCOM */
uint8_t h_pad;
};
#define DLM_MSG_REQUEST 1
#define DLM_MSG_CONVERT 2
#define DLM_MSG_UNLOCK 3
#define DLM_MSG_CANCEL 4
#define DLM_MSG_REQUEST_REPLY 5
#define DLM_MSG_CONVERT_REPLY 6
#define DLM_MSG_UNLOCK_REPLY 7
#define DLM_MSG_CANCEL_REPLY 8
#define DLM_MSG_GRANT 9
#define DLM_MSG_BAST 10
#define DLM_MSG_LOOKUP 11
#define DLM_MSG_REMOVE 12
#define DLM_MSG_LOOKUP_REPLY 13
struct dlm_message {
struct dlm_header m_header;
uint32_t m_type; /* DLM_MSG_ */
uint32_t m_nodeid;
uint32_t m_pid;
uint32_t m_lkid; /* lkid on sender */
uint32_t m_remid; /* lkid on receiver */
uint32_t m_parent_lkid;
uint32_t m_parent_remid;
uint32_t m_exflags;
uint32_t m_sbflags;
uint32_t m_flags;
uint32_t m_lvbseq;
uint32_t m_hash;
int m_status;
int m_grmode;
int m_rqmode;
int m_bastmode;
int m_asts;
int m_result; /* 0 or -EXXX */
char m_extra[0]; /* name or lvb */
};
#define DLM_RS_NODES 0x00000001
#define DLM_RS_NODES_ALL 0x00000002
#define DLM_RS_DIR 0x00000004
#define DLM_RS_DIR_ALL 0x00000008
#define DLM_RS_LOCKS 0x00000010
#define DLM_RS_LOCKS_ALL 0x00000020
#define DLM_RS_DONE 0x00000040
#define DLM_RS_DONE_ALL 0x00000080
#define DLM_RCOM_STATUS 1
#define DLM_RCOM_NAMES 2
#define DLM_RCOM_LOOKUP 3
#define DLM_RCOM_LOCK 4
#define DLM_RCOM_STATUS_REPLY 5
#define DLM_RCOM_NAMES_REPLY 6
#define DLM_RCOM_LOOKUP_REPLY 7
#define DLM_RCOM_LOCK_REPLY 8
struct dlm_rcom {
struct dlm_header rc_header;
uint32_t rc_type; /* DLM_RCOM_ */
int rc_result; /* multi-purpose */
uint64_t rc_id; /* match reply with request */
uint64_t rc_seq; /* sender's ls_recover_seq */
uint64_t rc_seq_reply; /* remote ls_recover_seq */
char rc_buf[0];
};
struct rcom_config {
uint32_t rf_lvblen;
uint32_t rf_lsflags;
uint64_t rf_unused;
};
struct rcom_lock {
uint32_t rl_ownpid;
uint32_t rl_lkid;
uint32_t rl_remid;
uint32_t rl_parent_lkid;
uint32_t rl_parent_remid;
uint32_t rl_exflags;
uint32_t rl_flags;
uint32_t rl_lvbseq;
int rl_result;
int8_t rl_rqmode;
int8_t rl_grmode;
int8_t rl_status;
int8_t rl_asts;
uint16_t rl_wait_type;
uint16_t rl_namelen;
char rl_name[DLM_RESNAME_MAXLEN];
char rl_lvb[0];
};
struct dlm_ls {
struct list_head ls_list; /* list of lockspaces */
dlm_lockspace_t *ls_local_handle;
uint32_t ls_global_id; /* global unique lockspace ID */
uint32_t ls_exflags;
int ls_lvblen;
int ls_count; /* reference count */
unsigned long ls_flags; /* LSFL_ */
struct kobject ls_kobj;
struct dlm_rsbtable *ls_rsbtbl;
uint32_t ls_rsbtbl_size;
struct dlm_lkbtable *ls_lkbtbl;
uint32_t ls_lkbtbl_size;
struct dlm_dirtable *ls_dirtbl;
uint32_t ls_dirtbl_size;
struct mutex ls_waiters_mutex;
struct list_head ls_waiters; /* lkbs needing a reply */
struct list_head ls_nodes; /* current nodes in ls */
struct list_head ls_nodes_gone; /* dead node list, recovery */
int ls_num_nodes; /* number of nodes in ls */
int ls_low_nodeid;
int ls_total_weight;
int *ls_node_array;
struct dlm_rsb ls_stub_rsb; /* for returning errors */
struct dlm_lkb ls_stub_lkb; /* for returning errors */
struct dlm_message ls_stub_ms; /* for faking a reply */
struct dentry *ls_debug_rsb_dentry; /* debugfs */
struct dentry *ls_debug_waiters_dentry; /* debugfs */
wait_queue_head_t ls_uevent_wait; /* user part of join/leave */
int ls_uevent_result;
struct miscdevice ls_device;
/* recovery related */
struct timer_list ls_timer;
struct task_struct *ls_recoverd_task;
struct mutex ls_recoverd_active;
spinlock_t ls_recover_lock;
uint32_t ls_recover_status; /* DLM_RS_ */
uint64_t ls_recover_seq;
struct dlm_recover *ls_recover_args;
struct rw_semaphore ls_in_recovery; /* block local requests */
struct list_head ls_requestqueue;/* queue remote requests */
struct mutex ls_requestqueue_mutex;
char *ls_recover_buf;
int ls_recover_nodeid; /* for debugging */
uint64_t ls_rcom_seq;
spinlock_t ls_rcom_spin;
struct list_head ls_recover_list;
spinlock_t ls_recover_list_lock;
int ls_recover_list_count;
wait_queue_head_t ls_wait_general;
struct mutex ls_clear_proc_locks;
struct list_head ls_root_list; /* root resources */
struct rw_semaphore ls_root_sem; /* protect root_list */
int ls_namelen;
char ls_name[1];
};
#define LSFL_WORK 0
#define LSFL_RUNNING 1
#define LSFL_RECOVERY_STOP 2
#define LSFL_RCOM_READY 3
#define LSFL_RCOM_WAIT 4
#define LSFL_UEVENT_WAIT 5
/* much of this is just saving user space pointers associated with the
lock that we pass back to the user lib with an ast */
struct dlm_user_args {
struct dlm_user_proc *proc; /* each process that opens the lockspace
device has private data
(dlm_user_proc) on the struct file,
the process's locks point back to it*/
struct dlm_lksb lksb;
int old_mode;
int update_user_lvb;
struct dlm_lksb __user *user_lksb;
void __user *castparam;
void __user *castaddr;
void __user *bastparam;
void __user *bastaddr;
};
#define DLM_PROC_FLAGS_CLOSING 1
#define DLM_PROC_FLAGS_COMPAT 2
/* locks list is kept so we can remove all a process's locks when it
exits (or orphan those that are persistent) */
struct dlm_user_proc {
dlm_lockspace_t *lockspace;
unsigned long flags; /* DLM_PROC_FLAGS */
struct list_head asts;
spinlock_t asts_spin;
struct list_head locks;
spinlock_t locks_spin;
struct list_head unlocking;
wait_queue_head_t wait;
};
static inline int dlm_locking_stopped(struct dlm_ls *ls)
{
return !test_bit(LSFL_RUNNING, &ls->ls_flags);
}
static inline int dlm_recovery_stopped(struct dlm_ls *ls)
{
return test_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
}
static inline int dlm_no_directory(struct dlm_ls *ls)
{
return (ls->ls_exflags & DLM_LSFL_NODIR) ? 1 : 0;
}
#endif /* __DLM_INTERNAL_DOT_H__ */

3918
fs/dlm/lock.c Normal file

File diff suppressed because it is too large Load Diff

62
fs/dlm/lock.h Normal file
View File

@@ -0,0 +1,62 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) 2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __LOCK_DOT_H__
#define __LOCK_DOT_H__
void dlm_print_rsb(struct dlm_rsb *r);
void dlm_dump_rsb(struct dlm_rsb *r);
void dlm_print_lkb(struct dlm_lkb *lkb);
int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery);
int dlm_modes_compat(int mode1, int mode2);
int dlm_find_rsb(struct dlm_ls *ls, char *name, int namelen,
unsigned int flags, struct dlm_rsb **r_ret);
void dlm_put_rsb(struct dlm_rsb *r);
void dlm_hold_rsb(struct dlm_rsb *r);
int dlm_put_lkb(struct dlm_lkb *lkb);
void dlm_scan_rsbs(struct dlm_ls *ls);
int dlm_purge_locks(struct dlm_ls *ls);
void dlm_purge_mstcpy_locks(struct dlm_rsb *r);
void dlm_grant_after_purge(struct dlm_ls *ls);
int dlm_recover_waiters_post(struct dlm_ls *ls);
void dlm_recover_waiters_pre(struct dlm_ls *ls);
int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode,
uint32_t flags, void *name, unsigned int namelen, uint32_t parent_lkid);
int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
int mode, uint32_t flags, uint32_t lkid, char *lvb_in);
int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
uint32_t flags, uint32_t lkid, char *lvb_in);
int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
uint32_t flags, uint32_t lkid);
void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc);
static inline int is_master(struct dlm_rsb *r)
{
return !r->res_nodeid;
}
static inline void lock_rsb(struct dlm_rsb *r)
{
mutex_lock(&r->res_mutex);
}
static inline void unlock_rsb(struct dlm_rsb *r)
{
mutex_unlock(&r->res_mutex);
}
#endif

733
fs/dlm/lockspace.c Normal file
View File

@@ -0,0 +1,733 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#include "dlm_internal.h"
#include "lockspace.h"
#include "member.h"
#include "recoverd.h"
#include "ast.h"
#include "dir.h"
#include "lowcomms.h"
#include "config.h"
#include "memory.h"
#include "lock.h"
#include "recover.h"
#include "requestqueue.h"
#ifdef CONFIG_DLM_DEBUG
int dlm_create_debug_file(struct dlm_ls *ls);
void dlm_delete_debug_file(struct dlm_ls *ls);
#else
static inline int dlm_create_debug_file(struct dlm_ls *ls) { return 0; }
static inline void dlm_delete_debug_file(struct dlm_ls *ls) { }
#endif
static int ls_count;
static struct mutex ls_lock;
static struct list_head lslist;
static spinlock_t lslist_lock;
static struct task_struct * scand_task;
static ssize_t dlm_control_store(struct dlm_ls *ls, const char *buf, size_t len)
{
ssize_t ret = len;
int n = simple_strtol(buf, NULL, 0);
ls = dlm_find_lockspace_local(ls->ls_local_handle);
if (!ls)
return -EINVAL;
switch (n) {
case 0:
dlm_ls_stop(ls);
break;
case 1:
dlm_ls_start(ls);
break;
default:
ret = -EINVAL;
}
dlm_put_lockspace(ls);
return ret;
}
static ssize_t dlm_event_store(struct dlm_ls *ls, const char *buf, size_t len)
{
ls->ls_uevent_result = simple_strtol(buf, NULL, 0);
set_bit(LSFL_UEVENT_WAIT, &ls->ls_flags);
wake_up(&ls->ls_uevent_wait);
return len;
}
static ssize_t dlm_id_show(struct dlm_ls *ls, char *buf)
{
return snprintf(buf, PAGE_SIZE, "%u\n", ls->ls_global_id);
}
static ssize_t dlm_id_store(struct dlm_ls *ls, const char *buf, size_t len)
{
ls->ls_global_id = simple_strtoul(buf, NULL, 0);
return len;
}
static ssize_t dlm_recover_status_show(struct dlm_ls *ls, char *buf)
{
uint32_t status = dlm_recover_status(ls);
return snprintf(buf, PAGE_SIZE, "%x\n", status);
}
static ssize_t dlm_recover_nodeid_show(struct dlm_ls *ls, char *buf)
{
return snprintf(buf, PAGE_SIZE, "%d\n", ls->ls_recover_nodeid);
}
struct dlm_attr {
struct attribute attr;
ssize_t (*show)(struct dlm_ls *, char *);
ssize_t (*store)(struct dlm_ls *, const char *, size_t);
};
static struct dlm_attr dlm_attr_control = {
.attr = {.name = "control", .mode = S_IWUSR},
.store = dlm_control_store
};
static struct dlm_attr dlm_attr_event = {
.attr = {.name = "event_done", .mode = S_IWUSR},
.store = dlm_event_store
};
static struct dlm_attr dlm_attr_id = {
.attr = {.name = "id", .mode = S_IRUGO | S_IWUSR},
.show = dlm_id_show,
.store = dlm_id_store
};
static struct dlm_attr dlm_attr_recover_status = {
.attr = {.name = "recover_status", .mode = S_IRUGO},
.show = dlm_recover_status_show
};
static struct dlm_attr dlm_attr_recover_nodeid = {
.attr = {.name = "recover_nodeid", .mode = S_IRUGO},
.show = dlm_recover_nodeid_show
};
static struct attribute *dlm_attrs[] = {
&dlm_attr_control.attr,
&dlm_attr_event.attr,
&dlm_attr_id.attr,
&dlm_attr_recover_status.attr,
&dlm_attr_recover_nodeid.attr,
NULL,
};
static ssize_t dlm_attr_show(struct kobject *kobj, struct attribute *attr,
char *buf)
{
struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
struct dlm_attr *a = container_of(attr, struct dlm_attr, attr);
return a->show ? a->show(ls, buf) : 0;
}
static ssize_t dlm_attr_store(struct kobject *kobj, struct attribute *attr,
const char *buf, size_t len)
{
struct dlm_ls *ls = container_of(kobj, struct dlm_ls, ls_kobj);
struct dlm_attr *a = container_of(attr, struct dlm_attr, attr);
return a->store ? a->store(ls, buf, len) : len;
}
static void lockspace_kobj_release(struct kobject *k)
{
struct dlm_ls *ls = container_of(k, struct dlm_ls, ls_kobj);
kfree(ls);
}
static struct sysfs_ops dlm_attr_ops = {
.show = dlm_attr_show,
.store = dlm_attr_store,
};
static struct kobj_type dlm_ktype = {
.default_attrs = dlm_attrs,
.sysfs_ops = &dlm_attr_ops,
.release = lockspace_kobj_release,
};
static struct kset dlm_kset = {
.subsys = &kernel_subsys,
.kobj = {.name = "dlm",},
.ktype = &dlm_ktype,
};
static int kobject_setup(struct dlm_ls *ls)
{
char lsname[DLM_LOCKSPACE_LEN];
int error;
memset(lsname, 0, DLM_LOCKSPACE_LEN);
snprintf(lsname, DLM_LOCKSPACE_LEN, "%s", ls->ls_name);
error = kobject_set_name(&ls->ls_kobj, "%s", lsname);
if (error)
return error;
ls->ls_kobj.kset = &dlm_kset;
ls->ls_kobj.ktype = &dlm_ktype;
return 0;
}
static int do_uevent(struct dlm_ls *ls, int in)
{
int error;
if (in)
kobject_uevent(&ls->ls_kobj, KOBJ_ONLINE);
else
kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE);
error = wait_event_interruptible(ls->ls_uevent_wait,
test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags));
if (error)
goto out;
error = ls->ls_uevent_result;
out:
return error;
}
int dlm_lockspace_init(void)
{
int error;
ls_count = 0;
mutex_init(&ls_lock);
INIT_LIST_HEAD(&lslist);
spin_lock_init(&lslist_lock);
error = kset_register(&dlm_kset);
if (error)
printk("dlm_lockspace_init: cannot register kset %d\n", error);
return error;
}
void dlm_lockspace_exit(void)
{
kset_unregister(&dlm_kset);
}
static int dlm_scand(void *data)
{
struct dlm_ls *ls;
while (!kthread_should_stop()) {
list_for_each_entry(ls, &lslist, ls_list)
dlm_scan_rsbs(ls);
schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
}
return 0;
}
static int dlm_scand_start(void)
{
struct task_struct *p;
int error = 0;
p = kthread_run(dlm_scand, NULL, "dlm_scand");
if (IS_ERR(p))
error = PTR_ERR(p);
else
scand_task = p;
return error;
}
static void dlm_scand_stop(void)
{
kthread_stop(scand_task);
}
static struct dlm_ls *dlm_find_lockspace_name(char *name, int namelen)
{
struct dlm_ls *ls;
spin_lock(&lslist_lock);
list_for_each_entry(ls, &lslist, ls_list) {
if (ls->ls_namelen == namelen &&
memcmp(ls->ls_name, name, namelen) == 0)
goto out;
}
ls = NULL;
out:
spin_unlock(&lslist_lock);
return ls;
}
struct dlm_ls *dlm_find_lockspace_global(uint32_t id)
{
struct dlm_ls *ls;
spin_lock(&lslist_lock);
list_for_each_entry(ls, &lslist, ls_list) {
if (ls->ls_global_id == id) {
ls->ls_count++;
goto out;
}
}
ls = NULL;
out:
spin_unlock(&lslist_lock);
return ls;
}
struct dlm_ls *dlm_find_lockspace_local(dlm_lockspace_t *lockspace)
{
struct dlm_ls *ls;
spin_lock(&lslist_lock);
list_for_each_entry(ls, &lslist, ls_list) {
if (ls->ls_local_handle == lockspace) {
ls->ls_count++;
goto out;
}
}
ls = NULL;
out:
spin_unlock(&lslist_lock);
return ls;
}
struct dlm_ls *dlm_find_lockspace_device(int minor)
{
struct dlm_ls *ls;
spin_lock(&lslist_lock);
list_for_each_entry(ls, &lslist, ls_list) {
if (ls->ls_device.minor == minor) {
ls->ls_count++;
goto out;
}
}
ls = NULL;
out:
spin_unlock(&lslist_lock);
return ls;
}
void dlm_put_lockspace(struct dlm_ls *ls)
{
spin_lock(&lslist_lock);
ls->ls_count--;
spin_unlock(&lslist_lock);
}
static void remove_lockspace(struct dlm_ls *ls)
{
for (;;) {
spin_lock(&lslist_lock);
if (ls->ls_count == 0) {
list_del(&ls->ls_list);
spin_unlock(&lslist_lock);
return;
}
spin_unlock(&lslist_lock);
ssleep(1);
}
}
static int threads_start(void)
{
int error;
/* Thread which process lock requests for all lockspace's */
error = dlm_astd_start();
if (error) {
log_print("cannot start dlm_astd thread %d", error);
goto fail;
}
error = dlm_scand_start();
if (error) {
log_print("cannot start dlm_scand thread %d", error);
goto astd_fail;
}
/* Thread for sending/receiving messages for all lockspace's */
error = dlm_lowcomms_start();
if (error) {
log_print("cannot start dlm lowcomms %d", error);
goto scand_fail;
}
return 0;
scand_fail:
dlm_scand_stop();
astd_fail:
dlm_astd_stop();
fail:
return error;
}
static void threads_stop(void)
{
dlm_scand_stop();
dlm_lowcomms_stop();
dlm_astd_stop();
}
static int new_lockspace(char *name, int namelen, void **lockspace,
uint32_t flags, int lvblen)
{
struct dlm_ls *ls;
int i, size, error = -ENOMEM;
if (namelen > DLM_LOCKSPACE_LEN)
return -EINVAL;
if (!lvblen || (lvblen % 8))
return -EINVAL;
if (!try_module_get(THIS_MODULE))
return -EINVAL;
ls = dlm_find_lockspace_name(name, namelen);
if (ls) {
*lockspace = ls;
module_put(THIS_MODULE);
return -EEXIST;
}
ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_KERNEL);
if (!ls)
goto out;
memcpy(ls->ls_name, name, namelen);
ls->ls_namelen = namelen;
ls->ls_exflags = flags;
ls->ls_lvblen = lvblen;
ls->ls_count = 0;
ls->ls_flags = 0;
size = dlm_config.ci_rsbtbl_size;
ls->ls_rsbtbl_size = size;
ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_KERNEL);
if (!ls->ls_rsbtbl)
goto out_lsfree;
for (i = 0; i < size; i++) {
INIT_LIST_HEAD(&ls->ls_rsbtbl[i].list);
INIT_LIST_HEAD(&ls->ls_rsbtbl[i].toss);
rwlock_init(&ls->ls_rsbtbl[i].lock);
}
size = dlm_config.ci_lkbtbl_size;
ls->ls_lkbtbl_size = size;
ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_KERNEL);
if (!ls->ls_lkbtbl)
goto out_rsbfree;
for (i = 0; i < size; i++) {
INIT_LIST_HEAD(&ls->ls_lkbtbl[i].list);
rwlock_init(&ls->ls_lkbtbl[i].lock);
ls->ls_lkbtbl[i].counter = 1;
}
size = dlm_config.ci_dirtbl_size;
ls->ls_dirtbl_size = size;
ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_KERNEL);
if (!ls->ls_dirtbl)
goto out_lkbfree;
for (i = 0; i < size; i++) {
INIT_LIST_HEAD(&ls->ls_dirtbl[i].list);
rwlock_init(&ls->ls_dirtbl[i].lock);
}
INIT_LIST_HEAD(&ls->ls_waiters);
mutex_init(&ls->ls_waiters_mutex);
INIT_LIST_HEAD(&ls->ls_nodes);
INIT_LIST_HEAD(&ls->ls_nodes_gone);
ls->ls_num_nodes = 0;
ls->ls_low_nodeid = 0;
ls->ls_total_weight = 0;
ls->ls_node_array = NULL;
memset(&ls->ls_stub_rsb, 0, sizeof(struct dlm_rsb));
ls->ls_stub_rsb.res_ls = ls;
ls->ls_debug_rsb_dentry = NULL;
ls->ls_debug_waiters_dentry = NULL;
init_waitqueue_head(&ls->ls_uevent_wait);
ls->ls_uevent_result = 0;
ls->ls_recoverd_task = NULL;
mutex_init(&ls->ls_recoverd_active);
spin_lock_init(&ls->ls_recover_lock);
spin_lock_init(&ls->ls_rcom_spin);
get_random_bytes(&ls->ls_rcom_seq, sizeof(uint64_t));
ls->ls_recover_status = 0;
ls->ls_recover_seq = 0;
ls->ls_recover_args = NULL;
init_rwsem(&ls->ls_in_recovery);
INIT_LIST_HEAD(&ls->ls_requestqueue);
mutex_init(&ls->ls_requestqueue_mutex);
mutex_init(&ls->ls_clear_proc_locks);
ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL);
if (!ls->ls_recover_buf)
goto out_dirfree;
INIT_LIST_HEAD(&ls->ls_recover_list);
spin_lock_init(&ls->ls_recover_list_lock);
ls->ls_recover_list_count = 0;
ls->ls_local_handle = ls;
init_waitqueue_head(&ls->ls_wait_general);
INIT_LIST_HEAD(&ls->ls_root_list);
init_rwsem(&ls->ls_root_sem);
down_write(&ls->ls_in_recovery);
spin_lock(&lslist_lock);
list_add(&ls->ls_list, &lslist);
spin_unlock(&lslist_lock);
/* needs to find ls in lslist */
error = dlm_recoverd_start(ls);
if (error) {
log_error(ls, "can't start dlm_recoverd %d", error);
goto out_rcomfree;
}
dlm_create_debug_file(ls);
error = kobject_setup(ls);
if (error)
goto out_del;
error = kobject_register(&ls->ls_kobj);
if (error)
goto out_del;
error = do_uevent(ls, 1);
if (error)
goto out_unreg;
*lockspace = ls;
return 0;
out_unreg:
kobject_unregister(&ls->ls_kobj);
out_del:
dlm_delete_debug_file(ls);
dlm_recoverd_stop(ls);
out_rcomfree:
spin_lock(&lslist_lock);
list_del(&ls->ls_list);
spin_unlock(&lslist_lock);
kfree(ls->ls_recover_buf);
out_dirfree:
kfree(ls->ls_dirtbl);
out_lkbfree:
kfree(ls->ls_lkbtbl);
out_rsbfree:
kfree(ls->ls_rsbtbl);
out_lsfree:
kfree(ls);
out:
module_put(THIS_MODULE);
return error;
}
int dlm_new_lockspace(char *name, int namelen, void **lockspace,
uint32_t flags, int lvblen)
{
int error = 0;
mutex_lock(&ls_lock);
if (!ls_count)
error = threads_start();
if (error)
goto out;
error = new_lockspace(name, namelen, lockspace, flags, lvblen);
if (!error)
ls_count++;
out:
mutex_unlock(&ls_lock);
return error;
}
/* Return 1 if the lockspace still has active remote locks,
* 2 if the lockspace still has active local locks.
*/
static int lockspace_busy(struct dlm_ls *ls)
{
int i, lkb_found = 0;
struct dlm_lkb *lkb;
/* NOTE: We check the lockidtbl here rather than the resource table.
This is because there may be LKBs queued as ASTs that have been
unlinked from their RSBs and are pending deletion once the AST has
been delivered */
for (i = 0; i < ls->ls_lkbtbl_size; i++) {
read_lock(&ls->ls_lkbtbl[i].lock);
if (!list_empty(&ls->ls_lkbtbl[i].list)) {
lkb_found = 1;
list_for_each_entry(lkb, &ls->ls_lkbtbl[i].list,
lkb_idtbl_list) {
if (!lkb->lkb_nodeid) {
read_unlock(&ls->ls_lkbtbl[i].lock);
return 2;
}
}
}
read_unlock(&ls->ls_lkbtbl[i].lock);
}
return lkb_found;
}
static int release_lockspace(struct dlm_ls *ls, int force)
{
struct dlm_lkb *lkb;
struct dlm_rsb *rsb;
struct list_head *head;
int i;
int busy = lockspace_busy(ls);
if (busy > force)
return -EBUSY;
if (force < 3)
do_uevent(ls, 0);
dlm_recoverd_stop(ls);
remove_lockspace(ls);
dlm_delete_debug_file(ls);
dlm_astd_suspend();
kfree(ls->ls_recover_buf);
/*
* Free direntry structs.
*/
dlm_dir_clear(ls);
kfree(ls->ls_dirtbl);
/*
* Free all lkb's on lkbtbl[] lists.
*/
for (i = 0; i < ls->ls_lkbtbl_size; i++) {
head = &ls->ls_lkbtbl[i].list;
while (!list_empty(head)) {
lkb = list_entry(head->next, struct dlm_lkb,
lkb_idtbl_list);
list_del(&lkb->lkb_idtbl_list);
dlm_del_ast(lkb);
if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY)
free_lvb(lkb->lkb_lvbptr);
free_lkb(lkb);
}
}
dlm_astd_resume();
kfree(ls->ls_lkbtbl);
/*
* Free all rsb's on rsbtbl[] lists
*/
for (i = 0; i < ls->ls_rsbtbl_size; i++) {
head = &ls->ls_rsbtbl[i].list;
while (!list_empty(head)) {
rsb = list_entry(head->next, struct dlm_rsb,
res_hashchain);
list_del(&rsb->res_hashchain);
free_rsb(rsb);
}
head = &ls->ls_rsbtbl[i].toss;
while (!list_empty(head)) {
rsb = list_entry(head->next, struct dlm_rsb,
res_hashchain);
list_del(&rsb->res_hashchain);
free_rsb(rsb);
}
}
kfree(ls->ls_rsbtbl);
/*
* Free structures on any other lists
*/
dlm_purge_requestqueue(ls);
kfree(ls->ls_recover_args);
dlm_clear_free_entries(ls);
dlm_clear_members(ls);
dlm_clear_members_gone(ls);
kfree(ls->ls_node_array);
kobject_unregister(&ls->ls_kobj);
/* The ls structure will be freed when the kobject is done with */
mutex_lock(&ls_lock);
ls_count--;
if (!ls_count)
threads_stop();
mutex_unlock(&ls_lock);
module_put(THIS_MODULE);
return 0;
}
/*
* Called when a system has released all its locks and is not going to use the
* lockspace any longer. We free everything we're managing for this lockspace.
* Remaining nodes will go through the recovery process as if we'd died. The
* lockspace must continue to function as usual, participating in recoveries,
* until this returns.
*
* Force has 4 possible values:
* 0 - don't destroy locksapce if it has any LKBs
* 1 - destroy lockspace if it has remote LKBs but not if it has local LKBs
* 2 - destroy lockspace regardless of LKBs
* 3 - destroy lockspace as part of a forced shutdown
*/
int dlm_release_lockspace(void *lockspace, int force)
{
struct dlm_ls *ls;
ls = dlm_find_lockspace_local(lockspace);
if (!ls)
return -EINVAL;
dlm_put_lockspace(ls);
return release_lockspace(ls, force);
}

25
fs/dlm/lockspace.h Normal file
View File

@@ -0,0 +1,25 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __LOCKSPACE_DOT_H__
#define __LOCKSPACE_DOT_H__
int dlm_lockspace_init(void);
void dlm_lockspace_exit(void);
struct dlm_ls *dlm_find_lockspace_global(uint32_t id);
struct dlm_ls *dlm_find_lockspace_local(void *id);
struct dlm_ls *dlm_find_lockspace_device(int minor);
void dlm_put_lockspace(struct dlm_ls *ls);
#endif /* __LOCKSPACE_DOT_H__ */

1210
fs/dlm/lowcomms-sctp.c Normal file

File diff suppressed because it is too large Load Diff

1007
fs/dlm/lowcomms-tcp.c Normal file

File diff suppressed because it is too large Load Diff

24
fs/dlm/lowcomms.h Normal file
View File

@@ -0,0 +1,24 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __LOWCOMMS_DOT_H__
#define __LOWCOMMS_DOT_H__
int dlm_lowcomms_start(void);
void dlm_lowcomms_stop(void);
int dlm_lowcomms_close(int nodeid);
void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc);
void dlm_lowcomms_commit_buffer(void *mh);
#endif /* __LOWCOMMS_DOT_H__ */

18
fs/dlm/lvb_table.h Normal file
View File

@@ -0,0 +1,18 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) 2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __LVB_TABLE_DOT_H__
#define __LVB_TABLE_DOT_H__
extern const int dlm_lvb_operations[8][8];
#endif

89
fs/dlm/main.c Normal file
View File

@@ -0,0 +1,89 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#include "dlm_internal.h"
#include "lockspace.h"
#include "lock.h"
#include "user.h"
#include "memory.h"
#include "config.h"
#ifdef CONFIG_DLM_DEBUG
int dlm_register_debugfs(void);
void dlm_unregister_debugfs(void);
#else
static inline int dlm_register_debugfs(void) { return 0; }
static inline void dlm_unregister_debugfs(void) { }
#endif
static int __init init_dlm(void)
{
int error;
error = dlm_memory_init();
if (error)
goto out;
error = dlm_lockspace_init();
if (error)
goto out_mem;
error = dlm_config_init();
if (error)
goto out_lockspace;
error = dlm_register_debugfs();
if (error)
goto out_config;
error = dlm_user_init();
if (error)
goto out_debug;
printk("DLM (built %s %s) installed\n", __DATE__, __TIME__);
return 0;
out_debug:
dlm_unregister_debugfs();
out_config:
dlm_config_exit();
out_lockspace:
dlm_lockspace_exit();
out_mem:
dlm_memory_exit();
out:
return error;
}
static void __exit exit_dlm(void)
{
dlm_user_exit();
dlm_config_exit();
dlm_memory_exit();
dlm_lockspace_exit();
dlm_unregister_debugfs();
}
module_init(init_dlm);
module_exit(exit_dlm);
MODULE_DESCRIPTION("Distributed Lock Manager");
MODULE_AUTHOR("Red Hat, Inc.");
MODULE_LICENSE("GPL");
EXPORT_SYMBOL_GPL(dlm_new_lockspace);
EXPORT_SYMBOL_GPL(dlm_release_lockspace);
EXPORT_SYMBOL_GPL(dlm_lock);
EXPORT_SYMBOL_GPL(dlm_unlock);

335
fs/dlm/member.c Normal file
View File

@@ -0,0 +1,335 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) 2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#include "dlm_internal.h"
#include "lockspace.h"
#include "member.h"
#include "recoverd.h"
#include "recover.h"
#include "rcom.h"
#include "config.h"
/*
* Following called by dlm_recoverd thread
*/
static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new)
{
struct dlm_member *memb = NULL;
struct list_head *tmp;
struct list_head *newlist = &new->list;
struct list_head *head = &ls->ls_nodes;
list_for_each(tmp, head) {
memb = list_entry(tmp, struct dlm_member, list);
if (new->nodeid < memb->nodeid)
break;
}
if (!memb)
list_add_tail(newlist, head);
else {
/* FIXME: can use list macro here */
newlist->prev = tmp->prev;
newlist->next = tmp;
tmp->prev->next = newlist;
tmp->prev = newlist;
}
}
static int dlm_add_member(struct dlm_ls *ls, int nodeid)
{
struct dlm_member *memb;
int w;
memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL);
if (!memb)
return -ENOMEM;
w = dlm_node_weight(ls->ls_name, nodeid);
if (w < 0)
return w;
memb->nodeid = nodeid;
memb->weight = w;
add_ordered_member(ls, memb);
ls->ls_num_nodes++;
return 0;
}
static void dlm_remove_member(struct dlm_ls *ls, struct dlm_member *memb)
{
list_move(&memb->list, &ls->ls_nodes_gone);
ls->ls_num_nodes--;
}
static int dlm_is_member(struct dlm_ls *ls, int nodeid)
{
struct dlm_member *memb;
list_for_each_entry(memb, &ls->ls_nodes, list) {
if (memb->nodeid == nodeid)
return 1;
}
return 0;
}
int dlm_is_removed(struct dlm_ls *ls, int nodeid)
{
struct dlm_member *memb;
list_for_each_entry(memb, &ls->ls_nodes_gone, list) {
if (memb->nodeid == nodeid)
return 1;
}
return 0;
}
static void clear_memb_list(struct list_head *head)
{
struct dlm_member *memb;
while (!list_empty(head)) {
memb = list_entry(head->next, struct dlm_member, list);
list_del(&memb->list);
kfree(memb);
}
}
void dlm_clear_members(struct dlm_ls *ls)
{
clear_memb_list(&ls->ls_nodes);
ls->ls_num_nodes = 0;
}
void dlm_clear_members_gone(struct dlm_ls *ls)
{
clear_memb_list(&ls->ls_nodes_gone);
}
static void make_member_array(struct dlm_ls *ls)
{
struct dlm_member *memb;
int i, w, x = 0, total = 0, all_zero = 0, *array;
kfree(ls->ls_node_array);
ls->ls_node_array = NULL;
list_for_each_entry(memb, &ls->ls_nodes, list) {
if (memb->weight)
total += memb->weight;
}
/* all nodes revert to weight of 1 if all have weight 0 */
if (!total) {
total = ls->ls_num_nodes;
all_zero = 1;
}
ls->ls_total_weight = total;
array = kmalloc(sizeof(int) * total, GFP_KERNEL);
if (!array)
return;
list_for_each_entry(memb, &ls->ls_nodes, list) {
if (!all_zero && !memb->weight)
continue;
if (all_zero)
w = 1;
else
w = memb->weight;
DLM_ASSERT(x < total, printk("total %d x %d\n", total, x););
for (i = 0; i < w; i++)
array[x++] = memb->nodeid;
}
ls->ls_node_array = array;
}
/* send a status request to all members just to establish comms connections */
static int ping_members(struct dlm_ls *ls)
{
struct dlm_member *memb;
int error = 0;
list_for_each_entry(memb, &ls->ls_nodes, list) {
error = dlm_recovery_stopped(ls);
if (error)
break;
error = dlm_rcom_status(ls, memb->nodeid);
if (error)
break;
}
if (error)
log_debug(ls, "ping_members aborted %d last nodeid %d",
error, ls->ls_recover_nodeid);
return error;
}
int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
{
struct dlm_member *memb, *safe;
int i, error, found, pos = 0, neg = 0, low = -1;
/* previously removed members that we've not finished removing need to
count as a negative change so the "neg" recovery steps will happen */
list_for_each_entry(memb, &ls->ls_nodes_gone, list) {
log_debug(ls, "prev removed member %d", memb->nodeid);
neg++;
}
/* move departed members from ls_nodes to ls_nodes_gone */
list_for_each_entry_safe(memb, safe, &ls->ls_nodes, list) {
found = 0;
for (i = 0; i < rv->node_count; i++) {
if (memb->nodeid == rv->nodeids[i]) {
found = 1;
break;
}
}
if (!found) {
neg++;
dlm_remove_member(ls, memb);
log_debug(ls, "remove member %d", memb->nodeid);
}
}
/* add new members to ls_nodes */
for (i = 0; i < rv->node_count; i++) {
if (dlm_is_member(ls, rv->nodeids[i]))
continue;
dlm_add_member(ls, rv->nodeids[i]);
pos++;
log_debug(ls, "add member %d", rv->nodeids[i]);
}
list_for_each_entry(memb, &ls->ls_nodes, list) {
if (low == -1 || memb->nodeid < low)
low = memb->nodeid;
}
ls->ls_low_nodeid = low;
make_member_array(ls);
dlm_set_recover_status(ls, DLM_RS_NODES);
*neg_out = neg;
error = ping_members(ls);
if (error)
goto out;
error = dlm_recover_members_wait(ls);
out:
log_debug(ls, "total members %d error %d", ls->ls_num_nodes, error);
return error;
}
/*
* Following called from lockspace.c
*/
int dlm_ls_stop(struct dlm_ls *ls)
{
int new;
/*
* A stop cancels any recovery that's in progress (see RECOVERY_STOP,
* dlm_recovery_stopped()) and prevents any new locks from being
* processed (see RUNNING, dlm_locking_stopped()).
*/
spin_lock(&ls->ls_recover_lock);
set_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
new = test_and_clear_bit(LSFL_RUNNING, &ls->ls_flags);
ls->ls_recover_seq++;
spin_unlock(&ls->ls_recover_lock);
/*
* This in_recovery lock does two things:
*
* 1) Keeps this function from returning until all threads are out
* of locking routines and locking is truely stopped.
* 2) Keeps any new requests from being processed until it's unlocked
* when recovery is complete.
*/
if (new)
down_write(&ls->ls_in_recovery);
/*
* The recoverd suspend/resume makes sure that dlm_recoverd (if
* running) has noticed the clearing of RUNNING above and quit
* processing the previous recovery. This will be true for all nodes
* before any nodes start the new recovery.
*/
dlm_recoverd_suspend(ls);
ls->ls_recover_status = 0;
dlm_recoverd_resume(ls);
return 0;
}
int dlm_ls_start(struct dlm_ls *ls)
{
struct dlm_recover *rv = NULL, *rv_old;
int *ids = NULL;
int error, count;
rv = kzalloc(sizeof(struct dlm_recover), GFP_KERNEL);
if (!rv)
return -ENOMEM;
error = count = dlm_nodeid_list(ls->ls_name, &ids);
if (error <= 0)
goto fail;
spin_lock(&ls->ls_recover_lock);
/* the lockspace needs to be stopped before it can be started */
if (!dlm_locking_stopped(ls)) {
spin_unlock(&ls->ls_recover_lock);
log_error(ls, "start ignored: lockspace running");
error = -EINVAL;
goto fail;
}
rv->nodeids = ids;
rv->node_count = count;
rv->seq = ++ls->ls_recover_seq;
rv_old = ls->ls_recover_args;
ls->ls_recover_args = rv;
spin_unlock(&ls->ls_recover_lock);
if (rv_old) {
kfree(rv_old->nodeids);
kfree(rv_old);
}
dlm_recoverd_kick(ls);
return 0;
fail:
kfree(rv);
kfree(ids);
return error;
}

24
fs/dlm/member.h Normal file
View File

@@ -0,0 +1,24 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) 2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __MEMBER_DOT_H__
#define __MEMBER_DOT_H__
int dlm_ls_stop(struct dlm_ls *ls);
int dlm_ls_start(struct dlm_ls *ls);
void dlm_clear_members(struct dlm_ls *ls);
void dlm_clear_members_gone(struct dlm_ls *ls);
int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv,int *neg_out);
int dlm_is_removed(struct dlm_ls *ls, int nodeid);
#endif /* __MEMBER_DOT_H__ */

114
fs/dlm/memory.c Normal file
View File

@@ -0,0 +1,114 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#include "dlm_internal.h"
#include "config.h"
#include "memory.h"
static struct kmem_cache *lkb_cache;
int dlm_memory_init(void)
{
int ret = 0;
lkb_cache = kmem_cache_create("dlm_lkb", sizeof(struct dlm_lkb),
__alignof__(struct dlm_lkb), 0, NULL, NULL);
if (!lkb_cache)
ret = -ENOMEM;
return ret;
}
void dlm_memory_exit(void)
{
if (lkb_cache)
kmem_cache_destroy(lkb_cache);
}
char *allocate_lvb(struct dlm_ls *ls)
{
char *p;
p = kmalloc(ls->ls_lvblen, GFP_KERNEL);
if (p)
memset(p, 0, ls->ls_lvblen);
return p;
}
void free_lvb(char *p)
{
kfree(p);
}
/* FIXME: have some minimal space built-in to rsb for the name and
kmalloc a separate name if needed, like dentries are done */
struct dlm_rsb *allocate_rsb(struct dlm_ls *ls, int namelen)
{
struct dlm_rsb *r;
DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,);
r = kmalloc(sizeof(*r) + namelen, GFP_KERNEL);
if (r)
memset(r, 0, sizeof(*r) + namelen);
return r;
}
void free_rsb(struct dlm_rsb *r)
{
if (r->res_lvbptr)
free_lvb(r->res_lvbptr);
kfree(r);
}
struct dlm_lkb *allocate_lkb(struct dlm_ls *ls)
{
struct dlm_lkb *lkb;
lkb = kmem_cache_zalloc(lkb_cache, GFP_KERNEL);
return lkb;
}
void free_lkb(struct dlm_lkb *lkb)
{
if (lkb->lkb_flags & DLM_IFL_USER) {
struct dlm_user_args *ua;
ua = (struct dlm_user_args *)lkb->lkb_astparam;
if (ua) {
if (ua->lksb.sb_lvbptr)
kfree(ua->lksb.sb_lvbptr);
kfree(ua);
}
}
kmem_cache_free(lkb_cache, lkb);
}
struct dlm_direntry *allocate_direntry(struct dlm_ls *ls, int namelen)
{
struct dlm_direntry *de;
DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,
printk("namelen = %d\n", namelen););
de = kmalloc(sizeof(*de) + namelen, GFP_KERNEL);
if (de)
memset(de, 0, sizeof(*de) + namelen);
return de;
}
void free_direntry(struct dlm_direntry *de)
{
kfree(de);
}

29
fs/dlm/memory.h Normal file
View File

@@ -0,0 +1,29 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __MEMORY_DOT_H__
#define __MEMORY_DOT_H__
int dlm_memory_init(void);
void dlm_memory_exit(void);
struct dlm_rsb *allocate_rsb(struct dlm_ls *ls, int namelen);
void free_rsb(struct dlm_rsb *r);
struct dlm_lkb *allocate_lkb(struct dlm_ls *ls);
void free_lkb(struct dlm_lkb *l);
struct dlm_direntry *allocate_direntry(struct dlm_ls *ls, int namelen);
void free_direntry(struct dlm_direntry *de);
char *allocate_lvb(struct dlm_ls *ls);
void free_lvb(char *l);
#endif /* __MEMORY_DOT_H__ */

140
fs/dlm/midcomms.c Normal file
View File

@@ -0,0 +1,140 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
/*
* midcomms.c
*
* This is the appallingly named "mid-level" comms layer.
*
* Its purpose is to take packets from the "real" comms layer,
* split them up into packets and pass them to the interested
* part of the locking mechanism.
*
* It also takes messages from the locking layer, formats them
* into packets and sends them to the comms layer.
*/
#include "dlm_internal.h"
#include "lowcomms.h"
#include "config.h"
#include "rcom.h"
#include "lock.h"
#include "midcomms.h"
static void copy_from_cb(void *dst, const void *base, unsigned offset,
unsigned len, unsigned limit)
{
unsigned copy = len;
if ((copy + offset) > limit)
copy = limit - offset;
memcpy(dst, base + offset, copy);
len -= copy;
if (len)
memcpy(dst + copy, base, len);
}
/*
* Called from the low-level comms layer to process a buffer of
* commands.
*
* Only complete messages are processed here, any "spare" bytes from
* the end of a buffer are saved and tacked onto the front of the next
* message that comes in. I doubt this will happen very often but we
* need to be able to cope with it and I don't want the task to be waiting
* for packets to come in when there is useful work to be done.
*/
int dlm_process_incoming_buffer(int nodeid, const void *base,
unsigned offset, unsigned len, unsigned limit)
{
unsigned char __tmp[DLM_INBUF_LEN];
struct dlm_header *msg = (struct dlm_header *) __tmp;
int ret = 0;
int err = 0;
uint16_t msglen;
uint32_t lockspace;
while (len > sizeof(struct dlm_header)) {
/* Copy just the header to check the total length. The
message may wrap around the end of the buffer back to the
start, so we need to use a temp buffer and copy_from_cb. */
copy_from_cb(msg, base, offset, sizeof(struct dlm_header),
limit);
msglen = le16_to_cpu(msg->h_length);
lockspace = msg->h_lockspace;
err = -EINVAL;
if (msglen < sizeof(struct dlm_header))
break;
err = -E2BIG;
if (msglen > dlm_config.ci_buffer_size) {
log_print("message size %d from %d too big, buf len %d",
msglen, nodeid, len);
break;
}
err = 0;
/* If only part of the full message is contained in this
buffer, then do nothing and wait for lowcomms to call
us again later with more data. We return 0 meaning
we've consumed none of the input buffer. */
if (msglen > len)
break;
/* Allocate a larger temp buffer if the full message won't fit
in the buffer on the stack (which should work for most
ordinary messages). */
if (msglen > sizeof(__tmp) &&
msg == (struct dlm_header *) __tmp) {
msg = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL);
if (msg == NULL)
return ret;
}
copy_from_cb(msg, base, offset, msglen, limit);
BUG_ON(lockspace != msg->h_lockspace);
ret += msglen;
offset += msglen;
offset &= (limit - 1);
len -= msglen;
switch (msg->h_cmd) {
case DLM_MSG:
dlm_receive_message(msg, nodeid, 0);
break;
case DLM_RCOM:
dlm_receive_rcom(msg, nodeid);
break;
default:
log_print("unknown msg type %x from %u: %u %u %u %u",
msg->h_cmd, nodeid, msglen, len, offset, ret);
}
}
if (msg != (struct dlm_header *) __tmp)
kfree(msg);
return err ? err : ret;
}

21
fs/dlm/midcomms.h Normal file
View File

@@ -0,0 +1,21 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __MIDCOMMS_DOT_H__
#define __MIDCOMMS_DOT_H__
int dlm_process_incoming_buffer(int nodeid, const void *base, unsigned offset,
unsigned len, unsigned limit);
#endif /* __MIDCOMMS_DOT_H__ */

525
fs/dlm/rcom.c Normal file
View File

@@ -0,0 +1,525 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#include "dlm_internal.h"
#include "lockspace.h"
#include "member.h"
#include "lowcomms.h"
#include "midcomms.h"
#include "rcom.h"
#include "recover.h"
#include "dir.h"
#include "config.h"
#include "memory.h"
#include "lock.h"
#include "util.h"
static int rcom_response(struct dlm_ls *ls)
{
return test_bit(LSFL_RCOM_READY, &ls->ls_flags);
}
static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
struct dlm_rcom **rc_ret, struct dlm_mhandle **mh_ret)
{
struct dlm_rcom *rc;
struct dlm_mhandle *mh;
char *mb;
int mb_len = sizeof(struct dlm_rcom) + len;
mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb);
if (!mh) {
log_print("create_rcom to %d type %d len %d ENOBUFS",
to_nodeid, type, len);
return -ENOBUFS;
}
memset(mb, 0, mb_len);
rc = (struct dlm_rcom *) mb;
rc->rc_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
rc->rc_header.h_lockspace = ls->ls_global_id;
rc->rc_header.h_nodeid = dlm_our_nodeid();
rc->rc_header.h_length = mb_len;
rc->rc_header.h_cmd = DLM_RCOM;
rc->rc_type = type;
spin_lock(&ls->ls_recover_lock);
rc->rc_seq = ls->ls_recover_seq;
spin_unlock(&ls->ls_recover_lock);
*mh_ret = mh;
*rc_ret = rc;
return 0;
}
static void send_rcom(struct dlm_ls *ls, struct dlm_mhandle *mh,
struct dlm_rcom *rc)
{
dlm_rcom_out(rc);
dlm_lowcomms_commit_buffer(mh);
}
/* When replying to a status request, a node also sends back its
configuration values. The requesting node then checks that the remote
node is configured the same way as itself. */
static void make_config(struct dlm_ls *ls, struct rcom_config *rf)
{
rf->rf_lvblen = ls->ls_lvblen;
rf->rf_lsflags = ls->ls_exflags;
}
static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
{
struct rcom_config *rf = (struct rcom_config *) rc->rc_buf;
if ((rc->rc_header.h_version & 0xFFFF0000) != DLM_HEADER_MAJOR) {
log_error(ls, "version mismatch: %x nodeid %d: %x",
DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid,
rc->rc_header.h_version);
return -EINVAL;
}
if (rf->rf_lvblen != ls->ls_lvblen ||
rf->rf_lsflags != ls->ls_exflags) {
log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x",
ls->ls_lvblen, ls->ls_exflags,
nodeid, rf->rf_lvblen, rf->rf_lsflags);
return -EINVAL;
}
return 0;
}
static void allow_sync_reply(struct dlm_ls *ls, uint64_t *new_seq)
{
spin_lock(&ls->ls_rcom_spin);
*new_seq = ++ls->ls_rcom_seq;
set_bit(LSFL_RCOM_WAIT, &ls->ls_flags);
spin_unlock(&ls->ls_rcom_spin);
}
static void disallow_sync_reply(struct dlm_ls *ls)
{
spin_lock(&ls->ls_rcom_spin);
clear_bit(LSFL_RCOM_WAIT, &ls->ls_flags);
clear_bit(LSFL_RCOM_READY, &ls->ls_flags);
spin_unlock(&ls->ls_rcom_spin);
}
int dlm_rcom_status(struct dlm_ls *ls, int nodeid)
{
struct dlm_rcom *rc;
struct dlm_mhandle *mh;
int error = 0;
ls->ls_recover_nodeid = nodeid;
if (nodeid == dlm_our_nodeid()) {
rc = (struct dlm_rcom *) ls->ls_recover_buf;
rc->rc_result = dlm_recover_status(ls);
goto out;
}
error = create_rcom(ls, nodeid, DLM_RCOM_STATUS, 0, &rc, &mh);
if (error)
goto out;
allow_sync_reply(ls, &rc->rc_id);
memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size);
send_rcom(ls, mh, rc);
error = dlm_wait_function(ls, &rcom_response);
disallow_sync_reply(ls);
if (error)
goto out;
rc = (struct dlm_rcom *) ls->ls_recover_buf;
if (rc->rc_result == -ESRCH) {
/* we pretend the remote lockspace exists with 0 status */
log_debug(ls, "remote node %d not ready", nodeid);
rc->rc_result = 0;
} else
error = check_config(ls, rc, nodeid);
/* the caller looks at rc_result for the remote recovery status */
out:
return error;
}
static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
{
struct dlm_rcom *rc;
struct dlm_mhandle *mh;
int error, nodeid = rc_in->rc_header.h_nodeid;
error = create_rcom(ls, nodeid, DLM_RCOM_STATUS_REPLY,
sizeof(struct rcom_config), &rc, &mh);
if (error)
return;
rc->rc_id = rc_in->rc_id;
rc->rc_seq_reply = rc_in->rc_seq;
rc->rc_result = dlm_recover_status(ls);
make_config(ls, (struct rcom_config *) rc->rc_buf);
send_rcom(ls, mh, rc);
}
static void receive_sync_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
{
spin_lock(&ls->ls_rcom_spin);
if (!test_bit(LSFL_RCOM_WAIT, &ls->ls_flags) ||
rc_in->rc_id != ls->ls_rcom_seq) {
log_debug(ls, "reject reply %d from %d seq %llx expect %llx",
rc_in->rc_type, rc_in->rc_header.h_nodeid,
(unsigned long long)rc_in->rc_id,
(unsigned long long)ls->ls_rcom_seq);
goto out;
}
memcpy(ls->ls_recover_buf, rc_in, rc_in->rc_header.h_length);
set_bit(LSFL_RCOM_READY, &ls->ls_flags);
clear_bit(LSFL_RCOM_WAIT, &ls->ls_flags);
wake_up(&ls->ls_wait_general);
out:
spin_unlock(&ls->ls_rcom_spin);
}
static void receive_rcom_status_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
{
receive_sync_reply(ls, rc_in);
}
int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
{
struct dlm_rcom *rc;
struct dlm_mhandle *mh;
int error = 0, len = sizeof(struct dlm_rcom);
ls->ls_recover_nodeid = nodeid;
if (nodeid == dlm_our_nodeid()) {
dlm_copy_master_names(ls, last_name, last_len,
ls->ls_recover_buf + len,
dlm_config.ci_buffer_size - len, nodeid);
goto out;
}
error = create_rcom(ls, nodeid, DLM_RCOM_NAMES, last_len, &rc, &mh);
if (error)
goto out;
memcpy(rc->rc_buf, last_name, last_len);
allow_sync_reply(ls, &rc->rc_id);
memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size);
send_rcom(ls, mh, rc);
error = dlm_wait_function(ls, &rcom_response);
disallow_sync_reply(ls);
out:
return error;
}
static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in)
{
struct dlm_rcom *rc;
struct dlm_mhandle *mh;
int error, inlen, outlen, nodeid;
nodeid = rc_in->rc_header.h_nodeid;
inlen = rc_in->rc_header.h_length - sizeof(struct dlm_rcom);
outlen = dlm_config.ci_buffer_size - sizeof(struct dlm_rcom);
error = create_rcom(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen, &rc, &mh);
if (error)
return;
rc->rc_id = rc_in->rc_id;
rc->rc_seq_reply = rc_in->rc_seq;
dlm_copy_master_names(ls, rc_in->rc_buf, inlen, rc->rc_buf, outlen,
nodeid);
send_rcom(ls, mh, rc);
}
static void receive_rcom_names_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
{
receive_sync_reply(ls, rc_in);
}
int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid)
{
struct dlm_rcom *rc;
struct dlm_mhandle *mh;
struct dlm_ls *ls = r->res_ls;
int error;
error = create_rcom(ls, dir_nodeid, DLM_RCOM_LOOKUP, r->res_length,
&rc, &mh);
if (error)
goto out;
memcpy(rc->rc_buf, r->res_name, r->res_length);
rc->rc_id = (unsigned long) r;
send_rcom(ls, mh, rc);
out:
return error;
}
static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in)
{
struct dlm_rcom *rc;
struct dlm_mhandle *mh;
int error, ret_nodeid, nodeid = rc_in->rc_header.h_nodeid;
int len = rc_in->rc_header.h_length - sizeof(struct dlm_rcom);
error = create_rcom(ls, nodeid, DLM_RCOM_LOOKUP_REPLY, 0, &rc, &mh);
if (error)
return;
error = dlm_dir_lookup(ls, nodeid, rc_in->rc_buf, len, &ret_nodeid);
if (error)
ret_nodeid = error;
rc->rc_result = ret_nodeid;
rc->rc_id = rc_in->rc_id;
rc->rc_seq_reply = rc_in->rc_seq;
send_rcom(ls, mh, rc);
}
static void receive_rcom_lookup_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
{
dlm_recover_master_reply(ls, rc_in);
}
static void pack_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb,
struct rcom_lock *rl)
{
memset(rl, 0, sizeof(*rl));
rl->rl_ownpid = lkb->lkb_ownpid;
rl->rl_lkid = lkb->lkb_id;
rl->rl_exflags = lkb->lkb_exflags;
rl->rl_flags = lkb->lkb_flags;
rl->rl_lvbseq = lkb->lkb_lvbseq;
rl->rl_rqmode = lkb->lkb_rqmode;
rl->rl_grmode = lkb->lkb_grmode;
rl->rl_status = lkb->lkb_status;
rl->rl_wait_type = lkb->lkb_wait_type;
if (lkb->lkb_bastaddr)
rl->rl_asts |= AST_BAST;
if (lkb->lkb_astaddr)
rl->rl_asts |= AST_COMP;
rl->rl_namelen = r->res_length;
memcpy(rl->rl_name, r->res_name, r->res_length);
/* FIXME: might we have an lvb without DLM_LKF_VALBLK set ?
If so, receive_rcom_lock_args() won't take this copy. */
if (lkb->lkb_lvbptr)
memcpy(rl->rl_lvb, lkb->lkb_lvbptr, r->res_ls->ls_lvblen);
}
int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
{
struct dlm_ls *ls = r->res_ls;
struct dlm_rcom *rc;
struct dlm_mhandle *mh;
struct rcom_lock *rl;
int error, len = sizeof(struct rcom_lock);
if (lkb->lkb_lvbptr)
len += ls->ls_lvblen;
error = create_rcom(ls, r->res_nodeid, DLM_RCOM_LOCK, len, &rc, &mh);
if (error)
goto out;
rl = (struct rcom_lock *) rc->rc_buf;
pack_rcom_lock(r, lkb, rl);
rc->rc_id = (unsigned long) r;
send_rcom(ls, mh, rc);
out:
return error;
}
static void receive_rcom_lock(struct dlm_ls *ls, struct dlm_rcom *rc_in)
{
struct dlm_rcom *rc;
struct dlm_mhandle *mh;
int error, nodeid = rc_in->rc_header.h_nodeid;
dlm_recover_master_copy(ls, rc_in);
error = create_rcom(ls, nodeid, DLM_RCOM_LOCK_REPLY,
sizeof(struct rcom_lock), &rc, &mh);
if (error)
return;
/* We send back the same rcom_lock struct we received, but
dlm_recover_master_copy() has filled in rl_remid and rl_result */
memcpy(rc->rc_buf, rc_in->rc_buf, sizeof(struct rcom_lock));
rc->rc_id = rc_in->rc_id;
rc->rc_seq_reply = rc_in->rc_seq;
send_rcom(ls, mh, rc);
}
static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
{
dlm_recover_process_copy(ls, rc_in);
}
static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
{
struct dlm_rcom *rc;
struct rcom_config *rf;
struct dlm_mhandle *mh;
char *mb;
int mb_len = sizeof(struct dlm_rcom) + sizeof(struct rcom_config);
mh = dlm_lowcomms_get_buffer(nodeid, mb_len, GFP_KERNEL, &mb);
if (!mh)
return -ENOBUFS;
memset(mb, 0, mb_len);
rc = (struct dlm_rcom *) mb;
rc->rc_header.h_version = (DLM_HEADER_MAJOR | DLM_HEADER_MINOR);
rc->rc_header.h_lockspace = rc_in->rc_header.h_lockspace;
rc->rc_header.h_nodeid = dlm_our_nodeid();
rc->rc_header.h_length = mb_len;
rc->rc_header.h_cmd = DLM_RCOM;
rc->rc_type = DLM_RCOM_STATUS_REPLY;
rc->rc_id = rc_in->rc_id;
rc->rc_seq_reply = rc_in->rc_seq;
rc->rc_result = -ESRCH;
rf = (struct rcom_config *) rc->rc_buf;
rf->rf_lvblen = -1;
dlm_rcom_out(rc);
dlm_lowcomms_commit_buffer(mh);
return 0;
}
static int is_old_reply(struct dlm_ls *ls, struct dlm_rcom *rc)
{
uint64_t seq;
int rv = 0;
switch (rc->rc_type) {
case DLM_RCOM_STATUS_REPLY:
case DLM_RCOM_NAMES_REPLY:
case DLM_RCOM_LOOKUP_REPLY:
case DLM_RCOM_LOCK_REPLY:
spin_lock(&ls->ls_recover_lock);
seq = ls->ls_recover_seq;
spin_unlock(&ls->ls_recover_lock);
if (rc->rc_seq_reply != seq) {
log_debug(ls, "ignoring old reply %x from %d "
"seq_reply %llx expect %llx",
rc->rc_type, rc->rc_header.h_nodeid,
(unsigned long long)rc->rc_seq_reply,
(unsigned long long)seq);
rv = 1;
}
}
return rv;
}
/* Called by dlm_recvd; corresponds to dlm_receive_message() but special
recovery-only comms are sent through here. */
void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
{
struct dlm_rcom *rc = (struct dlm_rcom *) hd;
struct dlm_ls *ls;
dlm_rcom_in(rc);
/* If the lockspace doesn't exist then still send a status message
back; it's possible that it just doesn't have its global_id yet. */
ls = dlm_find_lockspace_global(hd->h_lockspace);
if (!ls) {
log_print("lockspace %x from %d type %x not found",
hd->h_lockspace, nodeid, rc->rc_type);
if (rc->rc_type == DLM_RCOM_STATUS)
send_ls_not_ready(nodeid, rc);
return;
}
if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) {
log_debug(ls, "ignoring recovery message %x from %d",
rc->rc_type, nodeid);
goto out;
}
if (is_old_reply(ls, rc))
goto out;
if (nodeid != rc->rc_header.h_nodeid) {
log_error(ls, "bad rcom nodeid %d from %d",
rc->rc_header.h_nodeid, nodeid);
goto out;
}
switch (rc->rc_type) {
case DLM_RCOM_STATUS:
receive_rcom_status(ls, rc);
break;
case DLM_RCOM_NAMES:
receive_rcom_names(ls, rc);
break;
case DLM_RCOM_LOOKUP:
receive_rcom_lookup(ls, rc);
break;
case DLM_RCOM_LOCK:
receive_rcom_lock(ls, rc);
break;
case DLM_RCOM_STATUS_REPLY:
receive_rcom_status_reply(ls, rc);
break;
case DLM_RCOM_NAMES_REPLY:
receive_rcom_names_reply(ls, rc);
break;
case DLM_RCOM_LOOKUP_REPLY:
receive_rcom_lookup_reply(ls, rc);
break;
case DLM_RCOM_LOCK_REPLY:
receive_rcom_lock_reply(ls, rc);
break;
default:
DLM_ASSERT(0, printk("rc_type=%x\n", rc->rc_type););
}
out:
dlm_put_lockspace(ls);
}

24
fs/dlm/rcom.h Normal file
View File

@@ -0,0 +1,24 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __RCOM_DOT_H__
#define __RCOM_DOT_H__
int dlm_rcom_status(struct dlm_ls *ls, int nodeid);
int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name,int last_len);
int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid);
int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
void dlm_receive_rcom(struct dlm_header *hd, int nodeid);
#endif

768
fs/dlm/recover.c Normal file
View File

@@ -0,0 +1,768 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#include "dlm_internal.h"
#include "lockspace.h"
#include "dir.h"
#include "config.h"
#include "ast.h"
#include "memory.h"
#include "rcom.h"
#include "lock.h"
#include "lowcomms.h"
#include "member.h"
#include "recover.h"
/*
* Recovery waiting routines: these functions wait for a particular reply from
* a remote node, or for the remote node to report a certain status. They need
* to abort if the lockspace is stopped indicating a node has failed (perhaps
* the one being waited for).
*/
/*
* Wait until given function returns non-zero or lockspace is stopped
* (LS_RECOVERY_STOP set due to failure of a node in ls_nodes). When another
* function thinks it could have completed the waited-on task, they should wake
* up ls_wait_general to get an immediate response rather than waiting for the
* timer to detect the result. A timer wakes us up periodically while waiting
* to see if we should abort due to a node failure. This should only be called
* by the dlm_recoverd thread.
*/
static void dlm_wait_timer_fn(unsigned long data)
{
struct dlm_ls *ls = (struct dlm_ls *) data;
mod_timer(&ls->ls_timer, jiffies + (dlm_config.ci_recover_timer * HZ));
wake_up(&ls->ls_wait_general);
}
int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls))
{
int error = 0;
init_timer(&ls->ls_timer);
ls->ls_timer.function = dlm_wait_timer_fn;
ls->ls_timer.data = (long) ls;
ls->ls_timer.expires = jiffies + (dlm_config.ci_recover_timer * HZ);
add_timer(&ls->ls_timer);
wait_event(ls->ls_wait_general, testfn(ls) || dlm_recovery_stopped(ls));
del_timer_sync(&ls->ls_timer);
if (dlm_recovery_stopped(ls)) {
log_debug(ls, "dlm_wait_function aborted");
error = -EINTR;
}
return error;
}
/*
* An efficient way for all nodes to wait for all others to have a certain
* status. The node with the lowest nodeid polls all the others for their
* status (wait_status_all) and all the others poll the node with the low id
* for its accumulated result (wait_status_low). When all nodes have set
* status flag X, then status flag X_ALL will be set on the low nodeid.
*/
uint32_t dlm_recover_status(struct dlm_ls *ls)
{
uint32_t status;
spin_lock(&ls->ls_recover_lock);
status = ls->ls_recover_status;
spin_unlock(&ls->ls_recover_lock);
return status;
}
void dlm_set_recover_status(struct dlm_ls *ls, uint32_t status)
{
spin_lock(&ls->ls_recover_lock);
ls->ls_recover_status |= status;
spin_unlock(&ls->ls_recover_lock);
}
static int wait_status_all(struct dlm_ls *ls, uint32_t wait_status)
{
struct dlm_rcom *rc = (struct dlm_rcom *) ls->ls_recover_buf;
struct dlm_member *memb;
int error = 0, delay;
list_for_each_entry(memb, &ls->ls_nodes, list) {
delay = 0;
for (;;) {
if (dlm_recovery_stopped(ls)) {
error = -EINTR;
goto out;
}
error = dlm_rcom_status(ls, memb->nodeid);
if (error)
goto out;
if (rc->rc_result & wait_status)
break;
if (delay < 1000)
delay += 20;
msleep(delay);
}
}
out:
return error;
}
static int wait_status_low(struct dlm_ls *ls, uint32_t wait_status)
{
struct dlm_rcom *rc = (struct dlm_rcom *) ls->ls_recover_buf;
int error = 0, delay = 0, nodeid = ls->ls_low_nodeid;
for (;;) {
if (dlm_recovery_stopped(ls)) {
error = -EINTR;
goto out;
}
error = dlm_rcom_status(ls, nodeid);
if (error)
break;
if (rc->rc_result & wait_status)
break;
if (delay < 1000)
delay += 20;
msleep(delay);
}
out:
return error;
}
static int wait_status(struct dlm_ls *ls, uint32_t status)
{
uint32_t status_all = status << 1;
int error;
if (ls->ls_low_nodeid == dlm_our_nodeid()) {
error = wait_status_all(ls, status);
if (!error)
dlm_set_recover_status(ls, status_all);
} else
error = wait_status_low(ls, status_all);
return error;
}
int dlm_recover_members_wait(struct dlm_ls *ls)
{
return wait_status(ls, DLM_RS_NODES);
}
int dlm_recover_directory_wait(struct dlm_ls *ls)
{
return wait_status(ls, DLM_RS_DIR);
}
int dlm_recover_locks_wait(struct dlm_ls *ls)
{
return wait_status(ls, DLM_RS_LOCKS);
}
int dlm_recover_done_wait(struct dlm_ls *ls)
{
return wait_status(ls, DLM_RS_DONE);
}
/*
* The recover_list contains all the rsb's for which we've requested the new
* master nodeid. As replies are returned from the resource directories the
* rsb's are removed from the list. When the list is empty we're done.
*
* The recover_list is later similarly used for all rsb's for which we've sent
* new lkb's and need to receive new corresponding lkid's.
*
* We use the address of the rsb struct as a simple local identifier for the
* rsb so we can match an rcom reply with the rsb it was sent for.
*/
static int recover_list_empty(struct dlm_ls *ls)
{
int empty;
spin_lock(&ls->ls_recover_list_lock);
empty = list_empty(&ls->ls_recover_list);
spin_unlock(&ls->ls_recover_list_lock);
return empty;
}
static void recover_list_add(struct dlm_rsb *r)
{
struct dlm_ls *ls = r->res_ls;
spin_lock(&ls->ls_recover_list_lock);
if (list_empty(&r->res_recover_list)) {
list_add_tail(&r->res_recover_list, &ls->ls_recover_list);
ls->ls_recover_list_count++;
dlm_hold_rsb(r);
}
spin_unlock(&ls->ls_recover_list_lock);
}
static void recover_list_del(struct dlm_rsb *r)
{
struct dlm_ls *ls = r->res_ls;
spin_lock(&ls->ls_recover_list_lock);
list_del_init(&r->res_recover_list);
ls->ls_recover_list_count--;
spin_unlock(&ls->ls_recover_list_lock);
dlm_put_rsb(r);
}
static struct dlm_rsb *recover_list_find(struct dlm_ls *ls, uint64_t id)
{
struct dlm_rsb *r = NULL;
spin_lock(&ls->ls_recover_list_lock);
list_for_each_entry(r, &ls->ls_recover_list, res_recover_list) {
if (id == (unsigned long) r)
goto out;
}
r = NULL;
out:
spin_unlock(&ls->ls_recover_list_lock);
return r;
}
static void recover_list_clear(struct dlm_ls *ls)
{
struct dlm_rsb *r, *s;
spin_lock(&ls->ls_recover_list_lock);
list_for_each_entry_safe(r, s, &ls->ls_recover_list, res_recover_list) {
list_del_init(&r->res_recover_list);
r->res_recover_locks_count = 0;
dlm_put_rsb(r);
ls->ls_recover_list_count--;
}
if (ls->ls_recover_list_count != 0) {
log_error(ls, "warning: recover_list_count %d",
ls->ls_recover_list_count);
ls->ls_recover_list_count = 0;
}
spin_unlock(&ls->ls_recover_list_lock);
}
/* Master recovery: find new master node for rsb's that were
mastered on nodes that have been removed.
dlm_recover_masters
recover_master
dlm_send_rcom_lookup -> receive_rcom_lookup
dlm_dir_lookup
receive_rcom_lookup_reply <-
dlm_recover_master_reply
set_new_master
set_master_lkbs
set_lock_master
*/
/*
* Set the lock master for all LKBs in a lock queue
* If we are the new master of the rsb, we may have received new
* MSTCPY locks from other nodes already which we need to ignore
* when setting the new nodeid.
*/
static void set_lock_master(struct list_head *queue, int nodeid)
{
struct dlm_lkb *lkb;
list_for_each_entry(lkb, queue, lkb_statequeue)
if (!(lkb->lkb_flags & DLM_IFL_MSTCPY))
lkb->lkb_nodeid = nodeid;
}
static void set_master_lkbs(struct dlm_rsb *r)
{
set_lock_master(&r->res_grantqueue, r->res_nodeid);
set_lock_master(&r->res_convertqueue, r->res_nodeid);
set_lock_master(&r->res_waitqueue, r->res_nodeid);
}
/*
* Propogate the new master nodeid to locks
* The NEW_MASTER flag tells dlm_recover_locks() which rsb's to consider.
* The NEW_MASTER2 flag tells recover_lvb() and set_locks_purged() which
* rsb's to consider.
*/
static void set_new_master(struct dlm_rsb *r, int nodeid)
{
lock_rsb(r);
r->res_nodeid = nodeid;
set_master_lkbs(r);
rsb_set_flag(r, RSB_NEW_MASTER);
rsb_set_flag(r, RSB_NEW_MASTER2);
unlock_rsb(r);
}
/*
* We do async lookups on rsb's that need new masters. The rsb's
* waiting for a lookup reply are kept on the recover_list.
*/
static int recover_master(struct dlm_rsb *r)
{
struct dlm_ls *ls = r->res_ls;
int error, dir_nodeid, ret_nodeid, our_nodeid = dlm_our_nodeid();
dir_nodeid = dlm_dir_nodeid(r);
if (dir_nodeid == our_nodeid) {
error = dlm_dir_lookup(ls, our_nodeid, r->res_name,
r->res_length, &ret_nodeid);
if (error)
log_error(ls, "recover dir lookup error %d", error);
if (ret_nodeid == our_nodeid)
ret_nodeid = 0;
set_new_master(r, ret_nodeid);
} else {
recover_list_add(r);
error = dlm_send_rcom_lookup(r, dir_nodeid);
}
return error;
}
/*
* When not using a directory, most resource names will hash to a new static
* master nodeid and the resource will need to be remastered.
*/
static int recover_master_static(struct dlm_rsb *r)
{
int master = dlm_dir_nodeid(r);
if (master == dlm_our_nodeid())
master = 0;
if (r->res_nodeid != master) {
if (is_master(r))
dlm_purge_mstcpy_locks(r);
set_new_master(r, master);
return 1;
}
return 0;
}
/*
* Go through local root resources and for each rsb which has a master which
* has departed, get the new master nodeid from the directory. The dir will
* assign mastery to the first node to look up the new master. That means
* we'll discover in this lookup if we're the new master of any rsb's.
*
* We fire off all the dir lookup requests individually and asynchronously to
* the correct dir node.
*/
int dlm_recover_masters(struct dlm_ls *ls)
{
struct dlm_rsb *r;
int error = 0, count = 0;
log_debug(ls, "dlm_recover_masters");
down_read(&ls->ls_root_sem);
list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
if (dlm_recovery_stopped(ls)) {
up_read(&ls->ls_root_sem);
error = -EINTR;
goto out;
}
if (dlm_no_directory(ls))
count += recover_master_static(r);
else if (!is_master(r) &&
(dlm_is_removed(ls, r->res_nodeid) ||
rsb_flag(r, RSB_NEW_MASTER))) {
recover_master(r);
count++;
}
schedule();
}
up_read(&ls->ls_root_sem);
log_debug(ls, "dlm_recover_masters %d resources", count);
error = dlm_wait_function(ls, &recover_list_empty);
out:
if (error)
recover_list_clear(ls);
return error;
}
int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc)
{
struct dlm_rsb *r;
int nodeid;
r = recover_list_find(ls, rc->rc_id);
if (!r) {
log_error(ls, "dlm_recover_master_reply no id %llx",
(unsigned long long)rc->rc_id);
goto out;
}
nodeid = rc->rc_result;
if (nodeid == dlm_our_nodeid())
nodeid = 0;
set_new_master(r, nodeid);
recover_list_del(r);
if (recover_list_empty(ls))
wake_up(&ls->ls_wait_general);
out:
return 0;
}
/* Lock recovery: rebuild the process-copy locks we hold on a
remastered rsb on the new rsb master.
dlm_recover_locks
recover_locks
recover_locks_queue
dlm_send_rcom_lock -> receive_rcom_lock
dlm_recover_master_copy
receive_rcom_lock_reply <-
dlm_recover_process_copy
*/
/*
* keep a count of the number of lkb's we send to the new master; when we get
* an equal number of replies then recovery for the rsb is done
*/
static int recover_locks_queue(struct dlm_rsb *r, struct list_head *head)
{
struct dlm_lkb *lkb;
int error = 0;
list_for_each_entry(lkb, head, lkb_statequeue) {
error = dlm_send_rcom_lock(r, lkb);
if (error)
break;
r->res_recover_locks_count++;
}
return error;
}
static int recover_locks(struct dlm_rsb *r)
{
int error = 0;
lock_rsb(r);
DLM_ASSERT(!r->res_recover_locks_count, dlm_dump_rsb(r););
error = recover_locks_queue(r, &r->res_grantqueue);
if (error)
goto out;
error = recover_locks_queue(r, &r->res_convertqueue);
if (error)
goto out;
error = recover_locks_queue(r, &r->res_waitqueue);
if (error)
goto out;
if (r->res_recover_locks_count)
recover_list_add(r);
else
rsb_clear_flag(r, RSB_NEW_MASTER);
out:
unlock_rsb(r);
return error;
}
int dlm_recover_locks(struct dlm_ls *ls)
{
struct dlm_rsb *r;
int error, count = 0;
log_debug(ls, "dlm_recover_locks");
down_read(&ls->ls_root_sem);
list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
if (is_master(r)) {
rsb_clear_flag(r, RSB_NEW_MASTER);
continue;
}
if (!rsb_flag(r, RSB_NEW_MASTER))
continue;
if (dlm_recovery_stopped(ls)) {
error = -EINTR;
up_read(&ls->ls_root_sem);
goto out;
}
error = recover_locks(r);
if (error) {
up_read(&ls->ls_root_sem);
goto out;
}
count += r->res_recover_locks_count;
}
up_read(&ls->ls_root_sem);
log_debug(ls, "dlm_recover_locks %d locks", count);
error = dlm_wait_function(ls, &recover_list_empty);
out:
if (error)
recover_list_clear(ls);
else
dlm_set_recover_status(ls, DLM_RS_LOCKS);
return error;
}
void dlm_recovered_lock(struct dlm_rsb *r)
{
DLM_ASSERT(rsb_flag(r, RSB_NEW_MASTER), dlm_dump_rsb(r););
r->res_recover_locks_count--;
if (!r->res_recover_locks_count) {
rsb_clear_flag(r, RSB_NEW_MASTER);
recover_list_del(r);
}
if (recover_list_empty(r->res_ls))
wake_up(&r->res_ls->ls_wait_general);
}
/*
* The lvb needs to be recovered on all master rsb's. This includes setting
* the VALNOTVALID flag if necessary, and determining the correct lvb contents
* based on the lvb's of the locks held on the rsb.
*
* RSB_VALNOTVALID is set if there are only NL/CR locks on the rsb. If it
* was already set prior to recovery, it's not cleared, regardless of locks.
*
* The LVB contents are only considered for changing when this is a new master
* of the rsb (NEW_MASTER2). Then, the rsb's lvb is taken from any lkb with
* mode > CR. If no lkb's exist with mode above CR, the lvb contents are taken
* from the lkb with the largest lvb sequence number.
*/
static void recover_lvb(struct dlm_rsb *r)
{
struct dlm_lkb *lkb, *high_lkb = NULL;
uint32_t high_seq = 0;
int lock_lvb_exists = 0;
int big_lock_exists = 0;
int lvblen = r->res_ls->ls_lvblen;
list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
continue;
lock_lvb_exists = 1;
if (lkb->lkb_grmode > DLM_LOCK_CR) {
big_lock_exists = 1;
goto setflag;
}
if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) {
high_lkb = lkb;
high_seq = lkb->lkb_lvbseq;
}
}
list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
if (!(lkb->lkb_exflags & DLM_LKF_VALBLK))
continue;
lock_lvb_exists = 1;
if (lkb->lkb_grmode > DLM_LOCK_CR) {
big_lock_exists = 1;
goto setflag;
}
if (((int)lkb->lkb_lvbseq - (int)high_seq) >= 0) {
high_lkb = lkb;
high_seq = lkb->lkb_lvbseq;
}
}
setflag:
if (!lock_lvb_exists)
goto out;
if (!big_lock_exists)
rsb_set_flag(r, RSB_VALNOTVALID);
/* don't mess with the lvb unless we're the new master */
if (!rsb_flag(r, RSB_NEW_MASTER2))
goto out;
if (!r->res_lvbptr) {
r->res_lvbptr = allocate_lvb(r->res_ls);
if (!r->res_lvbptr)
goto out;
}
if (big_lock_exists) {
r->res_lvbseq = lkb->lkb_lvbseq;
memcpy(r->res_lvbptr, lkb->lkb_lvbptr, lvblen);
} else if (high_lkb) {
r->res_lvbseq = high_lkb->lkb_lvbseq;
memcpy(r->res_lvbptr, high_lkb->lkb_lvbptr, lvblen);
} else {
r->res_lvbseq = 0;
memset(r->res_lvbptr, 0, lvblen);
}
out:
return;
}
/* All master rsb's flagged RECOVER_CONVERT need to be looked at. The locks
converting PR->CW or CW->PR need to have their lkb_grmode set. */
static void recover_conversion(struct dlm_rsb *r)
{
struct dlm_lkb *lkb;
int grmode = -1;
list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue) {
if (lkb->lkb_grmode == DLM_LOCK_PR ||
lkb->lkb_grmode == DLM_LOCK_CW) {
grmode = lkb->lkb_grmode;
break;
}
}
list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue) {
if (lkb->lkb_grmode != DLM_LOCK_IV)
continue;
if (grmode == -1)
lkb->lkb_grmode = lkb->lkb_rqmode;
else
lkb->lkb_grmode = grmode;
}
}
/* We've become the new master for this rsb and waiting/converting locks may
need to be granted in dlm_grant_after_purge() due to locks that may have
existed from a removed node. */
static void set_locks_purged(struct dlm_rsb *r)
{
if (!list_empty(&r->res_waitqueue) || !list_empty(&r->res_convertqueue))
rsb_set_flag(r, RSB_LOCKS_PURGED);
}
void dlm_recover_rsbs(struct dlm_ls *ls)
{
struct dlm_rsb *r;
int count = 0;
log_debug(ls, "dlm_recover_rsbs");
down_read(&ls->ls_root_sem);
list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
lock_rsb(r);
if (is_master(r)) {
if (rsb_flag(r, RSB_RECOVER_CONVERT))
recover_conversion(r);
if (rsb_flag(r, RSB_NEW_MASTER2))
set_locks_purged(r);
recover_lvb(r);
count++;
}
rsb_clear_flag(r, RSB_RECOVER_CONVERT);
rsb_clear_flag(r, RSB_NEW_MASTER2);
unlock_rsb(r);
}
up_read(&ls->ls_root_sem);
log_debug(ls, "dlm_recover_rsbs %d rsbs", count);
}
/* Create a single list of all root rsb's to be used during recovery */
int dlm_create_root_list(struct dlm_ls *ls)
{
struct dlm_rsb *r;
int i, error = 0;
down_write(&ls->ls_root_sem);
if (!list_empty(&ls->ls_root_list)) {
log_error(ls, "root list not empty");
error = -EINVAL;
goto out;
}
for (i = 0; i < ls->ls_rsbtbl_size; i++) {
read_lock(&ls->ls_rsbtbl[i].lock);
list_for_each_entry(r, &ls->ls_rsbtbl[i].list, res_hashchain) {
list_add(&r->res_root_list, &ls->ls_root_list);
dlm_hold_rsb(r);
}
read_unlock(&ls->ls_rsbtbl[i].lock);
}
out:
up_write(&ls->ls_root_sem);
return error;
}
void dlm_release_root_list(struct dlm_ls *ls)
{
struct dlm_rsb *r, *safe;
down_write(&ls->ls_root_sem);
list_for_each_entry_safe(r, safe, &ls->ls_root_list, res_root_list) {
list_del_init(&r->res_root_list);
dlm_put_rsb(r);
}
up_write(&ls->ls_root_sem);
}
void dlm_clear_toss_list(struct dlm_ls *ls)
{
struct dlm_rsb *r, *safe;
int i;
for (i = 0; i < ls->ls_rsbtbl_size; i++) {
write_lock(&ls->ls_rsbtbl[i].lock);
list_for_each_entry_safe(r, safe, &ls->ls_rsbtbl[i].toss,
res_hashchain) {
list_del(&r->res_hashchain);
free_rsb(r);
}
write_unlock(&ls->ls_rsbtbl[i].lock);
}
}

34
fs/dlm/recover.h Normal file
View File

@@ -0,0 +1,34 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __RECOVER_DOT_H__
#define __RECOVER_DOT_H__
int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls));
uint32_t dlm_recover_status(struct dlm_ls *ls);
void dlm_set_recover_status(struct dlm_ls *ls, uint32_t status);
int dlm_recover_members_wait(struct dlm_ls *ls);
int dlm_recover_directory_wait(struct dlm_ls *ls);
int dlm_recover_locks_wait(struct dlm_ls *ls);
int dlm_recover_done_wait(struct dlm_ls *ls);
int dlm_recover_masters(struct dlm_ls *ls);
int dlm_recover_master_reply(struct dlm_ls *ls, struct dlm_rcom *rc);
int dlm_recover_locks(struct dlm_ls *ls);
void dlm_recovered_lock(struct dlm_rsb *r);
int dlm_create_root_list(struct dlm_ls *ls);
void dlm_release_root_list(struct dlm_ls *ls);
void dlm_clear_toss_list(struct dlm_ls *ls);
void dlm_recover_rsbs(struct dlm_ls *ls);
#endif /* __RECOVER_DOT_H__ */

310
fs/dlm/recoverd.c Normal file
View File

@@ -0,0 +1,310 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#include "dlm_internal.h"
#include "lockspace.h"
#include "member.h"
#include "dir.h"
#include "ast.h"
#include "recover.h"
#include "lowcomms.h"
#include "lock.h"
#include "requestqueue.h"
#include "recoverd.h"
/* If the start for which we're re-enabling locking (seq) has been superseded
by a newer stop (ls_recover_seq), we need to leave locking disabled. */
static int enable_locking(struct dlm_ls *ls, uint64_t seq)
{
int error = -EINTR;
spin_lock(&ls->ls_recover_lock);
if (ls->ls_recover_seq == seq) {
set_bit(LSFL_RUNNING, &ls->ls_flags);
up_write(&ls->ls_in_recovery);
error = 0;
}
spin_unlock(&ls->ls_recover_lock);
return error;
}
static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
{
unsigned long start;
int error, neg = 0;
log_debug(ls, "recover %llx", (unsigned long long)rv->seq);
mutex_lock(&ls->ls_recoverd_active);
/*
* Suspending and resuming dlm_astd ensures that no lkb's from this ls
* will be processed by dlm_astd during recovery.
*/
dlm_astd_suspend();
dlm_astd_resume();
/*
* This list of root rsb's will be the basis of most of the recovery
* routines.
*/
dlm_create_root_list(ls);
/*
* Free all the tossed rsb's so we don't have to recover them.
*/
dlm_clear_toss_list(ls);
/*
* Add or remove nodes from the lockspace's ls_nodes list.
* Also waits for all nodes to complete dlm_recover_members.
*/
error = dlm_recover_members(ls, rv, &neg);
if (error) {
log_debug(ls, "recover_members failed %d", error);
goto fail;
}
start = jiffies;
/*
* Rebuild our own share of the directory by collecting from all other
* nodes their master rsb names that hash to us.
*/
error = dlm_recover_directory(ls);
if (error) {
log_debug(ls, "recover_directory failed %d", error);
goto fail;
}
/*
* Wait for all nodes to complete directory rebuild.
*/
error = dlm_recover_directory_wait(ls);
if (error) {
log_debug(ls, "recover_directory_wait failed %d", error);
goto fail;
}
/*
* We may have outstanding operations that are waiting for a reply from
* a failed node. Mark these to be resent after recovery. Unlock and
* cancel ops can just be completed.
*/
dlm_recover_waiters_pre(ls);
error = dlm_recovery_stopped(ls);
if (error)
goto fail;
if (neg || dlm_no_directory(ls)) {
/*
* Clear lkb's for departed nodes.
*/
dlm_purge_locks(ls);
/*
* Get new master nodeid's for rsb's that were mastered on
* departed nodes.
*/
error = dlm_recover_masters(ls);
if (error) {
log_debug(ls, "recover_masters failed %d", error);
goto fail;
}
/*
* Send our locks on remastered rsb's to the new masters.
*/
error = dlm_recover_locks(ls);
if (error) {
log_debug(ls, "recover_locks failed %d", error);
goto fail;
}
error = dlm_recover_locks_wait(ls);
if (error) {
log_debug(ls, "recover_locks_wait failed %d", error);
goto fail;
}
/*
* Finalize state in master rsb's now that all locks can be
* checked. This includes conversion resolution and lvb
* settings.
*/
dlm_recover_rsbs(ls);
} else {
/*
* Other lockspace members may be going through the "neg" steps
* while also adding us to the lockspace, in which case they'll
* be doing the recover_locks (RS_LOCKS) barrier.
*/
dlm_set_recover_status(ls, DLM_RS_LOCKS);
error = dlm_recover_locks_wait(ls);
if (error) {
log_debug(ls, "recover_locks_wait failed %d", error);
goto fail;
}
}
dlm_release_root_list(ls);
/*
* Purge directory-related requests that are saved in requestqueue.
* All dir requests from before recovery are invalid now due to the dir
* rebuild and will be resent by the requesting nodes.
*/
dlm_purge_requestqueue(ls);
dlm_set_recover_status(ls, DLM_RS_DONE);
error = dlm_recover_done_wait(ls);
if (error) {
log_debug(ls, "recover_done_wait failed %d", error);
goto fail;
}
dlm_clear_members_gone(ls);
error = enable_locking(ls, rv->seq);
if (error) {
log_debug(ls, "enable_locking failed %d", error);
goto fail;
}
error = dlm_process_requestqueue(ls);
if (error) {
log_debug(ls, "process_requestqueue failed %d", error);
goto fail;
}
error = dlm_recover_waiters_post(ls);
if (error) {
log_debug(ls, "recover_waiters_post failed %d", error);
goto fail;
}
dlm_grant_after_purge(ls);
dlm_astd_wake();
log_debug(ls, "recover %llx done: %u ms",
(unsigned long long)rv->seq,
jiffies_to_msecs(jiffies - start));
mutex_unlock(&ls->ls_recoverd_active);
return 0;
fail:
dlm_release_root_list(ls);
log_debug(ls, "recover %llx error %d",
(unsigned long long)rv->seq, error);
mutex_unlock(&ls->ls_recoverd_active);
return error;
}
/* The dlm_ls_start() that created the rv we take here may already have been
stopped via dlm_ls_stop(); in that case we need to leave the RECOVERY_STOP
flag set. */
static void do_ls_recovery(struct dlm_ls *ls)
{
struct dlm_recover *rv = NULL;
spin_lock(&ls->ls_recover_lock);
rv = ls->ls_recover_args;
ls->ls_recover_args = NULL;
if (rv && ls->ls_recover_seq == rv->seq)
clear_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
spin_unlock(&ls->ls_recover_lock);
if (rv) {
ls_recover(ls, rv);
kfree(rv->nodeids);
kfree(rv);
}
}
static int dlm_recoverd(void *arg)
{
struct dlm_ls *ls;
ls = dlm_find_lockspace_local(arg);
if (!ls) {
log_print("dlm_recoverd: no lockspace %p", arg);
return -1;
}
while (!kthread_should_stop()) {
set_current_state(TASK_INTERRUPTIBLE);
if (!test_bit(LSFL_WORK, &ls->ls_flags))
schedule();
set_current_state(TASK_RUNNING);
if (test_and_clear_bit(LSFL_WORK, &ls->ls_flags))
do_ls_recovery(ls);
}
dlm_put_lockspace(ls);
return 0;
}
void dlm_recoverd_kick(struct dlm_ls *ls)
{
set_bit(LSFL_WORK, &ls->ls_flags);
wake_up_process(ls->ls_recoverd_task);
}
int dlm_recoverd_start(struct dlm_ls *ls)
{
struct task_struct *p;
int error = 0;
p = kthread_run(dlm_recoverd, ls, "dlm_recoverd");
if (IS_ERR(p))
error = PTR_ERR(p);
else
ls->ls_recoverd_task = p;
return error;
}
void dlm_recoverd_stop(struct dlm_ls *ls)
{
kthread_stop(ls->ls_recoverd_task);
}
void dlm_recoverd_suspend(struct dlm_ls *ls)
{
wake_up(&ls->ls_wait_general);
mutex_lock(&ls->ls_recoverd_active);
}
void dlm_recoverd_resume(struct dlm_ls *ls)
{
mutex_unlock(&ls->ls_recoverd_active);
}

24
fs/dlm/recoverd.h Normal file
View File

@@ -0,0 +1,24 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __RECOVERD_DOT_H__
#define __RECOVERD_DOT_H__
void dlm_recoverd_kick(struct dlm_ls *ls);
void dlm_recoverd_stop(struct dlm_ls *ls);
int dlm_recoverd_start(struct dlm_ls *ls);
void dlm_recoverd_suspend(struct dlm_ls *ls);
void dlm_recoverd_resume(struct dlm_ls *ls);
#endif /* __RECOVERD_DOT_H__ */

198
fs/dlm/requestqueue.c Normal file
View File

@@ -0,0 +1,198 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) 2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#include "dlm_internal.h"
#include "member.h"
#include "lock.h"
#include "dir.h"
#include "config.h"
#include "requestqueue.h"
struct rq_entry {
struct list_head list;
int nodeid;
char request[1];
};
/*
* Requests received while the lockspace is in recovery get added to the
* request queue and processed when recovery is complete. This happens when
* the lockspace is suspended on some nodes before it is on others, or the
* lockspace is enabled on some while still suspended on others.
*/
int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd)
{
struct rq_entry *e;
int length = hd->h_length;
int rv = 0;
e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL);
if (!e) {
log_print("dlm_add_requestqueue: out of memory\n");
return 0;
}
e->nodeid = nodeid;
memcpy(e->request, hd, length);
/* We need to check dlm_locking_stopped() after taking the mutex to
avoid a race where dlm_recoverd enables locking and runs
process_requestqueue between our earlier dlm_locking_stopped check
and this addition to the requestqueue. */
mutex_lock(&ls->ls_requestqueue_mutex);
if (dlm_locking_stopped(ls))
list_add_tail(&e->list, &ls->ls_requestqueue);
else {
log_debug(ls, "dlm_add_requestqueue skip from %d", nodeid);
kfree(e);
rv = -EAGAIN;
}
mutex_unlock(&ls->ls_requestqueue_mutex);
return rv;
}
int dlm_process_requestqueue(struct dlm_ls *ls)
{
struct rq_entry *e;
struct dlm_header *hd;
int error = 0;
mutex_lock(&ls->ls_requestqueue_mutex);
for (;;) {
if (list_empty(&ls->ls_requestqueue)) {
mutex_unlock(&ls->ls_requestqueue_mutex);
error = 0;
break;
}
e = list_entry(ls->ls_requestqueue.next, struct rq_entry, list);
mutex_unlock(&ls->ls_requestqueue_mutex);
hd = (struct dlm_header *) e->request;
error = dlm_receive_message(hd, e->nodeid, 1);
if (error == -EINTR) {
/* entry is left on requestqueue */
log_debug(ls, "process_requestqueue abort eintr");
break;
}
mutex_lock(&ls->ls_requestqueue_mutex);
list_del(&e->list);
kfree(e);
if (dlm_locking_stopped(ls)) {
log_debug(ls, "process_requestqueue abort running");
mutex_unlock(&ls->ls_requestqueue_mutex);
error = -EINTR;
break;
}
schedule();
}
return error;
}
/*
* After recovery is done, locking is resumed and dlm_recoverd takes all the
* saved requests and processes them as they would have been by dlm_recvd. At
* the same time, dlm_recvd will start receiving new requests from remote
* nodes. We want to delay dlm_recvd processing new requests until
* dlm_recoverd has finished processing the old saved requests.
*/
void dlm_wait_requestqueue(struct dlm_ls *ls)
{
for (;;) {
mutex_lock(&ls->ls_requestqueue_mutex);
if (list_empty(&ls->ls_requestqueue))
break;
if (dlm_locking_stopped(ls))
break;
mutex_unlock(&ls->ls_requestqueue_mutex);
schedule();
}
mutex_unlock(&ls->ls_requestqueue_mutex);
}
static int purge_request(struct dlm_ls *ls, struct dlm_message *ms, int nodeid)
{
uint32_t type = ms->m_type;
/* the ls is being cleaned up and freed by release_lockspace */
if (!ls->ls_count)
return 1;
if (dlm_is_removed(ls, nodeid))
return 1;
/* directory operations are always purged because the directory is
always rebuilt during recovery and the lookups resent */
if (type == DLM_MSG_REMOVE ||
type == DLM_MSG_LOOKUP ||
type == DLM_MSG_LOOKUP_REPLY)
return 1;
if (!dlm_no_directory(ls))
return 0;
/* with no directory, the master is likely to change as a part of
recovery; requests to/from the defunct master need to be purged */
switch (type) {
case DLM_MSG_REQUEST:
case DLM_MSG_CONVERT:
case DLM_MSG_UNLOCK:
case DLM_MSG_CANCEL:
/* we're no longer the master of this resource, the sender
will resend to the new master (see waiter_needs_recovery) */
if (dlm_hash2nodeid(ls, ms->m_hash) != dlm_our_nodeid())
return 1;
break;
case DLM_MSG_REQUEST_REPLY:
case DLM_MSG_CONVERT_REPLY:
case DLM_MSG_UNLOCK_REPLY:
case DLM_MSG_CANCEL_REPLY:
case DLM_MSG_GRANT:
/* this reply is from the former master of the resource,
we'll resend to the new master if needed */
if (dlm_hash2nodeid(ls, ms->m_hash) != nodeid)
return 1;
break;
}
return 0;
}
void dlm_purge_requestqueue(struct dlm_ls *ls)
{
struct dlm_message *ms;
struct rq_entry *e, *safe;
mutex_lock(&ls->ls_requestqueue_mutex);
list_for_each_entry_safe(e, safe, &ls->ls_requestqueue, list) {
ms = (struct dlm_message *) e->request;
if (purge_request(ls, ms, e->nodeid)) {
list_del(&e->list);
kfree(e);
}
}
mutex_unlock(&ls->ls_requestqueue_mutex);
}

22
fs/dlm/requestqueue.h Normal file
View File

@@ -0,0 +1,22 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) 2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __REQUESTQUEUE_DOT_H__
#define __REQUESTQUEUE_DOT_H__
int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd);
int dlm_process_requestqueue(struct dlm_ls *ls);
void dlm_wait_requestqueue(struct dlm_ls *ls);
void dlm_purge_requestqueue(struct dlm_ls *ls);
#endif

798
fs/dlm/user.c Normal file
View File

@@ -0,0 +1,798 @@
/*
* Copyright (C) 2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License v.2.
*/
#include <linux/miscdevice.h>
#include <linux/init.h>
#include <linux/wait.h>
#include <linux/module.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/signal.h>
#include <linux/spinlock.h>
#include <linux/dlm.h>
#include <linux/dlm_device.h>
#include "dlm_internal.h"
#include "lockspace.h"
#include "lock.h"
#include "lvb_table.h"
#include "user.h"
static const char *name_prefix="dlm";
static struct miscdevice ctl_device;
static const struct file_operations device_fops;
#ifdef CONFIG_COMPAT
struct dlm_lock_params32 {
__u8 mode;
__u8 namelen;
__u16 flags;
__u32 lkid;
__u32 parent;
__u32 castparam;
__u32 castaddr;
__u32 bastparam;
__u32 bastaddr;
__u32 lksb;
char lvb[DLM_USER_LVB_LEN];
char name[0];
};
struct dlm_write_request32 {
__u32 version[3];
__u8 cmd;
__u8 is64bit;
__u8 unused[2];
union {
struct dlm_lock_params32 lock;
struct dlm_lspace_params lspace;
} i;
};
struct dlm_lksb32 {
__u32 sb_status;
__u32 sb_lkid;
__u8 sb_flags;
__u32 sb_lvbptr;
};
struct dlm_lock_result32 {
__u32 length;
__u32 user_astaddr;
__u32 user_astparam;
__u32 user_lksb;
struct dlm_lksb32 lksb;
__u8 bast_mode;
__u8 unused[3];
/* Offsets may be zero if no data is present */
__u32 lvb_offset;
};
static void compat_input(struct dlm_write_request *kb,
struct dlm_write_request32 *kb32)
{
kb->version[0] = kb32->version[0];
kb->version[1] = kb32->version[1];
kb->version[2] = kb32->version[2];
kb->cmd = kb32->cmd;
kb->is64bit = kb32->is64bit;
if (kb->cmd == DLM_USER_CREATE_LOCKSPACE ||
kb->cmd == DLM_USER_REMOVE_LOCKSPACE) {
kb->i.lspace.flags = kb32->i.lspace.flags;
kb->i.lspace.minor = kb32->i.lspace.minor;
strcpy(kb->i.lspace.name, kb32->i.lspace.name);
} else {
kb->i.lock.mode = kb32->i.lock.mode;
kb->i.lock.namelen = kb32->i.lock.namelen;
kb->i.lock.flags = kb32->i.lock.flags;
kb->i.lock.lkid = kb32->i.lock.lkid;
kb->i.lock.parent = kb32->i.lock.parent;
kb->i.lock.castparam = (void *)(long)kb32->i.lock.castparam;
kb->i.lock.castaddr = (void *)(long)kb32->i.lock.castaddr;
kb->i.lock.bastparam = (void *)(long)kb32->i.lock.bastparam;
kb->i.lock.bastaddr = (void *)(long)kb32->i.lock.bastaddr;
kb->i.lock.lksb = (void *)(long)kb32->i.lock.lksb;
memcpy(kb->i.lock.lvb, kb32->i.lock.lvb, DLM_USER_LVB_LEN);
memcpy(kb->i.lock.name, kb32->i.lock.name, kb->i.lock.namelen);
}
}
static void compat_output(struct dlm_lock_result *res,
struct dlm_lock_result32 *res32)
{
res32->length = res->length - (sizeof(struct dlm_lock_result) -
sizeof(struct dlm_lock_result32));
res32->user_astaddr = (__u32)(long)res->user_astaddr;
res32->user_astparam = (__u32)(long)res->user_astparam;
res32->user_lksb = (__u32)(long)res->user_lksb;
res32->bast_mode = res->bast_mode;
res32->lvb_offset = res->lvb_offset;
res32->length = res->length;
res32->lksb.sb_status = res->lksb.sb_status;
res32->lksb.sb_flags = res->lksb.sb_flags;
res32->lksb.sb_lkid = res->lksb.sb_lkid;
res32->lksb.sb_lvbptr = (__u32)(long)res->lksb.sb_lvbptr;
}
#endif
void dlm_user_add_ast(struct dlm_lkb *lkb, int type)
{
struct dlm_ls *ls;
struct dlm_user_args *ua;
struct dlm_user_proc *proc;
int remove_ownqueue = 0;
/* dlm_clear_proc_locks() sets ORPHAN/DEAD flag on each
lkb before dealing with it. We need to check this
flag before taking ls_clear_proc_locks mutex because if
it's set, dlm_clear_proc_locks() holds the mutex. */
if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD)) {
/* log_print("user_add_ast skip1 %x", lkb->lkb_flags); */
return;
}
ls = lkb->lkb_resource->res_ls;
mutex_lock(&ls->ls_clear_proc_locks);
/* If ORPHAN/DEAD flag is set, it means the process is dead so an ast
can't be delivered. For ORPHAN's, dlm_clear_proc_locks() freed
lkb->ua so we can't try to use it. */
if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD)) {
/* log_print("user_add_ast skip2 %x", lkb->lkb_flags); */
goto out;
}
DLM_ASSERT(lkb->lkb_astparam, dlm_print_lkb(lkb););
ua = (struct dlm_user_args *)lkb->lkb_astparam;
proc = ua->proc;
if (type == AST_BAST && ua->bastaddr == NULL)
goto out;
spin_lock(&proc->asts_spin);
if (!(lkb->lkb_ast_type & (AST_COMP | AST_BAST))) {
kref_get(&lkb->lkb_ref);
list_add_tail(&lkb->lkb_astqueue, &proc->asts);
lkb->lkb_ast_type |= type;
wake_up_interruptible(&proc->wait);
}
/* noqueue requests that fail may need to be removed from the
proc's locks list, there should be a better way of detecting
this situation than checking all these things... */
if (type == AST_COMP && lkb->lkb_grmode == DLM_LOCK_IV &&
ua->lksb.sb_status == -EAGAIN && !list_empty(&lkb->lkb_ownqueue))
remove_ownqueue = 1;
/* unlocks or cancels of waiting requests need to be removed from the
proc's unlocking list, again there must be a better way... */
if (ua->lksb.sb_status == -DLM_EUNLOCK ||
(ua->lksb.sb_status == -DLM_ECANCEL &&
lkb->lkb_grmode == DLM_LOCK_IV))
remove_ownqueue = 1;
/* We want to copy the lvb to userspace when the completion
ast is read if the status is 0, the lock has an lvb and
lvb_ops says we should. We could probably have set_lvb_lock()
set update_user_lvb instead and not need old_mode */
if ((lkb->lkb_ast_type & AST_COMP) &&
(lkb->lkb_lksb->sb_status == 0) &&
lkb->lkb_lksb->sb_lvbptr &&
dlm_lvb_operations[ua->old_mode + 1][lkb->lkb_grmode + 1])
ua->update_user_lvb = 1;
else
ua->update_user_lvb = 0;
spin_unlock(&proc->asts_spin);
if (remove_ownqueue) {
spin_lock(&ua->proc->locks_spin);
list_del_init(&lkb->lkb_ownqueue);
spin_unlock(&ua->proc->locks_spin);
dlm_put_lkb(lkb);
}
out:
mutex_unlock(&ls->ls_clear_proc_locks);
}
static int device_user_lock(struct dlm_user_proc *proc,
struct dlm_lock_params *params)
{
struct dlm_ls *ls;
struct dlm_user_args *ua;
int error = -ENOMEM;
ls = dlm_find_lockspace_local(proc->lockspace);
if (!ls)
return -ENOENT;
if (!params->castaddr || !params->lksb) {
error = -EINVAL;
goto out;
}
ua = kzalloc(sizeof(struct dlm_user_args), GFP_KERNEL);
if (!ua)
goto out;
ua->proc = proc;
ua->user_lksb = params->lksb;
ua->castparam = params->castparam;
ua->castaddr = params->castaddr;
ua->bastparam = params->bastparam;
ua->bastaddr = params->bastaddr;
if (params->flags & DLM_LKF_CONVERT)
error = dlm_user_convert(ls, ua,
params->mode, params->flags,
params->lkid, params->lvb);
else {
error = dlm_user_request(ls, ua,
params->mode, params->flags,
params->name, params->namelen,
params->parent);
if (!error)
error = ua->lksb.sb_lkid;
}
out:
dlm_put_lockspace(ls);
return error;
}
static int device_user_unlock(struct dlm_user_proc *proc,
struct dlm_lock_params *params)
{
struct dlm_ls *ls;
struct dlm_user_args *ua;
int error = -ENOMEM;
ls = dlm_find_lockspace_local(proc->lockspace);
if (!ls)
return -ENOENT;
ua = kzalloc(sizeof(struct dlm_user_args), GFP_KERNEL);
if (!ua)
goto out;
ua->proc = proc;
ua->user_lksb = params->lksb;
ua->castparam = params->castparam;
ua->castaddr = params->castaddr;
if (params->flags & DLM_LKF_CANCEL)
error = dlm_user_cancel(ls, ua, params->flags, params->lkid);
else
error = dlm_user_unlock(ls, ua, params->flags, params->lkid,
params->lvb);
out:
dlm_put_lockspace(ls);
return error;
}
static int device_create_lockspace(struct dlm_lspace_params *params)
{
dlm_lockspace_t *lockspace;
struct dlm_ls *ls;
int error, len;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
error = dlm_new_lockspace(params->name, strlen(params->name),
&lockspace, 0, DLM_USER_LVB_LEN);
if (error)
return error;
ls = dlm_find_lockspace_local(lockspace);
if (!ls)
return -ENOENT;
error = -ENOMEM;
len = strlen(params->name) + strlen(name_prefix) + 2;
ls->ls_device.name = kzalloc(len, GFP_KERNEL);
if (!ls->ls_device.name)
goto fail;
snprintf((char *)ls->ls_device.name, len, "%s_%s", name_prefix,
params->name);
ls->ls_device.fops = &device_fops;
ls->ls_device.minor = MISC_DYNAMIC_MINOR;
error = misc_register(&ls->ls_device);
if (error) {
kfree(ls->ls_device.name);
goto fail;
}
error = ls->ls_device.minor;
dlm_put_lockspace(ls);
return error;
fail:
dlm_put_lockspace(ls);
dlm_release_lockspace(lockspace, 0);
return error;
}
static int device_remove_lockspace(struct dlm_lspace_params *params)
{
dlm_lockspace_t *lockspace;
struct dlm_ls *ls;
int error, force = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
ls = dlm_find_lockspace_device(params->minor);
if (!ls)
return -ENOENT;
error = misc_deregister(&ls->ls_device);
if (error) {
dlm_put_lockspace(ls);
goto out;
}
kfree(ls->ls_device.name);
if (params->flags & DLM_USER_LSFLG_FORCEFREE)
force = 2;
lockspace = ls->ls_local_handle;
/* dlm_release_lockspace waits for references to go to zero,
so all processes will need to close their device for the ls
before the release will procede */
dlm_put_lockspace(ls);
error = dlm_release_lockspace(lockspace, force);
out:
return error;
}
/* Check the user's version matches ours */
static int check_version(struct dlm_write_request *req)
{
if (req->version[0] != DLM_DEVICE_VERSION_MAJOR ||
(req->version[0] == DLM_DEVICE_VERSION_MAJOR &&
req->version[1] > DLM_DEVICE_VERSION_MINOR)) {
printk(KERN_DEBUG "dlm: process %s (%d) version mismatch "
"user (%d.%d.%d) kernel (%d.%d.%d)\n",
current->comm,
current->pid,
req->version[0],
req->version[1],
req->version[2],
DLM_DEVICE_VERSION_MAJOR,
DLM_DEVICE_VERSION_MINOR,
DLM_DEVICE_VERSION_PATCH);
return -EINVAL;
}
return 0;
}
/*
* device_write
*
* device_user_lock
* dlm_user_request -> request_lock
* dlm_user_convert -> convert_lock
*
* device_user_unlock
* dlm_user_unlock -> unlock_lock
* dlm_user_cancel -> cancel_lock
*
* device_create_lockspace
* dlm_new_lockspace
*
* device_remove_lockspace
* dlm_release_lockspace
*/
/* a write to a lockspace device is a lock or unlock request, a write
to the control device is to create/remove a lockspace */
static ssize_t device_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct dlm_user_proc *proc = file->private_data;
struct dlm_write_request *kbuf;
sigset_t tmpsig, allsigs;
int error;
#ifdef CONFIG_COMPAT
if (count < sizeof(struct dlm_write_request32))
#else
if (count < sizeof(struct dlm_write_request))
#endif
return -EINVAL;
kbuf = kmalloc(count, GFP_KERNEL);
if (!kbuf)
return -ENOMEM;
if (copy_from_user(kbuf, buf, count)) {
error = -EFAULT;
goto out_free;
}
if (check_version(kbuf)) {
error = -EBADE;
goto out_free;
}
#ifdef CONFIG_COMPAT
if (!kbuf->is64bit) {
struct dlm_write_request32 *k32buf;
k32buf = (struct dlm_write_request32 *)kbuf;
kbuf = kmalloc(count + (sizeof(struct dlm_write_request) -
sizeof(struct dlm_write_request32)), GFP_KERNEL);
if (!kbuf)
return -ENOMEM;
if (proc)
set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags);
compat_input(kbuf, k32buf);
kfree(k32buf);
}
#endif
/* do we really need this? can a write happen after a close? */
if ((kbuf->cmd == DLM_USER_LOCK || kbuf->cmd == DLM_USER_UNLOCK) &&
test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags))
return -EINVAL;
sigfillset(&allsigs);
sigprocmask(SIG_BLOCK, &allsigs, &tmpsig);
error = -EINVAL;
switch (kbuf->cmd)
{
case DLM_USER_LOCK:
if (!proc) {
log_print("no locking on control device");
goto out_sig;
}
error = device_user_lock(proc, &kbuf->i.lock);
break;
case DLM_USER_UNLOCK:
if (!proc) {
log_print("no locking on control device");
goto out_sig;
}
error = device_user_unlock(proc, &kbuf->i.lock);
break;
case DLM_USER_CREATE_LOCKSPACE:
if (proc) {
log_print("create/remove only on control device");
goto out_sig;
}
error = device_create_lockspace(&kbuf->i.lspace);
break;
case DLM_USER_REMOVE_LOCKSPACE:
if (proc) {
log_print("create/remove only on control device");
goto out_sig;
}
error = device_remove_lockspace(&kbuf->i.lspace);
break;
default:
log_print("Unknown command passed to DLM device : %d\n",
kbuf->cmd);
}
out_sig:
sigprocmask(SIG_SETMASK, &tmpsig, NULL);
recalc_sigpending();
out_free:
kfree(kbuf);
return error;
}
/* Every process that opens the lockspace device has its own "proc" structure
hanging off the open file that's used to keep track of locks owned by the
process and asts that need to be delivered to the process. */
static int device_open(struct inode *inode, struct file *file)
{
struct dlm_user_proc *proc;
struct dlm_ls *ls;
ls = dlm_find_lockspace_device(iminor(inode));
if (!ls)
return -ENOENT;
proc = kzalloc(sizeof(struct dlm_user_proc), GFP_KERNEL);
if (!proc) {
dlm_put_lockspace(ls);
return -ENOMEM;
}
proc->lockspace = ls->ls_local_handle;
INIT_LIST_HEAD(&proc->asts);
INIT_LIST_HEAD(&proc->locks);
INIT_LIST_HEAD(&proc->unlocking);
spin_lock_init(&proc->asts_spin);
spin_lock_init(&proc->locks_spin);
init_waitqueue_head(&proc->wait);
file->private_data = proc;
return 0;
}
static int device_close(struct inode *inode, struct file *file)
{
struct dlm_user_proc *proc = file->private_data;
struct dlm_ls *ls;
sigset_t tmpsig, allsigs;
ls = dlm_find_lockspace_local(proc->lockspace);
if (!ls)
return -ENOENT;
sigfillset(&allsigs);
sigprocmask(SIG_BLOCK, &allsigs, &tmpsig);
set_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags);
dlm_clear_proc_locks(ls, proc);
/* at this point no more lkb's should exist for this lockspace,
so there's no chance of dlm_user_add_ast() being called and
looking for lkb->ua->proc */
kfree(proc);
file->private_data = NULL;
dlm_put_lockspace(ls);
dlm_put_lockspace(ls); /* for the find in device_open() */
/* FIXME: AUTOFREE: if this ls is no longer used do
device_remove_lockspace() */
sigprocmask(SIG_SETMASK, &tmpsig, NULL);
recalc_sigpending();
return 0;
}
static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type,
int bmode, char __user *buf, size_t count)
{
#ifdef CONFIG_COMPAT
struct dlm_lock_result32 result32;
#endif
struct dlm_lock_result result;
void *resultptr;
int error=0;
int len;
int struct_len;
memset(&result, 0, sizeof(struct dlm_lock_result));
memcpy(&result.lksb, &ua->lksb, sizeof(struct dlm_lksb));
result.user_lksb = ua->user_lksb;
/* FIXME: dlm1 provides for the user's bastparam/addr to not be updated
in a conversion unless the conversion is successful. See code
in dlm_user_convert() for updating ua from ua_tmp. OpenVMS, though,
notes that a new blocking AST address and parameter are set even if
the conversion fails, so maybe we should just do that. */
if (type == AST_BAST) {
result.user_astaddr = ua->bastaddr;
result.user_astparam = ua->bastparam;
result.bast_mode = bmode;
} else {
result.user_astaddr = ua->castaddr;
result.user_astparam = ua->castparam;
}
#ifdef CONFIG_COMPAT
if (compat)
len = sizeof(struct dlm_lock_result32);
else
#endif
len = sizeof(struct dlm_lock_result);
struct_len = len;
/* copy lvb to userspace if there is one, it's been updated, and
the user buffer has space for it */
if (ua->update_user_lvb && ua->lksb.sb_lvbptr &&
count >= len + DLM_USER_LVB_LEN) {
if (copy_to_user(buf+len, ua->lksb.sb_lvbptr,
DLM_USER_LVB_LEN)) {
error = -EFAULT;
goto out;
}
result.lvb_offset = len;
len += DLM_USER_LVB_LEN;
}
result.length = len;
resultptr = &result;
#ifdef CONFIG_COMPAT
if (compat) {
compat_output(&result, &result32);
resultptr = &result32;
}
#endif
if (copy_to_user(buf, resultptr, struct_len))
error = -EFAULT;
else
error = len;
out:
return error;
}
/* a read returns a single ast described in a struct dlm_lock_result */
static ssize_t device_read(struct file *file, char __user *buf, size_t count,
loff_t *ppos)
{
struct dlm_user_proc *proc = file->private_data;
struct dlm_lkb *lkb;
struct dlm_user_args *ua;
DECLARE_WAITQUEUE(wait, current);
int error, type=0, bmode=0, removed = 0;
#ifdef CONFIG_COMPAT
if (count < sizeof(struct dlm_lock_result32))
#else
if (count < sizeof(struct dlm_lock_result))
#endif
return -EINVAL;
/* do we really need this? can a read happen after a close? */
if (test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags))
return -EINVAL;
spin_lock(&proc->asts_spin);
if (list_empty(&proc->asts)) {
if (file->f_flags & O_NONBLOCK) {
spin_unlock(&proc->asts_spin);
return -EAGAIN;
}
add_wait_queue(&proc->wait, &wait);
repeat:
set_current_state(TASK_INTERRUPTIBLE);
if (list_empty(&proc->asts) && !signal_pending(current)) {
spin_unlock(&proc->asts_spin);
schedule();
spin_lock(&proc->asts_spin);
goto repeat;
}
set_current_state(TASK_RUNNING);
remove_wait_queue(&proc->wait, &wait);
if (signal_pending(current)) {
spin_unlock(&proc->asts_spin);
return -ERESTARTSYS;
}
}
if (list_empty(&proc->asts)) {
spin_unlock(&proc->asts_spin);
return -EAGAIN;
}
/* there may be both completion and blocking asts to return for
the lkb, don't remove lkb from asts list unless no asts remain */
lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_astqueue);
if (lkb->lkb_ast_type & AST_COMP) {
lkb->lkb_ast_type &= ~AST_COMP;
type = AST_COMP;
} else if (lkb->lkb_ast_type & AST_BAST) {
lkb->lkb_ast_type &= ~AST_BAST;
type = AST_BAST;
bmode = lkb->lkb_bastmode;
}
if (!lkb->lkb_ast_type) {
list_del(&lkb->lkb_astqueue);
removed = 1;
}
spin_unlock(&proc->asts_spin);
ua = (struct dlm_user_args *)lkb->lkb_astparam;
error = copy_result_to_user(ua,
test_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags),
type, bmode, buf, count);
/* removes reference for the proc->asts lists added by
dlm_user_add_ast() and may result in the lkb being freed */
if (removed)
dlm_put_lkb(lkb);
return error;
}
static unsigned int device_poll(struct file *file, poll_table *wait)
{
struct dlm_user_proc *proc = file->private_data;
poll_wait(file, &proc->wait, wait);
spin_lock(&proc->asts_spin);
if (!list_empty(&proc->asts)) {
spin_unlock(&proc->asts_spin);
return POLLIN | POLLRDNORM;
}
spin_unlock(&proc->asts_spin);
return 0;
}
static int ctl_device_open(struct inode *inode, struct file *file)
{
file->private_data = NULL;
return 0;
}
static int ctl_device_close(struct inode *inode, struct file *file)
{
return 0;
}
static const struct file_operations device_fops = {
.open = device_open,
.release = device_close,
.read = device_read,
.write = device_write,
.poll = device_poll,
.owner = THIS_MODULE,
};
static const struct file_operations ctl_device_fops = {
.open = ctl_device_open,
.release = ctl_device_close,
.write = device_write,
.owner = THIS_MODULE,
};
int dlm_user_init(void)
{
int error;
ctl_device.name = "dlm-control";
ctl_device.fops = &ctl_device_fops;
ctl_device.minor = MISC_DYNAMIC_MINOR;
error = misc_register(&ctl_device);
if (error)
log_print("misc_register failed for control device");
return error;
}
void dlm_user_exit(void)
{
misc_deregister(&ctl_device);
}

16
fs/dlm/user.h Normal file
View File

@@ -0,0 +1,16 @@
/*
* Copyright (C) 2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License v.2.
*/
#ifndef __USER_DOT_H__
#define __USER_DOT_H__
void dlm_user_add_ast(struct dlm_lkb *lkb, int type);
int dlm_user_init(void);
void dlm_user_exit(void);
#endif

165
fs/dlm/util.c Normal file
View File

@@ -0,0 +1,165 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) 2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#include "dlm_internal.h"
#include "rcom.h"
#include "util.h"
static void header_out(struct dlm_header *hd)
{
hd->h_version = cpu_to_le32(hd->h_version);
hd->h_lockspace = cpu_to_le32(hd->h_lockspace);
hd->h_nodeid = cpu_to_le32(hd->h_nodeid);
hd->h_length = cpu_to_le16(hd->h_length);
}
static void header_in(struct dlm_header *hd)
{
hd->h_version = le32_to_cpu(hd->h_version);
hd->h_lockspace = le32_to_cpu(hd->h_lockspace);
hd->h_nodeid = le32_to_cpu(hd->h_nodeid);
hd->h_length = le16_to_cpu(hd->h_length);
}
void dlm_message_out(struct dlm_message *ms)
{
struct dlm_header *hd = (struct dlm_header *) ms;
header_out(hd);
ms->m_type = cpu_to_le32(ms->m_type);
ms->m_nodeid = cpu_to_le32(ms->m_nodeid);
ms->m_pid = cpu_to_le32(ms->m_pid);
ms->m_lkid = cpu_to_le32(ms->m_lkid);
ms->m_remid = cpu_to_le32(ms->m_remid);
ms->m_parent_lkid = cpu_to_le32(ms->m_parent_lkid);
ms->m_parent_remid = cpu_to_le32(ms->m_parent_remid);
ms->m_exflags = cpu_to_le32(ms->m_exflags);
ms->m_sbflags = cpu_to_le32(ms->m_sbflags);
ms->m_flags = cpu_to_le32(ms->m_flags);
ms->m_lvbseq = cpu_to_le32(ms->m_lvbseq);
ms->m_hash = cpu_to_le32(ms->m_hash);
ms->m_status = cpu_to_le32(ms->m_status);
ms->m_grmode = cpu_to_le32(ms->m_grmode);
ms->m_rqmode = cpu_to_le32(ms->m_rqmode);
ms->m_bastmode = cpu_to_le32(ms->m_bastmode);
ms->m_asts = cpu_to_le32(ms->m_asts);
ms->m_result = cpu_to_le32(ms->m_result);
}
void dlm_message_in(struct dlm_message *ms)
{
struct dlm_header *hd = (struct dlm_header *) ms;
header_in(hd);
ms->m_type = le32_to_cpu(ms->m_type);
ms->m_nodeid = le32_to_cpu(ms->m_nodeid);
ms->m_pid = le32_to_cpu(ms->m_pid);
ms->m_lkid = le32_to_cpu(ms->m_lkid);
ms->m_remid = le32_to_cpu(ms->m_remid);
ms->m_parent_lkid = le32_to_cpu(ms->m_parent_lkid);
ms->m_parent_remid = le32_to_cpu(ms->m_parent_remid);
ms->m_exflags = le32_to_cpu(ms->m_exflags);
ms->m_sbflags = le32_to_cpu(ms->m_sbflags);
ms->m_flags = le32_to_cpu(ms->m_flags);
ms->m_lvbseq = le32_to_cpu(ms->m_lvbseq);
ms->m_hash = le32_to_cpu(ms->m_hash);
ms->m_status = le32_to_cpu(ms->m_status);
ms->m_grmode = le32_to_cpu(ms->m_grmode);
ms->m_rqmode = le32_to_cpu(ms->m_rqmode);
ms->m_bastmode = le32_to_cpu(ms->m_bastmode);
ms->m_asts = le32_to_cpu(ms->m_asts);
ms->m_result = le32_to_cpu(ms->m_result);
}
static void rcom_lock_out(struct rcom_lock *rl)
{
rl->rl_ownpid = cpu_to_le32(rl->rl_ownpid);
rl->rl_lkid = cpu_to_le32(rl->rl_lkid);
rl->rl_remid = cpu_to_le32(rl->rl_remid);
rl->rl_parent_lkid = cpu_to_le32(rl->rl_parent_lkid);
rl->rl_parent_remid = cpu_to_le32(rl->rl_parent_remid);
rl->rl_exflags = cpu_to_le32(rl->rl_exflags);
rl->rl_flags = cpu_to_le32(rl->rl_flags);
rl->rl_lvbseq = cpu_to_le32(rl->rl_lvbseq);
rl->rl_result = cpu_to_le32(rl->rl_result);
rl->rl_wait_type = cpu_to_le16(rl->rl_wait_type);
rl->rl_namelen = cpu_to_le16(rl->rl_namelen);
}
static void rcom_lock_in(struct rcom_lock *rl)
{
rl->rl_ownpid = le32_to_cpu(rl->rl_ownpid);
rl->rl_lkid = le32_to_cpu(rl->rl_lkid);
rl->rl_remid = le32_to_cpu(rl->rl_remid);
rl->rl_parent_lkid = le32_to_cpu(rl->rl_parent_lkid);
rl->rl_parent_remid = le32_to_cpu(rl->rl_parent_remid);
rl->rl_exflags = le32_to_cpu(rl->rl_exflags);
rl->rl_flags = le32_to_cpu(rl->rl_flags);
rl->rl_lvbseq = le32_to_cpu(rl->rl_lvbseq);
rl->rl_result = le32_to_cpu(rl->rl_result);
rl->rl_wait_type = le16_to_cpu(rl->rl_wait_type);
rl->rl_namelen = le16_to_cpu(rl->rl_namelen);
}
static void rcom_config_out(struct rcom_config *rf)
{
rf->rf_lvblen = cpu_to_le32(rf->rf_lvblen);
rf->rf_lsflags = cpu_to_le32(rf->rf_lsflags);
}
static void rcom_config_in(struct rcom_config *rf)
{
rf->rf_lvblen = le32_to_cpu(rf->rf_lvblen);
rf->rf_lsflags = le32_to_cpu(rf->rf_lsflags);
}
void dlm_rcom_out(struct dlm_rcom *rc)
{
struct dlm_header *hd = (struct dlm_header *) rc;
int type = rc->rc_type;
header_out(hd);
rc->rc_type = cpu_to_le32(rc->rc_type);
rc->rc_result = cpu_to_le32(rc->rc_result);
rc->rc_id = cpu_to_le64(rc->rc_id);
rc->rc_seq = cpu_to_le64(rc->rc_seq);
rc->rc_seq_reply = cpu_to_le64(rc->rc_seq_reply);
if (type == DLM_RCOM_LOCK)
rcom_lock_out((struct rcom_lock *) rc->rc_buf);
else if (type == DLM_RCOM_STATUS_REPLY)
rcom_config_out((struct rcom_config *) rc->rc_buf);
}
void dlm_rcom_in(struct dlm_rcom *rc)
{
struct dlm_header *hd = (struct dlm_header *) rc;
header_in(hd);
rc->rc_type = le32_to_cpu(rc->rc_type);
rc->rc_result = le32_to_cpu(rc->rc_result);
rc->rc_id = le64_to_cpu(rc->rc_id);
rc->rc_seq = le64_to_cpu(rc->rc_seq);
rc->rc_seq_reply = le64_to_cpu(rc->rc_seq_reply);
if (rc->rc_type == DLM_RCOM_LOCK)
rcom_lock_in((struct rcom_lock *) rc->rc_buf);
else if (rc->rc_type == DLM_RCOM_STATUS_REPLY)
rcom_config_in((struct rcom_config *) rc->rc_buf);
}

22
fs/dlm/util.h Normal file
View File

@@ -0,0 +1,22 @@
/******************************************************************************
*******************************************************************************
**
** Copyright (C) 2005 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#ifndef __UTIL_DOT_H__
#define __UTIL_DOT_H__
void dlm_message_out(struct dlm_message *ms);
void dlm_message_in(struct dlm_message *ms);
void dlm_rcom_out(struct dlm_rcom *rc);
void dlm_rcom_in(struct dlm_rcom *rc);
#endif