Creation of Cybook 2416 (actually Gen4) repository

This commit is contained in:
mlt
2009-12-18 17:10:00 +00:00
committed by godzil
commit 76f20f4d40
13791 changed files with 6812321 additions and 0 deletions

46
fs/gfs2/Kconfig Normal file
View File

@@ -0,0 +1,46 @@
config GFS2_FS
tristate "GFS2 file system support"
depends on EXPERIMENTAL
select FS_POSIX_ACL
select CRC32
help
A cluster filesystem.
Allows a cluster of computers to simultaneously use a block device
that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads
and writes to the block device like a local filesystem, but also uses
a lock module to allow the computers coordinate their I/O so
filesystem consistency is maintained. One of the nifty features of
GFS is perfect consistency -- changes made to the filesystem on one
machine show up immediately on all other machines in the cluster.
To use the GFS2 filesystem, you will need to enable one or more of
the below locking modules. Documentation and utilities for GFS2 can
be found here: http://sources.redhat.com/cluster
config GFS2_FS_LOCKING_NOLOCK
tristate "GFS2 \"nolock\" locking module"
depends on GFS2_FS
help
Single node locking module for GFS2.
Use this module if you want to use GFS2 on a single node without
its clustering features. You can still take advantage of the
large file support, and upgrade to running a full cluster later on
if required.
If you will only be using GFS2 in cluster mode, you do not need this
module.
config GFS2_FS_LOCKING_DLM
tristate "GFS2 DLM locking module"
depends on GFS2_FS && SYSFS && NET && INET && (IPV6 || IPV6=n)
select IP_SCTP if DLM_SCTP
select CONFIGFS_FS
select DLM
help
Multiple node locking module for GFS2
Most users of GFS2 will require this module. It provides the locking
interface between GFS2 and the DLM, which is required to use GFS2
in a cluster environment.

10
fs/gfs2/Makefile Normal file
View File

@@ -0,0 +1,10 @@
obj-$(CONFIG_GFS2_FS) += gfs2.o
gfs2-y := acl.o bmap.o daemon.o dir.o eaops.o eattr.o glock.o \
glops.o inode.o lm.o log.o lops.o locking.o main.o meta_io.o \
mount.o ondisk.o ops_address.o ops_dentry.o ops_export.o ops_file.o \
ops_fstype.o ops_inode.o ops_super.o ops_vm.o quota.o \
recovery.o rgrp.o super.o sys.o trans.o util.o
obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += locking/nolock/
obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += locking/dlm/

294
fs/gfs2/acl.c Normal file
View File

@@ -0,0 +1,294 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/posix_acl.h>
#include <linux/posix_acl_xattr.h>
#include <linux/gfs2_ondisk.h>
#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
#include "acl.h"
#include "eaops.h"
#include "eattr.h"
#include "glock.h"
#include "inode.h"
#include "meta_io.h"
#include "trans.h"
#include "util.h"
#define ACL_ACCESS 1
#define ACL_DEFAULT 0
int gfs2_acl_validate_set(struct gfs2_inode *ip, int access,
struct gfs2_ea_request *er,
int *remove, mode_t *mode)
{
struct posix_acl *acl;
int error;
error = gfs2_acl_validate_remove(ip, access);
if (error)
return error;
if (!er->er_data)
return -EINVAL;
acl = posix_acl_from_xattr(er->er_data, er->er_data_len);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (!acl) {
*remove = 1;
return 0;
}
error = posix_acl_valid(acl);
if (error)
goto out;
if (access) {
error = posix_acl_equiv_mode(acl, mode);
if (!error)
*remove = 1;
else if (error > 0)
error = 0;
}
out:
posix_acl_release(acl);
return error;
}
int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access)
{
if (!GFS2_SB(&ip->i_inode)->sd_args.ar_posix_acl)
return -EOPNOTSUPP;
if (current->fsuid != ip->i_inode.i_uid && !capable(CAP_FOWNER))
return -EPERM;
if (S_ISLNK(ip->i_inode.i_mode))
return -EOPNOTSUPP;
if (!access && !S_ISDIR(ip->i_inode.i_mode))
return -EACCES;
return 0;
}
static int acl_get(struct gfs2_inode *ip, int access, struct posix_acl **acl,
struct gfs2_ea_location *el, char **data, unsigned int *len)
{
struct gfs2_ea_request er;
struct gfs2_ea_location el_this;
int error;
if (!ip->i_di.di_eattr)
return 0;
memset(&er, 0, sizeof(struct gfs2_ea_request));
if (access) {
er.er_name = GFS2_POSIX_ACL_ACCESS;
er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN;
} else {
er.er_name = GFS2_POSIX_ACL_DEFAULT;
er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN;
}
er.er_type = GFS2_EATYPE_SYS;
if (!el)
el = &el_this;
error = gfs2_ea_find(ip, &er, el);
if (error)
return error;
if (!el->el_ea)
return 0;
if (!GFS2_EA_DATA_LEN(el->el_ea))
goto out;
er.er_data_len = GFS2_EA_DATA_LEN(el->el_ea);
er.er_data = kmalloc(er.er_data_len, GFP_KERNEL);
error = -ENOMEM;
if (!er.er_data)
goto out;
error = gfs2_ea_get_copy(ip, el, er.er_data);
if (error)
goto out_kfree;
if (acl) {
*acl = posix_acl_from_xattr(er.er_data, er.er_data_len);
if (IS_ERR(*acl))
error = PTR_ERR(*acl);
}
out_kfree:
if (error || !data)
kfree(er.er_data);
else {
*data = er.er_data;
*len = er.er_data_len;
}
out:
if (error || el == &el_this)
brelse(el->el_bh);
return error;
}
/**
* gfs2_check_acl - Check an ACL to see if we're allowed to do something
* @inode: the file we want to do something to
* @mask: what we want to do
*
* Returns: errno
*/
int gfs2_check_acl(struct inode *inode, int mask)
{
struct posix_acl *acl = NULL;
int error;
error = acl_get(GFS2_I(inode), ACL_ACCESS, &acl, NULL, NULL, NULL);
if (error)
return error;
if (acl) {
error = posix_acl_permission(inode, acl, mask);
posix_acl_release(acl);
return error;
}
return -EAGAIN;
}
static int munge_mode(struct gfs2_inode *ip, mode_t mode)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct buffer_head *dibh;
int error;
error = gfs2_trans_begin(sdp, RES_DINODE, 0);
if (error)
return error;
error = gfs2_meta_inode_buffer(ip, &dibh);
if (!error) {
gfs2_assert_withdraw(sdp,
(ip->i_inode.i_mode & S_IFMT) == (mode & S_IFMT));
ip->i_inode.i_mode = mode;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
}
gfs2_trans_end(sdp);
return 0;
}
int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
{
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct posix_acl *acl = NULL, *clone;
struct gfs2_ea_request er;
mode_t mode = ip->i_inode.i_mode;
int error;
if (!sdp->sd_args.ar_posix_acl)
return 0;
if (S_ISLNK(ip->i_inode.i_mode))
return 0;
memset(&er, 0, sizeof(struct gfs2_ea_request));
er.er_type = GFS2_EATYPE_SYS;
error = acl_get(dip, ACL_DEFAULT, &acl, NULL,
&er.er_data, &er.er_data_len);
if (error)
return error;
if (!acl) {
mode &= ~current->fs->umask;
if (mode != ip->i_inode.i_mode)
error = munge_mode(ip, mode);
return error;
}
clone = posix_acl_clone(acl, GFP_KERNEL);
error = -ENOMEM;
if (!clone)
goto out;
posix_acl_release(acl);
acl = clone;
if (S_ISDIR(ip->i_inode.i_mode)) {
er.er_name = GFS2_POSIX_ACL_DEFAULT;
er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN;
error = gfs2_system_eaops.eo_set(ip, &er);
if (error)
goto out;
}
error = posix_acl_create_masq(acl, &mode);
if (error < 0)
goto out;
if (error > 0) {
er.er_name = GFS2_POSIX_ACL_ACCESS;
er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN;
posix_acl_to_xattr(acl, er.er_data, er.er_data_len);
er.er_mode = mode;
er.er_flags = GFS2_ERF_MODE;
error = gfs2_system_eaops.eo_set(ip, &er);
if (error)
goto out;
} else
munge_mode(ip, mode);
out:
posix_acl_release(acl);
kfree(er.er_data);
return error;
}
int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
{
struct posix_acl *acl = NULL, *clone;
struct gfs2_ea_location el;
char *data;
unsigned int len;
int error;
error = acl_get(ip, ACL_ACCESS, &acl, &el, &data, &len);
if (error)
return error;
if (!acl)
return gfs2_setattr_simple(ip, attr);
clone = posix_acl_clone(acl, GFP_KERNEL);
error = -ENOMEM;
if (!clone)
goto out;
posix_acl_release(acl);
acl = clone;
error = posix_acl_chmod_masq(acl, attr->ia_mode);
if (!error) {
posix_acl_to_xattr(acl, data, len);
error = gfs2_ea_acl_chmod(ip, &el, attr, data);
}
out:
posix_acl_release(acl);
brelse(el.el_bh);
kfree(data);
return error;
}

38
fs/gfs2/acl.h Normal file
View File

@@ -0,0 +1,38 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __ACL_DOT_H__
#define __ACL_DOT_H__
#include "incore.h"
#define GFS2_POSIX_ACL_ACCESS "posix_acl_access"
#define GFS2_POSIX_ACL_ACCESS_LEN 16
#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default"
#define GFS2_POSIX_ACL_DEFAULT_LEN 17
#define GFS2_ACL_IS_ACCESS(name, len) \
((len) == GFS2_POSIX_ACL_ACCESS_LEN && \
!memcmp(GFS2_POSIX_ACL_ACCESS, (name), (len)))
#define GFS2_ACL_IS_DEFAULT(name, len) \
((len) == GFS2_POSIX_ACL_DEFAULT_LEN && \
!memcmp(GFS2_POSIX_ACL_DEFAULT, (name), (len)))
struct gfs2_ea_request;
int gfs2_acl_validate_set(struct gfs2_inode *ip, int access,
struct gfs2_ea_request *er,
int *remove, mode_t *mode);
int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access);
int gfs2_check_acl(struct inode *inode, int mask);
int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip);
int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr);
#endif /* __ACL_DOT_H__ */

1216
fs/gfs2/bmap.c Normal file

File diff suppressed because it is too large Load Diff

31
fs/gfs2/bmap.h Normal file
View File

@@ -0,0 +1,31 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __BMAP_DOT_H__
#define __BMAP_DOT_H__
struct inode;
struct gfs2_inode;
struct page;
int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page);
int gfs2_block_map(struct inode *inode, u64 lblock, int create, struct buffer_head *bh);
int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen);
int gfs2_truncatei(struct gfs2_inode *ip, u64 size);
int gfs2_truncatei_resume(struct gfs2_inode *ip);
int gfs2_file_dealloc(struct gfs2_inode *ip);
void gfs2_write_calc_reserv(struct gfs2_inode *ip, unsigned int len,
unsigned int *data_blocks,
unsigned int *ind_blocks);
int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
unsigned int len, int *alloc_required);
#endif /* __BMAP_DOT_H__ */

199
fs/gfs2/daemon.c Normal file
View File

@@ -0,0 +1,199 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/kthread.h>
#include <linux/delay.h>
#include <linux/gfs2_ondisk.h>
#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
#include "daemon.h"
#include "glock.h"
#include "log.h"
#include "quota.h"
#include "recovery.h"
#include "super.h"
#include "util.h"
/* This uses schedule_timeout() instead of msleep() because it's good for
the daemons to wake up more often than the timeout when unmounting so
the user's unmount doesn't sit there forever.
The kthread functions used to start these daemons block and flush signals. */
/**
* gfs2_scand - Look for cached glocks and inodes to toss from memory
* @sdp: Pointer to GFS2 superblock
*
* One of these daemons runs, finding candidates to add to sd_reclaim_list.
* See gfs2_glockd()
*/
int gfs2_scand(void *data)
{
struct gfs2_sbd *sdp = data;
unsigned long t;
while (!kthread_should_stop()) {
gfs2_scand_internal(sdp);
t = gfs2_tune_get(sdp, gt_scand_secs) * HZ;
schedule_timeout_interruptible(t);
}
return 0;
}
/**
* gfs2_glockd - Reclaim unused glock structures
* @sdp: Pointer to GFS2 superblock
*
* One or more of these daemons run, reclaiming glocks on sd_reclaim_list.
* Number of daemons can be set by user, with num_glockd mount option.
*/
int gfs2_glockd(void *data)
{
struct gfs2_sbd *sdp = data;
while (!kthread_should_stop()) {
while (atomic_read(&sdp->sd_reclaim_count))
gfs2_reclaim_glock(sdp);
wait_event_interruptible(sdp->sd_reclaim_wq,
(atomic_read(&sdp->sd_reclaim_count) ||
kthread_should_stop()));
}
return 0;
}
/**
* gfs2_recoverd - Recover dead machine's journals
* @sdp: Pointer to GFS2 superblock
*
*/
int gfs2_recoverd(void *data)
{
struct gfs2_sbd *sdp = data;
unsigned long t;
while (!kthread_should_stop()) {
gfs2_check_journals(sdp);
t = gfs2_tune_get(sdp, gt_recoverd_secs) * HZ;
schedule_timeout_interruptible(t);
}
return 0;
}
/**
* gfs2_logd - Update log tail as Active Items get flushed to in-place blocks
* @sdp: Pointer to GFS2 superblock
*
* Also, periodically check to make sure that we're using the most recent
* journal index.
*/
int gfs2_logd(void *data)
{
struct gfs2_sbd *sdp = data;
struct gfs2_holder ji_gh;
unsigned long t;
int need_flush;
while (!kthread_should_stop()) {
/* Advance the log tail */
t = sdp->sd_log_flush_time +
gfs2_tune_get(sdp, gt_log_flush_secs) * HZ;
gfs2_ail1_empty(sdp, DIO_ALL);
gfs2_log_lock(sdp);
need_flush = sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks);
gfs2_log_unlock(sdp);
if (need_flush || time_after_eq(jiffies, t)) {
gfs2_log_flush(sdp, NULL);
sdp->sd_log_flush_time = jiffies;
}
/* Check for latest journal index */
t = sdp->sd_jindex_refresh_time +
gfs2_tune_get(sdp, gt_jindex_refresh_secs) * HZ;
if (time_after_eq(jiffies, t)) {
if (!gfs2_jindex_hold(sdp, &ji_gh))
gfs2_glock_dq_uninit(&ji_gh);
sdp->sd_jindex_refresh_time = jiffies;
}
t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
schedule_timeout_interruptible(t);
}
return 0;
}
/**
* gfs2_quotad - Write cached quota changes into the quota file
* @sdp: Pointer to GFS2 superblock
*
*/
int gfs2_quotad(void *data)
{
struct gfs2_sbd *sdp = data;
unsigned long t;
int error;
while (!kthread_should_stop()) {
/* Update the master statfs file */
t = sdp->sd_statfs_sync_time +
gfs2_tune_get(sdp, gt_statfs_quantum) * HZ;
if (time_after_eq(jiffies, t)) {
error = gfs2_statfs_sync(sdp);
if (error &&
error != -EROFS &&
!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
fs_err(sdp, "quotad: (1) error=%d\n", error);
sdp->sd_statfs_sync_time = jiffies;
}
/* Update quota file */
t = sdp->sd_quota_sync_time +
gfs2_tune_get(sdp, gt_quota_quantum) * HZ;
if (time_after_eq(jiffies, t)) {
error = gfs2_quota_sync(sdp);
if (error &&
error != -EROFS &&
!test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
fs_err(sdp, "quotad: (2) error=%d\n", error);
sdp->sd_quota_sync_time = jiffies;
}
gfs2_quota_scan(sdp);
t = gfs2_tune_get(sdp, gt_quotad_secs) * HZ;
schedule_timeout_interruptible(t);
}
return 0;
}

19
fs/gfs2/daemon.h Normal file
View File

@@ -0,0 +1,19 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __DAEMON_DOT_H__
#define __DAEMON_DOT_H__
int gfs2_scand(void *data);
int gfs2_glockd(void *data);
int gfs2_recoverd(void *data);
int gfs2_logd(void *data);
int gfs2_quotad(void *data);
#endif /* __DAEMON_DOT_H__ */

1964
fs/gfs2/dir.c Normal file

File diff suppressed because it is too large Load Diff

62
fs/gfs2/dir.h Normal file
View File

@@ -0,0 +1,62 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __DIR_DOT_H__
#define __DIR_DOT_H__
#include <linux/dcache.h>
struct inode;
struct gfs2_inode;
struct gfs2_inum;
int gfs2_dir_search(struct inode *dir, const struct qstr *filename,
struct gfs2_inum_host *inum, unsigned int *type);
int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
const struct gfs2_inum_host *inum, unsigned int type);
int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename);
int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
filldir_t filldir);
int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
struct gfs2_inum_host *new_inum, unsigned int new_type);
int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip);
int gfs2_diradd_alloc_required(struct inode *dir,
const struct qstr *filename);
int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
struct buffer_head **bhp);
static inline u32 gfs2_disk_hash(const char *data, int len)
{
return crc32_le((u32)~0, data, len) ^ (u32)~0;
}
static inline void gfs2_str2qstr(struct qstr *name, const char *fname)
{
name->name = fname;
name->len = strlen(fname);
name->hash = gfs2_disk_hash(name->name, name->len);
}
/* N.B. This probably ought to take inum & type as args as well */
static inline void gfs2_qstr2dirent(const struct qstr *name, u16 reclen, struct gfs2_dirent *dent)
{
dent->de_inum.no_addr = cpu_to_be64(0);
dent->de_inum.no_formal_ino = cpu_to_be64(0);
dent->de_hash = cpu_to_be32(name->hash);
dent->de_rec_len = cpu_to_be16(reclen);
dent->de_name_len = cpu_to_be16(name->len);
dent->de_type = cpu_to_be16(0);
memset(dent->__pad, 0, sizeof(dent->__pad));
memcpy(dent + 1, name->name, name->len);
}
#endif /* __DIR_DOT_H__ */

229
fs/gfs2/eaops.c Normal file
View File

@@ -0,0 +1,229 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/xattr.h>
#include <linux/gfs2_ondisk.h>
#include <linux/lm_interface.h>
#include <asm/uaccess.h>
#include "gfs2.h"
#include "incore.h"
#include "acl.h"
#include "eaops.h"
#include "eattr.h"
#include "util.h"
/**
* gfs2_ea_name2type - get the type of the ea, and truncate type from the name
* @namep: ea name, possibly with type appended
*
* Returns: GFS2_EATYPE_XXX
*/
unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name)
{
unsigned int type;
if (strncmp(name, "system.", 7) == 0) {
type = GFS2_EATYPE_SYS;
if (truncated_name)
*truncated_name = name + sizeof("system.") - 1;
} else if (strncmp(name, "user.", 5) == 0) {
type = GFS2_EATYPE_USR;
if (truncated_name)
*truncated_name = name + sizeof("user.") - 1;
} else if (strncmp(name, "security.", 9) == 0) {
type = GFS2_EATYPE_SECURITY;
if (truncated_name)
*truncated_name = name + sizeof("security.") - 1;
} else {
type = GFS2_EATYPE_UNUSED;
if (truncated_name)
*truncated_name = NULL;
}
return type;
}
static int user_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
{
struct inode *inode = &ip->i_inode;
int error = permission(inode, MAY_READ, NULL);
if (error)
return error;
return gfs2_ea_get_i(ip, er);
}
static int user_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
{
struct inode *inode = &ip->i_inode;
if (S_ISREG(inode->i_mode) ||
(S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) {
int error = permission(inode, MAY_WRITE, NULL);
if (error)
return error;
} else
return -EPERM;
return gfs2_ea_set_i(ip, er);
}
static int user_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
{
struct inode *inode = &ip->i_inode;
if (S_ISREG(inode->i_mode) ||
(S_ISDIR(inode->i_mode) && !(inode->i_mode & S_ISVTX))) {
int error = permission(inode, MAY_WRITE, NULL);
if (error)
return error;
} else
return -EPERM;
return gfs2_ea_remove_i(ip, er);
}
static int system_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
{
if (!GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) &&
!GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len) &&
!capable(CAP_SYS_ADMIN))
return -EPERM;
if (GFS2_SB(&ip->i_inode)->sd_args.ar_posix_acl == 0 &&
(GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) ||
GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)))
return -EOPNOTSUPP;
return gfs2_ea_get_i(ip, er);
}
static int system_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
{
int remove = 0;
int error;
if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) {
if (!(er->er_flags & GFS2_ERF_MODE)) {
er->er_mode = ip->i_inode.i_mode;
er->er_flags |= GFS2_ERF_MODE;
}
error = gfs2_acl_validate_set(ip, 1, er,
&remove, &er->er_mode);
if (error)
return error;
error = gfs2_ea_set_i(ip, er);
if (error)
return error;
if (remove)
gfs2_ea_remove_i(ip, er);
return 0;
} else if (GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)) {
error = gfs2_acl_validate_set(ip, 0, er,
&remove, NULL);
if (error)
return error;
if (!remove)
error = gfs2_ea_set_i(ip, er);
else {
error = gfs2_ea_remove_i(ip, er);
if (error == -ENODATA)
error = 0;
}
return error;
}
return -EPERM;
}
static int system_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
{
if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) {
int error = gfs2_acl_validate_remove(ip, 1);
if (error)
return error;
} else if (GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)) {
int error = gfs2_acl_validate_remove(ip, 0);
if (error)
return error;
} else
return -EPERM;
return gfs2_ea_remove_i(ip, er);
}
static int security_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
{
struct inode *inode = &ip->i_inode;
int error = permission(inode, MAY_READ, NULL);
if (error)
return error;
return gfs2_ea_get_i(ip, er);
}
static int security_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
{
struct inode *inode = &ip->i_inode;
int error = permission(inode, MAY_WRITE, NULL);
if (error)
return error;
return gfs2_ea_set_i(ip, er);
}
static int security_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
{
struct inode *inode = &ip->i_inode;
int error = permission(inode, MAY_WRITE, NULL);
if (error)
return error;
return gfs2_ea_remove_i(ip, er);
}
static struct gfs2_eattr_operations gfs2_user_eaops = {
.eo_get = user_eo_get,
.eo_set = user_eo_set,
.eo_remove = user_eo_remove,
.eo_name = "user",
};
struct gfs2_eattr_operations gfs2_system_eaops = {
.eo_get = system_eo_get,
.eo_set = system_eo_set,
.eo_remove = system_eo_remove,
.eo_name = "system",
};
static struct gfs2_eattr_operations gfs2_security_eaops = {
.eo_get = security_eo_get,
.eo_set = security_eo_set,
.eo_remove = security_eo_remove,
.eo_name = "security",
};
struct gfs2_eattr_operations *gfs2_ea_ops[] = {
NULL,
&gfs2_user_eaops,
&gfs2_system_eaops,
&gfs2_security_eaops,
};

30
fs/gfs2/eaops.h Normal file
View File

@@ -0,0 +1,30 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __EAOPS_DOT_H__
#define __EAOPS_DOT_H__
struct gfs2_ea_request;
struct gfs2_inode;
struct gfs2_eattr_operations {
int (*eo_get) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
int (*eo_set) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
int (*eo_remove) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
char *eo_name;
};
unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name);
extern struct gfs2_eattr_operations gfs2_system_eaops;
extern struct gfs2_eattr_operations *gfs2_ea_ops[];
#endif /* __EAOPS_DOT_H__ */

1506
fs/gfs2/eattr.c Normal file

File diff suppressed because it is too large Load Diff

100
fs/gfs2/eattr.h Normal file
View File

@@ -0,0 +1,100 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __EATTR_DOT_H__
#define __EATTR_DOT_H__
struct gfs2_inode;
struct iattr;
#define GFS2_EA_REC_LEN(ea) be32_to_cpu((ea)->ea_rec_len)
#define GFS2_EA_DATA_LEN(ea) be32_to_cpu((ea)->ea_data_len)
#define GFS2_EA_SIZE(ea) \
ALIGN(sizeof(struct gfs2_ea_header) + (ea)->ea_name_len + \
((GFS2_EA_IS_STUFFED(ea)) ? GFS2_EA_DATA_LEN(ea) : \
(sizeof(__be64) * (ea)->ea_num_ptrs)), 8)
#define GFS2_EA_IS_STUFFED(ea) (!(ea)->ea_num_ptrs)
#define GFS2_EA_IS_LAST(ea) ((ea)->ea_flags & GFS2_EAFLAG_LAST)
#define GFS2_EAREQ_SIZE_STUFFED(er) \
ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + (er)->er_data_len, 8)
#define GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er) \
ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + \
sizeof(__be64) * DIV_ROUND_UP((er)->er_data_len, (sdp)->sd_jbsize), 8)
#define GFS2_EA2NAME(ea) ((char *)((struct gfs2_ea_header *)(ea) + 1))
#define GFS2_EA2DATA(ea) (GFS2_EA2NAME(ea) + (ea)->ea_name_len)
#define GFS2_EA2DATAPTRS(ea) \
((__be64 *)(GFS2_EA2NAME(ea) + ALIGN((ea)->ea_name_len, 8)))
#define GFS2_EA2NEXT(ea) \
((struct gfs2_ea_header *)((char *)(ea) + GFS2_EA_REC_LEN(ea)))
#define GFS2_EA_BH2FIRST(bh) \
((struct gfs2_ea_header *)((bh)->b_data + sizeof(struct gfs2_meta_header)))
#define GFS2_ERF_MODE 0x80000000
struct gfs2_ea_request {
const char *er_name;
char *er_data;
unsigned int er_name_len;
unsigned int er_data_len;
unsigned int er_type; /* GFS2_EATYPE_... */
int er_flags;
mode_t er_mode;
};
struct gfs2_ea_location {
struct buffer_head *el_bh;
struct gfs2_ea_header *el_ea;
struct gfs2_ea_header *el_prev;
};
int gfs2_ea_get_i(struct gfs2_inode *ip, struct gfs2_ea_request *er);
int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er);
int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er);
int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er);
int gfs2_ea_get(struct gfs2_inode *ip, struct gfs2_ea_request *er);
int gfs2_ea_set(struct gfs2_inode *ip, struct gfs2_ea_request *er);
int gfs2_ea_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er);
int gfs2_ea_dealloc(struct gfs2_inode *ip);
/* Exported to acl.c */
int gfs2_ea_find(struct gfs2_inode *ip,
struct gfs2_ea_request *er,
struct gfs2_ea_location *el);
int gfs2_ea_get_copy(struct gfs2_inode *ip,
struct gfs2_ea_location *el,
char *data);
int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el,
struct iattr *attr, char *data);
static inline unsigned int gfs2_ea_strlen(struct gfs2_ea_header *ea)
{
switch (ea->ea_type) {
case GFS2_EATYPE_USR:
return 5 + ea->ea_name_len + 1;
case GFS2_EATYPE_SYS:
return 7 + ea->ea_name_len + 1;
case GFS2_EATYPE_SECURITY:
return 9 + ea->ea_name_len + 1;
default:
return 0;
}
}
#endif /* __EATTR_DOT_H__ */

31
fs/gfs2/gfs2.h Normal file
View File

@@ -0,0 +1,31 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __GFS2_DOT_H__
#define __GFS2_DOT_H__
enum {
NO_CREATE = 0,
CREATE = 1,
};
enum {
NO_WAIT = 0,
WAIT = 1,
};
enum {
NO_FORCE = 0,
FORCE = 1,
};
#define GFS2_FAST_NAME_SIZE 8
#endif /* __GFS2_DOT_H__ */

2045
fs/gfs2/glock.c Normal file

File diff suppressed because it is too large Load Diff

139
fs/gfs2/glock.h Normal file
View File

@@ -0,0 +1,139 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __GLOCK_DOT_H__
#define __GLOCK_DOT_H__
#include "incore.h"
/* Flags for lock requests; used in gfs2_holder gh_flag field.
From lm_interface.h:
#define LM_FLAG_TRY 0x00000001
#define LM_FLAG_TRY_1CB 0x00000002
#define LM_FLAG_NOEXP 0x00000004
#define LM_FLAG_ANY 0x00000008
#define LM_FLAG_PRIORITY 0x00000010 */
#define GL_ASYNC 0x00000040
#define GL_EXACT 0x00000080
#define GL_SKIP 0x00000100
#define GL_ATIME 0x00000200
#define GL_NOCACHE 0x00000400
#define GL_NOCANCEL 0x00001000
#define GLR_TRYFAILED 13
#define GLR_CANCELED 14
static inline int gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
{
struct gfs2_holder *gh;
int locked = 0;
/* Look in glock's list of holders for one with current task as owner */
spin_lock(&gl->gl_spin);
list_for_each_entry(gh, &gl->gl_holders, gh_list) {
if (gh->gh_owner == current) {
locked = 1;
break;
}
}
spin_unlock(&gl->gl_spin);
return locked;
}
static inline int gfs2_glock_is_held_excl(struct gfs2_glock *gl)
{
return gl->gl_state == LM_ST_EXCLUSIVE;
}
static inline int gfs2_glock_is_held_dfrd(struct gfs2_glock *gl)
{
return gl->gl_state == LM_ST_DEFERRED;
}
static inline int gfs2_glock_is_held_shrd(struct gfs2_glock *gl)
{
return gl->gl_state == LM_ST_SHARED;
}
static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl)
{
int ret;
spin_lock(&gl->gl_spin);
ret = !list_empty(&gl->gl_waiters2) || !list_empty(&gl->gl_waiters3);
spin_unlock(&gl->gl_spin);
return ret;
}
int gfs2_glock_get(struct gfs2_sbd *sdp,
u64 number, const struct gfs2_glock_operations *glops,
int create, struct gfs2_glock **glp);
void gfs2_glock_hold(struct gfs2_glock *gl);
int gfs2_glock_put(struct gfs2_glock *gl);
void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
struct gfs2_holder *gh);
void gfs2_holder_reinit(unsigned int state, unsigned flags,
struct gfs2_holder *gh);
void gfs2_holder_uninit(struct gfs2_holder *gh);
int gfs2_glock_nq(struct gfs2_holder *gh);
int gfs2_glock_poll(struct gfs2_holder *gh);
int gfs2_glock_wait(struct gfs2_holder *gh);
void gfs2_glock_dq(struct gfs2_holder *gh);
void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
u64 number, const struct gfs2_glock_operations *glops,
unsigned int state, int flags, struct gfs2_holder *gh);
int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
/**
* gfs2_glock_nq_init - intialize a holder and enqueue it on a glock
* @gl: the glock
* @state: the state we're requesting
* @flags: the modifier flags
* @gh: the holder structure
*
* Returns: 0, GLR_*, or errno
*/
static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
unsigned int state, int flags,
struct gfs2_holder *gh)
{
int error;
gfs2_holder_init(gl, state, flags, gh);
error = gfs2_glock_nq(gh);
if (error)
gfs2_holder_uninit(gh);
return error;
}
/* Lock Value Block functions */
int gfs2_lvb_hold(struct gfs2_glock *gl);
void gfs2_lvb_unhold(struct gfs2_glock *gl);
void gfs2_glock_cb(void *cb_data, unsigned int type, void *data);
void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl);
void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
void gfs2_scand_internal(struct gfs2_sbd *sdp);
void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait);
int __init gfs2_glock_init(void);
#endif /* __GLOCK_DOT_H__ */

497
fs/gfs2/glops.c Normal file
View File

@@ -0,0 +1,497 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
#include "bmap.h"
#include "glock.h"
#include "glops.h"
#include "inode.h"
#include "log.h"
#include "meta_io.h"
#include "recovery.h"
#include "rgrp.h"
#include "util.h"
#include "trans.h"
/**
* ail_empty_gl - remove all buffers for a given lock from the AIL
* @gl: the glock
*
* None of the buffers should be dirty, locked, or pinned.
*/
static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
unsigned int blocks;
struct list_head *head = &gl->gl_ail_list;
struct gfs2_bufdata *bd;
struct buffer_head *bh;
u64 blkno;
int error;
blocks = atomic_read(&gl->gl_ail_count);
if (!blocks)
return;
error = gfs2_trans_begin(sdp, 0, blocks);
if (gfs2_assert_withdraw(sdp, !error))
return;
gfs2_log_lock(sdp);
while (!list_empty(head)) {
bd = list_entry(head->next, struct gfs2_bufdata,
bd_ail_gl_list);
bh = bd->bd_bh;
blkno = bh->b_blocknr;
gfs2_assert_withdraw(sdp, !buffer_busy(bh));
bd->bd_ail = NULL;
list_del(&bd->bd_ail_st_list);
list_del(&bd->bd_ail_gl_list);
atomic_dec(&gl->gl_ail_count);
brelse(bh);
gfs2_log_unlock(sdp);
gfs2_trans_add_revoke(sdp, blkno);
gfs2_log_lock(sdp);
}
gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
gfs2_log_unlock(sdp);
gfs2_trans_end(sdp);
gfs2_log_flush(sdp, NULL);
}
/**
* gfs2_pte_inval - Sync and invalidate all PTEs associated with a glock
* @gl: the glock
*
*/
static void gfs2_pte_inval(struct gfs2_glock *gl)
{
struct gfs2_inode *ip;
struct inode *inode;
ip = gl->gl_object;
inode = &ip->i_inode;
if (!ip || !S_ISREG(inode->i_mode))
return;
if (!test_bit(GIF_PAGED, &ip->i_flags))
return;
unmap_shared_mapping_range(inode->i_mapping, 0, 0);
if (test_bit(GIF_SW_PAGED, &ip->i_flags))
set_bit(GLF_DIRTY, &gl->gl_flags);
clear_bit(GIF_SW_PAGED, &ip->i_flags);
}
/**
* meta_go_sync - sync out the metadata for this glock
* @gl: the glock
*
* Called when demoting or unlocking an EX glock. We must flush
* to disk all dirty buffers/pages relating to this glock, and must not
* not return to caller to demote/unlock the glock until I/O is complete.
*/
static void meta_go_sync(struct gfs2_glock *gl)
{
if (gl->gl_state != LM_ST_EXCLUSIVE)
return;
if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) {
gfs2_log_flush(gl->gl_sbd, gl);
gfs2_meta_sync(gl);
gfs2_ail_empty_gl(gl);
}
}
/**
* meta_go_inval - invalidate the metadata for this glock
* @gl: the glock
* @flags:
*
*/
static void meta_go_inval(struct gfs2_glock *gl, int flags)
{
if (!(flags & DIO_METADATA))
return;
gfs2_meta_inval(gl);
gl->gl_vn++;
}
/**
* inode_go_sync - Sync the dirty data and/or metadata for an inode glock
* @gl: the glock protecting the inode
*
*/
static void inode_go_sync(struct gfs2_glock *gl)
{
struct gfs2_inode *ip = gl->gl_object;
if (ip && !S_ISREG(ip->i_inode.i_mode))
ip = NULL;
if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
gfs2_log_flush(gl->gl_sbd, gl);
if (ip)
filemap_fdatawrite(ip->i_inode.i_mapping);
gfs2_meta_sync(gl);
if (ip) {
struct address_space *mapping = ip->i_inode.i_mapping;
int error = filemap_fdatawait(mapping);
if (error == -ENOSPC)
set_bit(AS_ENOSPC, &mapping->flags);
else if (error)
set_bit(AS_EIO, &mapping->flags);
}
clear_bit(GLF_DIRTY, &gl->gl_flags);
gfs2_ail_empty_gl(gl);
}
}
/**
* inode_go_xmote_th - promote/demote a glock
* @gl: the glock
* @state: the requested state
* @flags:
*
*/
static void inode_go_xmote_th(struct gfs2_glock *gl)
{
if (gl->gl_state != LM_ST_UNLOCKED)
gfs2_pte_inval(gl);
if (gl->gl_state == LM_ST_EXCLUSIVE)
inode_go_sync(gl);
}
/**
* inode_go_xmote_bh - After promoting/demoting a glock
* @gl: the glock
*
*/
static void inode_go_xmote_bh(struct gfs2_glock *gl)
{
struct gfs2_holder *gh = gl->gl_req_gh;
struct buffer_head *bh;
int error;
if (gl->gl_state != LM_ST_UNLOCKED &&
(!gh || !(gh->gh_flags & GL_SKIP))) {
error = gfs2_meta_read(gl, gl->gl_name.ln_number, 0, &bh);
if (!error)
brelse(bh);
}
}
/**
* inode_go_drop_th - unlock a glock
* @gl: the glock
*
* Invoked from rq_demote().
* Another node needs the lock in EXCLUSIVE mode, or lock (unused for too long)
* is being purged from our node's glock cache; we're dropping lock.
*/
static void inode_go_drop_th(struct gfs2_glock *gl)
{
gfs2_pte_inval(gl);
if (gl->gl_state == LM_ST_EXCLUSIVE)
inode_go_sync(gl);
}
/**
* inode_go_inval - prepare a inode glock to be released
* @gl: the glock
* @flags:
*
*/
static void inode_go_inval(struct gfs2_glock *gl, int flags)
{
struct gfs2_inode *ip = gl->gl_object;
int meta = (flags & DIO_METADATA);
if (meta) {
gfs2_meta_inval(gl);
if (ip)
set_bit(GIF_INVALID, &ip->i_flags);
}
if (ip && S_ISREG(ip->i_inode.i_mode)) {
truncate_inode_pages(ip->i_inode.i_mapping, 0);
clear_bit(GIF_PAGED, &ip->i_flags);
}
}
/**
* inode_go_demote_ok - Check to see if it's ok to unlock an inode glock
* @gl: the glock
*
* Returns: 1 if it's ok
*/
static int inode_go_demote_ok(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
int demote = 0;
if (!gl->gl_object && !gl->gl_aspace->i_mapping->nrpages)
demote = 1;
else if (!sdp->sd_args.ar_localcaching &&
time_after_eq(jiffies, gl->gl_stamp +
gfs2_tune_get(sdp, gt_demote_secs) * HZ))
demote = 1;
return demote;
}
/**
* inode_go_lock - operation done after an inode lock is locked by a process
* @gl: the glock
* @flags:
*
* Returns: errno
*/
static int inode_go_lock(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
struct gfs2_inode *ip = gl->gl_object;
int error = 0;
if (!ip)
return 0;
if (test_bit(GIF_INVALID, &ip->i_flags)) {
error = gfs2_inode_refresh(ip);
if (error)
return error;
}
if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) &&
(gl->gl_state == LM_ST_EXCLUSIVE) &&
(gh->gh_state == LM_ST_EXCLUSIVE))
error = gfs2_truncatei_resume(ip);
return error;
}
/**
* inode_go_unlock - operation done before an inode lock is unlocked by a
* process
* @gl: the glock
* @flags:
*
*/
static void inode_go_unlock(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
struct gfs2_inode *ip = gl->gl_object;
if (ip)
gfs2_meta_cache_flush(ip);
}
/**
* rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock
* @gl: the glock
*
* Returns: 1 if it's ok
*/
static int rgrp_go_demote_ok(struct gfs2_glock *gl)
{
return !gl->gl_aspace->i_mapping->nrpages;
}
/**
* rgrp_go_lock - operation done after an rgrp lock is locked by
* a first holder on this node.
* @gl: the glock
* @flags:
*
* Returns: errno
*/
static int rgrp_go_lock(struct gfs2_holder *gh)
{
return gfs2_rgrp_bh_get(gh->gh_gl->gl_object);
}
/**
* rgrp_go_unlock - operation done before an rgrp lock is unlocked by
* a last holder on this node.
* @gl: the glock
* @flags:
*
*/
static void rgrp_go_unlock(struct gfs2_holder *gh)
{
gfs2_rgrp_bh_put(gh->gh_gl->gl_object);
}
/**
* trans_go_xmote_th - promote/demote the transaction glock
* @gl: the glock
* @state: the requested state
* @flags:
*
*/
static void trans_go_xmote_th(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
if (gl->gl_state != LM_ST_UNLOCKED &&
test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
gfs2_meta_syncfs(sdp);
gfs2_log_shutdown(sdp);
}
}
/**
* trans_go_xmote_bh - After promoting/demoting the transaction glock
* @gl: the glock
*
*/
static void trans_go_xmote_bh(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
struct gfs2_glock *j_gl = ip->i_gl;
struct gfs2_log_header_host head;
int error;
if (gl->gl_state != LM_ST_UNLOCKED &&
test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
gfs2_meta_cache_flush(GFS2_I(sdp->sd_jdesc->jd_inode));
j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
error = gfs2_find_jhead(sdp->sd_jdesc, &head);
if (error)
gfs2_consist(sdp);
if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT))
gfs2_consist(sdp);
/* Initialize some head of the log stuff */
if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) {
sdp->sd_log_sequence = head.lh_sequence + 1;
gfs2_log_pointers_init(sdp, head.lh_blkno);
}
}
}
/**
* trans_go_drop_th - unlock the transaction glock
* @gl: the glock
*
* We want to sync the device even with localcaching. Remember
* that localcaching journal replay only marks buffers dirty.
*/
static void trans_go_drop_th(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
gfs2_meta_syncfs(sdp);
gfs2_log_shutdown(sdp);
}
}
/**
* quota_go_demote_ok - Check to see if it's ok to unlock a quota glock
* @gl: the glock
*
* Returns: 1 if it's ok
*/
static int quota_go_demote_ok(struct gfs2_glock *gl)
{
return !atomic_read(&gl->gl_lvb_count);
}
const struct gfs2_glock_operations gfs2_meta_glops = {
.go_xmote_th = meta_go_sync,
.go_drop_th = meta_go_sync,
.go_type = LM_TYPE_META,
};
const struct gfs2_glock_operations gfs2_inode_glops = {
.go_xmote_th = inode_go_xmote_th,
.go_xmote_bh = inode_go_xmote_bh,
.go_drop_th = inode_go_drop_th,
.go_inval = inode_go_inval,
.go_demote_ok = inode_go_demote_ok,
.go_lock = inode_go_lock,
.go_unlock = inode_go_unlock,
.go_type = LM_TYPE_INODE,
};
const struct gfs2_glock_operations gfs2_rgrp_glops = {
.go_xmote_th = meta_go_sync,
.go_drop_th = meta_go_sync,
.go_inval = meta_go_inval,
.go_demote_ok = rgrp_go_demote_ok,
.go_lock = rgrp_go_lock,
.go_unlock = rgrp_go_unlock,
.go_type = LM_TYPE_RGRP,
};
const struct gfs2_glock_operations gfs2_trans_glops = {
.go_xmote_th = trans_go_xmote_th,
.go_xmote_bh = trans_go_xmote_bh,
.go_drop_th = trans_go_drop_th,
.go_type = LM_TYPE_NONDISK,
};
const struct gfs2_glock_operations gfs2_iopen_glops = {
.go_type = LM_TYPE_IOPEN,
};
const struct gfs2_glock_operations gfs2_flock_glops = {
.go_type = LM_TYPE_FLOCK,
};
const struct gfs2_glock_operations gfs2_nondisk_glops = {
.go_type = LM_TYPE_NONDISK,
};
const struct gfs2_glock_operations gfs2_quota_glops = {
.go_demote_ok = quota_go_demote_ok,
.go_type = LM_TYPE_QUOTA,
};
const struct gfs2_glock_operations gfs2_journal_glops = {
.go_type = LM_TYPE_JOURNAL,
};

25
fs/gfs2/glops.h Normal file
View File

@@ -0,0 +1,25 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __GLOPS_DOT_H__
#define __GLOPS_DOT_H__
#include "incore.h"
extern const struct gfs2_glock_operations gfs2_meta_glops;
extern const struct gfs2_glock_operations gfs2_inode_glops;
extern const struct gfs2_glock_operations gfs2_rgrp_glops;
extern const struct gfs2_glock_operations gfs2_trans_glops;
extern const struct gfs2_glock_operations gfs2_iopen_glops;
extern const struct gfs2_glock_operations gfs2_flock_glops;
extern const struct gfs2_glock_operations gfs2_nondisk_glops;
extern const struct gfs2_glock_operations gfs2_quota_glops;
extern const struct gfs2_glock_operations gfs2_journal_glops;
#endif /* __GLOPS_DOT_H__ */

617
fs/gfs2/incore.h Normal file
View File

@@ -0,0 +1,617 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __INCORE_DOT_H__
#define __INCORE_DOT_H__
#include <linux/fs.h>
#define DIO_WAIT 0x00000010
#define DIO_METADATA 0x00000020
#define DIO_ALL 0x00000100
struct gfs2_log_operations;
struct gfs2_log_element;
struct gfs2_holder;
struct gfs2_glock;
struct gfs2_quota_data;
struct gfs2_trans;
struct gfs2_ail;
struct gfs2_jdesc;
struct gfs2_sbd;
typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret);
/*
* Structure of operations that are associated with each
* type of element in the log.
*/
struct gfs2_log_operations {
void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_log_element *le);
void (*lo_incore_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr);
void (*lo_before_commit) (struct gfs2_sbd *sdp);
void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai);
void (*lo_before_scan) (struct gfs2_jdesc *jd,
struct gfs2_log_header_host *head, int pass);
int (*lo_scan_elements) (struct gfs2_jdesc *jd, unsigned int start,
struct gfs2_log_descriptor *ld, __be64 *ptr,
int pass);
void (*lo_after_scan) (struct gfs2_jdesc *jd, int error, int pass);
const char *lo_name;
};
struct gfs2_log_element {
struct list_head le_list;
const struct gfs2_log_operations *le_ops;
};
struct gfs2_bitmap {
struct buffer_head *bi_bh;
char *bi_clone;
u32 bi_offset;
u32 bi_start;
u32 bi_len;
};
struct gfs2_rgrpd {
struct list_head rd_list; /* Link with superblock */
struct list_head rd_list_mru;
struct list_head rd_recent; /* Recently used rgrps */
struct gfs2_glock *rd_gl; /* Glock for this rgrp */
struct gfs2_rindex_host rd_ri;
struct gfs2_rgrp_host rd_rg;
u64 rd_rg_vn;
struct gfs2_bitmap *rd_bits;
unsigned int rd_bh_count;
struct mutex rd_mutex;
u32 rd_free_clone;
struct gfs2_log_element rd_le;
u32 rd_last_alloc_data;
u32 rd_last_alloc_meta;
struct gfs2_sbd *rd_sbd;
};
enum gfs2_state_bits {
BH_Pinned = BH_PrivateStart,
BH_Escaped = BH_PrivateStart + 1,
};
BUFFER_FNS(Pinned, pinned)
TAS_BUFFER_FNS(Pinned, pinned)
BUFFER_FNS(Escaped, escaped)
TAS_BUFFER_FNS(Escaped, escaped)
struct gfs2_bufdata {
struct buffer_head *bd_bh;
struct gfs2_glock *bd_gl;
struct list_head bd_list_tr;
struct gfs2_log_element bd_le;
struct gfs2_ail *bd_ail;
struct list_head bd_ail_st_list;
struct list_head bd_ail_gl_list;
};
struct gfs2_glock_operations {
void (*go_xmote_th) (struct gfs2_glock *gl);
void (*go_xmote_bh) (struct gfs2_glock *gl);
void (*go_drop_th) (struct gfs2_glock *gl);
void (*go_inval) (struct gfs2_glock *gl, int flags);
int (*go_demote_ok) (struct gfs2_glock *gl);
int (*go_lock) (struct gfs2_holder *gh);
void (*go_unlock) (struct gfs2_holder *gh);
const int go_type;
};
enum {
/* Actions */
HIF_MUTEX = 0,
HIF_PROMOTE = 1,
HIF_DEMOTE = 2,
/* States */
HIF_ALLOCED = 4,
HIF_DEALLOC = 5,
HIF_HOLDER = 6,
HIF_FIRST = 7,
HIF_ABORTED = 9,
HIF_WAIT = 10,
};
struct gfs2_holder {
struct list_head gh_list;
struct gfs2_glock *gh_gl;
struct task_struct *gh_owner;
unsigned int gh_state;
unsigned gh_flags;
int gh_error;
unsigned long gh_iflags;
unsigned long gh_ip;
};
enum {
GLF_LOCK = 1,
GLF_STICKY = 2,
GLF_DIRTY = 5,
GLF_SKIP_WAITERS2 = 6,
};
struct gfs2_glock {
struct hlist_node gl_list;
unsigned long gl_flags; /* GLF_... */
struct lm_lockname gl_name;
atomic_t gl_ref;
spinlock_t gl_spin;
unsigned int gl_state;
unsigned int gl_hash;
struct task_struct *gl_owner;
unsigned long gl_ip;
struct list_head gl_holders;
struct list_head gl_waiters1; /* HIF_MUTEX */
struct list_head gl_waiters2; /* HIF_DEMOTE */
struct list_head gl_waiters3; /* HIF_PROMOTE */
const struct gfs2_glock_operations *gl_ops;
struct gfs2_holder *gl_req_gh;
gfs2_glop_bh_t gl_req_bh;
void *gl_lock;
char *gl_lvb;
atomic_t gl_lvb_count;
u64 gl_vn;
unsigned long gl_stamp;
void *gl_object;
struct list_head gl_reclaim;
struct gfs2_sbd *gl_sbd;
struct inode *gl_aspace;
struct gfs2_log_element gl_le;
struct list_head gl_ail_list;
atomic_t gl_ail_count;
};
struct gfs2_alloc {
/* Quota stuff */
struct gfs2_quota_data *al_qd[2*MAXQUOTAS];
struct gfs2_holder al_qd_ghs[2*MAXQUOTAS];
unsigned int al_qd_num;
u32 al_requested; /* Filled in by caller of gfs2_inplace_reserve() */
u32 al_alloced; /* Filled in by gfs2_alloc_*() */
/* Filled in by gfs2_inplace_reserve() */
unsigned int al_line;
char *al_file;
struct gfs2_holder al_ri_gh;
struct gfs2_holder al_rgd_gh;
struct gfs2_rgrpd *al_rgd;
};
enum {
GIF_INVALID = 0,
GIF_QD_LOCKED = 1,
GIF_PAGED = 2,
GIF_SW_PAGED = 3,
};
struct gfs2_inode {
struct inode i_inode;
struct gfs2_inum_host i_num;
unsigned long i_flags; /* GIF_... */
struct gfs2_dinode_host i_di; /* To be replaced by ref to block */
struct gfs2_glock *i_gl; /* Move into i_gh? */
struct gfs2_holder i_iopen_gh;
struct gfs2_holder i_gh; /* for prepare/commit_write only */
struct gfs2_alloc i_alloc;
u64 i_last_rg_alloc;
spinlock_t i_spin;
struct rw_semaphore i_rw_mutex;
unsigned long i_last_pfault;
struct buffer_head *i_cache[GFS2_MAX_META_HEIGHT];
};
/*
* Since i_inode is the first element of struct gfs2_inode,
* this is effectively a cast.
*/
static inline struct gfs2_inode *GFS2_I(struct inode *inode)
{
return container_of(inode, struct gfs2_inode, i_inode);
}
/* To be removed? */
static inline struct gfs2_sbd *GFS2_SB(struct inode *inode)
{
return inode->i_sb->s_fs_info;
}
enum {
GFF_DID_DIRECT_ALLOC = 0,
GFF_EXLOCK = 1,
};
struct gfs2_file {
unsigned long f_flags; /* GFF_... */
struct mutex f_fl_mutex;
struct gfs2_holder f_fl_gh;
};
struct gfs2_revoke {
struct gfs2_log_element rv_le;
u64 rv_blkno;
};
struct gfs2_revoke_replay {
struct list_head rr_list;
u64 rr_blkno;
unsigned int rr_where;
};
enum {
QDF_USER = 0,
QDF_CHANGE = 1,
QDF_LOCKED = 2,
};
struct gfs2_quota_lvb {
__be32 qb_magic;
u32 __pad;
__be64 qb_limit; /* Hard limit of # blocks to alloc */
__be64 qb_warn; /* Warn user when alloc is above this # */
__be64 qb_value; /* Current # blocks allocated */
};
struct gfs2_quota_data {
struct list_head qd_list;
unsigned int qd_count;
u32 qd_id;
unsigned long qd_flags; /* QDF_... */
s64 qd_change;
s64 qd_change_sync;
unsigned int qd_slot;
unsigned int qd_slot_count;
struct buffer_head *qd_bh;
struct gfs2_quota_change *qd_bh_qc;
unsigned int qd_bh_count;
struct gfs2_glock *qd_gl;
struct gfs2_quota_lvb qd_qb;
u64 qd_sync_gen;
unsigned long qd_last_warn;
unsigned long qd_last_touched;
};
struct gfs2_log_buf {
struct list_head lb_list;
struct buffer_head *lb_bh;
struct buffer_head *lb_real;
};
struct gfs2_trans {
unsigned long tr_ip;
unsigned int tr_blocks;
unsigned int tr_revokes;
unsigned int tr_reserved;
struct gfs2_holder tr_t_gh;
int tr_touched;
unsigned int tr_num_buf;
unsigned int tr_num_buf_new;
unsigned int tr_num_buf_rm;
struct list_head tr_list_buf;
unsigned int tr_num_revoke;
unsigned int tr_num_revoke_rm;
};
struct gfs2_ail {
struct list_head ai_list;
unsigned int ai_first;
struct list_head ai_ail1_list;
struct list_head ai_ail2_list;
u64 ai_sync_gen;
};
struct gfs2_jdesc {
struct list_head jd_list;
struct inode *jd_inode;
unsigned int jd_jid;
int jd_dirty;
unsigned int jd_blocks;
};
#define GFS2_GLOCKD_DEFAULT 1
#define GFS2_GLOCKD_MAX 16
#define GFS2_QUOTA_DEFAULT GFS2_QUOTA_OFF
#define GFS2_QUOTA_OFF 0
#define GFS2_QUOTA_ACCOUNT 1
#define GFS2_QUOTA_ON 2
#define GFS2_DATA_DEFAULT GFS2_DATA_ORDERED
#define GFS2_DATA_WRITEBACK 1
#define GFS2_DATA_ORDERED 2
struct gfs2_args {
char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */
char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */
char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */
int ar_spectator; /* Don't get a journal because we're always RO */
int ar_ignore_local_fs; /* Don't optimize even if local_fs is 1 */
int ar_localflocks; /* Let the VFS do flock|fcntl locks for us */
int ar_localcaching; /* Local-style caching (dangerous on multihost) */
int ar_debug; /* Oops on errors instead of trying to be graceful */
int ar_upgrade; /* Upgrade ondisk/multihost format */
unsigned int ar_num_glockd; /* Number of glockd threads */
int ar_posix_acl; /* Enable posix acls */
int ar_quota; /* off/account/on */
int ar_suiddir; /* suiddir support */
int ar_data; /* ordered/writeback */
};
struct gfs2_tune {
spinlock_t gt_spin;
unsigned int gt_ilimit;
unsigned int gt_ilimit_tries;
unsigned int gt_ilimit_min;
unsigned int gt_demote_secs; /* Cache retention for unheld glock */
unsigned int gt_incore_log_blocks;
unsigned int gt_log_flush_secs;
unsigned int gt_jindex_refresh_secs; /* Check for new journal index */
unsigned int gt_scand_secs;
unsigned int gt_recoverd_secs;
unsigned int gt_logd_secs;
unsigned int gt_quotad_secs;
unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */
unsigned int gt_quota_warn_period; /* Secs between quota warn msgs */
unsigned int gt_quota_scale_num; /* Numerator */
unsigned int gt_quota_scale_den; /* Denominator */
unsigned int gt_quota_cache_secs;
unsigned int gt_quota_quantum; /* Secs between syncs to quota file */
unsigned int gt_atime_quantum; /* Min secs between atime updates */
unsigned int gt_new_files_jdata;
unsigned int gt_new_files_directio;
unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */
unsigned int gt_lockdump_size;
unsigned int gt_stall_secs; /* Detects trouble! */
unsigned int gt_complain_secs;
unsigned int gt_reclaim_limit; /* Max num of glocks in reclaim list */
unsigned int gt_statfs_quantum;
unsigned int gt_statfs_slow;
};
enum {
SDF_JOURNAL_CHECKED = 0,
SDF_JOURNAL_LIVE = 1,
SDF_SHUTDOWN = 2,
SDF_NOATIME = 3,
};
#define GFS2_FSNAME_LEN 256
struct gfs2_sbd {
struct super_block *sd_vfs;
struct super_block *sd_vfs_meta;
struct kobject sd_kobj;
unsigned long sd_flags; /* SDF_... */
struct gfs2_sb_host sd_sb;
/* Constants computed on mount */
u32 sd_fsb2bb;
u32 sd_fsb2bb_shift;
u32 sd_diptrs; /* Number of pointers in a dinode */
u32 sd_inptrs; /* Number of pointers in a indirect block */
u32 sd_jbsize; /* Size of a journaled data block */
u32 sd_hash_bsize; /* sizeof(exhash block) */
u32 sd_hash_bsize_shift;
u32 sd_hash_ptrs; /* Number of pointers in a hash block */
u32 sd_qc_per_block;
u32 sd_max_dirres; /* Max blocks needed to add a directory entry */
u32 sd_max_height; /* Max height of a file's metadata tree */
u64 sd_heightsize[GFS2_MAX_META_HEIGHT];
u32 sd_max_jheight; /* Max height of journaled file's meta tree */
u64 sd_jheightsize[GFS2_MAX_META_HEIGHT];
struct gfs2_args sd_args; /* Mount arguments */
struct gfs2_tune sd_tune; /* Filesystem tuning structure */
/* Lock Stuff */
struct lm_lockstruct sd_lockstruct;
struct list_head sd_reclaim_list;
spinlock_t sd_reclaim_lock;
wait_queue_head_t sd_reclaim_wq;
atomic_t sd_reclaim_count;
struct gfs2_holder sd_live_gh;
struct gfs2_glock *sd_rename_gl;
struct gfs2_glock *sd_trans_gl;
/* Inode Stuff */
struct inode *sd_master_dir;
struct inode *sd_jindex;
struct inode *sd_inum_inode;
struct inode *sd_statfs_inode;
struct inode *sd_ir_inode;
struct inode *sd_sc_inode;
struct inode *sd_qc_inode;
struct inode *sd_rindex;
struct inode *sd_quota_inode;
/* Inum stuff */
struct mutex sd_inum_mutex;
/* StatFS stuff */
spinlock_t sd_statfs_spin;
struct mutex sd_statfs_mutex;
struct gfs2_statfs_change_host sd_statfs_master;
struct gfs2_statfs_change_host sd_statfs_local;
unsigned long sd_statfs_sync_time;
/* Resource group stuff */
u64 sd_rindex_vn;
spinlock_t sd_rindex_spin;
struct mutex sd_rindex_mutex;
struct list_head sd_rindex_list;
struct list_head sd_rindex_mru_list;
struct list_head sd_rindex_recent_list;
struct gfs2_rgrpd *sd_rindex_forward;
unsigned int sd_rgrps;
/* Journal index stuff */
struct list_head sd_jindex_list;
spinlock_t sd_jindex_spin;
struct mutex sd_jindex_mutex;
unsigned int sd_journals;
unsigned long sd_jindex_refresh_time;
struct gfs2_jdesc *sd_jdesc;
struct gfs2_holder sd_journal_gh;
struct gfs2_holder sd_jinode_gh;
struct gfs2_holder sd_ir_gh;
struct gfs2_holder sd_sc_gh;
struct gfs2_holder sd_qc_gh;
/* Daemon stuff */
struct task_struct *sd_scand_process;
struct task_struct *sd_recoverd_process;
struct task_struct *sd_logd_process;
struct task_struct *sd_quotad_process;
struct task_struct *sd_glockd_process[GFS2_GLOCKD_MAX];
unsigned int sd_glockd_num;
/* Quota stuff */
struct list_head sd_quota_list;
atomic_t sd_quota_count;
spinlock_t sd_quota_spin;
struct mutex sd_quota_mutex;
unsigned int sd_quota_slots;
unsigned int sd_quota_chunks;
unsigned char **sd_quota_bitmap;
u64 sd_quota_sync_gen;
unsigned long sd_quota_sync_time;
/* Log stuff */
spinlock_t sd_log_lock;
unsigned int sd_log_blks_reserved;
unsigned int sd_log_commited_buf;
unsigned int sd_log_commited_revoke;
unsigned int sd_log_num_gl;
unsigned int sd_log_num_buf;
unsigned int sd_log_num_revoke;
unsigned int sd_log_num_rg;
unsigned int sd_log_num_databuf;
unsigned int sd_log_num_jdata;
unsigned int sd_log_num_hdrs;
struct list_head sd_log_le_gl;
struct list_head sd_log_le_buf;
struct list_head sd_log_le_revoke;
struct list_head sd_log_le_rg;
struct list_head sd_log_le_databuf;
unsigned int sd_log_blks_free;
struct mutex sd_log_reserve_mutex;
u64 sd_log_sequence;
unsigned int sd_log_head;
unsigned int sd_log_tail;
int sd_log_idle;
unsigned long sd_log_flush_time;
struct rw_semaphore sd_log_flush_lock;
struct list_head sd_log_flush_list;
unsigned int sd_log_flush_head;
u64 sd_log_flush_wrapped;
struct list_head sd_ail1_list;
struct list_head sd_ail2_list;
u64 sd_ail_sync_gen;
/* Replay stuff */
struct list_head sd_revoke_list;
unsigned int sd_replay_tail;
unsigned int sd_found_blocks;
unsigned int sd_found_revokes;
unsigned int sd_replayed_blocks;
/* For quiescing the filesystem */
struct gfs2_holder sd_freeze_gh;
struct mutex sd_freeze_lock;
unsigned int sd_freeze_count;
/* Counters */
atomic_t sd_glock_count;
atomic_t sd_glock_held_count;
atomic_t sd_inode_count;
atomic_t sd_reclaimed;
char sd_fsname[GFS2_FSNAME_LEN];
char sd_table_name[GFS2_FSNAME_LEN];
char sd_proto_name[GFS2_FSNAME_LEN];
/* Debugging crud */
unsigned long sd_last_warning;
struct vfsmount *sd_gfs2mnt;
};
#endif /* __INCORE_DOT_H__ */

1254
fs/gfs2/inode.c Normal file

File diff suppressed because it is too large Load Diff

58
fs/gfs2/inode.h Normal file
View File

@@ -0,0 +1,58 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __INODE_DOT_H__
#define __INODE_DOT_H__
static inline int gfs2_is_stuffed(struct gfs2_inode *ip)
{
return !ip->i_di.di_height;
}
static inline int gfs2_is_jdata(struct gfs2_inode *ip)
{
return ip->i_di.di_flags & GFS2_DIF_JDATA;
}
static inline int gfs2_is_dir(struct gfs2_inode *ip)
{
return S_ISDIR(ip->i_inode.i_mode);
}
static inline void gfs2_set_inode_blocks(struct inode *inode)
{
struct gfs2_inode *ip = GFS2_I(inode);
inode->i_blocks = ip->i_di.di_blocks <<
(GFS2_SB(inode)->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
}
void gfs2_inode_attr_in(struct gfs2_inode *ip);
struct inode *gfs2_inode_lookup(struct super_block *sb, struct gfs2_inum_host *inum, unsigned type);
struct inode *gfs2_ilookup(struct super_block *sb, struct gfs2_inum_host *inum);
int gfs2_inode_refresh(struct gfs2_inode *ip);
int gfs2_dinode_dealloc(struct gfs2_inode *inode);
int gfs2_change_nlink(struct gfs2_inode *ip, int diff);
struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
int is_root, struct nameidata *nd);
struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
unsigned int mode, dev_t dev);
int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
struct gfs2_inode *ip);
int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
struct gfs2_inode *ip);
int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to);
int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len);
int gfs2_glock_nq_atime(struct gfs2_holder *gh);
int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
#endif /* __INODE_DOT_H__ */

210
fs/gfs2/lm.c Normal file
View File

@@ -0,0 +1,210 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/delay.h>
#include <linux/gfs2_ondisk.h>
#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
#include "glock.h"
#include "lm.h"
#include "super.h"
#include "util.h"
/**
* gfs2_lm_mount - mount a locking protocol
* @sdp: the filesystem
* @args: mount arguements
* @silent: if 1, don't complain if the FS isn't a GFS2 fs
*
* Returns: errno
*/
int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
{
char *proto = sdp->sd_proto_name;
char *table = sdp->sd_table_name;
int flags = 0;
int error;
if (sdp->sd_args.ar_spectator)
flags |= LM_MFLAG_SPECTATOR;
fs_info(sdp, "Trying to join cluster \"%s\", \"%s\"\n", proto, table);
error = gfs2_mount_lockproto(proto, table, sdp->sd_args.ar_hostdata,
gfs2_glock_cb, sdp,
GFS2_MIN_LVB_SIZE, flags,
&sdp->sd_lockstruct, &sdp->sd_kobj);
if (error) {
fs_info(sdp, "can't mount proto=%s, table=%s, hostdata=%s\n",
proto, table, sdp->sd_args.ar_hostdata);
goto out;
}
if (gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lockspace) ||
gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_ops) ||
gfs2_assert_warn(sdp, sdp->sd_lockstruct.ls_lvb_size >=
GFS2_MIN_LVB_SIZE)) {
gfs2_unmount_lockproto(&sdp->sd_lockstruct);
goto out;
}
if (sdp->sd_args.ar_spectator)
snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.s", table);
else
snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s.%u", table,
sdp->sd_lockstruct.ls_jid);
fs_info(sdp, "Joined cluster. Now mounting FS...\n");
if ((sdp->sd_lockstruct.ls_flags & LM_LSFLAG_LOCAL) &&
!sdp->sd_args.ar_ignore_local_fs) {
sdp->sd_args.ar_localflocks = 1;
sdp->sd_args.ar_localcaching = 1;
}
out:
return error;
}
void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp)
{
if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
sdp->sd_lockstruct.ls_ops->lm_others_may_mount(
sdp->sd_lockstruct.ls_lockspace);
}
void gfs2_lm_unmount(struct gfs2_sbd *sdp)
{
if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
gfs2_unmount_lockproto(&sdp->sd_lockstruct);
}
int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
{
va_list args;
if (test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags))
return 0;
va_start(args, fmt);
vprintk(fmt, args);
va_end(args);
fs_err(sdp, "about to withdraw this file system\n");
BUG_ON(sdp->sd_args.ar_debug);
fs_err(sdp, "telling LM to withdraw\n");
gfs2_withdraw_lockproto(&sdp->sd_lockstruct);
fs_err(sdp, "withdrawn\n");
dump_stack();
return -1;
}
int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name,
void **lockp)
{
int error = -EIO;
if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
error = sdp->sd_lockstruct.ls_ops->lm_get_lock(
sdp->sd_lockstruct.ls_lockspace, name, lockp);
return error;
}
void gfs2_lm_put_lock(struct gfs2_sbd *sdp, void *lock)
{
if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
sdp->sd_lockstruct.ls_ops->lm_put_lock(lock);
}
unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
unsigned int cur_state, unsigned int req_state,
unsigned int flags)
{
int ret = 0;
if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock, cur_state,
req_state, flags);
return ret;
}
unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, void *lock,
unsigned int cur_state)
{
int ret = 0;
if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
ret = sdp->sd_lockstruct.ls_ops->lm_unlock(lock, cur_state);
return ret;
}
void gfs2_lm_cancel(struct gfs2_sbd *sdp, void *lock)
{
if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
sdp->sd_lockstruct.ls_ops->lm_cancel(lock);
}
int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp)
{
int error = -EIO;
if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
error = sdp->sd_lockstruct.ls_ops->lm_hold_lvb(lock, lvbp);
return error;
}
void gfs2_lm_unhold_lvb(struct gfs2_sbd *sdp, void *lock, char *lvb)
{
if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
sdp->sd_lockstruct.ls_ops->lm_unhold_lvb(lock, lvb);
}
int gfs2_lm_plock_get(struct gfs2_sbd *sdp, struct lm_lockname *name,
struct file *file, struct file_lock *fl)
{
int error = -EIO;
if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
error = sdp->sd_lockstruct.ls_ops->lm_plock_get(
sdp->sd_lockstruct.ls_lockspace, name, file, fl);
return error;
}
int gfs2_lm_plock(struct gfs2_sbd *sdp, struct lm_lockname *name,
struct file *file, int cmd, struct file_lock *fl)
{
int error = -EIO;
if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
error = sdp->sd_lockstruct.ls_ops->lm_plock(
sdp->sd_lockstruct.ls_lockspace, name, file, cmd, fl);
return error;
}
int gfs2_lm_punlock(struct gfs2_sbd *sdp, struct lm_lockname *name,
struct file *file, struct file_lock *fl)
{
int error = -EIO;
if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
error = sdp->sd_lockstruct.ls_ops->lm_punlock(
sdp->sd_lockstruct.ls_lockspace, name, file, fl);
return error;
}
void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
unsigned int message)
{
if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
sdp->sd_lockstruct.ls_ops->lm_recovery_done(
sdp->sd_lockstruct.ls_lockspace, jid, message);
}

42
fs/gfs2/lm.h Normal file
View File

@@ -0,0 +1,42 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __LM_DOT_H__
#define __LM_DOT_H__
struct gfs2_sbd;
#define GFS2_MIN_LVB_SIZE 32
int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent);
void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp);
void gfs2_lm_unmount(struct gfs2_sbd *sdp);
int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
__attribute__ ((format(printf, 2, 3)));
int gfs2_lm_get_lock(struct gfs2_sbd *sdp, struct lm_lockname *name,
void **lockp);
void gfs2_lm_put_lock(struct gfs2_sbd *sdp, void *lock);
unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
unsigned int cur_state, unsigned int req_state,
unsigned int flags);
unsigned int gfs2_lm_unlock(struct gfs2_sbd *sdp, void *lock,
unsigned int cur_state);
void gfs2_lm_cancel(struct gfs2_sbd *sdp, void *lock);
int gfs2_lm_hold_lvb(struct gfs2_sbd *sdp, void *lock, char **lvbp);
void gfs2_lm_unhold_lvb(struct gfs2_sbd *sdp, void *lock, char *lvb);
int gfs2_lm_plock_get(struct gfs2_sbd *sdp, struct lm_lockname *name,
struct file *file, struct file_lock *fl);
int gfs2_lm_plock(struct gfs2_sbd *sdp, struct lm_lockname *name,
struct file *file, int cmd, struct file_lock *fl);
int gfs2_lm_punlock(struct gfs2_sbd *sdp, struct lm_lockname *name,
struct file *file, struct file_lock *fl);
void gfs2_lm_recovery_done(struct gfs2_sbd *sdp, unsigned int jid,
unsigned int message);
#endif /* __LM_DOT_H__ */

184
fs/gfs2/locking.c Normal file
View File

@@ -0,0 +1,184 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/wait.h>
#include <linux/sched.h>
#include <linux/kmod.h>
#include <linux/fs.h>
#include <linux/delay.h>
#include <linux/lm_interface.h>
struct lmh_wrapper {
struct list_head lw_list;
const struct lm_lockops *lw_ops;
};
/* List of registered low-level locking protocols. A file system selects one
of them by name at mount time, e.g. lock_nolock, lock_dlm. */
static LIST_HEAD(lmh_list);
static DEFINE_MUTEX(lmh_lock);
/**
* gfs2_register_lockproto - Register a low-level locking protocol
* @proto: the protocol definition
*
* Returns: 0 on success, -EXXX on failure
*/
int gfs2_register_lockproto(const struct lm_lockops *proto)
{
struct lmh_wrapper *lw;
mutex_lock(&lmh_lock);
list_for_each_entry(lw, &lmh_list, lw_list) {
if (!strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name)) {
mutex_unlock(&lmh_lock);
printk(KERN_INFO "GFS2: protocol %s already exists\n",
proto->lm_proto_name);
return -EEXIST;
}
}
lw = kzalloc(sizeof(struct lmh_wrapper), GFP_KERNEL);
if (!lw) {
mutex_unlock(&lmh_lock);
return -ENOMEM;
}
lw->lw_ops = proto;
list_add(&lw->lw_list, &lmh_list);
mutex_unlock(&lmh_lock);
return 0;
}
/**
* gfs2_unregister_lockproto - Unregister a low-level locking protocol
* @proto: the protocol definition
*
*/
void gfs2_unregister_lockproto(const struct lm_lockops *proto)
{
struct lmh_wrapper *lw;
mutex_lock(&lmh_lock);
list_for_each_entry(lw, &lmh_list, lw_list) {
if (!strcmp(lw->lw_ops->lm_proto_name, proto->lm_proto_name)) {
list_del(&lw->lw_list);
mutex_unlock(&lmh_lock);
kfree(lw);
return;
}
}
mutex_unlock(&lmh_lock);
printk(KERN_WARNING "GFS2: can't unregister lock protocol %s\n",
proto->lm_proto_name);
}
/**
* gfs2_mount_lockproto - Mount a lock protocol
* @proto_name - the name of the protocol
* @table_name - the name of the lock space
* @host_data - data specific to this host
* @cb - the callback to the code using the lock module
* @sdp - The GFS2 superblock
* @min_lvb_size - the mininum LVB size that the caller can deal with
* @flags - LM_MFLAG_*
* @lockstruct - a structure returned describing the mount
*
* Returns: 0 on success, -EXXX on failure
*/
int gfs2_mount_lockproto(char *proto_name, char *table_name, char *host_data,
lm_callback_t cb, void *cb_data,
unsigned int min_lvb_size, int flags,
struct lm_lockstruct *lockstruct,
struct kobject *fskobj)
{
struct lmh_wrapper *lw = NULL;
int try = 0;
int error, found;
retry:
mutex_lock(&lmh_lock);
found = 0;
list_for_each_entry(lw, &lmh_list, lw_list) {
if (!strcmp(lw->lw_ops->lm_proto_name, proto_name)) {
found = 1;
break;
}
}
if (!found) {
if (!try && capable(CAP_SYS_MODULE)) {
try = 1;
mutex_unlock(&lmh_lock);
request_module(proto_name);
goto retry;
}
printk(KERN_INFO "GFS2: can't find protocol %s\n", proto_name);
error = -ENOENT;
goto out;
}
if (!try_module_get(lw->lw_ops->lm_owner)) {
try = 0;
mutex_unlock(&lmh_lock);
msleep(1000);
goto retry;
}
error = lw->lw_ops->lm_mount(table_name, host_data, cb, cb_data,
min_lvb_size, flags, lockstruct, fskobj);
if (error)
module_put(lw->lw_ops->lm_owner);
out:
mutex_unlock(&lmh_lock);
return error;
}
void gfs2_unmount_lockproto(struct lm_lockstruct *lockstruct)
{
mutex_lock(&lmh_lock);
lockstruct->ls_ops->lm_unmount(lockstruct->ls_lockspace);
if (lockstruct->ls_ops->lm_owner)
module_put(lockstruct->ls_ops->lm_owner);
mutex_unlock(&lmh_lock);
}
/**
* gfs2_withdraw_lockproto - abnormally unmount a lock module
* @lockstruct: the lockstruct passed into mount
*
*/
void gfs2_withdraw_lockproto(struct lm_lockstruct *lockstruct)
{
mutex_lock(&lmh_lock);
lockstruct->ls_ops->lm_withdraw(lockstruct->ls_lockspace);
if (lockstruct->ls_ops->lm_owner)
module_put(lockstruct->ls_ops->lm_owner);
mutex_unlock(&lmh_lock);
}
EXPORT_SYMBOL_GPL(gfs2_register_lockproto);
EXPORT_SYMBOL_GPL(gfs2_unregister_lockproto);

View File

@@ -0,0 +1,3 @@
obj-$(CONFIG_GFS2_FS_LOCKING_DLM) += lock_dlm.o
lock_dlm-y := lock.o main.o mount.o sysfs.o thread.o plock.o

524
fs/gfs2/locking/dlm/lock.c Normal file
View File

@@ -0,0 +1,524 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include "lock_dlm.h"
static char junk_lvb[GDLM_LVB_SIZE];
static void queue_complete(struct gdlm_lock *lp)
{
struct gdlm_ls *ls = lp->ls;
clear_bit(LFL_ACTIVE, &lp->flags);
spin_lock(&ls->async_lock);
list_add_tail(&lp->clist, &ls->complete);
spin_unlock(&ls->async_lock);
wake_up(&ls->thread_wait);
}
static inline void gdlm_ast(void *astarg)
{
queue_complete(astarg);
}
static inline void gdlm_bast(void *astarg, int mode)
{
struct gdlm_lock *lp = astarg;
struct gdlm_ls *ls = lp->ls;
if (!mode) {
printk(KERN_INFO "lock_dlm: bast mode zero %x,%llx\n",
lp->lockname.ln_type,
(unsigned long long)lp->lockname.ln_number);
return;
}
spin_lock(&ls->async_lock);
if (!lp->bast_mode) {
list_add_tail(&lp->blist, &ls->blocking);
lp->bast_mode = mode;
} else if (lp->bast_mode < mode)
lp->bast_mode = mode;
spin_unlock(&ls->async_lock);
wake_up(&ls->thread_wait);
}
void gdlm_queue_delayed(struct gdlm_lock *lp)
{
struct gdlm_ls *ls = lp->ls;
spin_lock(&ls->async_lock);
list_add_tail(&lp->delay_list, &ls->delayed);
spin_unlock(&ls->async_lock);
}
/* convert gfs lock-state to dlm lock-mode */
static s16 make_mode(s16 lmstate)
{
switch (lmstate) {
case LM_ST_UNLOCKED:
return DLM_LOCK_NL;
case LM_ST_EXCLUSIVE:
return DLM_LOCK_EX;
case LM_ST_DEFERRED:
return DLM_LOCK_CW;
case LM_ST_SHARED:
return DLM_LOCK_PR;
}
gdlm_assert(0, "unknown LM state %d", lmstate);
return -1;
}
/* convert dlm lock-mode to gfs lock-state */
s16 gdlm_make_lmstate(s16 dlmmode)
{
switch (dlmmode) {
case DLM_LOCK_IV:
case DLM_LOCK_NL:
return LM_ST_UNLOCKED;
case DLM_LOCK_EX:
return LM_ST_EXCLUSIVE;
case DLM_LOCK_CW:
return LM_ST_DEFERRED;
case DLM_LOCK_PR:
return LM_ST_SHARED;
}
gdlm_assert(0, "unknown DLM mode %d", dlmmode);
return -1;
}
/* verify agreement with GFS on the current lock state, NB: DLM_LOCK_NL and
DLM_LOCK_IV are both considered LM_ST_UNLOCKED by GFS. */
static void check_cur_state(struct gdlm_lock *lp, unsigned int cur_state)
{
s16 cur = make_mode(cur_state);
if (lp->cur != DLM_LOCK_IV)
gdlm_assert(lp->cur == cur, "%d, %d", lp->cur, cur);
}
static inline unsigned int make_flags(struct gdlm_lock *lp,
unsigned int gfs_flags,
s16 cur, s16 req)
{
unsigned int lkf = 0;
if (gfs_flags & LM_FLAG_TRY)
lkf |= DLM_LKF_NOQUEUE;
if (gfs_flags & LM_FLAG_TRY_1CB) {
lkf |= DLM_LKF_NOQUEUE;
lkf |= DLM_LKF_NOQUEUEBAST;
}
if (gfs_flags & LM_FLAG_PRIORITY) {
lkf |= DLM_LKF_NOORDER;
lkf |= DLM_LKF_HEADQUE;
}
if (gfs_flags & LM_FLAG_ANY) {
if (req == DLM_LOCK_PR)
lkf |= DLM_LKF_ALTCW;
else if (req == DLM_LOCK_CW)
lkf |= DLM_LKF_ALTPR;
}
if (lp->lksb.sb_lkid != 0) {
lkf |= DLM_LKF_CONVERT;
/* Conversion deadlock avoidance by DLM */
if (!test_bit(LFL_FORCE_PROMOTE, &lp->flags) &&
!(lkf & DLM_LKF_NOQUEUE) &&
cur > DLM_LOCK_NL && req > DLM_LOCK_NL && cur != req)
lkf |= DLM_LKF_CONVDEADLK;
}
if (lp->lvb)
lkf |= DLM_LKF_VALBLK;
return lkf;
}
/* make_strname - convert GFS lock numbers to a string */
static inline void make_strname(struct lm_lockname *lockname,
struct gdlm_strname *str)
{
sprintf(str->name, "%8x%16llx", lockname->ln_type,
(unsigned long long)lockname->ln_number);
str->namelen = GDLM_STRNAME_BYTES;
}
static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name,
struct gdlm_lock **lpp)
{
struct gdlm_lock *lp;
lp = kzalloc(sizeof(struct gdlm_lock), GFP_KERNEL);
if (!lp)
return -ENOMEM;
lp->lockname = *name;
lp->ls = ls;
lp->cur = DLM_LOCK_IV;
lp->lvb = NULL;
lp->hold_null = NULL;
init_completion(&lp->ast_wait);
INIT_LIST_HEAD(&lp->clist);
INIT_LIST_HEAD(&lp->blist);
INIT_LIST_HEAD(&lp->delay_list);
spin_lock(&ls->async_lock);
list_add(&lp->all_list, &ls->all_locks);
ls->all_locks_count++;
spin_unlock(&ls->async_lock);
*lpp = lp;
return 0;
}
void gdlm_delete_lp(struct gdlm_lock *lp)
{
struct gdlm_ls *ls = lp->ls;
spin_lock(&ls->async_lock);
if (!list_empty(&lp->clist))
list_del_init(&lp->clist);
if (!list_empty(&lp->blist))
list_del_init(&lp->blist);
if (!list_empty(&lp->delay_list))
list_del_init(&lp->delay_list);
gdlm_assert(!list_empty(&lp->all_list), "%x,%llx", lp->lockname.ln_type,
(unsigned long long)lp->lockname.ln_number);
list_del_init(&lp->all_list);
ls->all_locks_count--;
spin_unlock(&ls->async_lock);
kfree(lp);
}
int gdlm_get_lock(void *lockspace, struct lm_lockname *name,
void **lockp)
{
struct gdlm_lock *lp;
int error;
error = gdlm_create_lp(lockspace, name, &lp);
*lockp = lp;
return error;
}
void gdlm_put_lock(void *lock)
{
gdlm_delete_lp(lock);
}
unsigned int gdlm_do_lock(struct gdlm_lock *lp)
{
struct gdlm_ls *ls = lp->ls;
struct gdlm_strname str;
int error, bast = 1;
/*
* When recovery is in progress, delay lock requests for submission
* once recovery is done. Requests for recovery (NOEXP) and unlocks
* can pass.
*/
if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
!test_bit(LFL_NOBLOCK, &lp->flags) && lp->req != DLM_LOCK_NL) {
gdlm_queue_delayed(lp);
return LM_OUT_ASYNC;
}
/*
* Submit the actual lock request.
*/
if (test_bit(LFL_NOBAST, &lp->flags))
bast = 0;
make_strname(&lp->lockname, &str);
set_bit(LFL_ACTIVE, &lp->flags);
log_debug("lk %x,%llx id %x %d,%d %x", lp->lockname.ln_type,
(unsigned long long)lp->lockname.ln_number, lp->lksb.sb_lkid,
lp->cur, lp->req, lp->lkf);
error = dlm_lock(ls->dlm_lockspace, lp->req, &lp->lksb, lp->lkf,
str.name, str.namelen, 0, gdlm_ast, lp,
bast ? gdlm_bast : NULL);
if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) {
lp->lksb.sb_status = -EAGAIN;
queue_complete(lp);
error = 0;
}
if (error) {
log_debug("%s: gdlm_lock %x,%llx err=%d cur=%d req=%d lkf=%x "
"flags=%lx", ls->fsname, lp->lockname.ln_type,
(unsigned long long)lp->lockname.ln_number, error,
lp->cur, lp->req, lp->lkf, lp->flags);
return LM_OUT_ERROR;
}
return LM_OUT_ASYNC;
}
static unsigned int gdlm_do_unlock(struct gdlm_lock *lp)
{
struct gdlm_ls *ls = lp->ls;
unsigned int lkf = 0;
int error;
set_bit(LFL_DLM_UNLOCK, &lp->flags);
set_bit(LFL_ACTIVE, &lp->flags);
if (lp->lvb)
lkf = DLM_LKF_VALBLK;
log_debug("un %x,%llx %x %d %x", lp->lockname.ln_type,
(unsigned long long)lp->lockname.ln_number,
lp->lksb.sb_lkid, lp->cur, lkf);
error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, lkf, NULL, lp);
if (error) {
log_debug("%s: gdlm_unlock %x,%llx err=%d cur=%d req=%d lkf=%x "
"flags=%lx", ls->fsname, lp->lockname.ln_type,
(unsigned long long)lp->lockname.ln_number, error,
lp->cur, lp->req, lp->lkf, lp->flags);
return LM_OUT_ERROR;
}
return LM_OUT_ASYNC;
}
unsigned int gdlm_lock(void *lock, unsigned int cur_state,
unsigned int req_state, unsigned int flags)
{
struct gdlm_lock *lp = lock;
clear_bit(LFL_DLM_CANCEL, &lp->flags);
if (flags & LM_FLAG_NOEXP)
set_bit(LFL_NOBLOCK, &lp->flags);
check_cur_state(lp, cur_state);
lp->req = make_mode(req_state);
lp->lkf = make_flags(lp, flags, lp->cur, lp->req);
return gdlm_do_lock(lp);
}
unsigned int gdlm_unlock(void *lock, unsigned int cur_state)
{
struct gdlm_lock *lp = lock;
clear_bit(LFL_DLM_CANCEL, &lp->flags);
if (lp->cur == DLM_LOCK_IV)
return 0;
return gdlm_do_unlock(lp);
}
void gdlm_cancel(void *lock)
{
struct gdlm_lock *lp = lock;
struct gdlm_ls *ls = lp->ls;
int error, delay_list = 0;
if (test_bit(LFL_DLM_CANCEL, &lp->flags))
return;
log_info("gdlm_cancel %x,%llx flags %lx", lp->lockname.ln_type,
(unsigned long long)lp->lockname.ln_number, lp->flags);
spin_lock(&ls->async_lock);
if (!list_empty(&lp->delay_list)) {
list_del_init(&lp->delay_list);
delay_list = 1;
}
spin_unlock(&ls->async_lock);
if (delay_list) {
set_bit(LFL_CANCEL, &lp->flags);
set_bit(LFL_ACTIVE, &lp->flags);
queue_complete(lp);
return;
}
if (!test_bit(LFL_ACTIVE, &lp->flags) ||
test_bit(LFL_DLM_UNLOCK, &lp->flags)) {
log_info("gdlm_cancel skip %x,%llx flags %lx",
lp->lockname.ln_type,
(unsigned long long)lp->lockname.ln_number, lp->flags);
return;
}
/* the lock is blocked in the dlm */
set_bit(LFL_DLM_CANCEL, &lp->flags);
set_bit(LFL_ACTIVE, &lp->flags);
error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, DLM_LKF_CANCEL,
NULL, lp);
log_info("gdlm_cancel rv %d %x,%llx flags %lx", error,
lp->lockname.ln_type,
(unsigned long long)lp->lockname.ln_number, lp->flags);
if (error == -EBUSY)
clear_bit(LFL_DLM_CANCEL, &lp->flags);
}
static int gdlm_add_lvb(struct gdlm_lock *lp)
{
char *lvb;
lvb = kzalloc(GDLM_LVB_SIZE, GFP_KERNEL);
if (!lvb)
return -ENOMEM;
lp->lksb.sb_lvbptr = lvb;
lp->lvb = lvb;
return 0;
}
static void gdlm_del_lvb(struct gdlm_lock *lp)
{
kfree(lp->lvb);
lp->lvb = NULL;
lp->lksb.sb_lvbptr = NULL;
}
/* This can do a synchronous dlm request (requiring a lock_dlm thread to get
the completion) because gfs won't call hold_lvb() during a callback (from
the context of a lock_dlm thread). */
static int hold_null_lock(struct gdlm_lock *lp)
{
struct gdlm_lock *lpn = NULL;
int error;
if (lp->hold_null) {
printk(KERN_INFO "lock_dlm: lvb already held\n");
return 0;
}
error = gdlm_create_lp(lp->ls, &lp->lockname, &lpn);
if (error)
goto out;
lpn->lksb.sb_lvbptr = junk_lvb;
lpn->lvb = junk_lvb;
lpn->req = DLM_LOCK_NL;
lpn->lkf = DLM_LKF_VALBLK | DLM_LKF_EXPEDITE;
set_bit(LFL_NOBAST, &lpn->flags);
set_bit(LFL_INLOCK, &lpn->flags);
init_completion(&lpn->ast_wait);
gdlm_do_lock(lpn);
wait_for_completion(&lpn->ast_wait);
error = lpn->lksb.sb_status;
if (error) {
printk(KERN_INFO "lock_dlm: hold_null_lock dlm error %d\n",
error);
gdlm_delete_lp(lpn);
lpn = NULL;
}
out:
lp->hold_null = lpn;
return error;
}
/* This cannot do a synchronous dlm request (requiring a lock_dlm thread to get
the completion) because gfs may call unhold_lvb() during a callback (from
the context of a lock_dlm thread) which could cause a deadlock since the
other lock_dlm thread could be engaged in recovery. */
static void unhold_null_lock(struct gdlm_lock *lp)
{
struct gdlm_lock *lpn = lp->hold_null;
gdlm_assert(lpn, "%x,%llx", lp->lockname.ln_type,
(unsigned long long)lp->lockname.ln_number);
lpn->lksb.sb_lvbptr = NULL;
lpn->lvb = NULL;
set_bit(LFL_UNLOCK_DELETE, &lpn->flags);
gdlm_do_unlock(lpn);
lp->hold_null = NULL;
}
/* Acquire a NL lock because gfs requires the value block to remain
intact on the resource while the lvb is "held" even if it's holding no locks
on the resource. */
int gdlm_hold_lvb(void *lock, char **lvbp)
{
struct gdlm_lock *lp = lock;
int error;
error = gdlm_add_lvb(lp);
if (error)
return error;
*lvbp = lp->lvb;
error = hold_null_lock(lp);
if (error)
gdlm_del_lvb(lp);
return error;
}
void gdlm_unhold_lvb(void *lock, char *lvb)
{
struct gdlm_lock *lp = lock;
unhold_null_lock(lp);
gdlm_del_lvb(lp);
}
void gdlm_submit_delayed(struct gdlm_ls *ls)
{
struct gdlm_lock *lp, *safe;
spin_lock(&ls->async_lock);
list_for_each_entry_safe(lp, safe, &ls->delayed, delay_list) {
list_del_init(&lp->delay_list);
list_add_tail(&lp->delay_list, &ls->submit);
}
spin_unlock(&ls->async_lock);
wake_up(&ls->thread_wait);
}
int gdlm_release_all_locks(struct gdlm_ls *ls)
{
struct gdlm_lock *lp, *safe;
int count = 0;
spin_lock(&ls->async_lock);
list_for_each_entry_safe(lp, safe, &ls->all_locks, all_list) {
list_del_init(&lp->all_list);
if (lp->lvb && lp->lvb != junk_lvb)
kfree(lp->lvb);
kfree(lp);
count++;
}
spin_unlock(&ls->async_lock);
return count;
}

View File

@@ -0,0 +1,187 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef LOCK_DLM_DOT_H
#define LOCK_DLM_DOT_H
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/string.h>
#include <linux/list.h>
#include <linux/socket.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/kobject.h>
#include <linux/fcntl.h>
#include <linux/wait.h>
#include <net/sock.h>
#include <linux/dlm.h>
#include <linux/lm_interface.h>
/*
* Internally, we prefix things with gdlm_ and GDLM_ (for gfs-dlm) since a
* prefix of lock_dlm_ gets awkward. Externally, GFS refers to this module
* as "lock_dlm".
*/
#define GDLM_STRNAME_BYTES 24
#define GDLM_LVB_SIZE 32
#define GDLM_DROP_COUNT 200000
#define GDLM_DROP_PERIOD 60
#define GDLM_NAME_LEN 128
/* GFS uses 12 bytes to identify a resource (32 bit type + 64 bit number).
We sprintf these numbers into a 24 byte string of hex values to make them
human-readable (to make debugging simpler.) */
struct gdlm_strname {
unsigned char name[GDLM_STRNAME_BYTES];
unsigned short namelen;
};
enum {
DFL_BLOCK_LOCKS = 0,
DFL_SPECTATOR = 1,
DFL_WITHDRAW = 2,
};
struct gdlm_ls {
u32 id;
int jid;
int first;
int first_done;
unsigned long flags;
struct kobject kobj;
char clustername[GDLM_NAME_LEN];
char fsname[GDLM_NAME_LEN];
int fsflags;
dlm_lockspace_t *dlm_lockspace;
lm_callback_t fscb;
struct gfs2_sbd *sdp;
int recover_jid;
int recover_jid_done;
int recover_jid_status;
spinlock_t async_lock;
struct list_head complete;
struct list_head blocking;
struct list_head delayed;
struct list_head submit;
struct list_head all_locks;
u32 all_locks_count;
wait_queue_head_t wait_control;
struct task_struct *thread1;
struct task_struct *thread2;
wait_queue_head_t thread_wait;
unsigned long drop_time;
int drop_locks_count;
int drop_locks_period;
};
enum {
LFL_NOBLOCK = 0,
LFL_NOCACHE = 1,
LFL_DLM_UNLOCK = 2,
LFL_DLM_CANCEL = 3,
LFL_SYNC_LVB = 4,
LFL_FORCE_PROMOTE = 5,
LFL_REREQUEST = 6,
LFL_ACTIVE = 7,
LFL_INLOCK = 8,
LFL_CANCEL = 9,
LFL_NOBAST = 10,
LFL_HEADQUE = 11,
LFL_UNLOCK_DELETE = 12,
};
struct gdlm_lock {
struct gdlm_ls *ls;
struct lm_lockname lockname;
char *lvb;
struct dlm_lksb lksb;
s16 cur;
s16 req;
s16 prev_req;
u32 lkf; /* dlm flags DLM_LKF_ */
unsigned long flags; /* lock_dlm flags LFL_ */
int bast_mode; /* protected by async_lock */
struct completion ast_wait;
struct list_head clist; /* complete */
struct list_head blist; /* blocking */
struct list_head delay_list; /* delayed */
struct list_head all_list; /* all locks for the fs */
struct gdlm_lock *hold_null; /* NL lock for hold_lvb */
};
#define gdlm_assert(assertion, fmt, args...) \
do { \
if (unlikely(!(assertion))) { \
printk(KERN_EMERG "lock_dlm: fatal assertion failed \"%s\"\n" \
"lock_dlm: " fmt "\n", \
#assertion, ##args); \
BUG(); \
} \
} while (0)
#define log_print(lev, fmt, arg...) printk(lev "lock_dlm: " fmt "\n" , ## arg)
#define log_info(fmt, arg...) log_print(KERN_INFO , fmt , ## arg)
#define log_error(fmt, arg...) log_print(KERN_ERR , fmt , ## arg)
#ifdef LOCK_DLM_LOG_DEBUG
#define log_debug(fmt, arg...) log_print(KERN_DEBUG , fmt , ## arg)
#else
#define log_debug(fmt, arg...)
#endif
/* sysfs.c */
int gdlm_sysfs_init(void);
void gdlm_sysfs_exit(void);
int gdlm_kobject_setup(struct gdlm_ls *, struct kobject *);
void gdlm_kobject_release(struct gdlm_ls *);
/* thread.c */
int gdlm_init_threads(struct gdlm_ls *);
void gdlm_release_threads(struct gdlm_ls *);
/* lock.c */
s16 gdlm_make_lmstate(s16);
void gdlm_queue_delayed(struct gdlm_lock *);
void gdlm_submit_delayed(struct gdlm_ls *);
int gdlm_release_all_locks(struct gdlm_ls *);
void gdlm_delete_lp(struct gdlm_lock *);
unsigned int gdlm_do_lock(struct gdlm_lock *);
int gdlm_get_lock(void *, struct lm_lockname *, void **);
void gdlm_put_lock(void *);
unsigned int gdlm_lock(void *, unsigned int, unsigned int, unsigned int);
unsigned int gdlm_unlock(void *, unsigned int);
void gdlm_cancel(void *);
int gdlm_hold_lvb(void *, char **);
void gdlm_unhold_lvb(void *, char *);
/* plock.c */
int gdlm_plock_init(void);
void gdlm_plock_exit(void);
int gdlm_plock(void *, struct lm_lockname *, struct file *, int,
struct file_lock *);
int gdlm_plock_get(void *, struct lm_lockname *, struct file *,
struct file_lock *);
int gdlm_punlock(void *, struct lm_lockname *, struct file *,
struct file_lock *);
#endif

View File

@@ -0,0 +1,58 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/init.h>
#include "lock_dlm.h"
extern struct lm_lockops gdlm_ops;
static int __init init_lock_dlm(void)
{
int error;
error = gfs2_register_lockproto(&gdlm_ops);
if (error) {
printk(KERN_WARNING "lock_dlm: can't register protocol: %d\n",
error);
return error;
}
error = gdlm_sysfs_init();
if (error) {
gfs2_unregister_lockproto(&gdlm_ops);
return error;
}
error = gdlm_plock_init();
if (error) {
gdlm_sysfs_exit();
gfs2_unregister_lockproto(&gdlm_ops);
return error;
}
printk(KERN_INFO
"Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__);
return 0;
}
static void __exit exit_lock_dlm(void)
{
gdlm_plock_exit();
gdlm_sysfs_exit();
gfs2_unregister_lockproto(&gdlm_ops);
}
module_init(init_lock_dlm);
module_exit(exit_lock_dlm);
MODULE_DESCRIPTION("GFS DLM Locking Module");
MODULE_AUTHOR("Red Hat, Inc.");
MODULE_LICENSE("GPL");

253
fs/gfs2/locking/dlm/mount.c Normal file
View File

@@ -0,0 +1,253 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include "lock_dlm.h"
const struct lm_lockops gdlm_ops;
static struct gdlm_ls *init_gdlm(lm_callback_t cb, struct gfs2_sbd *sdp,
int flags, char *table_name)
{
struct gdlm_ls *ls;
char buf[256], *p;
ls = kzalloc(sizeof(struct gdlm_ls), GFP_KERNEL);
if (!ls)
return NULL;
ls->drop_locks_count = GDLM_DROP_COUNT;
ls->drop_locks_period = GDLM_DROP_PERIOD;
ls->fscb = cb;
ls->sdp = sdp;
ls->fsflags = flags;
spin_lock_init(&ls->async_lock);
INIT_LIST_HEAD(&ls->complete);
INIT_LIST_HEAD(&ls->blocking);
INIT_LIST_HEAD(&ls->delayed);
INIT_LIST_HEAD(&ls->submit);
INIT_LIST_HEAD(&ls->all_locks);
init_waitqueue_head(&ls->thread_wait);
init_waitqueue_head(&ls->wait_control);
ls->thread1 = NULL;
ls->thread2 = NULL;
ls->drop_time = jiffies;
ls->jid = -1;
strncpy(buf, table_name, 256);
buf[255] = '\0';
p = strchr(buf, ':');
if (!p) {
log_info("invalid table_name \"%s\"", table_name);
kfree(ls);
return NULL;
}
*p = '\0';
p++;
strncpy(ls->clustername, buf, GDLM_NAME_LEN);
strncpy(ls->fsname, p, GDLM_NAME_LEN);
return ls;
}
static int make_args(struct gdlm_ls *ls, char *data_arg, int *nodir)
{
char data[256];
char *options, *x, *y;
int error = 0;
memset(data, 0, 256);
strncpy(data, data_arg, 255);
for (options = data; (x = strsep(&options, ":")); ) {
if (!*x)
continue;
y = strchr(x, '=');
if (y)
*y++ = 0;
if (!strcmp(x, "jid")) {
if (!y) {
log_error("need argument to jid");
error = -EINVAL;
break;
}
sscanf(y, "%u", &ls->jid);
} else if (!strcmp(x, "first")) {
if (!y) {
log_error("need argument to first");
error = -EINVAL;
break;
}
sscanf(y, "%u", &ls->first);
} else if (!strcmp(x, "id")) {
if (!y) {
log_error("need argument to id");
error = -EINVAL;
break;
}
sscanf(y, "%u", &ls->id);
} else if (!strcmp(x, "nodir")) {
if (!y) {
log_error("need argument to nodir");
error = -EINVAL;
break;
}
sscanf(y, "%u", nodir);
} else {
log_error("unkonwn option: %s", x);
error = -EINVAL;
break;
}
}
return error;
}
static int gdlm_mount(char *table_name, char *host_data,
lm_callback_t cb, void *cb_data,
unsigned int min_lvb_size, int flags,
struct lm_lockstruct *lockstruct,
struct kobject *fskobj)
{
struct gdlm_ls *ls;
int error = -ENOMEM, nodir = 0;
if (min_lvb_size > GDLM_LVB_SIZE)
goto out;
ls = init_gdlm(cb, cb_data, flags, table_name);
if (!ls)
goto out;
error = make_args(ls, host_data, &nodir);
if (error)
goto out;
error = gdlm_init_threads(ls);
if (error)
goto out_free;
error = gdlm_kobject_setup(ls, fskobj);
if (error)
goto out_thread;
error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname),
&ls->dlm_lockspace,
nodir ? DLM_LSFL_NODIR : 0,
GDLM_LVB_SIZE);
if (error) {
log_error("dlm_new_lockspace error %d", error);
goto out_kobj;
}
lockstruct->ls_jid = ls->jid;
lockstruct->ls_first = ls->first;
lockstruct->ls_lockspace = ls;
lockstruct->ls_ops = &gdlm_ops;
lockstruct->ls_flags = 0;
lockstruct->ls_lvb_size = GDLM_LVB_SIZE;
return 0;
out_kobj:
gdlm_kobject_release(ls);
out_thread:
gdlm_release_threads(ls);
out_free:
kfree(ls);
out:
return error;
}
static void gdlm_unmount(void *lockspace)
{
struct gdlm_ls *ls = lockspace;
int rv;
log_debug("unmount flags %lx", ls->flags);
/* FIXME: serialize unmount and withdraw in case they
happen at once. Also, if unmount follows withdraw,
wait for withdraw to finish. */
if (test_bit(DFL_WITHDRAW, &ls->flags))
goto out;
gdlm_kobject_release(ls);
dlm_release_lockspace(ls->dlm_lockspace, 2);
gdlm_release_threads(ls);
rv = gdlm_release_all_locks(ls);
if (rv)
log_info("gdlm_unmount: %d stray locks freed", rv);
out:
kfree(ls);
}
static void gdlm_recovery_done(void *lockspace, unsigned int jid,
unsigned int message)
{
struct gdlm_ls *ls = lockspace;
ls->recover_jid_done = jid;
ls->recover_jid_status = message;
kobject_uevent(&ls->kobj, KOBJ_CHANGE);
}
static void gdlm_others_may_mount(void *lockspace)
{
struct gdlm_ls *ls = lockspace;
ls->first_done = 1;
kobject_uevent(&ls->kobj, KOBJ_CHANGE);
}
/* Userspace gets the offline uevent, blocks new gfs locks on
other mounters, and lets us know (sets WITHDRAW flag). Then,
userspace leaves the mount group while we leave the lockspace. */
static void gdlm_withdraw(void *lockspace)
{
struct gdlm_ls *ls = lockspace;
kobject_uevent(&ls->kobj, KOBJ_OFFLINE);
wait_event_interruptible(ls->wait_control,
test_bit(DFL_WITHDRAW, &ls->flags));
dlm_release_lockspace(ls->dlm_lockspace, 2);
gdlm_release_threads(ls);
gdlm_release_all_locks(ls);
gdlm_kobject_release(ls);
}
const struct lm_lockops gdlm_ops = {
.lm_proto_name = "lock_dlm",
.lm_mount = gdlm_mount,
.lm_others_may_mount = gdlm_others_may_mount,
.lm_unmount = gdlm_unmount,
.lm_withdraw = gdlm_withdraw,
.lm_get_lock = gdlm_get_lock,
.lm_put_lock = gdlm_put_lock,
.lm_lock = gdlm_lock,
.lm_unlock = gdlm_unlock,
.lm_plock = gdlm_plock,
.lm_punlock = gdlm_punlock,
.lm_plock_get = gdlm_plock_get,
.lm_cancel = gdlm_cancel,
.lm_hold_lvb = gdlm_hold_lvb,
.lm_unhold_lvb = gdlm_unhold_lvb,
.lm_recovery_done = gdlm_recovery_done,
.lm_owner = THIS_MODULE,
};

302
fs/gfs2/locking/dlm/plock.c Normal file
View File

@@ -0,0 +1,302 @@
/*
* Copyright (C) 2005 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/miscdevice.h>
#include <linux/lock_dlm_plock.h>
#include <linux/poll.h>
#include "lock_dlm.h"
static spinlock_t ops_lock;
static struct list_head send_list;
static struct list_head recv_list;
static wait_queue_head_t send_wq;
static wait_queue_head_t recv_wq;
struct plock_op {
struct list_head list;
int done;
struct gdlm_plock_info info;
};
static inline void set_version(struct gdlm_plock_info *info)
{
info->version[0] = GDLM_PLOCK_VERSION_MAJOR;
info->version[1] = GDLM_PLOCK_VERSION_MINOR;
info->version[2] = GDLM_PLOCK_VERSION_PATCH;
}
static int check_version(struct gdlm_plock_info *info)
{
if ((GDLM_PLOCK_VERSION_MAJOR != info->version[0]) ||
(GDLM_PLOCK_VERSION_MINOR < info->version[1])) {
log_error("plock device version mismatch: "
"kernel (%u.%u.%u), user (%u.%u.%u)",
GDLM_PLOCK_VERSION_MAJOR,
GDLM_PLOCK_VERSION_MINOR,
GDLM_PLOCK_VERSION_PATCH,
info->version[0],
info->version[1],
info->version[2]);
return -EINVAL;
}
return 0;
}
static void send_op(struct plock_op *op)
{
set_version(&op->info);
INIT_LIST_HEAD(&op->list);
spin_lock(&ops_lock);
list_add_tail(&op->list, &send_list);
spin_unlock(&ops_lock);
wake_up(&send_wq);
}
int gdlm_plock(void *lockspace, struct lm_lockname *name,
struct file *file, int cmd, struct file_lock *fl)
{
struct gdlm_ls *ls = lockspace;
struct plock_op *op;
int rv;
op = kzalloc(sizeof(*op), GFP_KERNEL);
if (!op)
return -ENOMEM;
op->info.optype = GDLM_PLOCK_OP_LOCK;
op->info.pid = fl->fl_pid;
op->info.ex = (fl->fl_type == F_WRLCK);
op->info.wait = IS_SETLKW(cmd);
op->info.fsid = ls->id;
op->info.number = name->ln_number;
op->info.start = fl->fl_start;
op->info.end = fl->fl_end;
op->info.owner = (__u64)(long) fl->fl_owner;
send_op(op);
wait_event(recv_wq, (op->done != 0));
spin_lock(&ops_lock);
if (!list_empty(&op->list)) {
printk(KERN_INFO "plock op on list\n");
list_del(&op->list);
}
spin_unlock(&ops_lock);
rv = op->info.rv;
if (!rv) {
if (posix_lock_file_wait(file, fl) < 0)
log_error("gdlm_plock: vfs lock error %x,%llx",
name->ln_type,
(unsigned long long)name->ln_number);
}
kfree(op);
return rv;
}
int gdlm_punlock(void *lockspace, struct lm_lockname *name,
struct file *file, struct file_lock *fl)
{
struct gdlm_ls *ls = lockspace;
struct plock_op *op;
int rv;
op = kzalloc(sizeof(*op), GFP_KERNEL);
if (!op)
return -ENOMEM;
if (posix_lock_file_wait(file, fl) < 0)
log_error("gdlm_punlock: vfs unlock error %x,%llx",
name->ln_type, (unsigned long long)name->ln_number);
op->info.optype = GDLM_PLOCK_OP_UNLOCK;
op->info.pid = fl->fl_pid;
op->info.fsid = ls->id;
op->info.number = name->ln_number;
op->info.start = fl->fl_start;
op->info.end = fl->fl_end;
op->info.owner = (__u64)(long) fl->fl_owner;
send_op(op);
wait_event(recv_wq, (op->done != 0));
spin_lock(&ops_lock);
if (!list_empty(&op->list)) {
printk(KERN_INFO "punlock op on list\n");
list_del(&op->list);
}
spin_unlock(&ops_lock);
rv = op->info.rv;
kfree(op);
return rv;
}
int gdlm_plock_get(void *lockspace, struct lm_lockname *name,
struct file *file, struct file_lock *fl)
{
struct gdlm_ls *ls = lockspace;
struct plock_op *op;
int rv;
op = kzalloc(sizeof(*op), GFP_KERNEL);
if (!op)
return -ENOMEM;
op->info.optype = GDLM_PLOCK_OP_GET;
op->info.pid = fl->fl_pid;
op->info.ex = (fl->fl_type == F_WRLCK);
op->info.fsid = ls->id;
op->info.number = name->ln_number;
op->info.start = fl->fl_start;
op->info.end = fl->fl_end;
send_op(op);
wait_event(recv_wq, (op->done != 0));
spin_lock(&ops_lock);
if (!list_empty(&op->list)) {
printk(KERN_INFO "plock_get op on list\n");
list_del(&op->list);
}
spin_unlock(&ops_lock);
rv = op->info.rv;
if (rv == 0)
fl->fl_type = F_UNLCK;
else if (rv > 0) {
fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
fl->fl_pid = op->info.pid;
fl->fl_start = op->info.start;
fl->fl_end = op->info.end;
}
kfree(op);
return rv;
}
/* a read copies out one plock request from the send list */
static ssize_t dev_read(struct file *file, char __user *u, size_t count,
loff_t *ppos)
{
struct gdlm_plock_info info;
struct plock_op *op = NULL;
if (count < sizeof(info))
return -EINVAL;
spin_lock(&ops_lock);
if (!list_empty(&send_list)) {
op = list_entry(send_list.next, struct plock_op, list);
list_move(&op->list, &recv_list);
memcpy(&info, &op->info, sizeof(info));
}
spin_unlock(&ops_lock);
if (!op)
return -EAGAIN;
if (copy_to_user(u, &info, sizeof(info)))
return -EFAULT;
return sizeof(info);
}
/* a write copies in one plock result that should match a plock_op
on the recv list */
static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
loff_t *ppos)
{
struct gdlm_plock_info info;
struct plock_op *op;
int found = 0;
if (count != sizeof(info))
return -EINVAL;
if (copy_from_user(&info, u, sizeof(info)))
return -EFAULT;
if (check_version(&info))
return -EINVAL;
spin_lock(&ops_lock);
list_for_each_entry(op, &recv_list, list) {
if (op->info.fsid == info.fsid && op->info.number == info.number &&
op->info.owner == info.owner) {
list_del_init(&op->list);
found = 1;
op->done = 1;
memcpy(&op->info, &info, sizeof(info));
break;
}
}
spin_unlock(&ops_lock);
if (found)
wake_up(&recv_wq);
else
printk(KERN_INFO "gdlm dev_write no op %x %llx\n", info.fsid,
(unsigned long long)info.number);
return count;
}
static unsigned int dev_poll(struct file *file, poll_table *wait)
{
poll_wait(file, &send_wq, wait);
spin_lock(&ops_lock);
if (!list_empty(&send_list)) {
spin_unlock(&ops_lock);
return POLLIN | POLLRDNORM;
}
spin_unlock(&ops_lock);
return 0;
}
static const struct file_operations dev_fops = {
.read = dev_read,
.write = dev_write,
.poll = dev_poll,
.owner = THIS_MODULE
};
static struct miscdevice plock_dev_misc = {
.minor = MISC_DYNAMIC_MINOR,
.name = GDLM_PLOCK_MISC_NAME,
.fops = &dev_fops
};
int gdlm_plock_init(void)
{
int rv;
spin_lock_init(&ops_lock);
INIT_LIST_HEAD(&send_list);
INIT_LIST_HEAD(&recv_list);
init_waitqueue_head(&send_wq);
init_waitqueue_head(&recv_wq);
rv = misc_register(&plock_dev_misc);
if (rv)
printk(KERN_INFO "gdlm_plock_init: misc_register failed %d",
rv);
return rv;
}
void gdlm_plock_exit(void)
{
if (misc_deregister(&plock_dev_misc) < 0)
printk(KERN_INFO "gdlm_plock_exit: misc_deregister failed");
}

239
fs/gfs2/locking/dlm/sysfs.c Normal file
View File

@@ -0,0 +1,239 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/ctype.h>
#include <linux/stat.h>
#include "lock_dlm.h"
extern struct lm_lockops gdlm_ops;
static ssize_t proto_name_show(struct gdlm_ls *ls, char *buf)
{
return sprintf(buf, "%s\n", gdlm_ops.lm_proto_name);
}
static ssize_t block_show(struct gdlm_ls *ls, char *buf)
{
ssize_t ret;
int val = 0;
if (test_bit(DFL_BLOCK_LOCKS, &ls->flags))
val = 1;
ret = sprintf(buf, "%d\n", val);
return ret;
}
static ssize_t block_store(struct gdlm_ls *ls, const char *buf, size_t len)
{
ssize_t ret = len;
int val;
val = simple_strtol(buf, NULL, 0);
if (val == 1)
set_bit(DFL_BLOCK_LOCKS, &ls->flags);
else if (val == 0) {
clear_bit(DFL_BLOCK_LOCKS, &ls->flags);
gdlm_submit_delayed(ls);
} else {
ret = -EINVAL;
}
return ret;
}
static ssize_t withdraw_show(struct gdlm_ls *ls, char *buf)
{
ssize_t ret;
int val = 0;
if (test_bit(DFL_WITHDRAW, &ls->flags))
val = 1;
ret = sprintf(buf, "%d\n", val);
return ret;
}
static ssize_t withdraw_store(struct gdlm_ls *ls, const char *buf, size_t len)
{
ssize_t ret = len;
int val;
val = simple_strtol(buf, NULL, 0);
if (val == 1)
set_bit(DFL_WITHDRAW, &ls->flags);
else
ret = -EINVAL;
wake_up(&ls->wait_control);
return ret;
}
static ssize_t id_show(struct gdlm_ls *ls, char *buf)
{
return sprintf(buf, "%u\n", ls->id);
}
static ssize_t jid_show(struct gdlm_ls *ls, char *buf)
{
return sprintf(buf, "%d\n", ls->jid);
}
static ssize_t first_show(struct gdlm_ls *ls, char *buf)
{
return sprintf(buf, "%d\n", ls->first);
}
static ssize_t first_done_show(struct gdlm_ls *ls, char *buf)
{
return sprintf(buf, "%d\n", ls->first_done);
}
static ssize_t recover_show(struct gdlm_ls *ls, char *buf)
{
return sprintf(buf, "%d\n", ls->recover_jid);
}
static ssize_t recover_store(struct gdlm_ls *ls, const char *buf, size_t len)
{
ls->recover_jid = simple_strtol(buf, NULL, 0);
ls->fscb(ls->sdp, LM_CB_NEED_RECOVERY, &ls->recover_jid);
return len;
}
static ssize_t recover_done_show(struct gdlm_ls *ls, char *buf)
{
return sprintf(buf, "%d\n", ls->recover_jid_done);
}
static ssize_t recover_status_show(struct gdlm_ls *ls, char *buf)
{
return sprintf(buf, "%d\n", ls->recover_jid_status);
}
static ssize_t drop_count_show(struct gdlm_ls *ls, char *buf)
{
return sprintf(buf, "%d\n", ls->drop_locks_count);
}
static ssize_t drop_count_store(struct gdlm_ls *ls, const char *buf, size_t len)
{
ls->drop_locks_count = simple_strtol(buf, NULL, 0);
return len;
}
struct gdlm_attr {
struct attribute attr;
ssize_t (*show)(struct gdlm_ls *, char *);
ssize_t (*store)(struct gdlm_ls *, const char *, size_t);
};
#define GDLM_ATTR(_name,_mode,_show,_store) \
static struct gdlm_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
GDLM_ATTR(block, 0644, block_show, block_store);
GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
GDLM_ATTR(id, 0444, id_show, NULL);
GDLM_ATTR(jid, 0444, jid_show, NULL);
GDLM_ATTR(first, 0444, first_show, NULL);
GDLM_ATTR(first_done, 0444, first_done_show, NULL);
GDLM_ATTR(recover, 0644, recover_show, recover_store);
GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
GDLM_ATTR(drop_count, 0644, drop_count_show, drop_count_store);
static struct attribute *gdlm_attrs[] = {
&gdlm_attr_proto_name.attr,
&gdlm_attr_block.attr,
&gdlm_attr_withdraw.attr,
&gdlm_attr_id.attr,
&gdlm_attr_jid.attr,
&gdlm_attr_first.attr,
&gdlm_attr_first_done.attr,
&gdlm_attr_recover.attr,
&gdlm_attr_recover_done.attr,
&gdlm_attr_recover_status.attr,
&gdlm_attr_drop_count.attr,
NULL,
};
static ssize_t gdlm_attr_show(struct kobject *kobj, struct attribute *attr,
char *buf)
{
struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
struct gdlm_attr *a = container_of(attr, struct gdlm_attr, attr);
return a->show ? a->show(ls, buf) : 0;
}
static ssize_t gdlm_attr_store(struct kobject *kobj, struct attribute *attr,
const char *buf, size_t len)
{
struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
struct gdlm_attr *a = container_of(attr, struct gdlm_attr, attr);
return a->store ? a->store(ls, buf, len) : len;
}
static struct sysfs_ops gdlm_attr_ops = {
.show = gdlm_attr_show,
.store = gdlm_attr_store,
};
static struct kobj_type gdlm_ktype = {
.default_attrs = gdlm_attrs,
.sysfs_ops = &gdlm_attr_ops,
};
static struct kset gdlm_kset = {
.subsys = &kernel_subsys,
.kobj = {.name = "lock_dlm",},
.ktype = &gdlm_ktype,
};
int gdlm_kobject_setup(struct gdlm_ls *ls, struct kobject *fskobj)
{
int error;
error = kobject_set_name(&ls->kobj, "%s", "lock_module");
if (error) {
log_error("can't set kobj name %d", error);
return error;
}
ls->kobj.kset = &gdlm_kset;
ls->kobj.ktype = &gdlm_ktype;
ls->kobj.parent = fskobj;
error = kobject_register(&ls->kobj);
if (error)
log_error("can't register kobj %d", error);
return error;
}
void gdlm_kobject_release(struct gdlm_ls *ls)
{
kobject_unregister(&ls->kobj);
}
int gdlm_sysfs_init(void)
{
int error;
error = kset_register(&gdlm_kset);
if (error)
printk("lock_dlm: cannot register kset %d\n", error);
return error;
}
void gdlm_sysfs_exit(void)
{
kset_unregister(&gdlm_kset);
}

View File

@@ -0,0 +1,359 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include "lock_dlm.h"
/* A lock placed on this queue is re-submitted to DLM as soon as the lock_dlm
thread gets to it. */
static void queue_submit(struct gdlm_lock *lp)
{
struct gdlm_ls *ls = lp->ls;
spin_lock(&ls->async_lock);
list_add_tail(&lp->delay_list, &ls->submit);
spin_unlock(&ls->async_lock);
wake_up(&ls->thread_wait);
}
static void process_blocking(struct gdlm_lock *lp, int bast_mode)
{
struct gdlm_ls *ls = lp->ls;
unsigned int cb = 0;
switch (gdlm_make_lmstate(bast_mode)) {
case LM_ST_EXCLUSIVE:
cb = LM_CB_NEED_E;
break;
case LM_ST_DEFERRED:
cb = LM_CB_NEED_D;
break;
case LM_ST_SHARED:
cb = LM_CB_NEED_S;
break;
default:
gdlm_assert(0, "unknown bast mode %u", lp->bast_mode);
}
ls->fscb(ls->sdp, cb, &lp->lockname);
}
static void process_complete(struct gdlm_lock *lp)
{
struct gdlm_ls *ls = lp->ls;
struct lm_async_cb acb;
s16 prev_mode = lp->cur;
memset(&acb, 0, sizeof(acb));
if (lp->lksb.sb_status == -DLM_ECANCEL) {
log_info("complete dlm cancel %x,%llx flags %lx",
lp->lockname.ln_type,
(unsigned long long)lp->lockname.ln_number,
lp->flags);
lp->req = lp->cur;
acb.lc_ret |= LM_OUT_CANCELED;
if (lp->cur == DLM_LOCK_IV)
lp->lksb.sb_lkid = 0;
goto out;
}
if (test_and_clear_bit(LFL_DLM_UNLOCK, &lp->flags)) {
if (lp->lksb.sb_status != -DLM_EUNLOCK) {
log_info("unlock sb_status %d %x,%llx flags %lx",
lp->lksb.sb_status, lp->lockname.ln_type,
(unsigned long long)lp->lockname.ln_number,
lp->flags);
return;
}
lp->cur = DLM_LOCK_IV;
lp->req = DLM_LOCK_IV;
lp->lksb.sb_lkid = 0;
if (test_and_clear_bit(LFL_UNLOCK_DELETE, &lp->flags)) {
gdlm_delete_lp(lp);
return;
}
goto out;
}
if (lp->lksb.sb_flags & DLM_SBF_VALNOTVALID)
memset(lp->lksb.sb_lvbptr, 0, GDLM_LVB_SIZE);
if (lp->lksb.sb_flags & DLM_SBF_ALTMODE) {
if (lp->req == DLM_LOCK_PR)
lp->req = DLM_LOCK_CW;
else if (lp->req == DLM_LOCK_CW)
lp->req = DLM_LOCK_PR;
}
/*
* A canceled lock request. The lock was just taken off the delayed
* list and was never even submitted to dlm.
*/
if (test_and_clear_bit(LFL_CANCEL, &lp->flags)) {
log_info("complete internal cancel %x,%llx",
lp->lockname.ln_type,
(unsigned long long)lp->lockname.ln_number);
lp->req = lp->cur;
acb.lc_ret |= LM_OUT_CANCELED;
goto out;
}
/*
* An error occured.
*/
if (lp->lksb.sb_status) {
/* a "normal" error */
if ((lp->lksb.sb_status == -EAGAIN) &&
(lp->lkf & DLM_LKF_NOQUEUE)) {
lp->req = lp->cur;
if (lp->cur == DLM_LOCK_IV)
lp->lksb.sb_lkid = 0;
goto out;
}
/* this could only happen with cancels I think */
log_info("ast sb_status %d %x,%llx flags %lx",
lp->lksb.sb_status, lp->lockname.ln_type,
(unsigned long long)lp->lockname.ln_number,
lp->flags);
return;
}
/*
* This is an AST for an EX->EX conversion for sync_lvb from GFS.
*/
if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) {
complete(&lp->ast_wait);
return;
}
/*
* A lock has been demoted to NL because it initially completed during
* BLOCK_LOCKS. Now it must be requested in the originally requested
* mode.
*/
if (test_and_clear_bit(LFL_REREQUEST, &lp->flags)) {
gdlm_assert(lp->req == DLM_LOCK_NL, "%x,%llx",
lp->lockname.ln_type,
(unsigned long long)lp->lockname.ln_number);
gdlm_assert(lp->prev_req > DLM_LOCK_NL, "%x,%llx",
lp->lockname.ln_type,
(unsigned long long)lp->lockname.ln_number);
lp->cur = DLM_LOCK_NL;
lp->req = lp->prev_req;
lp->prev_req = DLM_LOCK_IV;
lp->lkf &= ~DLM_LKF_CONVDEADLK;
set_bit(LFL_NOCACHE, &lp->flags);
if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
!test_bit(LFL_NOBLOCK, &lp->flags))
gdlm_queue_delayed(lp);
else
queue_submit(lp);
return;
}
/*
* A request is granted during dlm recovery. It may be granted
* because the locks of a failed node were cleared. In that case,
* there may be inconsistent data beneath this lock and we must wait
* for recovery to complete to use it. When gfs recovery is done this
* granted lock will be converted to NL and then reacquired in this
* granted state.
*/
if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
!test_bit(LFL_NOBLOCK, &lp->flags) &&
lp->req != DLM_LOCK_NL) {
lp->cur = lp->req;
lp->prev_req = lp->req;
lp->req = DLM_LOCK_NL;
lp->lkf |= DLM_LKF_CONVERT;
lp->lkf &= ~DLM_LKF_CONVDEADLK;
log_debug("rereq %x,%llx id %x %d,%d",
lp->lockname.ln_type,
(unsigned long long)lp->lockname.ln_number,
lp->lksb.sb_lkid, lp->cur, lp->req);
set_bit(LFL_REREQUEST, &lp->flags);
queue_submit(lp);
return;
}
/*
* DLM demoted the lock to NL before it was granted so GFS must be
* told it cannot cache data for this lock.
*/
if (lp->lksb.sb_flags & DLM_SBF_DEMOTED)
set_bit(LFL_NOCACHE, &lp->flags);
out:
/*
* This is an internal lock_dlm lock
*/
if (test_bit(LFL_INLOCK, &lp->flags)) {
clear_bit(LFL_NOBLOCK, &lp->flags);
lp->cur = lp->req;
complete(&lp->ast_wait);
return;
}
/*
* Normal completion of a lock request. Tell GFS it now has the lock.
*/
clear_bit(LFL_NOBLOCK, &lp->flags);
lp->cur = lp->req;
acb.lc_name = lp->lockname;
acb.lc_ret |= gdlm_make_lmstate(lp->cur);
if (!test_and_clear_bit(LFL_NOCACHE, &lp->flags) &&
(lp->cur > DLM_LOCK_NL) && (prev_mode > DLM_LOCK_NL))
acb.lc_ret |= LM_OUT_CACHEABLE;
ls->fscb(ls->sdp, LM_CB_ASYNC, &acb);
}
static inline int no_work(struct gdlm_ls *ls, int blocking)
{
int ret;
spin_lock(&ls->async_lock);
ret = list_empty(&ls->complete) && list_empty(&ls->submit);
if (ret && blocking)
ret = list_empty(&ls->blocking);
spin_unlock(&ls->async_lock);
return ret;
}
static inline int check_drop(struct gdlm_ls *ls)
{
if (!ls->drop_locks_count)
return 0;
if (time_after(jiffies, ls->drop_time + ls->drop_locks_period * HZ)) {
ls->drop_time = jiffies;
if (ls->all_locks_count >= ls->drop_locks_count)
return 1;
}
return 0;
}
static int gdlm_thread(void *data)
{
struct gdlm_ls *ls = (struct gdlm_ls *) data;
struct gdlm_lock *lp = NULL;
int blist = 0;
uint8_t complete, blocking, submit, drop;
DECLARE_WAITQUEUE(wait, current);
/* Only thread1 is allowed to do blocking callbacks since gfs
may wait for a completion callback within a blocking cb. */
if (current == ls->thread1)
blist = 1;
while (!kthread_should_stop()) {
set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue(&ls->thread_wait, &wait);
if (no_work(ls, blist))
schedule();
remove_wait_queue(&ls->thread_wait, &wait);
set_current_state(TASK_RUNNING);
complete = blocking = submit = drop = 0;
spin_lock(&ls->async_lock);
if (blist && !list_empty(&ls->blocking)) {
lp = list_entry(ls->blocking.next, struct gdlm_lock,
blist);
list_del_init(&lp->blist);
blocking = lp->bast_mode;
lp->bast_mode = 0;
} else if (!list_empty(&ls->complete)) {
lp = list_entry(ls->complete.next, struct gdlm_lock,
clist);
list_del_init(&lp->clist);
complete = 1;
} else if (!list_empty(&ls->submit)) {
lp = list_entry(ls->submit.next, struct gdlm_lock,
delay_list);
list_del_init(&lp->delay_list);
submit = 1;
}
drop = check_drop(ls);
spin_unlock(&ls->async_lock);
if (complete)
process_complete(lp);
else if (blocking)
process_blocking(lp, blocking);
else if (submit)
gdlm_do_lock(lp);
if (drop)
ls->fscb(ls->sdp, LM_CB_DROPLOCKS, NULL);
schedule();
}
return 0;
}
int gdlm_init_threads(struct gdlm_ls *ls)
{
struct task_struct *p;
int error;
p = kthread_run(gdlm_thread, ls, "lock_dlm1");
error = IS_ERR(p);
if (error) {
log_error("can't start lock_dlm1 thread %d", error);
return error;
}
ls->thread1 = p;
p = kthread_run(gdlm_thread, ls, "lock_dlm2");
error = IS_ERR(p);
if (error) {
log_error("can't start lock_dlm2 thread %d", error);
kthread_stop(ls->thread1);
return error;
}
ls->thread2 = p;
return 0;
}
void gdlm_release_threads(struct gdlm_ls *ls)
{
kthread_stop(ls->thread1);
kthread_stop(ls->thread2);
}

View File

@@ -0,0 +1,3 @@
obj-$(CONFIG_GFS2_FS_LOCKING_NOLOCK) += lock_nolock.o
lock_nolock-y := main.o

View File

@@ -0,0 +1,246 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/smp_lock.h>
#include <linux/lm_interface.h>
struct nolock_lockspace {
unsigned int nl_lvb_size;
};
static const struct lm_lockops nolock_ops;
static int nolock_mount(char *table_name, char *host_data,
lm_callback_t cb, void *cb_data,
unsigned int min_lvb_size, int flags,
struct lm_lockstruct *lockstruct,
struct kobject *fskobj)
{
char *c;
unsigned int jid;
struct nolock_lockspace *nl;
c = strstr(host_data, "jid=");
if (!c)
jid = 0;
else {
c += 4;
sscanf(c, "%u", &jid);
}
nl = kzalloc(sizeof(struct nolock_lockspace), GFP_KERNEL);
if (!nl)
return -ENOMEM;
nl->nl_lvb_size = min_lvb_size;
lockstruct->ls_jid = jid;
lockstruct->ls_first = 1;
lockstruct->ls_lvb_size = min_lvb_size;
lockstruct->ls_lockspace = nl;
lockstruct->ls_ops = &nolock_ops;
lockstruct->ls_flags = LM_LSFLAG_LOCAL;
return 0;
}
static void nolock_others_may_mount(void *lockspace)
{
}
static void nolock_unmount(void *lockspace)
{
struct nolock_lockspace *nl = lockspace;
kfree(nl);
}
static void nolock_withdraw(void *lockspace)
{
}
/**
* nolock_get_lock - get a lm_lock_t given a descripton of the lock
* @lockspace: the lockspace the lock lives in
* @name: the name of the lock
* @lockp: return the lm_lock_t here
*
* Returns: 0 on success, -EXXX on failure
*/
static int nolock_get_lock(void *lockspace, struct lm_lockname *name,
void **lockp)
{
*lockp = lockspace;
return 0;
}
/**
* nolock_put_lock - get rid of a lock structure
* @lock: the lock to throw away
*
*/
static void nolock_put_lock(void *lock)
{
}
/**
* nolock_lock - acquire a lock
* @lock: the lock to manipulate
* @cur_state: the current state
* @req_state: the requested state
* @flags: modifier flags
*
* Returns: A bitmap of LM_OUT_*
*/
static unsigned int nolock_lock(void *lock, unsigned int cur_state,
unsigned int req_state, unsigned int flags)
{
return req_state | LM_OUT_CACHEABLE;
}
/**
* nolock_unlock - unlock a lock
* @lock: the lock to manipulate
* @cur_state: the current state
*
* Returns: 0
*/
static unsigned int nolock_unlock(void *lock, unsigned int cur_state)
{
return 0;
}
static void nolock_cancel(void *lock)
{
}
/**
* nolock_hold_lvb - hold on to a lock value block
* @lock: the lock the LVB is associated with
* @lvbp: return the lm_lvb_t here
*
* Returns: 0 on success, -EXXX on failure
*/
static int nolock_hold_lvb(void *lock, char **lvbp)
{
struct nolock_lockspace *nl = lock;
int error = 0;
*lvbp = kzalloc(nl->nl_lvb_size, GFP_KERNEL);
if (!*lvbp)
error = -ENOMEM;
return error;
}
/**
* nolock_unhold_lvb - release a LVB
* @lock: the lock the LVB is associated with
* @lvb: the lock value block
*
*/
static void nolock_unhold_lvb(void *lock, char *lvb)
{
kfree(lvb);
}
static int nolock_plock_get(void *lockspace, struct lm_lockname *name,
struct file *file, struct file_lock *fl)
{
struct file_lock tmp;
int ret;
ret = posix_test_lock(file, fl, &tmp);
fl->fl_type = F_UNLCK;
if (ret)
memcpy(fl, &tmp, sizeof(struct file_lock));
return 0;
}
static int nolock_plock(void *lockspace, struct lm_lockname *name,
struct file *file, int cmd, struct file_lock *fl)
{
int error;
error = posix_lock_file_wait(file, fl);
return error;
}
static int nolock_punlock(void *lockspace, struct lm_lockname *name,
struct file *file, struct file_lock *fl)
{
int error;
error = posix_lock_file_wait(file, fl);
return error;
}
static void nolock_recovery_done(void *lockspace, unsigned int jid,
unsigned int message)
{
}
static const struct lm_lockops nolock_ops = {
.lm_proto_name = "lock_nolock",
.lm_mount = nolock_mount,
.lm_others_may_mount = nolock_others_may_mount,
.lm_unmount = nolock_unmount,
.lm_withdraw = nolock_withdraw,
.lm_get_lock = nolock_get_lock,
.lm_put_lock = nolock_put_lock,
.lm_lock = nolock_lock,
.lm_unlock = nolock_unlock,
.lm_cancel = nolock_cancel,
.lm_hold_lvb = nolock_hold_lvb,
.lm_unhold_lvb = nolock_unhold_lvb,
.lm_plock_get = nolock_plock_get,
.lm_plock = nolock_plock,
.lm_punlock = nolock_punlock,
.lm_recovery_done = nolock_recovery_done,
.lm_owner = THIS_MODULE,
};
static int __init init_nolock(void)
{
int error;
error = gfs2_register_lockproto(&nolock_ops);
if (error) {
printk(KERN_WARNING
"lock_nolock: can't register protocol: %d\n", error);
return error;
}
printk(KERN_INFO
"Lock_Nolock (built %s %s) installed\n", __DATE__, __TIME__);
return 0;
}
static void __exit exit_nolock(void)
{
gfs2_unregister_lockproto(&nolock_ops);
}
module_init(init_nolock);
module_exit(exit_nolock);
MODULE_DESCRIPTION("GFS Nolock Locking Module");
MODULE_AUTHOR("Red Hat, Inc.");
MODULE_LICENSE("GPL");

711
fs/gfs2/log.c Normal file
View File

@@ -0,0 +1,711 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
#include <linux/lm_interface.h>
#include <linux/delay.h>
#include "gfs2.h"
#include "incore.h"
#include "bmap.h"
#include "glock.h"
#include "log.h"
#include "lops.h"
#include "meta_io.h"
#include "util.h"
#include "dir.h"
#define PULL 1
/**
* gfs2_struct2blk - compute stuff
* @sdp: the filesystem
* @nstruct: the number of structures
* @ssize: the size of the structures
*
* Compute the number of log descriptor blocks needed to hold a certain number
* of structures of a certain size.
*
* Returns: the number of blocks needed (minimum is always 1)
*/
unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
unsigned int ssize)
{
unsigned int blks;
unsigned int first, second;
blks = 1;
first = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / ssize;
if (nstruct > first) {
second = (sdp->sd_sb.sb_bsize -
sizeof(struct gfs2_meta_header)) / ssize;
blks += DIV_ROUND_UP(nstruct - first, second);
}
return blks;
}
/**
* gfs2_ail1_start_one - Start I/O on a part of the AIL
* @sdp: the filesystem
* @tr: the part of the AIL
*
*/
static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
{
struct gfs2_bufdata *bd, *s;
struct buffer_head *bh;
int retry;
BUG_ON(!spin_is_locked(&sdp->sd_log_lock));
do {
retry = 0;
list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list,
bd_ail_st_list) {
bh = bd->bd_bh;
gfs2_assert(sdp, bd->bd_ail == ai);
if (!buffer_busy(bh)) {
if (!buffer_uptodate(bh)) {
gfs2_log_unlock(sdp);
gfs2_io_error_bh(sdp, bh);
gfs2_log_lock(sdp);
}
list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
continue;
}
if (!buffer_dirty(bh))
continue;
list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
gfs2_log_unlock(sdp);
wait_on_buffer(bh);
ll_rw_block(WRITE, 1, &bh);
gfs2_log_lock(sdp);
retry = 1;
break;
}
} while (retry);
}
/**
* gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced
* @sdp: the filesystem
* @ai: the AIL entry
*
*/
static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags)
{
struct gfs2_bufdata *bd, *s;
struct buffer_head *bh;
list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list,
bd_ail_st_list) {
bh = bd->bd_bh;
gfs2_assert(sdp, bd->bd_ail == ai);
if (buffer_busy(bh)) {
if (flags & DIO_ALL)
continue;
else
break;
}
if (!buffer_uptodate(bh))
gfs2_io_error_bh(sdp, bh);
list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list);
}
return list_empty(&ai->ai_ail1_list);
}
static void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags)
{
struct list_head *head = &sdp->sd_ail1_list;
u64 sync_gen;
struct list_head *first;
struct gfs2_ail *first_ai, *ai, *tmp;
int done = 0;
gfs2_log_lock(sdp);
if (list_empty(head)) {
gfs2_log_unlock(sdp);
return;
}
sync_gen = sdp->sd_ail_sync_gen++;
first = head->prev;
first_ai = list_entry(first, struct gfs2_ail, ai_list);
first_ai->ai_sync_gen = sync_gen;
gfs2_ail1_start_one(sdp, first_ai); /* This may drop log lock */
if (flags & DIO_ALL)
first = NULL;
while(!done) {
if (first && (head->prev != first ||
gfs2_ail1_empty_one(sdp, first_ai, 0)))
break;
done = 1;
list_for_each_entry_safe_reverse(ai, tmp, head, ai_list) {
if (ai->ai_sync_gen >= sync_gen)
continue;
ai->ai_sync_gen = sync_gen;
gfs2_ail1_start_one(sdp, ai); /* This may drop log lock */
done = 0;
break;
}
}
gfs2_log_unlock(sdp);
}
int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
{
struct gfs2_ail *ai, *s;
int ret;
gfs2_log_lock(sdp);
list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) {
if (gfs2_ail1_empty_one(sdp, ai, flags))
list_move(&ai->ai_list, &sdp->sd_ail2_list);
else if (!(flags & DIO_ALL))
break;
}
ret = list_empty(&sdp->sd_ail1_list);
gfs2_log_unlock(sdp);
return ret;
}
/**
* gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced
* @sdp: the filesystem
* @ai: the AIL entry
*
*/
static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
{
struct list_head *head = &ai->ai_ail2_list;
struct gfs2_bufdata *bd;
while (!list_empty(head)) {
bd = list_entry(head->prev, struct gfs2_bufdata,
bd_ail_st_list);
gfs2_assert(sdp, bd->bd_ail == ai);
bd->bd_ail = NULL;
list_del(&bd->bd_ail_st_list);
list_del(&bd->bd_ail_gl_list);
atomic_dec(&bd->bd_gl->gl_ail_count);
brelse(bd->bd_bh);
}
}
static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
{
struct gfs2_ail *ai, *safe;
unsigned int old_tail = sdp->sd_log_tail;
int wrap = (new_tail < old_tail);
int a, b, rm;
gfs2_log_lock(sdp);
list_for_each_entry_safe(ai, safe, &sdp->sd_ail2_list, ai_list) {
a = (old_tail <= ai->ai_first);
b = (ai->ai_first < new_tail);
rm = (wrap) ? (a || b) : (a && b);
if (!rm)
continue;
gfs2_ail2_empty_one(sdp, ai);
list_del(&ai->ai_list);
gfs2_assert_warn(sdp, list_empty(&ai->ai_ail1_list));
gfs2_assert_warn(sdp, list_empty(&ai->ai_ail2_list));
kfree(ai);
}
gfs2_log_unlock(sdp);
}
/**
* gfs2_log_reserve - Make a log reservation
* @sdp: The GFS2 superblock
* @blks: The number of blocks to reserve
*
* Note that we never give out the last 6 blocks of the journal. Thats
* due to the fact that there is are a small number of header blocks
* associated with each log flush. The exact number can't be known until
* flush time, so we ensure that we have just enough free blocks at all
* times to avoid running out during a log flush.
*
* Returns: errno
*/
int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
{
unsigned int try = 0;
if (gfs2_assert_warn(sdp, blks) ||
gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
return -EINVAL;
mutex_lock(&sdp->sd_log_reserve_mutex);
gfs2_log_lock(sdp);
while(sdp->sd_log_blks_free <= (blks + 6)) {
gfs2_log_unlock(sdp);
gfs2_ail1_empty(sdp, 0);
gfs2_log_flush(sdp, NULL);
if (try++)
gfs2_ail1_start(sdp, 0);
gfs2_log_lock(sdp);
}
sdp->sd_log_blks_free -= blks;
gfs2_log_unlock(sdp);
mutex_unlock(&sdp->sd_log_reserve_mutex);
down_read(&sdp->sd_log_flush_lock);
return 0;
}
/**
* gfs2_log_release - Release a given number of log blocks
* @sdp: The GFS2 superblock
* @blks: The number of blocks
*
*/
void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
{
gfs2_log_lock(sdp);
sdp->sd_log_blks_free += blks;
gfs2_assert_withdraw(sdp,
sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
gfs2_log_unlock(sdp);
up_read(&sdp->sd_log_flush_lock);
}
static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn)
{
struct inode *inode = sdp->sd_jdesc->jd_inode;
int error;
struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
bh_map.b_size = 1 << inode->i_blkbits;
error = gfs2_block_map(inode, lbn, 0, &bh_map);
if (error || !bh_map.b_blocknr)
printk(KERN_INFO "error=%d, dbn=%llu lbn=%u", error,
(unsigned long long)bh_map.b_blocknr, lbn);
gfs2_assert_withdraw(sdp, !error && bh_map.b_blocknr);
return bh_map.b_blocknr;
}
/**
* log_distance - Compute distance between two journal blocks
* @sdp: The GFS2 superblock
* @newer: The most recent journal block of the pair
* @older: The older journal block of the pair
*
* Compute the distance (in the journal direction) between two
* blocks in the journal
*
* Returns: the distance in blocks
*/
static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer,
unsigned int older)
{
int dist;
dist = newer - older;
if (dist < 0)
dist += sdp->sd_jdesc->jd_blocks;
return dist;
}
static unsigned int current_tail(struct gfs2_sbd *sdp)
{
struct gfs2_ail *ai;
unsigned int tail;
gfs2_log_lock(sdp);
if (list_empty(&sdp->sd_ail1_list)) {
tail = sdp->sd_log_head;
} else {
ai = list_entry(sdp->sd_ail1_list.prev, struct gfs2_ail, ai_list);
tail = ai->ai_first;
}
gfs2_log_unlock(sdp);
return tail;
}
static inline void log_incr_head(struct gfs2_sbd *sdp)
{
if (sdp->sd_log_flush_head == sdp->sd_log_tail)
gfs2_assert_withdraw(sdp, sdp->sd_log_flush_head == sdp->sd_log_head);
if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) {
sdp->sd_log_flush_head = 0;
sdp->sd_log_flush_wrapped = 1;
}
}
/**
* gfs2_log_get_buf - Get and initialize a buffer to use for log control data
* @sdp: The GFS2 superblock
*
* Returns: the buffer_head
*/
struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp)
{
u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head);
struct gfs2_log_buf *lb;
struct buffer_head *bh;
lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_NOFS | __GFP_NOFAIL);
list_add(&lb->lb_list, &sdp->sd_log_flush_list);
bh = lb->lb_bh = sb_getblk(sdp->sd_vfs, blkno);
lock_buffer(bh);
memset(bh->b_data, 0, bh->b_size);
set_buffer_uptodate(bh);
clear_buffer_dirty(bh);
unlock_buffer(bh);
log_incr_head(sdp);
return bh;
}
/**
* gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log
* @sdp: the filesystem
* @data: the data the buffer_head should point to
*
* Returns: the log buffer descriptor
*/
struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
struct buffer_head *real)
{
u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head);
struct gfs2_log_buf *lb;
struct buffer_head *bh;
lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_NOFS | __GFP_NOFAIL);
list_add(&lb->lb_list, &sdp->sd_log_flush_list);
lb->lb_real = real;
bh = lb->lb_bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL);
atomic_set(&bh->b_count, 1);
bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate);
set_bh_page(bh, real->b_page, bh_offset(real));
bh->b_blocknr = blkno;
bh->b_size = sdp->sd_sb.sb_bsize;
bh->b_bdev = sdp->sd_vfs->s_bdev;
log_incr_head(sdp);
return bh;
}
static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail, int pull)
{
unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
ail2_empty(sdp, new_tail);
gfs2_log_lock(sdp);
sdp->sd_log_blks_free += dist - (pull ? 1 : 0);
gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks);
gfs2_log_unlock(sdp);
sdp->sd_log_tail = new_tail;
}
/**
* log_write_header - Get and initialize a journal header buffer
* @sdp: The GFS2 superblock
*
* Returns: the initialized log buffer descriptor
*/
static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
{
u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head);
struct buffer_head *bh;
struct gfs2_log_header *lh;
unsigned int tail;
u32 hash;
bh = sb_getblk(sdp->sd_vfs, blkno);
lock_buffer(bh);
memset(bh->b_data, 0, bh->b_size);
set_buffer_uptodate(bh);
clear_buffer_dirty(bh);
unlock_buffer(bh);
gfs2_ail1_empty(sdp, 0);
tail = current_tail(sdp);
lh = (struct gfs2_log_header *)bh->b_data;
memset(lh, 0, sizeof(struct gfs2_log_header));
lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
lh->lh_sequence = cpu_to_be64(sdp->sd_log_sequence++);
lh->lh_flags = cpu_to_be32(flags);
lh->lh_tail = cpu_to_be32(tail);
lh->lh_blkno = cpu_to_be32(sdp->sd_log_flush_head);
hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header));
lh->lh_hash = cpu_to_be32(hash);
set_buffer_dirty(bh);
if (sync_dirty_buffer(bh))
gfs2_io_error_bh(sdp, bh);
brelse(bh);
if (sdp->sd_log_tail != tail)
log_pull_tail(sdp, tail, pull);
else
gfs2_assert_withdraw(sdp, !pull);
sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
log_incr_head(sdp);
}
static void log_flush_commit(struct gfs2_sbd *sdp)
{
struct list_head *head = &sdp->sd_log_flush_list;
struct gfs2_log_buf *lb;
struct buffer_head *bh;
while (!list_empty(head)) {
lb = list_entry(head->next, struct gfs2_log_buf, lb_list);
list_del(&lb->lb_list);
bh = lb->lb_bh;
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
gfs2_io_error_bh(sdp, bh);
if (lb->lb_real) {
while (atomic_read(&bh->b_count) != 1) /* Grrrr... */
schedule();
free_buffer_head(bh);
} else
brelse(bh);
kfree(lb);
}
log_write_header(sdp, 0, 0);
}
/**
* gfs2_log_flush - flush incore transaction(s)
* @sdp: the filesystem
* @gl: The glock structure to flush. If NULL, flush the whole incore log
*
*/
void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
{
struct gfs2_ail *ai;
down_write(&sdp->sd_log_flush_lock);
if (gl) {
gfs2_log_lock(sdp);
if (list_empty(&gl->gl_le.le_list)) {
gfs2_log_unlock(sdp);
up_write(&sdp->sd_log_flush_lock);
return;
}
gfs2_log_unlock(sdp);
}
ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL);
INIT_LIST_HEAD(&ai->ai_ail1_list);
INIT_LIST_HEAD(&ai->ai_ail2_list);
gfs2_assert_withdraw(sdp, sdp->sd_log_num_buf == sdp->sd_log_commited_buf);
gfs2_assert_withdraw(sdp,
sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke);
sdp->sd_log_flush_head = sdp->sd_log_head;
sdp->sd_log_flush_wrapped = 0;
ai->ai_first = sdp->sd_log_flush_head;
lops_before_commit(sdp);
if (!list_empty(&sdp->sd_log_flush_list))
log_flush_commit(sdp);
else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle)
log_write_header(sdp, 0, PULL);
lops_after_commit(sdp, ai);
gfs2_log_lock(sdp);
sdp->sd_log_head = sdp->sd_log_flush_head;
sdp->sd_log_blks_free -= sdp->sd_log_num_hdrs;
sdp->sd_log_blks_reserved = 0;
sdp->sd_log_commited_buf = 0;
sdp->sd_log_num_hdrs = 0;
sdp->sd_log_commited_revoke = 0;
if (!list_empty(&ai->ai_ail1_list)) {
list_add(&ai->ai_list, &sdp->sd_ail1_list);
ai = NULL;
}
gfs2_log_unlock(sdp);
sdp->sd_vfs->s_dirt = 0;
up_write(&sdp->sd_log_flush_lock);
kfree(ai);
}
static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
{
unsigned int reserved = 0;
unsigned int old;
gfs2_log_lock(sdp);
sdp->sd_log_commited_buf += tr->tr_num_buf_new - tr->tr_num_buf_rm;
gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_buf) >= 0);
sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
gfs2_assert_withdraw(sdp, ((int)sdp->sd_log_commited_revoke) >= 0);
if (sdp->sd_log_commited_buf)
reserved += sdp->sd_log_commited_buf;
if (sdp->sd_log_commited_revoke)
reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,
sizeof(u64));
if (reserved)
reserved++;
old = sdp->sd_log_blks_free;
sdp->sd_log_blks_free += tr->tr_reserved -
(reserved - sdp->sd_log_blks_reserved);
gfs2_assert_withdraw(sdp, sdp->sd_log_blks_free >= old);
gfs2_assert_withdraw(sdp,
sdp->sd_log_blks_free <= sdp->sd_jdesc->jd_blocks +
sdp->sd_log_num_hdrs);
sdp->sd_log_blks_reserved = reserved;
gfs2_log_unlock(sdp);
}
/**
* gfs2_log_commit - Commit a transaction to the log
* @sdp: the filesystem
* @tr: the transaction
*
* Returns: errno
*/
void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
{
log_refund(sdp, tr);
lops_incore_commit(sdp, tr);
sdp->sd_vfs->s_dirt = 1;
up_read(&sdp->sd_log_flush_lock);
gfs2_log_lock(sdp);
if (sdp->sd_log_num_buf > gfs2_tune_get(sdp, gt_incore_log_blocks))
wake_up_process(sdp->sd_logd_process);
gfs2_log_unlock(sdp);
}
/**
* gfs2_log_shutdown - write a shutdown header into a journal
* @sdp: the filesystem
*
*/
void gfs2_log_shutdown(struct gfs2_sbd *sdp)
{
down_write(&sdp->sd_log_flush_lock);
gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_jdata);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf);
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_hdrs);
gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));
sdp->sd_log_flush_head = sdp->sd_log_head;
sdp->sd_log_flush_wrapped = 0;
log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT, 0);
gfs2_assert_warn(sdp, sdp->sd_log_blks_free == sdp->sd_jdesc->jd_blocks);
gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list));
sdp->sd_log_head = sdp->sd_log_flush_head;
sdp->sd_log_tail = sdp->sd_log_head;
up_write(&sdp->sd_log_flush_lock);
}
/**
* gfs2_meta_syncfs - sync all the buffers in a filesystem
* @sdp: the filesystem
*
*/
void gfs2_meta_syncfs(struct gfs2_sbd *sdp)
{
gfs2_log_flush(sdp, NULL);
for (;;) {
gfs2_ail1_start(sdp, DIO_ALL);
if (gfs2_ail1_empty(sdp, DIO_ALL))
break;
msleep(10);
}
}

65
fs/gfs2/log.h Normal file
View File

@@ -0,0 +1,65 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __LOG_DOT_H__
#define __LOG_DOT_H__
#include <linux/list.h>
#include <linux/spinlock.h>
#include "incore.h"
/**
* gfs2_log_lock - acquire the right to mess with the log manager
* @sdp: the filesystem
*
*/
static inline void gfs2_log_lock(struct gfs2_sbd *sdp)
{
spin_lock(&sdp->sd_log_lock);
}
/**
* gfs2_log_unlock - release the right to mess with the log manager
* @sdp: the filesystem
*
*/
static inline void gfs2_log_unlock(struct gfs2_sbd *sdp)
{
spin_unlock(&sdp->sd_log_lock);
}
static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
unsigned int value)
{
if (++value == sdp->sd_jdesc->jd_blocks) {
value = 0;
}
sdp->sd_log_head = sdp->sd_log_tail = value;
}
unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
unsigned int ssize);
int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags);
int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp);
struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp,
struct buffer_head *real);
void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl);
void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
void gfs2_log_shutdown(struct gfs2_sbd *sdp);
void gfs2_meta_syncfs(struct gfs2_sbd *sdp);
#endif /* __LOG_DOT_H__ */

823
fs/gfs2/lops.c Normal file
View File

@@ -0,0 +1,823 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
#include "glock.h"
#include "log.h"
#include "lops.h"
#include "meta_io.h"
#include "recovery.h"
#include "rgrp.h"
#include "trans.h"
#include "util.h"
static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
{
struct gfs2_glock *gl;
struct gfs2_trans *tr = current->journal_info;
tr->tr_touched = 1;
if (!list_empty(&le->le_list))
return;
gl = container_of(le, struct gfs2_glock, gl_le);
if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl)))
return;
gfs2_glock_hold(gl);
set_bit(GLF_DIRTY, &gl->gl_flags);
gfs2_log_lock(sdp);
sdp->sd_log_num_gl++;
list_add(&le->le_list, &sdp->sd_log_le_gl);
gfs2_log_unlock(sdp);
}
static void glock_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
{
struct list_head *head = &sdp->sd_log_le_gl;
struct gfs2_glock *gl;
while (!list_empty(head)) {
gl = list_entry(head->next, struct gfs2_glock, gl_le.le_list);
list_del_init(&gl->gl_le.le_list);
sdp->sd_log_num_gl--;
gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl));
gfs2_glock_put(gl);
}
gfs2_assert_warn(sdp, !sdp->sd_log_num_gl);
}
static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
{
struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
struct gfs2_trans *tr;
gfs2_log_lock(sdp);
if (!list_empty(&bd->bd_list_tr)) {
gfs2_log_unlock(sdp);
return;
}
tr = current->journal_info;
tr->tr_touched = 1;
tr->tr_num_buf++;
list_add(&bd->bd_list_tr, &tr->tr_list_buf);
gfs2_log_unlock(sdp);
if (!list_empty(&le->le_list))
return;
gfs2_trans_add_gl(bd->bd_gl);
gfs2_meta_check(sdp, bd->bd_bh);
gfs2_pin(sdp, bd->bd_bh);
gfs2_log_lock(sdp);
sdp->sd_log_num_buf++;
list_add(&le->le_list, &sdp->sd_log_le_buf);
gfs2_log_unlock(sdp);
tr->tr_num_buf_new++;
}
static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
{
struct list_head *head = &tr->tr_list_buf;
struct gfs2_bufdata *bd;
gfs2_log_lock(sdp);
while (!list_empty(head)) {
bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr);
list_del_init(&bd->bd_list_tr);
tr->tr_num_buf--;
}
gfs2_log_unlock(sdp);
gfs2_assert_warn(sdp, !tr->tr_num_buf);
}
static void buf_lo_before_commit(struct gfs2_sbd *sdp)
{
struct buffer_head *bh;
struct gfs2_log_descriptor *ld;
struct gfs2_bufdata *bd1 = NULL, *bd2;
unsigned int total = sdp->sd_log_num_buf;
unsigned int offset = sizeof(struct gfs2_log_descriptor);
unsigned int limit;
unsigned int num;
unsigned n;
__be64 *ptr;
offset += sizeof(__be64) - 1;
offset &= ~(sizeof(__be64) - 1);
limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
/* for 4k blocks, limit = 503 */
bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list);
while(total) {
num = total;
if (total > limit)
num = limit;
bh = gfs2_log_get_buf(sdp);
sdp->sd_log_num_hdrs++;
ld = (struct gfs2_log_descriptor *)bh->b_data;
ptr = (__be64 *)(bh->b_data + offset);
ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_METADATA);
ld->ld_length = cpu_to_be32(num + 1);
ld->ld_data1 = cpu_to_be32(num);
ld->ld_data2 = cpu_to_be32(0);
memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
n = 0;
list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf,
bd_le.le_list) {
*ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
if (++n >= num)
break;
}
set_buffer_dirty(bh);
ll_rw_block(WRITE, 1, &bh);
n = 0;
list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf,
bd_le.le_list) {
bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
set_buffer_dirty(bh);
ll_rw_block(WRITE, 1, &bh);
if (++n >= num)
break;
}
total -= num;
}
}
static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
{
struct list_head *head = &sdp->sd_log_le_buf;
struct gfs2_bufdata *bd;
while (!list_empty(head)) {
bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
list_del_init(&bd->bd_le.le_list);
sdp->sd_log_num_buf--;
gfs2_unpin(sdp, bd->bd_bh, ai);
}
gfs2_assert_warn(sdp, !sdp->sd_log_num_buf);
}
static void buf_lo_before_scan(struct gfs2_jdesc *jd,
struct gfs2_log_header_host *head, int pass)
{
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
if (pass != 0)
return;
sdp->sd_found_blocks = 0;
sdp->sd_replayed_blocks = 0;
}
static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
struct gfs2_log_descriptor *ld, __be64 *ptr,
int pass)
{
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
struct gfs2_glock *gl = ip->i_gl;
unsigned int blks = be32_to_cpu(ld->ld_data1);
struct buffer_head *bh_log, *bh_ip;
u64 blkno;
int error = 0;
if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
return 0;
gfs2_replay_incr_blk(sdp, &start);
for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
blkno = be64_to_cpu(*ptr++);
sdp->sd_found_blocks++;
if (gfs2_revoke_check(sdp, blkno, start))
continue;
error = gfs2_replay_read_block(jd, start, &bh_log);
if (error)
return error;
bh_ip = gfs2_meta_new(gl, blkno);
memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
if (gfs2_meta_check(sdp, bh_ip))
error = -EIO;
else
mark_buffer_dirty(bh_ip);
brelse(bh_log);
brelse(bh_ip);
if (error)
break;
sdp->sd_replayed_blocks++;
}
return error;
}
static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
{
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
if (error) {
gfs2_meta_sync(ip->i_gl);
return;
}
if (pass != 1)
return;
gfs2_meta_sync(ip->i_gl);
fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n",
jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
}
static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
{
struct gfs2_trans *tr;
tr = current->journal_info;
tr->tr_touched = 1;
tr->tr_num_revoke++;
gfs2_log_lock(sdp);
sdp->sd_log_num_revoke++;
list_add(&le->le_list, &sdp->sd_log_le_revoke);
gfs2_log_unlock(sdp);
}
static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
{
struct gfs2_log_descriptor *ld;
struct gfs2_meta_header *mh;
struct buffer_head *bh;
unsigned int offset;
struct list_head *head = &sdp->sd_log_le_revoke;
struct gfs2_revoke *rv;
if (!sdp->sd_log_num_revoke)
return;
bh = gfs2_log_get_buf(sdp);
ld = (struct gfs2_log_descriptor *)bh->b_data;
ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_REVOKE);
ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke,
sizeof(u64)));
ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke);
ld->ld_data2 = cpu_to_be32(0);
memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
offset = sizeof(struct gfs2_log_descriptor);
while (!list_empty(head)) {
rv = list_entry(head->next, struct gfs2_revoke, rv_le.le_list);
list_del_init(&rv->rv_le.le_list);
sdp->sd_log_num_revoke--;
if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
set_buffer_dirty(bh);
ll_rw_block(WRITE, 1, &bh);
bh = gfs2_log_get_buf(sdp);
mh = (struct gfs2_meta_header *)bh->b_data;
mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
mh->mh_type = cpu_to_be32(GFS2_METATYPE_LB);
mh->mh_format = cpu_to_be32(GFS2_FORMAT_LB);
offset = sizeof(struct gfs2_meta_header);
}
*(__be64 *)(bh->b_data + offset) = cpu_to_be64(rv->rv_blkno);
kfree(rv);
offset += sizeof(u64);
}
gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
set_buffer_dirty(bh);
ll_rw_block(WRITE, 1, &bh);
}
static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
struct gfs2_log_header_host *head, int pass)
{
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
if (pass != 0)
return;
sdp->sd_found_revokes = 0;
sdp->sd_replay_tail = head->lh_tail;
}
static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
struct gfs2_log_descriptor *ld, __be64 *ptr,
int pass)
{
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
unsigned int blks = be32_to_cpu(ld->ld_length);
unsigned int revokes = be32_to_cpu(ld->ld_data1);
struct buffer_head *bh;
unsigned int offset;
u64 blkno;
int first = 1;
int error;
if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE)
return 0;
offset = sizeof(struct gfs2_log_descriptor);
for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
error = gfs2_replay_read_block(jd, start, &bh);
if (error)
return error;
if (!first)
gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB);
while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) {
blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
error = gfs2_revoke_add(sdp, blkno, start);
if (error < 0)
return error;
else if (error)
sdp->sd_found_revokes++;
if (!--revokes)
break;
offset += sizeof(u64);
}
brelse(bh);
offset = sizeof(struct gfs2_meta_header);
first = 0;
}
return 0;
}
static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
{
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
if (error) {
gfs2_revoke_clean(sdp);
return;
}
if (pass != 1)
return;
fs_info(sdp, "jid=%u: Found %u revoke tags\n",
jd->jd_jid, sdp->sd_found_revokes);
gfs2_revoke_clean(sdp);
}
static void rg_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
{
struct gfs2_rgrpd *rgd;
struct gfs2_trans *tr = current->journal_info;
tr->tr_touched = 1;
if (!list_empty(&le->le_list))
return;
rgd = container_of(le, struct gfs2_rgrpd, rd_le);
gfs2_rgrp_bh_hold(rgd);
gfs2_log_lock(sdp);
sdp->sd_log_num_rg++;
list_add(&le->le_list, &sdp->sd_log_le_rg);
gfs2_log_unlock(sdp);
}
static void rg_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
{
struct list_head *head = &sdp->sd_log_le_rg;
struct gfs2_rgrpd *rgd;
while (!list_empty(head)) {
rgd = list_entry(head->next, struct gfs2_rgrpd, rd_le.le_list);
list_del_init(&rgd->rd_le.le_list);
sdp->sd_log_num_rg--;
gfs2_rgrp_repolish_clones(rgd);
gfs2_rgrp_bh_put(rgd);
}
gfs2_assert_warn(sdp, !sdp->sd_log_num_rg);
}
/**
* databuf_lo_add - Add a databuf to the transaction.
*
* This is used in two distinct cases:
* i) In ordered write mode
* We put the data buffer on a list so that we can ensure that its
* synced to disk at the right time
* ii) In journaled data mode
* We need to journal the data block in the same way as metadata in
* the functions above. The difference is that here we have a tag
* which is two __be64's being the block number (as per meta data)
* and a flag which says whether the data block needs escaping or
* not. This means we need a new log entry for each 251 or so data
* blocks, which isn't an enormous overhead but twice as much as
* for normal metadata blocks.
*/
static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
{
struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
struct gfs2_trans *tr = current->journal_info;
struct address_space *mapping = bd->bd_bh->b_page->mapping;
struct gfs2_inode *ip = GFS2_I(mapping->host);
gfs2_log_lock(sdp);
tr->tr_touched = 1;
if (list_empty(&bd->bd_list_tr) &&
(ip->i_di.di_flags & GFS2_DIF_JDATA)) {
tr->tr_num_buf++;
list_add(&bd->bd_list_tr, &tr->tr_list_buf);
gfs2_log_unlock(sdp);
gfs2_pin(sdp, bd->bd_bh);
tr->tr_num_buf_new++;
} else {
gfs2_log_unlock(sdp);
}
gfs2_trans_add_gl(bd->bd_gl);
gfs2_log_lock(sdp);
if (list_empty(&le->le_list)) {
if (ip->i_di.di_flags & GFS2_DIF_JDATA)
sdp->sd_log_num_jdata++;
sdp->sd_log_num_databuf++;
list_add(&le->le_list, &sdp->sd_log_le_databuf);
}
gfs2_log_unlock(sdp);
}
static int gfs2_check_magic(struct buffer_head *bh)
{
struct page *page = bh->b_page;
void *kaddr;
__be32 *ptr;
int rv = 0;
kaddr = kmap_atomic(page, KM_USER0);
ptr = kaddr + bh_offset(bh);
if (*ptr == cpu_to_be32(GFS2_MAGIC))
rv = 1;
kunmap_atomic(kaddr, KM_USER0);
return rv;
}
/**
* databuf_lo_before_commit - Scan the data buffers, writing as we go
*
* Here we scan through the lists of buffers and make the assumption
* that any buffer thats been pinned is being journaled, and that
* any unpinned buffer is an ordered write data buffer and therefore
* will be written back rather than journaled.
*/
static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
{
LIST_HEAD(started);
struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt;
struct buffer_head *bh = NULL,*bh1 = NULL;
unsigned int offset = sizeof(struct gfs2_log_descriptor);
struct gfs2_log_descriptor *ld;
unsigned int limit;
unsigned int total_dbuf = sdp->sd_log_num_databuf;
unsigned int total_jdata = sdp->sd_log_num_jdata;
unsigned int num, n;
__be64 *ptr = NULL;
offset += 2*sizeof(__be64) - 1;
offset &= ~(2*sizeof(__be64) - 1);
limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
/*
* Start writing ordered buffers, write journaled buffers
* into the log along with a header
*/
gfs2_log_lock(sdp);
bd2 = bd1 = list_prepare_entry(bd1, &sdp->sd_log_le_databuf,
bd_le.le_list);
while(total_dbuf) {
num = total_jdata;
if (num > limit)
num = limit;
n = 0;
list_for_each_entry_safe_continue(bd1, bdt,
&sdp->sd_log_le_databuf,
bd_le.le_list) {
/* store off the buffer head in a local ptr since
* gfs2_bufdata might change when we drop the log lock
*/
bh1 = bd1->bd_bh;
/* An ordered write buffer */
if (bh1 && !buffer_pinned(bh1)) {
list_move(&bd1->bd_le.le_list, &started);
if (bd1 == bd2) {
bd2 = NULL;
bd2 = list_prepare_entry(bd2,
&sdp->sd_log_le_databuf,
bd_le.le_list);
}
total_dbuf--;
if (bh1) {
if (buffer_dirty(bh1)) {
get_bh(bh1);
gfs2_log_unlock(sdp);
ll_rw_block(SWRITE, 1, &bh1);
brelse(bh1);
gfs2_log_lock(sdp);
}
continue;
}
continue;
} else if (bh1) { /* A journaled buffer */
int magic;
gfs2_log_unlock(sdp);
if (!bh) {
bh = gfs2_log_get_buf(sdp);
sdp->sd_log_num_hdrs++;
ld = (struct gfs2_log_descriptor *)
bh->b_data;
ptr = (__be64 *)(bh->b_data + offset);
ld->ld_header.mh_magic =
cpu_to_be32(GFS2_MAGIC);
ld->ld_header.mh_type =
cpu_to_be32(GFS2_METATYPE_LD);
ld->ld_header.mh_format =
cpu_to_be32(GFS2_FORMAT_LD);
ld->ld_type =
cpu_to_be32(GFS2_LOG_DESC_JDATA);
ld->ld_length = cpu_to_be32(num + 1);
ld->ld_data1 = cpu_to_be32(num);
ld->ld_data2 = cpu_to_be32(0);
memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
}
magic = gfs2_check_magic(bh1);
*ptr++ = cpu_to_be64(bh1->b_blocknr);
*ptr++ = cpu_to_be64((__u64)magic);
clear_buffer_escaped(bh1);
if (unlikely(magic != 0))
set_buffer_escaped(bh1);
gfs2_log_lock(sdp);
if (n++ > num)
break;
} else if (!bh1) {
total_dbuf--;
sdp->sd_log_num_databuf--;
list_del_init(&bd1->bd_le.le_list);
if (bd1 == bd2) {
bd2 = NULL;
bd2 = list_prepare_entry(bd2,
&sdp->sd_log_le_databuf,
bd_le.le_list);
}
kmem_cache_free(gfs2_bufdata_cachep, bd1);
}
}
gfs2_log_unlock(sdp);
if (bh) {
set_buffer_dirty(bh);
ll_rw_block(WRITE, 1, &bh);
bh = NULL;
}
n = 0;
gfs2_log_lock(sdp);
list_for_each_entry_continue(bd2, &sdp->sd_log_le_databuf,
bd_le.le_list) {
if (!bd2->bd_bh)
continue;
/* copy buffer if it needs escaping */
gfs2_log_unlock(sdp);
if (unlikely(buffer_escaped(bd2->bd_bh))) {
void *kaddr;
struct page *page = bd2->bd_bh->b_page;
bh = gfs2_log_get_buf(sdp);
kaddr = kmap_atomic(page, KM_USER0);
memcpy(bh->b_data,
kaddr + bh_offset(bd2->bd_bh),
sdp->sd_sb.sb_bsize);
kunmap_atomic(kaddr, KM_USER0);
*(__be32 *)bh->b_data = 0;
} else {
bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
}
set_buffer_dirty(bh);
ll_rw_block(WRITE, 1, &bh);
gfs2_log_lock(sdp);
if (++n >= num)
break;
}
bh = NULL;
total_dbuf -= num;
total_jdata -= num;
}
gfs2_log_unlock(sdp);
/* Wait on all ordered buffers */
while (!list_empty(&started)) {
gfs2_log_lock(sdp);
bd1 = list_entry(started.next, struct gfs2_bufdata,
bd_le.le_list);
list_del_init(&bd1->bd_le.le_list);
sdp->sd_log_num_databuf--;
bh = bd1->bd_bh;
if (bh) {
bh->b_private = NULL;
get_bh(bh);
gfs2_log_unlock(sdp);
wait_on_buffer(bh);
brelse(bh);
} else
gfs2_log_unlock(sdp);
kmem_cache_free(gfs2_bufdata_cachep, bd1);
}
/* We've removed all the ordered write bufs here, so only jdata left */
gfs2_assert_warn(sdp, sdp->sd_log_num_databuf == sdp->sd_log_num_jdata);
}
static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
struct gfs2_log_descriptor *ld,
__be64 *ptr, int pass)
{
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
struct gfs2_glock *gl = ip->i_gl;
unsigned int blks = be32_to_cpu(ld->ld_data1);
struct buffer_head *bh_log, *bh_ip;
u64 blkno;
u64 esc;
int error = 0;
if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
return 0;
gfs2_replay_incr_blk(sdp, &start);
for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
blkno = be64_to_cpu(*ptr++);
esc = be64_to_cpu(*ptr++);
sdp->sd_found_blocks++;
if (gfs2_revoke_check(sdp, blkno, start))
continue;
error = gfs2_replay_read_block(jd, start, &bh_log);
if (error)
return error;
bh_ip = gfs2_meta_new(gl, blkno);
memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
/* Unescape */
if (esc) {
__be32 *eptr = (__be32 *)bh_ip->b_data;
*eptr = cpu_to_be32(GFS2_MAGIC);
}
mark_buffer_dirty(bh_ip);
brelse(bh_log);
brelse(bh_ip);
if (error)
break;
sdp->sd_replayed_blocks++;
}
return error;
}
/* FIXME: sort out accounting for log blocks etc. */
static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
{
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
if (error) {
gfs2_meta_sync(ip->i_gl);
return;
}
if (pass != 1)
return;
/* data sync? */
gfs2_meta_sync(ip->i_gl);
fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
}
static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
{
struct list_head *head = &sdp->sd_log_le_databuf;
struct gfs2_bufdata *bd;
while (!list_empty(head)) {
bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
list_del_init(&bd->bd_le.le_list);
sdp->sd_log_num_databuf--;
sdp->sd_log_num_jdata--;
gfs2_unpin(sdp, bd->bd_bh, ai);
}
gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf);
gfs2_assert_warn(sdp, !sdp->sd_log_num_jdata);
}
const struct gfs2_log_operations gfs2_glock_lops = {
.lo_add = glock_lo_add,
.lo_after_commit = glock_lo_after_commit,
.lo_name = "glock",
};
const struct gfs2_log_operations gfs2_buf_lops = {
.lo_add = buf_lo_add,
.lo_incore_commit = buf_lo_incore_commit,
.lo_before_commit = buf_lo_before_commit,
.lo_after_commit = buf_lo_after_commit,
.lo_before_scan = buf_lo_before_scan,
.lo_scan_elements = buf_lo_scan_elements,
.lo_after_scan = buf_lo_after_scan,
.lo_name = "buf",
};
const struct gfs2_log_operations gfs2_revoke_lops = {
.lo_add = revoke_lo_add,
.lo_before_commit = revoke_lo_before_commit,
.lo_before_scan = revoke_lo_before_scan,
.lo_scan_elements = revoke_lo_scan_elements,
.lo_after_scan = revoke_lo_after_scan,
.lo_name = "revoke",
};
const struct gfs2_log_operations gfs2_rg_lops = {
.lo_add = rg_lo_add,
.lo_after_commit = rg_lo_after_commit,
.lo_name = "rg",
};
const struct gfs2_log_operations gfs2_databuf_lops = {
.lo_add = databuf_lo_add,
.lo_incore_commit = buf_lo_incore_commit,
.lo_before_commit = databuf_lo_before_commit,
.lo_after_commit = databuf_lo_after_commit,
.lo_scan_elements = databuf_lo_scan_elements,
.lo_after_scan = databuf_lo_after_scan,
.lo_name = "databuf",
};
const struct gfs2_log_operations *gfs2_log_ops[] = {
&gfs2_glock_lops,
&gfs2_buf_lops,
&gfs2_revoke_lops,
&gfs2_rg_lops,
&gfs2_databuf_lops,
NULL,
};

99
fs/gfs2/lops.h Normal file
View File

@@ -0,0 +1,99 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __LOPS_DOT_H__
#define __LOPS_DOT_H__
#include <linux/list.h>
#include "incore.h"
extern const struct gfs2_log_operations gfs2_glock_lops;
extern const struct gfs2_log_operations gfs2_buf_lops;
extern const struct gfs2_log_operations gfs2_revoke_lops;
extern const struct gfs2_log_operations gfs2_rg_lops;
extern const struct gfs2_log_operations gfs2_databuf_lops;
extern const struct gfs2_log_operations *gfs2_log_ops[];
static inline void lops_init_le(struct gfs2_log_element *le,
const struct gfs2_log_operations *lops)
{
INIT_LIST_HEAD(&le->le_list);
le->le_ops = lops;
}
static inline void lops_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
{
if (le->le_ops->lo_add)
le->le_ops->lo_add(sdp, le);
}
static inline void lops_incore_commit(struct gfs2_sbd *sdp,
struct gfs2_trans *tr)
{
int x;
for (x = 0; gfs2_log_ops[x]; x++)
if (gfs2_log_ops[x]->lo_incore_commit)
gfs2_log_ops[x]->lo_incore_commit(sdp, tr);
}
static inline void lops_before_commit(struct gfs2_sbd *sdp)
{
int x;
for (x = 0; gfs2_log_ops[x]; x++)
if (gfs2_log_ops[x]->lo_before_commit)
gfs2_log_ops[x]->lo_before_commit(sdp);
}
static inline void lops_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
{
int x;
for (x = 0; gfs2_log_ops[x]; x++)
if (gfs2_log_ops[x]->lo_after_commit)
gfs2_log_ops[x]->lo_after_commit(sdp, ai);
}
static inline void lops_before_scan(struct gfs2_jdesc *jd,
struct gfs2_log_header_host *head,
unsigned int pass)
{
int x;
for (x = 0; gfs2_log_ops[x]; x++)
if (gfs2_log_ops[x]->lo_before_scan)
gfs2_log_ops[x]->lo_before_scan(jd, head, pass);
}
static inline int lops_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
struct gfs2_log_descriptor *ld,
__be64 *ptr,
unsigned int pass)
{
int x, error;
for (x = 0; gfs2_log_ops[x]; x++)
if (gfs2_log_ops[x]->lo_scan_elements) {
error = gfs2_log_ops[x]->lo_scan_elements(jd, start,
ld, ptr, pass);
if (error)
return error;
}
return 0;
}
static inline void lops_after_scan(struct gfs2_jdesc *jd, int error,
unsigned int pass)
{
int x;
for (x = 0; gfs2_log_ops[x]; x++)
if (gfs2_log_ops[x]->lo_before_scan)
gfs2_log_ops[x]->lo_after_scan(jd, error, pass);
}
#endif /* __LOPS_DOT_H__ */

149
fs/gfs2/main.c Normal file
View File

@@ -0,0 +1,149 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/gfs2_ondisk.h>
#include <linux/lm_interface.h>
#include <asm/atomic.h>
#include "gfs2.h"
#include "incore.h"
#include "ops_fstype.h"
#include "sys.h"
#include "util.h"
#include "glock.h"
static void gfs2_init_inode_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
{
struct gfs2_inode *ip = foo;
if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
SLAB_CTOR_CONSTRUCTOR) {
inode_init_once(&ip->i_inode);
spin_lock_init(&ip->i_spin);
init_rwsem(&ip->i_rw_mutex);
memset(ip->i_cache, 0, sizeof(ip->i_cache));
}
}
static void gfs2_init_glock_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
{
struct gfs2_glock *gl = foo;
if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
SLAB_CTOR_CONSTRUCTOR) {
INIT_HLIST_NODE(&gl->gl_list);
spin_lock_init(&gl->gl_spin);
INIT_LIST_HEAD(&gl->gl_holders);
INIT_LIST_HEAD(&gl->gl_waiters1);
INIT_LIST_HEAD(&gl->gl_waiters2);
INIT_LIST_HEAD(&gl->gl_waiters3);
gl->gl_lvb = NULL;
atomic_set(&gl->gl_lvb_count, 0);
INIT_LIST_HEAD(&gl->gl_reclaim);
INIT_LIST_HEAD(&gl->gl_ail_list);
atomic_set(&gl->gl_ail_count, 0);
}
}
/**
* init_gfs2_fs - Register GFS2 as a filesystem
*
* Returns: 0 on success, error code on failure
*/
static int __init init_gfs2_fs(void)
{
int error;
error = gfs2_sys_init();
if (error)
return error;
error = gfs2_glock_init();
if (error)
goto fail;
error = -ENOMEM;
gfs2_glock_cachep = kmem_cache_create("gfs2_glock",
sizeof(struct gfs2_glock),
0, 0,
gfs2_init_glock_once, NULL);
if (!gfs2_glock_cachep)
goto fail;
gfs2_inode_cachep = kmem_cache_create("gfs2_inode",
sizeof(struct gfs2_inode),
0, SLAB_RECLAIM_ACCOUNT|
SLAB_MEM_SPREAD,
gfs2_init_inode_once, NULL);
if (!gfs2_inode_cachep)
goto fail;
gfs2_bufdata_cachep = kmem_cache_create("gfs2_bufdata",
sizeof(struct gfs2_bufdata),
0, 0, NULL, NULL);
if (!gfs2_bufdata_cachep)
goto fail;
error = register_filesystem(&gfs2_fs_type);
if (error)
goto fail;
error = register_filesystem(&gfs2meta_fs_type);
if (error)
goto fail_unregister;
printk("GFS2 (built %s %s) installed\n", __DATE__, __TIME__);
return 0;
fail_unregister:
unregister_filesystem(&gfs2_fs_type);
fail:
if (gfs2_bufdata_cachep)
kmem_cache_destroy(gfs2_bufdata_cachep);
if (gfs2_inode_cachep)
kmem_cache_destroy(gfs2_inode_cachep);
if (gfs2_glock_cachep)
kmem_cache_destroy(gfs2_glock_cachep);
gfs2_sys_uninit();
return error;
}
/**
* exit_gfs2_fs - Unregister the file system
*
*/
static void __exit exit_gfs2_fs(void)
{
unregister_filesystem(&gfs2_fs_type);
unregister_filesystem(&gfs2meta_fs_type);
kmem_cache_destroy(gfs2_bufdata_cachep);
kmem_cache_destroy(gfs2_inode_cachep);
kmem_cache_destroy(gfs2_glock_cachep);
gfs2_sys_uninit();
}
MODULE_DESCRIPTION("Global File System");
MODULE_AUTHOR("Red Hat, Inc.");
MODULE_LICENSE("GPL");
module_init(init_gfs2_fs);
module_exit(exit_gfs2_fs);

573
fs/gfs2/meta_io.c Normal file
View File

@@ -0,0 +1,573 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/writeback.h>
#include <linux/swap.h>
#include <linux/delay.h>
#include <linux/bio.h>
#include <linux/gfs2_ondisk.h>
#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
#include "glock.h"
#include "glops.h"
#include "inode.h"
#include "log.h"
#include "lops.h"
#include "meta_io.h"
#include "rgrp.h"
#include "trans.h"
#include "util.h"
#include "ops_address.h"
static int aspace_get_block(struct inode *inode, sector_t lblock,
struct buffer_head *bh_result, int create)
{
gfs2_assert_warn(inode->i_sb->s_fs_info, 0);
return -EOPNOTSUPP;
}
static int gfs2_aspace_writepage(struct page *page,
struct writeback_control *wbc)
{
return block_write_full_page(page, aspace_get_block, wbc);
}
static const struct address_space_operations aspace_aops = {
.writepage = gfs2_aspace_writepage,
.releasepage = gfs2_releasepage,
};
/**
* gfs2_aspace_get - Create and initialize a struct inode structure
* @sdp: the filesystem the aspace is in
*
* Right now a struct inode is just a struct inode. Maybe Linux
* will supply a more lightweight address space construct (that works)
* in the future.
*
* Make sure pages/buffers in this aspace aren't in high memory.
*
* Returns: the aspace
*/
struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp)
{
struct inode *aspace;
aspace = new_inode(sdp->sd_vfs);
if (aspace) {
mapping_set_gfp_mask(aspace->i_mapping, GFP_NOFS);
aspace->i_mapping->a_ops = &aspace_aops;
aspace->i_size = ~0ULL;
aspace->i_private = NULL;
insert_inode_hash(aspace);
}
return aspace;
}
void gfs2_aspace_put(struct inode *aspace)
{
remove_inode_hash(aspace);
iput(aspace);
}
/**
* gfs2_meta_inval - Invalidate all buffers associated with a glock
* @gl: the glock
*
*/
void gfs2_meta_inval(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
struct inode *aspace = gl->gl_aspace;
struct address_space *mapping = gl->gl_aspace->i_mapping;
gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
atomic_inc(&aspace->i_writecount);
truncate_inode_pages(mapping, 0);
atomic_dec(&aspace->i_writecount);
gfs2_assert_withdraw(sdp, !mapping->nrpages);
}
/**
* gfs2_meta_sync - Sync all buffers associated with a glock
* @gl: The glock
*
*/
void gfs2_meta_sync(struct gfs2_glock *gl)
{
struct address_space *mapping = gl->gl_aspace->i_mapping;
int error;
filemap_fdatawrite(mapping);
error = filemap_fdatawait(mapping);
if (error)
gfs2_io_error(gl->gl_sbd);
}
/**
* getbuf - Get a buffer with a given address space
* @gl: the glock
* @blkno: the block number (filesystem scope)
* @create: 1 if the buffer should be created
*
* Returns: the buffer
*/
static struct buffer_head *getbuf(struct gfs2_glock *gl, u64 blkno, int create)
{
struct address_space *mapping = gl->gl_aspace->i_mapping;
struct gfs2_sbd *sdp = gl->gl_sbd;
struct page *page;
struct buffer_head *bh;
unsigned int shift;
unsigned long index;
unsigned int bufnum;
shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift;
index = blkno >> shift; /* convert block to page */
bufnum = blkno - (index << shift); /* block buf index within page */
if (create) {
for (;;) {
page = grab_cache_page(mapping, index);
if (page)
break;
yield();
}
} else {
page = find_lock_page(mapping, index);
if (!page)
return NULL;
}
if (!page_has_buffers(page))
create_empty_buffers(page, sdp->sd_sb.sb_bsize, 0);
/* Locate header for our buffer within our page */
for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page)
/* Do nothing */;
get_bh(bh);
if (!buffer_mapped(bh))
map_bh(bh, sdp->sd_vfs, blkno);
unlock_page(page);
mark_page_accessed(page);
page_cache_release(page);
return bh;
}
static void meta_prep_new(struct buffer_head *bh)
{
struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
lock_buffer(bh);
clear_buffer_dirty(bh);
set_buffer_uptodate(bh);
unlock_buffer(bh);
mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
}
/**
* gfs2_meta_new - Get a block
* @gl: The glock associated with this block
* @blkno: The block number
*
* Returns: The buffer
*/
struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno)
{
struct buffer_head *bh;
bh = getbuf(gl, blkno, CREATE);
meta_prep_new(bh);
return bh;
}
/**
* gfs2_meta_read - Read a block from disk
* @gl: The glock covering the block
* @blkno: The block number
* @flags: flags
* @bhp: the place where the buffer is returned (NULL on failure)
*
* Returns: errno
*/
int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
struct buffer_head **bhp)
{
*bhp = getbuf(gl, blkno, CREATE);
if (!buffer_uptodate(*bhp))
ll_rw_block(READ_META, 1, bhp);
if (flags & DIO_WAIT) {
int error = gfs2_meta_wait(gl->gl_sbd, *bhp);
if (error) {
brelse(*bhp);
return error;
}
}
return 0;
}
/**
* gfs2_meta_wait - Reread a block from disk
* @sdp: the filesystem
* @bh: The block to wait for
*
* Returns: errno
*/
int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh)
{
if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
return -EIO;
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
struct gfs2_trans *tr = current->journal_info;
if (tr && tr->tr_touched)
gfs2_io_error_bh(sdp, bh);
return -EIO;
}
if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
return -EIO;
return 0;
}
/**
* gfs2_attach_bufdata - attach a struct gfs2_bufdata structure to a buffer
* @gl: the glock the buffer belongs to
* @bh: The buffer to be attached to
* @meta: Flag to indicate whether its metadata or not
*/
void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
int meta)
{
struct gfs2_bufdata *bd;
if (meta)
lock_page(bh->b_page);
if (bh->b_private) {
if (meta)
unlock_page(bh->b_page);
return;
}
bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL),
bd->bd_bh = bh;
bd->bd_gl = gl;
INIT_LIST_HEAD(&bd->bd_list_tr);
if (meta)
lops_init_le(&bd->bd_le, &gfs2_buf_lops);
else
lops_init_le(&bd->bd_le, &gfs2_databuf_lops);
bh->b_private = bd;
if (meta)
unlock_page(bh->b_page);
}
/**
* gfs2_pin - Pin a buffer in memory
* @sdp: the filesystem the buffer belongs to
* @bh: The buffer to be pinned
*
*/
void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
{
struct gfs2_bufdata *bd = bh->b_private;
gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags));
if (test_set_buffer_pinned(bh))
gfs2_assert_withdraw(sdp, 0);
wait_on_buffer(bh);
/* If this buffer is in the AIL and it has already been written
to in-place disk block, remove it from the AIL. */
gfs2_log_lock(sdp);
if (bd->bd_ail && !buffer_in_io(bh))
list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
gfs2_log_unlock(sdp);
clear_buffer_dirty(bh);
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
gfs2_io_error_bh(sdp, bh);
get_bh(bh);
}
/**
* gfs2_unpin - Unpin a buffer
* @sdp: the filesystem the buffer belongs to
* @bh: The buffer to unpin
* @ai:
*
*/
void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
struct gfs2_ail *ai)
{
struct gfs2_bufdata *bd = bh->b_private;
gfs2_assert_withdraw(sdp, buffer_uptodate(bh));
if (!buffer_pinned(bh))
gfs2_assert_withdraw(sdp, 0);
mark_buffer_dirty(bh);
clear_buffer_pinned(bh);
gfs2_log_lock(sdp);
if (bd->bd_ail) {
list_del(&bd->bd_ail_st_list);
brelse(bh);
} else {
struct gfs2_glock *gl = bd->bd_gl;
list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
atomic_inc(&gl->gl_ail_count);
}
bd->bd_ail = ai;
list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
gfs2_log_unlock(sdp);
}
/**
* gfs2_meta_wipe - make inode's buffers so they aren't dirty/pinned anymore
* @ip: the inode who owns the buffers
* @bstart: the first buffer in the run
* @blen: the number of buffers in the run
*
*/
void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct buffer_head *bh;
while (blen) {
bh = getbuf(ip->i_gl, bstart, NO_CREATE);
if (bh) {
struct gfs2_bufdata *bd = bh->b_private;
if (test_clear_buffer_pinned(bh)) {
struct gfs2_trans *tr = current->journal_info;
gfs2_log_lock(sdp);
list_del_init(&bd->bd_le.le_list);
gfs2_assert_warn(sdp, sdp->sd_log_num_buf);
sdp->sd_log_num_buf--;
gfs2_log_unlock(sdp);
tr->tr_num_buf_rm++;
brelse(bh);
}
if (bd) {
gfs2_log_lock(sdp);
if (bd->bd_ail) {
u64 blkno = bh->b_blocknr;
bd->bd_ail = NULL;
list_del(&bd->bd_ail_st_list);
list_del(&bd->bd_ail_gl_list);
atomic_dec(&bd->bd_gl->gl_ail_count);
brelse(bh);
gfs2_log_unlock(sdp);
gfs2_trans_add_revoke(sdp, blkno);
} else
gfs2_log_unlock(sdp);
}
lock_buffer(bh);
clear_buffer_dirty(bh);
clear_buffer_uptodate(bh);
unlock_buffer(bh);
brelse(bh);
}
bstart++;
blen--;
}
}
/**
* gfs2_meta_cache_flush - get rid of any references on buffers for this inode
* @ip: The GFS2 inode
*
* This releases buffers that are in the most-recently-used array of
* blocks used for indirect block addressing for this inode.
*/
void gfs2_meta_cache_flush(struct gfs2_inode *ip)
{
struct buffer_head **bh_slot;
unsigned int x;
spin_lock(&ip->i_spin);
for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) {
bh_slot = &ip->i_cache[x];
if (!*bh_slot)
break;
brelse(*bh_slot);
*bh_slot = NULL;
}
spin_unlock(&ip->i_spin);
}
/**
* gfs2_meta_indirect_buffer - Get a metadata buffer
* @ip: The GFS2 inode
* @height: The level of this buf in the metadata (indir addr) tree (if any)
* @num: The block number (device relative) of the buffer
* @new: Non-zero if we may create a new buffer
* @bhp: the buffer is returned here
*
* Try to use the gfs2_inode's MRU metadata tree cache.
*
* Returns: errno
*/
int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
int new, struct buffer_head **bhp)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_glock *gl = ip->i_gl;
struct buffer_head *bh = NULL, **bh_slot = ip->i_cache + height;
int in_cache = 0;
BUG_ON(!gl);
BUG_ON(!sdp);
spin_lock(&ip->i_spin);
if (*bh_slot && (*bh_slot)->b_blocknr == num) {
bh = *bh_slot;
get_bh(bh);
in_cache = 1;
}
spin_unlock(&ip->i_spin);
if (!bh)
bh = getbuf(gl, num, CREATE);
if (!bh)
return -ENOBUFS;
if (new) {
if (gfs2_assert_warn(sdp, height))
goto err;
meta_prep_new(bh);
gfs2_trans_add_bh(ip->i_gl, bh, 1);
gfs2_metatype_set(bh, GFS2_METATYPE_IN, GFS2_FORMAT_IN);
gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
} else {
u32 mtype = height ? GFS2_METATYPE_IN : GFS2_METATYPE_DI;
if (!buffer_uptodate(bh)) {
ll_rw_block(READ_META, 1, &bh);
if (gfs2_meta_wait(sdp, bh))
goto err;
}
if (gfs2_metatype_check(sdp, bh, mtype))
goto err;
}
if (!in_cache) {
spin_lock(&ip->i_spin);
if (*bh_slot)
brelse(*bh_slot);
*bh_slot = bh;
get_bh(bh);
spin_unlock(&ip->i_spin);
}
*bhp = bh;
return 0;
err:
brelse(bh);
return -EIO;
}
/**
* gfs2_meta_ra - start readahead on an extent of a file
* @gl: the glock the blocks belong to
* @dblock: the starting disk block
* @extlen: the number of blocks in the extent
*
* returns: the first buffer in the extent
*/
struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
struct buffer_head *first_bh, *bh;
u32 max_ra = gfs2_tune_get(sdp, gt_max_readahead) >>
sdp->sd_sb.sb_bsize_shift;
BUG_ON(!extlen);
if (max_ra < 1)
max_ra = 1;
if (extlen > max_ra)
extlen = max_ra;
first_bh = getbuf(gl, dblock, CREATE);
if (buffer_uptodate(first_bh))
goto out;
if (!buffer_locked(first_bh))
ll_rw_block(READ_META, 1, &first_bh);
dblock++;
extlen--;
while (extlen) {
bh = getbuf(gl, dblock, CREATE);
if (!buffer_uptodate(bh) && !buffer_locked(bh))
ll_rw_block(READA, 1, &bh);
brelse(bh);
dblock++;
extlen--;
if (!buffer_locked(first_bh) && buffer_uptodate(first_bh))
goto out;
}
wait_on_buffer(first_bh);
out:
return first_bh;
}

77
fs/gfs2/meta_io.h Normal file
View File

@@ -0,0 +1,77 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __DIO_DOT_H__
#define __DIO_DOT_H__
#include <linux/buffer_head.h>
#include <linux/string.h>
#include "incore.h"
static inline void gfs2_buffer_clear(struct buffer_head *bh)
{
memset(bh->b_data, 0, bh->b_size);
}
static inline void gfs2_buffer_clear_tail(struct buffer_head *bh, int head)
{
BUG_ON(head > bh->b_size);
memset(bh->b_data + head, 0, bh->b_size - head);
}
static inline void gfs2_buffer_copy_tail(struct buffer_head *to_bh,
int to_head,
struct buffer_head *from_bh,
int from_head)
{
BUG_ON(from_head < to_head);
memcpy(to_bh->b_data + to_head, from_bh->b_data + from_head,
from_bh->b_size - from_head);
memset(to_bh->b_data + to_bh->b_size + to_head - from_head,
0, from_head - to_head);
}
struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp);
void gfs2_aspace_put(struct inode *aspace);
void gfs2_meta_inval(struct gfs2_glock *gl);
void gfs2_meta_sync(struct gfs2_glock *gl);
struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno);
int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno,
int flags, struct buffer_head **bhp);
int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh);
void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
int meta);
void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
struct gfs2_ail *ai);
void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen);
void gfs2_meta_cache_flush(struct gfs2_inode *ip);
int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num,
int new, struct buffer_head **bhp);
static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip,
struct buffer_head **bhp)
{
return gfs2_meta_indirect_buffer(ip, 0, ip->i_num.no_addr, 0, bhp);
}
struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen);
#define buffer_busy(bh) \
((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned)))
#define buffer_in_io(bh) \
((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock)))
#endif /* __DIO_DOT_H__ */

213
fs/gfs2/mount.c Normal file
View File

@@ -0,0 +1,213 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
#include "mount.h"
#include "sys.h"
#include "util.h"
/**
* gfs2_mount_args - Parse mount options
* @sdp:
* @data:
*
* Return: errno
*/
int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount)
{
struct gfs2_args *args = &sdp->sd_args;
char *data = data_arg;
char *options, *o, *v;
int error = 0;
if (!remount) {
/* If someone preloaded options, use those instead */
spin_lock(&gfs2_sys_margs_lock);
if (gfs2_sys_margs) {
data = gfs2_sys_margs;
gfs2_sys_margs = NULL;
}
spin_unlock(&gfs2_sys_margs_lock);
/* Set some defaults */
args->ar_num_glockd = GFS2_GLOCKD_DEFAULT;
args->ar_quota = GFS2_QUOTA_DEFAULT;
args->ar_data = GFS2_DATA_DEFAULT;
}
/* Split the options into tokens with the "," character and
process them */
for (options = data; (o = strsep(&options, ",")); ) {
if (!*o)
continue;
v = strchr(o, '=');
if (v)
*v++ = 0;
if (!strcmp(o, "lockproto")) {
if (!v)
goto need_value;
if (remount && strcmp(v, args->ar_lockproto))
goto cant_remount;
strncpy(args->ar_lockproto, v, GFS2_LOCKNAME_LEN);
args->ar_lockproto[GFS2_LOCKNAME_LEN - 1] = 0;
}
else if (!strcmp(o, "locktable")) {
if (!v)
goto need_value;
if (remount && strcmp(v, args->ar_locktable))
goto cant_remount;
strncpy(args->ar_locktable, v, GFS2_LOCKNAME_LEN);
args->ar_locktable[GFS2_LOCKNAME_LEN - 1] = 0;
}
else if (!strcmp(o, "hostdata")) {
if (!v)
goto need_value;
if (remount && strcmp(v, args->ar_hostdata))
goto cant_remount;
strncpy(args->ar_hostdata, v, GFS2_LOCKNAME_LEN);
args->ar_hostdata[GFS2_LOCKNAME_LEN - 1] = 0;
}
else if (!strcmp(o, "spectator")) {
if (remount && !args->ar_spectator)
goto cant_remount;
args->ar_spectator = 1;
sdp->sd_vfs->s_flags |= MS_RDONLY;
}
else if (!strcmp(o, "ignore_local_fs")) {
if (remount && !args->ar_ignore_local_fs)
goto cant_remount;
args->ar_ignore_local_fs = 1;
}
else if (!strcmp(o, "localflocks")) {
if (remount && !args->ar_localflocks)
goto cant_remount;
args->ar_localflocks = 1;
}
else if (!strcmp(o, "localcaching")) {
if (remount && !args->ar_localcaching)
goto cant_remount;
args->ar_localcaching = 1;
}
else if (!strcmp(o, "debug"))
args->ar_debug = 1;
else if (!strcmp(o, "nodebug"))
args->ar_debug = 0;
else if (!strcmp(o, "upgrade")) {
if (remount && !args->ar_upgrade)
goto cant_remount;
args->ar_upgrade = 1;
}
else if (!strcmp(o, "num_glockd")) {
unsigned int x;
if (!v)
goto need_value;
sscanf(v, "%u", &x);
if (remount && x != args->ar_num_glockd)
goto cant_remount;
if (!x || x > GFS2_GLOCKD_MAX) {
fs_info(sdp, "0 < num_glockd <= %u (not %u)\n",
GFS2_GLOCKD_MAX, x);
error = -EINVAL;
break;
}
args->ar_num_glockd = x;
}
else if (!strcmp(o, "acl")) {
args->ar_posix_acl = 1;
sdp->sd_vfs->s_flags |= MS_POSIXACL;
}
else if (!strcmp(o, "noacl")) {
args->ar_posix_acl = 0;
sdp->sd_vfs->s_flags &= ~MS_POSIXACL;
}
else if (!strcmp(o, "quota")) {
if (!v)
goto need_value;
if (!strcmp(v, "off"))
args->ar_quota = GFS2_QUOTA_OFF;
else if (!strcmp(v, "account"))
args->ar_quota = GFS2_QUOTA_ACCOUNT;
else if (!strcmp(v, "on"))
args->ar_quota = GFS2_QUOTA_ON;
else {
fs_info(sdp, "invalid value for quota\n");
error = -EINVAL;
break;
}
}
else if (!strcmp(o, "suiddir"))
args->ar_suiddir = 1;
else if (!strcmp(o, "nosuiddir"))
args->ar_suiddir = 0;
else if (!strcmp(o, "data")) {
if (!v)
goto need_value;
if (!strcmp(v, "writeback"))
args->ar_data = GFS2_DATA_WRITEBACK;
else if (!strcmp(v, "ordered"))
args->ar_data = GFS2_DATA_ORDERED;
else {
fs_info(sdp, "invalid value for data\n");
error = -EINVAL;
break;
}
}
else {
fs_info(sdp, "unknown option: %s\n", o);
error = -EINVAL;
break;
}
}
if (error)
fs_info(sdp, "invalid mount option(s)\n");
if (data != data_arg)
kfree(data);
return error;
need_value:
fs_info(sdp, "need value for option %s\n", o);
return -EINVAL;
cant_remount:
fs_info(sdp, "can't remount with option %s\n", o);
return -EINVAL;
}

17
fs/gfs2/mount.h Normal file
View File

@@ -0,0 +1,17 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __MOUNT_DOT_H__
#define __MOUNT_DOT_H__
struct gfs2_sbd;
int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount);
#endif /* __MOUNT_DOT_H__ */

251
fs/gfs2/ondisk.c Normal file
View File

@@ -0,0 +1,251 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include "gfs2.h"
#include <linux/gfs2_ondisk.h>
#include <linux/lm_interface.h>
#include "incore.h"
#define pv(struct, member, fmt) printk(KERN_INFO " "#member" = "fmt"\n", \
struct->member);
/*
* gfs2_xxx_in - read in an xxx struct
* first arg: the cpu-order structure
* buf: the disk-order buffer
*
* gfs2_xxx_out - write out an xxx struct
* first arg: the cpu-order structure
* buf: the disk-order buffer
*
* gfs2_xxx_print - print out an xxx struct
* first arg: the cpu-order structure
*/
void gfs2_inum_in(struct gfs2_inum_host *no, const void *buf)
{
const struct gfs2_inum *str = buf;
no->no_formal_ino = be64_to_cpu(str->no_formal_ino);
no->no_addr = be64_to_cpu(str->no_addr);
}
void gfs2_inum_out(const struct gfs2_inum_host *no, void *buf)
{
struct gfs2_inum *str = buf;
str->no_formal_ino = cpu_to_be64(no->no_formal_ino);
str->no_addr = cpu_to_be64(no->no_addr);
}
static void gfs2_inum_print(const struct gfs2_inum_host *no)
{
printk(KERN_INFO " no_formal_ino = %llu\n", (unsigned long long)no->no_formal_ino);
printk(KERN_INFO " no_addr = %llu\n", (unsigned long long)no->no_addr);
}
static void gfs2_meta_header_in(struct gfs2_meta_header_host *mh, const void *buf)
{
const struct gfs2_meta_header *str = buf;
mh->mh_magic = be32_to_cpu(str->mh_magic);
mh->mh_type = be32_to_cpu(str->mh_type);
mh->mh_format = be32_to_cpu(str->mh_format);
}
void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf)
{
const struct gfs2_sb *str = buf;
gfs2_meta_header_in(&sb->sb_header, buf);
sb->sb_fs_format = be32_to_cpu(str->sb_fs_format);
sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format);
sb->sb_bsize = be32_to_cpu(str->sb_bsize);
sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift);
gfs2_inum_in(&sb->sb_master_dir, (char *)&str->sb_master_dir);
gfs2_inum_in(&sb->sb_root_dir, (char *)&str->sb_root_dir);
memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN);
memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN);
}
void gfs2_rindex_in(struct gfs2_rindex_host *ri, const void *buf)
{
const struct gfs2_rindex *str = buf;
ri->ri_addr = be64_to_cpu(str->ri_addr);
ri->ri_length = be32_to_cpu(str->ri_length);
ri->ri_data0 = be64_to_cpu(str->ri_data0);
ri->ri_data = be32_to_cpu(str->ri_data);
ri->ri_bitbytes = be32_to_cpu(str->ri_bitbytes);
}
void gfs2_rindex_print(const struct gfs2_rindex_host *ri)
{
printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)ri->ri_addr);
pv(ri, ri_length, "%u");
printk(KERN_INFO " ri_data0 = %llu\n", (unsigned long long)ri->ri_data0);
pv(ri, ri_data, "%u");
pv(ri, ri_bitbytes, "%u");
}
void gfs2_rgrp_in(struct gfs2_rgrp_host *rg, const void *buf)
{
const struct gfs2_rgrp *str = buf;
rg->rg_flags = be32_to_cpu(str->rg_flags);
rg->rg_free = be32_to_cpu(str->rg_free);
rg->rg_dinodes = be32_to_cpu(str->rg_dinodes);
rg->rg_igeneration = be64_to_cpu(str->rg_igeneration);
}
void gfs2_rgrp_out(const struct gfs2_rgrp_host *rg, void *buf)
{
struct gfs2_rgrp *str = buf;
str->rg_flags = cpu_to_be32(rg->rg_flags);
str->rg_free = cpu_to_be32(rg->rg_free);
str->rg_dinodes = cpu_to_be32(rg->rg_dinodes);
str->__pad = cpu_to_be32(0);
str->rg_igeneration = cpu_to_be64(rg->rg_igeneration);
memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
}
void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf)
{
const struct gfs2_quota *str = buf;
qu->qu_limit = be64_to_cpu(str->qu_limit);
qu->qu_warn = be64_to_cpu(str->qu_warn);
qu->qu_value = be64_to_cpu(str->qu_value);
}
void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
{
const struct gfs2_dinode_host *di = &ip->i_di;
struct gfs2_dinode *str = buf;
str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
str->di_header.mh_type = cpu_to_be32(GFS2_METATYPE_DI);
str->di_header.__pad0 = 0;
str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
str->di_header.__pad1 = 0;
gfs2_inum_out(&ip->i_num, &str->di_num);
str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
str->di_uid = cpu_to_be32(ip->i_inode.i_uid);
str->di_gid = cpu_to_be32(ip->i_inode.i_gid);
str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
str->di_size = cpu_to_be64(di->di_size);
str->di_blocks = cpu_to_be64(di->di_blocks);
str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
str->di_goal_meta = cpu_to_be64(di->di_goal_meta);
str->di_goal_data = cpu_to_be64(di->di_goal_data);
str->di_generation = cpu_to_be64(di->di_generation);
str->di_flags = cpu_to_be32(di->di_flags);
str->di_height = cpu_to_be16(di->di_height);
str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
!(ip->i_di.di_flags & GFS2_DIF_EXHASH) ?
GFS2_FORMAT_DE : 0);
str->di_depth = cpu_to_be16(di->di_depth);
str->di_entries = cpu_to_be32(di->di_entries);
str->di_eattr = cpu_to_be64(di->di_eattr);
}
void gfs2_dinode_print(const struct gfs2_inode *ip)
{
const struct gfs2_dinode_host *di = &ip->i_di;
gfs2_inum_print(&ip->i_num);
printk(KERN_INFO " di_size = %llu\n", (unsigned long long)di->di_size);
printk(KERN_INFO " di_blocks = %llu\n", (unsigned long long)di->di_blocks);
printk(KERN_INFO " di_goal_meta = %llu\n", (unsigned long long)di->di_goal_meta);
printk(KERN_INFO " di_goal_data = %llu\n", (unsigned long long)di->di_goal_data);
pv(di, di_flags, "0x%.8X");
pv(di, di_height, "%u");
pv(di, di_depth, "%u");
pv(di, di_entries, "%u");
printk(KERN_INFO " di_eattr = %llu\n", (unsigned long long)di->di_eattr);
}
void gfs2_log_header_in(struct gfs2_log_header_host *lh, const void *buf)
{
const struct gfs2_log_header *str = buf;
gfs2_meta_header_in(&lh->lh_header, buf);
lh->lh_sequence = be64_to_cpu(str->lh_sequence);
lh->lh_flags = be32_to_cpu(str->lh_flags);
lh->lh_tail = be32_to_cpu(str->lh_tail);
lh->lh_blkno = be32_to_cpu(str->lh_blkno);
lh->lh_hash = be32_to_cpu(str->lh_hash);
}
void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf)
{
const struct gfs2_inum_range *str = buf;
ir->ir_start = be64_to_cpu(str->ir_start);
ir->ir_length = be64_to_cpu(str->ir_length);
}
void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf)
{
struct gfs2_inum_range *str = buf;
str->ir_start = cpu_to_be64(ir->ir_start);
str->ir_length = cpu_to_be64(ir->ir_length);
}
void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf)
{
const struct gfs2_statfs_change *str = buf;
sc->sc_total = be64_to_cpu(str->sc_total);
sc->sc_free = be64_to_cpu(str->sc_free);
sc->sc_dinodes = be64_to_cpu(str->sc_dinodes);
}
void gfs2_statfs_change_out(const struct gfs2_statfs_change_host *sc, void *buf)
{
struct gfs2_statfs_change *str = buf;
str->sc_total = cpu_to_be64(sc->sc_total);
str->sc_free = cpu_to_be64(sc->sc_free);
str->sc_dinodes = cpu_to_be64(sc->sc_dinodes);
}
void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void *buf)
{
const struct gfs2_quota_change *str = buf;
qc->qc_change = be64_to_cpu(str->qc_change);
qc->qc_flags = be32_to_cpu(str->qc_flags);
qc->qc_id = be32_to_cpu(str->qc_id);
}

803
fs/gfs2/ops_address.c Normal file
View File

@@ -0,0 +1,803 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/pagemap.h>
#include <linux/pagevec.h>
#include <linux/mpage.h>
#include <linux/fs.h>
#include <linux/writeback.h>
#include <linux/gfs2_ondisk.h>
#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
#include "bmap.h"
#include "glock.h"
#include "inode.h"
#include "log.h"
#include "meta_io.h"
#include "ops_address.h"
#include "quota.h"
#include "trans.h"
#include "rgrp.h"
#include "ops_file.h"
#include "util.h"
#include "glops.h"
static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
unsigned int from, unsigned int to)
{
struct buffer_head *head = page_buffers(page);
unsigned int bsize = head->b_size;
struct buffer_head *bh;
unsigned int start, end;
for (bh = head, start = 0; bh != head || !start;
bh = bh->b_this_page, start = end) {
end = start + bsize;
if (end <= from || start >= to)
continue;
gfs2_trans_add_bh(ip->i_gl, bh, 0);
}
}
/**
* gfs2_get_block - Fills in a buffer head with details about a block
* @inode: The inode
* @lblock: The block number to look up
* @bh_result: The buffer head to return the result in
* @create: Non-zero if we may add block to the file
*
* Returns: errno
*/
int gfs2_get_block(struct inode *inode, sector_t lblock,
struct buffer_head *bh_result, int create)
{
return gfs2_block_map(inode, lblock, create, bh_result);
}
/**
* gfs2_get_block_noalloc - Fills in a buffer head with details about a block
* @inode: The inode
* @lblock: The block number to look up
* @bh_result: The buffer head to return the result in
* @create: Non-zero if we may add block to the file
*
* Returns: errno
*/
static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock,
struct buffer_head *bh_result, int create)
{
int error;
error = gfs2_block_map(inode, lblock, 0, bh_result);
if (error)
return error;
if (bh_result->b_blocknr == 0)
return -EIO;
return 0;
}
static int gfs2_get_block_direct(struct inode *inode, sector_t lblock,
struct buffer_head *bh_result, int create)
{
return gfs2_block_map(inode, lblock, 0, bh_result);
}
/**
* gfs2_writepage - Write complete page
* @page: Page to write
*
* Returns: errno
*
* Some of this is copied from block_write_full_page() although we still
* call it to do most of the work.
*/
static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
{
struct inode *inode = page->mapping->host;
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
loff_t i_size = i_size_read(inode);
pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
unsigned offset;
int error;
int done_trans = 0;
if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) {
unlock_page(page);
return -EIO;
}
if (current->journal_info)
goto out_ignore;
/* Is the page fully outside i_size? (truncate in progress) */
offset = i_size & (PAGE_CACHE_SIZE-1);
if (page->index > end_index || (page->index == end_index && !offset)) {
page->mapping->a_ops->invalidatepage(page, 0);
unlock_page(page);
return 0; /* don't care */
}
if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) {
error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
if (error)
goto out_ignore;
if (!page_has_buffers(page)) {
create_empty_buffers(page, inode->i_sb->s_blocksize,
(1 << BH_Dirty)|(1 << BH_Uptodate));
}
gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
done_trans = 1;
}
error = block_write_full_page(page, gfs2_get_block_noalloc, wbc);
if (done_trans)
gfs2_trans_end(sdp);
gfs2_meta_cache_flush(ip);
return error;
out_ignore:
redirty_page_for_writepage(wbc, page);
unlock_page(page);
return 0;
}
/**
* gfs2_writepages - Write a bunch of dirty pages back to disk
* @mapping: The mapping to write
* @wbc: Write-back control
*
* For journaled files and/or ordered writes this just falls back to the
* kernel's default writepages path for now. We will probably want to change
* that eventually (i.e. when we look at allocate on flush).
*
* For the data=writeback case though we can already ignore buffer heads
* and write whole extents at once. This is a big reduction in the
* number of I/O requests we send and the bmap calls we make in this case.
*/
static int gfs2_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct inode *inode = mapping->host;
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
if (sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK && !gfs2_is_jdata(ip))
return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
return generic_writepages(mapping, wbc);
}
/**
* stuffed_readpage - Fill in a Linux page with stuffed file data
* @ip: the inode
* @page: the page
*
* Returns: errno
*/
static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
{
struct buffer_head *dibh;
void *kaddr;
int error;
BUG_ON(page->index);
error = gfs2_meta_inode_buffer(ip, &dibh);
if (error)
return error;
kaddr = kmap_atomic(page, KM_USER0);
memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode),
ip->i_di.di_size);
memset(kaddr + ip->i_di.di_size, 0, PAGE_CACHE_SIZE - ip->i_di.di_size);
kunmap_atomic(kaddr, KM_USER0);
brelse(dibh);
SetPageUptodate(page);
return 0;
}
/**
* gfs2_readpage - readpage with locking
* @file: The file to read a page for. N.B. This may be NULL if we are
* reading an internal file.
* @page: The page to read
*
* Returns: errno
*/
static int gfs2_readpage(struct file *file, struct page *page)
{
struct gfs2_inode *ip = GFS2_I(page->mapping->host);
struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
struct gfs2_file *gf = NULL;
struct gfs2_holder gh;
int error;
int do_unlock = 0;
if (likely(file != &gfs2_internal_file_sentinel)) {
if (file) {
gf = file->private_data;
if (test_bit(GFF_EXLOCK, &gf->f_flags))
/* gfs2_sharewrite_nopage has grabbed the ip->i_gl already */
goto skip_lock;
}
gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh);
do_unlock = 1;
error = gfs2_glock_nq_atime(&gh);
if (unlikely(error))
goto out_unlock;
}
skip_lock:
if (gfs2_is_stuffed(ip)) {
error = stuffed_readpage(ip, page);
unlock_page(page);
} else
error = mpage_readpage(page, gfs2_get_block);
if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
error = -EIO;
if (do_unlock) {
gfs2_glock_dq_m(1, &gh);
gfs2_holder_uninit(&gh);
}
out:
return error;
out_unlock:
unlock_page(page);
if (error == GLR_TRYFAILED) {
error = AOP_TRUNCATED_PAGE;
yield();
}
if (do_unlock)
gfs2_holder_uninit(&gh);
goto out;
}
/**
* gfs2_readpages - Read a bunch of pages at once
*
* Some notes:
* 1. This is only for readahead, so we can simply ignore any things
* which are slightly inconvenient (such as locking conflicts between
* the page lock and the glock) and return having done no I/O. Its
* obviously not something we'd want to do on too regular a basis.
* Any I/O we ignore at this time will be done via readpage later.
* 2. We don't handle stuffed files here we let readpage do the honours.
* 3. mpage_readpages() does most of the heavy lifting in the common case.
* 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places.
* 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as
* well as read-ahead.
*/
static int gfs2_readpages(struct file *file, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages)
{
struct inode *inode = mapping->host;
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_holder gh;
int ret = 0;
int do_unlock = 0;
if (likely(file != &gfs2_internal_file_sentinel)) {
if (file) {
struct gfs2_file *gf = file->private_data;
if (test_bit(GFF_EXLOCK, &gf->f_flags))
goto skip_lock;
}
gfs2_holder_init(ip->i_gl, LM_ST_SHARED,
LM_FLAG_TRY_1CB|GL_ATIME, &gh);
do_unlock = 1;
ret = gfs2_glock_nq_atime(&gh);
if (ret == GLR_TRYFAILED)
goto out_noerror;
if (unlikely(ret))
goto out_unlock;
}
skip_lock:
if (!gfs2_is_stuffed(ip))
ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block);
if (do_unlock) {
gfs2_glock_dq_m(1, &gh);
gfs2_holder_uninit(&gh);
}
out:
if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
ret = -EIO;
return ret;
out_noerror:
ret = 0;
out_unlock:
if (do_unlock)
gfs2_holder_uninit(&gh);
goto out;
}
/**
* gfs2_prepare_write - Prepare to write a page to a file
* @file: The file to write to
* @page: The page which is to be prepared for writing
* @from: From (byte range within page)
* @to: To (byte range within page)
*
* Returns: errno
*/
static int gfs2_prepare_write(struct file *file, struct page *page,
unsigned from, unsigned to)
{
struct gfs2_inode *ip = GFS2_I(page->mapping->host);
struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
unsigned int data_blocks, ind_blocks, rblocks;
int alloc_required;
int error = 0;
loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + from;
loff_t end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
struct gfs2_alloc *al;
unsigned int write_len = to - from;
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|LM_FLAG_TRY_1CB, &ip->i_gh);
error = gfs2_glock_nq_atime(&ip->i_gh);
if (unlikely(error)) {
if (error == GLR_TRYFAILED) {
unlock_page(page);
error = AOP_TRUNCATED_PAGE;
yield();
}
goto out_uninit;
}
gfs2_write_calc_reserv(ip, write_len, &data_blocks, &ind_blocks);
error = gfs2_write_alloc_required(ip, pos, write_len, &alloc_required);
if (error)
goto out_unlock;
ip->i_alloc.al_requested = 0;
if (alloc_required) {
al = gfs2_alloc_get(ip);
error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
if (error)
goto out_alloc_put;
error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
if (error)
goto out_qunlock;
al->al_requested = data_blocks + ind_blocks;
error = gfs2_inplace_reserve(ip);
if (error)
goto out_qunlock;
}
rblocks = RES_DINODE + ind_blocks;
if (gfs2_is_jdata(ip))
rblocks += data_blocks ? data_blocks : 1;
if (ind_blocks || data_blocks)
rblocks += RES_STATFS + RES_QUOTA;
error = gfs2_trans_begin(sdp, rblocks, 0);
if (error)
goto out;
if (gfs2_is_stuffed(ip)) {
if (end > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
error = gfs2_unstuff_dinode(ip, page);
if (error == 0)
goto prepare_write;
} else if (!PageUptodate(page))
error = stuffed_readpage(ip, page);
goto out;
}
prepare_write:
error = block_prepare_write(page, from, to, gfs2_get_block);
out:
if (error) {
gfs2_trans_end(sdp);
if (alloc_required) {
gfs2_inplace_release(ip);
out_qunlock:
gfs2_quota_unlock(ip);
out_alloc_put:
gfs2_alloc_put(ip);
}
out_unlock:
gfs2_glock_dq_m(1, &ip->i_gh);
out_uninit:
gfs2_holder_uninit(&ip->i_gh);
}
return error;
}
/**
* gfs2_commit_write - Commit write to a file
* @file: The file to write to
* @page: The page containing the data
* @from: From (byte range within page)
* @to: To (byte range within page)
*
* Returns: errno
*/
static int gfs2_commit_write(struct file *file, struct page *page,
unsigned from, unsigned to)
{
struct inode *inode = page->mapping->host;
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
int error = -EOPNOTSUPP;
struct buffer_head *dibh;
struct gfs2_alloc *al = &ip->i_alloc;
struct gfs2_dinode *di;
if (gfs2_assert_withdraw(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)))
goto fail_nounlock;
error = gfs2_meta_inode_buffer(ip, &dibh);
if (error)
goto fail_endtrans;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
di = (struct gfs2_dinode *)dibh->b_data;
if (gfs2_is_stuffed(ip)) {
u64 file_size;
void *kaddr;
file_size = ((u64)page->index << PAGE_CACHE_SHIFT) + to;
kaddr = kmap_atomic(page, KM_USER0);
memcpy(dibh->b_data + sizeof(struct gfs2_dinode) + from,
kaddr + from, to - from);
kunmap_atomic(kaddr, KM_USER0);
SetPageUptodate(page);
if (inode->i_size < file_size) {
i_size_write(inode, file_size);
mark_inode_dirty(inode);
}
} else {
if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED ||
gfs2_is_jdata(ip))
gfs2_page_add_databufs(ip, page, from, to);
error = generic_commit_write(file, page, from, to);
if (error)
goto fail;
}
if (ip->i_di.di_size < inode->i_size) {
ip->i_di.di_size = inode->i_size;
di->di_size = cpu_to_be64(inode->i_size);
}
brelse(dibh);
gfs2_trans_end(sdp);
if (al->al_requested) {
gfs2_inplace_release(ip);
gfs2_quota_unlock(ip);
gfs2_alloc_put(ip);
}
gfs2_glock_dq_m(1, &ip->i_gh);
gfs2_holder_uninit(&ip->i_gh);
return 0;
fail:
brelse(dibh);
fail_endtrans:
gfs2_trans_end(sdp);
if (al->al_requested) {
gfs2_inplace_release(ip);
gfs2_quota_unlock(ip);
gfs2_alloc_put(ip);
}
gfs2_glock_dq_m(1, &ip->i_gh);
gfs2_holder_uninit(&ip->i_gh);
fail_nounlock:
ClearPageUptodate(page);
return error;
}
/**
* gfs2_bmap - Block map function
* @mapping: Address space info
* @lblock: The block to map
*
* Returns: The disk address for the block or 0 on hole or error
*/
static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock)
{
struct gfs2_inode *ip = GFS2_I(mapping->host);
struct gfs2_holder i_gh;
sector_t dblock = 0;
int error;
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
if (error)
return 0;
if (!gfs2_is_stuffed(ip))
dblock = generic_block_bmap(mapping, lblock, gfs2_get_block);
gfs2_glock_dq_uninit(&i_gh);
return dblock;
}
static void discard_buffer(struct gfs2_sbd *sdp, struct buffer_head *bh)
{
struct gfs2_bufdata *bd;
gfs2_log_lock(sdp);
bd = bh->b_private;
if (bd) {
bd->bd_bh = NULL;
bh->b_private = NULL;
}
gfs2_log_unlock(sdp);
lock_buffer(bh);
clear_buffer_dirty(bh);
bh->b_bdev = NULL;
clear_buffer_mapped(bh);
clear_buffer_req(bh);
clear_buffer_new(bh);
clear_buffer_delay(bh);
unlock_buffer(bh);
}
static void gfs2_invalidatepage(struct page *page, unsigned long offset)
{
struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
struct buffer_head *head, *bh, *next;
unsigned int curr_off = 0;
BUG_ON(!PageLocked(page));
if (!page_has_buffers(page))
return;
bh = head = page_buffers(page);
do {
unsigned int next_off = curr_off + bh->b_size;
next = bh->b_this_page;
if (offset <= curr_off)
discard_buffer(sdp, bh);
curr_off = next_off;
bh = next;
} while (bh != head);
if (!offset)
try_to_release_page(page, 0);
return;
}
/**
* gfs2_ok_for_dio - check that dio is valid on this file
* @ip: The inode
* @rw: READ or WRITE
* @offset: The offset at which we are reading or writing
*
* Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o)
* 1 (to accept the i/o request)
*/
static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
{
/*
* Should we return an error here? I can't see that O_DIRECT for
* a journaled file makes any sense. For now we'll silently fall
* back to buffered I/O, likewise we do the same for stuffed
* files since they are (a) small and (b) unaligned.
*/
if (gfs2_is_jdata(ip))
return 0;
if (gfs2_is_stuffed(ip))
return 0;
if (offset > i_size_read(&ip->i_inode))
return 0;
return 1;
}
static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
const struct iovec *iov, loff_t offset,
unsigned long nr_segs)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_holder gh;
int rv;
/*
* Deferred lock, even if its a write, since we do no allocation
* on this path. All we need change is atime, and this lock mode
* ensures that other nodes have flushed their buffered read caches
* (i.e. their page cache entries for this inode). We do not,
* unfortunately have the option of only flushing a range like
* the VFS does.
*/
gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, GL_ATIME, &gh);
rv = gfs2_glock_nq_atime(&gh);
if (rv)
return rv;
rv = gfs2_ok_for_dio(ip, rw, offset);
if (rv != 1)
goto out; /* dio not valid, fall back to buffered i/o */
rv = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev,
iov, offset, nr_segs,
gfs2_get_block_direct, NULL);
out:
gfs2_glock_dq_m(1, &gh);
gfs2_holder_uninit(&gh);
return rv;
}
/**
* stuck_releasepage - We're stuck in gfs2_releasepage(). Print stuff out.
* @bh: the buffer we're stuck on
*
*/
static void stuck_releasepage(struct buffer_head *bh)
{
struct inode *inode = bh->b_page->mapping->host;
struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
struct gfs2_bufdata *bd = bh->b_private;
struct gfs2_glock *gl;
static unsigned limit = 0;
if (limit > 3)
return;
limit++;
fs_warn(sdp, "stuck in gfs2_releasepage() %p\n", inode);
fs_warn(sdp, "blkno = %llu, bh->b_count = %d\n",
(unsigned long long)bh->b_blocknr, atomic_read(&bh->b_count));
fs_warn(sdp, "pinned = %u\n", buffer_pinned(bh));
fs_warn(sdp, "bh->b_private = %s\n", (bd) ? "!NULL" : "NULL");
if (!bd)
return;
gl = bd->bd_gl;
fs_warn(sdp, "gl = (%u, %llu)\n",
gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number);
fs_warn(sdp, "bd_list_tr = %s, bd_le.le_list = %s\n",
(list_empty(&bd->bd_list_tr)) ? "no" : "yes",
(list_empty(&bd->bd_le.le_list)) ? "no" : "yes");
if (gl->gl_ops == &gfs2_inode_glops) {
struct gfs2_inode *ip = gl->gl_object;
unsigned int x;
if (!ip)
return;
fs_warn(sdp, "ip = %llu %llu\n",
(unsigned long long)ip->i_num.no_formal_ino,
(unsigned long long)ip->i_num.no_addr);
for (x = 0; x < GFS2_MAX_META_HEIGHT; x++)
fs_warn(sdp, "ip->i_cache[%u] = %s\n",
x, (ip->i_cache[x]) ? "!NULL" : "NULL");
}
}
/**
* gfs2_releasepage - free the metadata associated with a page
* @page: the page that's being released
* @gfp_mask: passed from Linux VFS, ignored by us
*
* Call try_to_free_buffers() if the buffers in this page can be
* released.
*
* Returns: 0
*/
int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
{
struct inode *aspace = page->mapping->host;
struct gfs2_sbd *sdp = aspace->i_sb->s_fs_info;
struct buffer_head *bh, *head;
struct gfs2_bufdata *bd;
unsigned long t = jiffies + gfs2_tune_get(sdp, gt_stall_secs) * HZ;
if (!page_has_buffers(page))
goto out;
head = bh = page_buffers(page);
do {
while (atomic_read(&bh->b_count)) {
if (!atomic_read(&aspace->i_writecount))
return 0;
if (!(gfp_mask & __GFP_WAIT))
return 0;
if (time_after_eq(jiffies, t)) {
stuck_releasepage(bh);
/* should we withdraw here? */
return 0;
}
yield();
}
gfs2_assert_warn(sdp, !buffer_pinned(bh));
gfs2_assert_warn(sdp, !buffer_dirty(bh));
gfs2_log_lock(sdp);
bd = bh->b_private;
if (bd) {
gfs2_assert_warn(sdp, bd->bd_bh == bh);
gfs2_assert_warn(sdp, list_empty(&bd->bd_list_tr));
gfs2_assert_warn(sdp, !bd->bd_ail);
bd->bd_bh = NULL;
if (!list_empty(&bd->bd_le.le_list))
bd = NULL;
bh->b_private = NULL;
}
gfs2_log_unlock(sdp);
if (bd)
kmem_cache_free(gfs2_bufdata_cachep, bd);
bh = bh->b_this_page;
} while (bh != head);
out:
return try_to_free_buffers(page);
}
const struct address_space_operations gfs2_file_aops = {
.writepage = gfs2_writepage,
.writepages = gfs2_writepages,
.readpage = gfs2_readpage,
.readpages = gfs2_readpages,
.sync_page = block_sync_page,
.prepare_write = gfs2_prepare_write,
.commit_write = gfs2_commit_write,
.bmap = gfs2_bmap,
.invalidatepage = gfs2_invalidatepage,
.releasepage = gfs2_releasepage,
.direct_IO = gfs2_direct_IO,
};

22
fs/gfs2/ops_address.h Normal file
View File

@@ -0,0 +1,22 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __OPS_ADDRESS_DOT_H__
#define __OPS_ADDRESS_DOT_H__
#include <linux/fs.h>
#include <linux/buffer_head.h>
#include <linux/mm.h>
extern const struct address_space_operations gfs2_file_aops;
extern int gfs2_get_block(struct inode *inode, sector_t lblock,
struct buffer_head *bh_result, int create);
extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask);
#endif /* __OPS_ADDRESS_DOT_H__ */

124
fs/gfs2/ops_dentry.c Normal file
View File

@@ -0,0 +1,124 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/smp_lock.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
#include "dir.h"
#include "glock.h"
#include "ops_dentry.h"
#include "util.h"
/**
* gfs2_drevalidate - Check directory lookup consistency
* @dentry: the mapping to check
* @nd:
*
* Check to make sure the lookup necessary to arrive at this inode from its
* parent is still good.
*
* Returns: 1 if the dentry is ok, 0 if it isn't
*/
static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
{
struct dentry *parent = dget_parent(dentry);
struct gfs2_sbd *sdp = GFS2_SB(parent->d_inode);
struct gfs2_inode *dip = GFS2_I(parent->d_inode);
struct inode *inode = dentry->d_inode;
struct gfs2_holder d_gh;
struct gfs2_inode *ip;
struct gfs2_inum_host inum;
unsigned int type;
int error;
int had_lock=0;
if (inode && is_bad_inode(inode))
goto invalid;
if (sdp->sd_args.ar_localcaching)
goto valid;
had_lock = gfs2_glock_is_locked_by_me(dip->i_gl);
if (!had_lock) {
error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
if (error)
goto fail;
}
error = gfs2_dir_search(parent->d_inode, &dentry->d_name, &inum, &type);
switch (error) {
case 0:
if (!inode)
goto invalid_gunlock;
break;
case -ENOENT:
if (!inode)
goto valid_gunlock;
goto invalid_gunlock;
default:
goto fail_gunlock;
}
ip = GFS2_I(inode);
if (!gfs2_inum_equal(&ip->i_num, &inum))
goto invalid_gunlock;
if (IF2DT(ip->i_inode.i_mode) != type) {
gfs2_consist_inode(dip);
goto fail_gunlock;
}
valid_gunlock:
if (!had_lock)
gfs2_glock_dq_uninit(&d_gh);
valid:
dput(parent);
return 1;
invalid_gunlock:
if (!had_lock)
gfs2_glock_dq_uninit(&d_gh);
invalid:
if (inode && S_ISDIR(inode->i_mode)) {
if (have_submounts(dentry))
goto valid;
shrink_dcache_parent(dentry);
}
d_drop(dentry);
dput(parent);
return 0;
fail_gunlock:
gfs2_glock_dq_uninit(&d_gh);
fail:
dput(parent);
return 0;
}
static int gfs2_dhash(struct dentry *dentry, struct qstr *str)
{
str->hash = gfs2_disk_hash(str->name, str->len);
return 0;
}
struct dentry_operations gfs2_dops = {
.d_revalidate = gfs2_drevalidate,
.d_hash = gfs2_dhash,
};

17
fs/gfs2/ops_dentry.h Normal file
View File

@@ -0,0 +1,17 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __OPS_DENTRY_DOT_H__
#define __OPS_DENTRY_DOT_H__
#include <linux/dcache.h>
extern struct dentry_operations gfs2_dops;
#endif /* __OPS_DENTRY_DOT_H__ */

289
fs/gfs2/ops_export.c Normal file
View File

@@ -0,0 +1,289 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
#include "dir.h"
#include "glock.h"
#include "glops.h"
#include "inode.h"
#include "ops_dentry.h"
#include "ops_export.h"
#include "rgrp.h"
#include "util.h"
static struct dentry *gfs2_decode_fh(struct super_block *sb,
__u32 *p,
int fh_len,
int fh_type,
int (*acceptable)(void *context,
struct dentry *dentry),
void *context)
{
__be32 *fh = (__force __be32 *)p;
struct gfs2_fh_obj fh_obj;
struct gfs2_inum_host *this, parent;
this = &fh_obj.this;
fh_obj.imode = DT_UNKNOWN;
memset(&parent, 0, sizeof(struct gfs2_inum));
switch (fh_len) {
case GFS2_LARGE_FH_SIZE:
parent.no_formal_ino = ((u64)be32_to_cpu(fh[4])) << 32;
parent.no_formal_ino |= be32_to_cpu(fh[5]);
parent.no_addr = ((u64)be32_to_cpu(fh[6])) << 32;
parent.no_addr |= be32_to_cpu(fh[7]);
fh_obj.imode = be32_to_cpu(fh[8]);
case GFS2_SMALL_FH_SIZE:
this->no_formal_ino = ((u64)be32_to_cpu(fh[0])) << 32;
this->no_formal_ino |= be32_to_cpu(fh[1]);
this->no_addr = ((u64)be32_to_cpu(fh[2])) << 32;
this->no_addr |= be32_to_cpu(fh[3]);
break;
default:
return NULL;
}
return gfs2_export_ops.find_exported_dentry(sb, &fh_obj, &parent,
acceptable, context);
}
static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
int connectable)
{
__be32 *fh = (__force __be32 *)p;
struct inode *inode = dentry->d_inode;
struct super_block *sb = inode->i_sb;
struct gfs2_inode *ip = GFS2_I(inode);
if (*len < GFS2_SMALL_FH_SIZE ||
(connectable && *len < GFS2_LARGE_FH_SIZE))
return 255;
fh[0] = cpu_to_be32(ip->i_num.no_formal_ino >> 32);
fh[1] = cpu_to_be32(ip->i_num.no_formal_ino & 0xFFFFFFFF);
fh[2] = cpu_to_be32(ip->i_num.no_addr >> 32);
fh[3] = cpu_to_be32(ip->i_num.no_addr & 0xFFFFFFFF);
*len = GFS2_SMALL_FH_SIZE;
if (!connectable || inode == sb->s_root->d_inode)
return *len;
spin_lock(&dentry->d_lock);
inode = dentry->d_parent->d_inode;
ip = GFS2_I(inode);
igrab(inode);
spin_unlock(&dentry->d_lock);
fh[4] = cpu_to_be32(ip->i_num.no_formal_ino >> 32);
fh[5] = cpu_to_be32(ip->i_num.no_formal_ino & 0xFFFFFFFF);
fh[6] = cpu_to_be32(ip->i_num.no_addr >> 32);
fh[7] = cpu_to_be32(ip->i_num.no_addr & 0xFFFFFFFF);
fh[8] = cpu_to_be32(inode->i_mode);
fh[9] = 0; /* pad to double word */
*len = GFS2_LARGE_FH_SIZE;
iput(inode);
return *len;
}
struct get_name_filldir {
struct gfs2_inum_host inum;
char *name;
};
static int get_name_filldir(void *opaque, const char *name, int length,
loff_t offset, u64 inum, unsigned int type)
{
struct get_name_filldir *gnfd = opaque;
if (inum != gnfd->inum.no_addr)
return 0;
memcpy(gnfd->name, name, length);
gnfd->name[length] = 0;
return 1;
}
static int gfs2_get_name(struct dentry *parent, char *name,
struct dentry *child)
{
struct inode *dir = parent->d_inode;
struct inode *inode = child->d_inode;
struct gfs2_inode *dip, *ip;
struct get_name_filldir gnfd;
struct gfs2_holder gh;
u64 offset = 0;
int error;
if (!dir)
return -EINVAL;
if (!S_ISDIR(dir->i_mode) || !inode)
return -EINVAL;
dip = GFS2_I(dir);
ip = GFS2_I(inode);
*name = 0;
gnfd.inum = ip->i_num;
gnfd.name = name;
error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh);
if (error)
return error;
error = gfs2_dir_read(dir, &offset, &gnfd, get_name_filldir);
gfs2_glock_dq_uninit(&gh);
if (!error && !*name)
error = -ENOENT;
return error;
}
static struct dentry *gfs2_get_parent(struct dentry *child)
{
struct qstr dotdot;
struct inode *inode;
struct dentry *dentry;
gfs2_str2qstr(&dotdot, "..");
inode = gfs2_lookupi(child->d_inode, &dotdot, 1, NULL);
if (!inode)
return ERR_PTR(-ENOENT);
/*
* In case of an error, @inode carries the error value, and we
* have to return that as a(n invalid) pointer to dentry.
*/
if (IS_ERR(inode))
return ERR_PTR(PTR_ERR(inode));
dentry = d_alloc_anon(inode);
if (!dentry) {
iput(inode);
return ERR_PTR(-ENOMEM);
}
dentry->d_op = &gfs2_dops;
return dentry;
}
static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
struct gfs2_fh_obj *fh_obj = (struct gfs2_fh_obj *)inum_obj;
struct gfs2_inum_host *inum = &fh_obj->this;
struct gfs2_holder i_gh, ri_gh, rgd_gh;
struct gfs2_rgrpd *rgd;
struct inode *inode;
struct dentry *dentry;
int error;
/* System files? */
inode = gfs2_ilookup(sb, inum);
if (inode) {
if (GFS2_I(inode)->i_num.no_formal_ino != inum->no_formal_ino) {
iput(inode);
return ERR_PTR(-ESTALE);
}
goto out_inode;
}
error = gfs2_glock_nq_num(sdp, inum->no_addr, &gfs2_inode_glops,
LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
if (error)
return ERR_PTR(error);
error = gfs2_rindex_hold(sdp, &ri_gh);
if (error)
goto fail;
error = -EINVAL;
rgd = gfs2_blk2rgrpd(sdp, inum->no_addr);
if (!rgd)
goto fail_rindex;
error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
if (error)
goto fail_rindex;
error = -ESTALE;
if (gfs2_get_block_type(rgd, inum->no_addr) != GFS2_BLKST_DINODE)
goto fail_rgd;
gfs2_glock_dq_uninit(&rgd_gh);
gfs2_glock_dq_uninit(&ri_gh);
inode = gfs2_inode_lookup(sb, inum, fh_obj->imode);
if (!inode)
goto fail;
if (IS_ERR(inode)) {
error = PTR_ERR(inode);
goto fail;
}
error = gfs2_inode_refresh(GFS2_I(inode));
if (error) {
iput(inode);
goto fail;
}
error = -EIO;
if (GFS2_I(inode)->i_di.di_flags & GFS2_DIF_SYSTEM) {
iput(inode);
goto fail;
}
gfs2_glock_dq_uninit(&i_gh);
out_inode:
dentry = d_alloc_anon(inode);
if (!dentry) {
iput(inode);
return ERR_PTR(-ENOMEM);
}
dentry->d_op = &gfs2_dops;
return dentry;
fail_rgd:
gfs2_glock_dq_uninit(&rgd_gh);
fail_rindex:
gfs2_glock_dq_uninit(&ri_gh);
fail:
gfs2_glock_dq_uninit(&i_gh);
return ERR_PTR(error);
}
struct export_operations gfs2_export_ops = {
.decode_fh = gfs2_decode_fh,
.encode_fh = gfs2_encode_fh,
.get_name = gfs2_get_name,
.get_parent = gfs2_get_parent,
.get_dentry = gfs2_get_dentry,
};

22
fs/gfs2/ops_export.h Normal file
View File

@@ -0,0 +1,22 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __OPS_EXPORT_DOT_H__
#define __OPS_EXPORT_DOT_H__
#define GFS2_SMALL_FH_SIZE 4
#define GFS2_LARGE_FH_SIZE 10
extern struct export_operations gfs2_export_ops;
struct gfs2_fh_obj {
struct gfs2_inum_host this;
__u32 imode;
};
#endif /* __OPS_EXPORT_DOT_H__ */

654
fs/gfs2/ops_file.c Normal file
View File

@@ -0,0 +1,654 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/pagemap.h>
#include <linux/uio.h>
#include <linux/blkdev.h>
#include <linux/mm.h>
#include <linux/smp_lock.h>
#include <linux/fs.h>
#include <linux/gfs2_ondisk.h>
#include <linux/ext2_fs.h>
#include <linux/crc32.h>
#include <linux/lm_interface.h>
#include <linux/writeback.h>
#include <asm/uaccess.h>
#include "gfs2.h"
#include "incore.h"
#include "bmap.h"
#include "dir.h"
#include "glock.h"
#include "glops.h"
#include "inode.h"
#include "lm.h"
#include "log.h"
#include "meta_io.h"
#include "ops_file.h"
#include "ops_vm.h"
#include "quota.h"
#include "rgrp.h"
#include "trans.h"
#include "util.h"
#include "eaops.h"
/*
* Most fields left uninitialised to catch anybody who tries to
* use them. f_flags set to prevent file_accessed() from touching
* any other part of this. Its use is purely as a flag so that we
* know (in readpage()) whether or not do to locking.
*/
struct file gfs2_internal_file_sentinel = {
.f_flags = O_NOATIME|O_RDONLY,
};
static int gfs2_read_actor(read_descriptor_t *desc, struct page *page,
unsigned long offset, unsigned long size)
{
char *kaddr;
unsigned long count = desc->count;
if (size > count)
size = count;
kaddr = kmap(page);
memcpy(desc->arg.data, kaddr + offset, size);
kunmap(page);
desc->count = count - size;
desc->written += size;
desc->arg.buf += size;
return size;
}
int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
char *buf, loff_t *pos, unsigned size)
{
struct inode *inode = &ip->i_inode;
read_descriptor_t desc;
desc.written = 0;
desc.arg.data = buf;
desc.count = size;
desc.error = 0;
do_generic_mapping_read(inode->i_mapping, ra_state,
&gfs2_internal_file_sentinel, pos, &desc,
gfs2_read_actor);
return desc.written ? desc.written : desc.error;
}
/**
* gfs2_llseek - seek to a location in a file
* @file: the file
* @offset: the offset
* @origin: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END)
*
* SEEK_END requires the glock for the file because it references the
* file's size.
*
* Returns: The new offset, or errno
*/
static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
{
struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
struct gfs2_holder i_gh;
loff_t error;
if (origin == 2) {
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
&i_gh);
if (!error) {
error = remote_llseek(file, offset, origin);
gfs2_glock_dq_uninit(&i_gh);
}
} else
error = remote_llseek(file, offset, origin);
return error;
}
/**
* gfs2_readdir - Read directory entries from a directory
* @file: The directory to read from
* @dirent: Buffer for dirents
* @filldir: Function used to do the copying
*
* Returns: errno
*/
static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir)
{
struct inode *dir = file->f_mapping->host;
struct gfs2_inode *dip = GFS2_I(dir);
struct gfs2_holder d_gh;
u64 offset = file->f_pos;
int error;
gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh);
error = gfs2_glock_nq_atime(&d_gh);
if (error) {
gfs2_holder_uninit(&d_gh);
return error;
}
error = gfs2_dir_read(dir, &offset, dirent, filldir);
gfs2_glock_dq_uninit(&d_gh);
file->f_pos = offset;
return error;
}
/**
* fsflags_cvt
* @table: A table of 32 u32 flags
* @val: a 32 bit value to convert
*
* This function can be used to convert between fsflags values and
* GFS2's own flags values.
*
* Returns: the converted flags
*/
static u32 fsflags_cvt(const u32 *table, u32 val)
{
u32 res = 0;
while(val) {
if (val & 1)
res |= *table;
table++;
val >>= 1;
}
return res;
}
static const u32 fsflags_to_gfs2[32] = {
[3] = GFS2_DIF_SYNC,
[4] = GFS2_DIF_IMMUTABLE,
[5] = GFS2_DIF_APPENDONLY,
[7] = GFS2_DIF_NOATIME,
[12] = GFS2_DIF_EXHASH,
[14] = GFS2_DIF_JDATA,
[20] = GFS2_DIF_DIRECTIO,
};
static const u32 gfs2_to_fsflags[32] = {
[gfs2fl_Sync] = FS_SYNC_FL,
[gfs2fl_Immutable] = FS_IMMUTABLE_FL,
[gfs2fl_AppendOnly] = FS_APPEND_FL,
[gfs2fl_NoAtime] = FS_NOATIME_FL,
[gfs2fl_ExHash] = FS_INDEX_FL,
[gfs2fl_Jdata] = FS_JOURNAL_DATA_FL,
[gfs2fl_Directio] = FS_DIRECTIO_FL,
[gfs2fl_InheritDirectio] = FS_DIRECTIO_FL,
[gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL,
};
static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
{
struct inode *inode = filp->f_path.dentry->d_inode;
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_holder gh;
int error;
u32 fsflags;
gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
error = gfs2_glock_nq_atime(&gh);
if (error)
return error;
fsflags = fsflags_cvt(gfs2_to_fsflags, ip->i_di.di_flags);
if (put_user(fsflags, ptr))
error = -EFAULT;
gfs2_glock_dq_m(1, &gh);
gfs2_holder_uninit(&gh);
return error;
}
void gfs2_set_inode_flags(struct inode *inode)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_dinode_host *di = &ip->i_di;
unsigned int flags = inode->i_flags;
flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
if (di->di_flags & GFS2_DIF_IMMUTABLE)
flags |= S_IMMUTABLE;
if (di->di_flags & GFS2_DIF_APPENDONLY)
flags |= S_APPEND;
if (di->di_flags & GFS2_DIF_NOATIME)
flags |= S_NOATIME;
if (di->di_flags & GFS2_DIF_SYNC)
flags |= S_SYNC;
inode->i_flags = flags;
}
/* Flags that can be set by user space */
#define GFS2_FLAGS_USER_SET (GFS2_DIF_JDATA| \
GFS2_DIF_DIRECTIO| \
GFS2_DIF_IMMUTABLE| \
GFS2_DIF_APPENDONLY| \
GFS2_DIF_NOATIME| \
GFS2_DIF_SYNC| \
GFS2_DIF_SYSTEM| \
GFS2_DIF_INHERIT_DIRECTIO| \
GFS2_DIF_INHERIT_JDATA)
/**
* gfs2_set_flags - set flags on an inode
* @inode: The inode
* @flags: The flags to set
* @mask: Indicates which flags are valid
*
*/
static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
{
struct inode *inode = filp->f_path.dentry->d_inode;
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct buffer_head *bh;
struct gfs2_holder gh;
int error;
u32 new_flags, flags;
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
if (error)
return error;
flags = ip->i_di.di_flags;
new_flags = (flags & ~mask) | (reqflags & mask);
if ((new_flags ^ flags) == 0)
goto out;
if (S_ISDIR(inode->i_mode)) {
if ((new_flags ^ flags) & GFS2_DIF_JDATA)
new_flags ^= (GFS2_DIF_JDATA|GFS2_DIF_INHERIT_JDATA);
if ((new_flags ^ flags) & GFS2_DIF_DIRECTIO)
new_flags ^= (GFS2_DIF_DIRECTIO|GFS2_DIF_INHERIT_DIRECTIO);
}
error = -EINVAL;
if ((new_flags ^ flags) & ~GFS2_FLAGS_USER_SET)
goto out;
error = -EPERM;
if (IS_IMMUTABLE(inode) && (new_flags & GFS2_DIF_IMMUTABLE))
goto out;
if (IS_APPEND(inode) && (new_flags & GFS2_DIF_APPENDONLY))
goto out;
if (((new_flags ^ flags) & GFS2_DIF_IMMUTABLE) &&
!capable(CAP_LINUX_IMMUTABLE))
goto out;
if (!IS_IMMUTABLE(inode)) {
error = permission(inode, MAY_WRITE, NULL);
if (error)
goto out;
}
error = gfs2_trans_begin(sdp, RES_DINODE, 0);
if (error)
goto out;
error = gfs2_meta_inode_buffer(ip, &bh);
if (error)
goto out_trans_end;
gfs2_trans_add_bh(ip->i_gl, bh, 1);
ip->i_di.di_flags = new_flags;
gfs2_dinode_out(ip, bh->b_data);
brelse(bh);
gfs2_set_inode_flags(inode);
out_trans_end:
gfs2_trans_end(sdp);
out:
gfs2_glock_dq_uninit(&gh);
return error;
}
static int gfs2_set_flags(struct file *filp, u32 __user *ptr)
{
u32 fsflags, gfsflags;
if (get_user(fsflags, ptr))
return -EFAULT;
gfsflags = fsflags_cvt(fsflags_to_gfs2, fsflags);
return do_gfs2_set_flags(filp, gfsflags, ~0);
}
static long gfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
switch(cmd) {
case FS_IOC_GETFLAGS:
return gfs2_get_flags(filp, (u32 __user *)arg);
case FS_IOC_SETFLAGS:
return gfs2_set_flags(filp, (u32 __user *)arg);
}
return -ENOTTY;
}
/**
* gfs2_mmap -
* @file: The file to map
* @vma: The VMA which described the mapping
*
* Returns: 0 or error code
*/
static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
{
struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
struct gfs2_holder i_gh;
int error;
gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh);
error = gfs2_glock_nq_atime(&i_gh);
if (error) {
gfs2_holder_uninit(&i_gh);
return error;
}
/* This is VM_MAYWRITE instead of VM_WRITE because a call
to mprotect() can turn on VM_WRITE later. */
if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) ==
(VM_MAYSHARE | VM_MAYWRITE))
vma->vm_ops = &gfs2_vm_ops_sharewrite;
else
vma->vm_ops = &gfs2_vm_ops_private;
gfs2_glock_dq_uninit(&i_gh);
return error;
}
/**
* gfs2_open - open a file
* @inode: the inode to open
* @file: the struct file for this opening
*
* Returns: errno
*/
static int gfs2_open(struct inode *inode, struct file *file)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_holder i_gh;
struct gfs2_file *fp;
int error;
fp = kzalloc(sizeof(struct gfs2_file), GFP_KERNEL);
if (!fp)
return -ENOMEM;
mutex_init(&fp->f_fl_mutex);
gfs2_assert_warn(GFS2_SB(inode), !file->private_data);
file->private_data = fp;
if (S_ISREG(ip->i_inode.i_mode)) {
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
&i_gh);
if (error)
goto fail;
if (!(file->f_flags & O_LARGEFILE) &&
ip->i_di.di_size > MAX_NON_LFS) {
error = -EFBIG;
goto fail_gunlock;
}
/* Listen to the Direct I/O flag */
if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO)
file->f_flags |= O_DIRECT;
gfs2_glock_dq_uninit(&i_gh);
}
return 0;
fail_gunlock:
gfs2_glock_dq_uninit(&i_gh);
fail:
file->private_data = NULL;
kfree(fp);
return error;
}
/**
* gfs2_close - called to close a struct file
* @inode: the inode the struct file belongs to
* @file: the struct file being closed
*
* Returns: errno
*/
static int gfs2_close(struct inode *inode, struct file *file)
{
struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
struct gfs2_file *fp;
fp = file->private_data;
file->private_data = NULL;
if (gfs2_assert_warn(sdp, fp))
return -EIO;
kfree(fp);
return 0;
}
/**
* gfs2_fsync - sync the dirty data for a file (across the cluster)
* @file: the file that points to the dentry (we ignore this)
* @dentry: the dentry that points to the inode to sync
*
* The VFS will flush "normal" data for us. We only need to worry
* about metadata here. For journaled data, we just do a log flush
* as we can't avoid it. Otherwise we can just bale out if datasync
* is set. For stuffed inodes we must flush the log in order to
* ensure that all data is on disk.
*
* The call to write_inode_now() is there to write back metadata and
* the inode itself. It does also try and write the data, but thats
* (hopefully) a no-op due to the VFS having already called filemap_fdatawrite()
* for us.
*
* Returns: errno
*/
static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
{
struct inode *inode = dentry->d_inode;
int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
int ret = 0;
if (gfs2_is_jdata(GFS2_I(inode))) {
gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl);
return 0;
}
if (sync_state != 0) {
if (!datasync)
ret = write_inode_now(inode, 0);
if (gfs2_is_stuffed(GFS2_I(inode)))
gfs2_log_flush(GFS2_SB(inode), GFS2_I(inode)->i_gl);
}
return ret;
}
/**
* gfs2_lock - acquire/release a posix lock on a file
* @file: the file pointer
* @cmd: either modify or retrieve lock state, possibly wait
* @fl: type and range of lock
*
* Returns: errno
*/
static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
{
struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
struct lm_lockname name =
{ .ln_number = ip->i_num.no_addr,
.ln_type = LM_TYPE_PLOCK };
if (!(fl->fl_flags & FL_POSIX))
return -ENOLCK;
if ((ip->i_inode.i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
return -ENOLCK;
if (sdp->sd_args.ar_localflocks) {
if (IS_GETLK(cmd)) {
struct file_lock tmp;
int ret;
ret = posix_test_lock(file, fl, &tmp);
fl->fl_type = F_UNLCK;
if (ret)
memcpy(fl, &tmp, sizeof(struct file_lock));
return 0;
} else {
return posix_lock_file_wait(file, fl);
}
}
if (IS_GETLK(cmd))
return gfs2_lm_plock_get(sdp, &name, file, fl);
else if (fl->fl_type == F_UNLCK)
return gfs2_lm_punlock(sdp, &name, file, fl);
else
return gfs2_lm_plock(sdp, &name, file, cmd, fl);
}
static int do_flock(struct file *file, int cmd, struct file_lock *fl)
{
struct gfs2_file *fp = file->private_data;
struct gfs2_holder *fl_gh = &fp->f_fl_gh;
struct gfs2_inode *ip = GFS2_I(file->f_path.dentry->d_inode);
struct gfs2_glock *gl;
unsigned int state;
int flags;
int error = 0;
state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE;
mutex_lock(&fp->f_fl_mutex);
gl = fl_gh->gh_gl;
if (gl) {
if (fl_gh->gh_state == state)
goto out;
gfs2_glock_hold(gl);
flock_lock_file_wait(file,
&(struct file_lock){.fl_type = F_UNLCK});
gfs2_glock_dq_uninit(fl_gh);
} else {
error = gfs2_glock_get(GFS2_SB(&ip->i_inode),
ip->i_num.no_addr, &gfs2_flock_glops,
CREATE, &gl);
if (error)
goto out;
}
gfs2_holder_init(gl, state, flags, fl_gh);
gfs2_glock_put(gl);
error = gfs2_glock_nq(fl_gh);
if (error) {
gfs2_holder_uninit(fl_gh);
if (error == GLR_TRYFAILED)
error = -EAGAIN;
} else {
error = flock_lock_file_wait(file, fl);
gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error);
}
out:
mutex_unlock(&fp->f_fl_mutex);
return error;
}
static void do_unflock(struct file *file, struct file_lock *fl)
{
struct gfs2_file *fp = file->private_data;
struct gfs2_holder *fl_gh = &fp->f_fl_gh;
mutex_lock(&fp->f_fl_mutex);
flock_lock_file_wait(file, fl);
if (fl_gh->gh_gl)
gfs2_glock_dq_uninit(fl_gh);
mutex_unlock(&fp->f_fl_mutex);
}
/**
* gfs2_flock - acquire/release a flock lock on a file
* @file: the file pointer
* @cmd: either modify or retrieve lock state, possibly wait
* @fl: type and range of lock
*
* Returns: errno
*/
static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
{
struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host);
if (!(fl->fl_flags & FL_FLOCK))
return -ENOLCK;
if ((ip->i_inode.i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
return -ENOLCK;
if (sdp->sd_args.ar_localflocks)
return flock_lock_file_wait(file, fl);
if (fl->fl_type == F_UNLCK) {
do_unflock(file, fl);
return 0;
} else {
return do_flock(file, cmd, fl);
}
}
const struct file_operations gfs2_file_fops = {
.llseek = gfs2_llseek,
.read = do_sync_read,
.aio_read = generic_file_aio_read,
.write = do_sync_write,
.aio_write = generic_file_aio_write,
.unlocked_ioctl = gfs2_ioctl,
.mmap = gfs2_mmap,
.open = gfs2_open,
.release = gfs2_close,
.fsync = gfs2_fsync,
.lock = gfs2_lock,
.sendfile = generic_file_sendfile,
.flock = gfs2_flock,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
};
const struct file_operations gfs2_dir_fops = {
.readdir = gfs2_readdir,
.unlocked_ioctl = gfs2_ioctl,
.open = gfs2_open,
.release = gfs2_close,
.fsync = gfs2_fsync,
.lock = gfs2_lock,
.flock = gfs2_flock,
};

24
fs/gfs2/ops_file.h Normal file
View File

@@ -0,0 +1,24 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __OPS_FILE_DOT_H__
#define __OPS_FILE_DOT_H__
#include <linux/fs.h>
struct gfs2_inode;
extern struct file gfs2_internal_file_sentinel;
extern int gfs2_internal_read(struct gfs2_inode *ip,
struct file_ra_state *ra_state,
char *buf, loff_t *pos, unsigned size);
extern void gfs2_set_inode_flags(struct inode *inode);
extern const struct file_operations gfs2_file_fops;
extern const struct file_operations gfs2_dir_fops;
#endif /* __OPS_FILE_DOT_H__ */

925
fs/gfs2/ops_fstype.c Normal file
View File

@@ -0,0 +1,925 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/blkdev.h>
#include <linux/kthread.h>
#include <linux/namei.h>
#include <linux/mount.h>
#include <linux/gfs2_ondisk.h>
#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
#include "daemon.h"
#include "glock.h"
#include "glops.h"
#include "inode.h"
#include "lm.h"
#include "mount.h"
#include "ops_export.h"
#include "ops_fstype.h"
#include "ops_super.h"
#include "recovery.h"
#include "rgrp.h"
#include "super.h"
#include "sys.h"
#include "util.h"
#define DO 0
#define UNDO 1
extern struct dentry_operations gfs2_dops;
static struct gfs2_sbd *init_sbd(struct super_block *sb)
{
struct gfs2_sbd *sdp;
sdp = kzalloc(sizeof(struct gfs2_sbd), GFP_KERNEL);
if (!sdp)
return NULL;
sb->s_fs_info = sdp;
sdp->sd_vfs = sb;
gfs2_tune_init(&sdp->sd_tune);
INIT_LIST_HEAD(&sdp->sd_reclaim_list);
spin_lock_init(&sdp->sd_reclaim_lock);
init_waitqueue_head(&sdp->sd_reclaim_wq);
mutex_init(&sdp->sd_inum_mutex);
spin_lock_init(&sdp->sd_statfs_spin);
mutex_init(&sdp->sd_statfs_mutex);
spin_lock_init(&sdp->sd_rindex_spin);
mutex_init(&sdp->sd_rindex_mutex);
INIT_LIST_HEAD(&sdp->sd_rindex_list);
INIT_LIST_HEAD(&sdp->sd_rindex_mru_list);
INIT_LIST_HEAD(&sdp->sd_rindex_recent_list);
INIT_LIST_HEAD(&sdp->sd_jindex_list);
spin_lock_init(&sdp->sd_jindex_spin);
mutex_init(&sdp->sd_jindex_mutex);
INIT_LIST_HEAD(&sdp->sd_quota_list);
spin_lock_init(&sdp->sd_quota_spin);
mutex_init(&sdp->sd_quota_mutex);
spin_lock_init(&sdp->sd_log_lock);
INIT_LIST_HEAD(&sdp->sd_log_le_gl);
INIT_LIST_HEAD(&sdp->sd_log_le_buf);
INIT_LIST_HEAD(&sdp->sd_log_le_revoke);
INIT_LIST_HEAD(&sdp->sd_log_le_rg);
INIT_LIST_HEAD(&sdp->sd_log_le_databuf);
mutex_init(&sdp->sd_log_reserve_mutex);
INIT_LIST_HEAD(&sdp->sd_ail1_list);
INIT_LIST_HEAD(&sdp->sd_ail2_list);
init_rwsem(&sdp->sd_log_flush_lock);
INIT_LIST_HEAD(&sdp->sd_log_flush_list);
INIT_LIST_HEAD(&sdp->sd_revoke_list);
mutex_init(&sdp->sd_freeze_lock);
return sdp;
}
static void init_vfs(struct super_block *sb, unsigned noatime)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
sb->s_magic = GFS2_MAGIC;
sb->s_op = &gfs2_super_ops;
sb->s_export_op = &gfs2_export_ops;
sb->s_maxbytes = MAX_LFS_FILESIZE;
if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME))
set_bit(noatime, &sdp->sd_flags);
/* Don't let the VFS update atimes. GFS2 handles this itself. */
sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
}
static int init_names(struct gfs2_sbd *sdp, int silent)
{
struct page *page;
char *proto, *table;
int error = 0;
proto = sdp->sd_args.ar_lockproto;
table = sdp->sd_args.ar_locktable;
/* Try to autodetect */
if (!proto[0] || !table[0]) {
struct gfs2_sb *sb;
page = gfs2_read_super(sdp->sd_vfs, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
if (!page)
return -ENOBUFS;
sb = kmap(page);
gfs2_sb_in(&sdp->sd_sb, sb);
kunmap(page);
__free_page(page);
error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
if (error)
goto out;
if (!proto[0])
proto = sdp->sd_sb.sb_lockproto;
if (!table[0])
table = sdp->sd_sb.sb_locktable;
}
if (!table[0])
table = sdp->sd_vfs->s_id;
snprintf(sdp->sd_proto_name, GFS2_FSNAME_LEN, "%s", proto);
snprintf(sdp->sd_table_name, GFS2_FSNAME_LEN, "%s", table);
out:
return error;
}
static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh,
int undo)
{
struct task_struct *p;
int error = 0;
if (undo)
goto fail_trans;
p = kthread_run(gfs2_scand, sdp, "gfs2_scand");
error = IS_ERR(p);
if (error) {
fs_err(sdp, "can't start scand thread: %d\n", error);
return error;
}
sdp->sd_scand_process = p;
for (sdp->sd_glockd_num = 0;
sdp->sd_glockd_num < sdp->sd_args.ar_num_glockd;
sdp->sd_glockd_num++) {
p = kthread_run(gfs2_glockd, sdp, "gfs2_glockd");
error = IS_ERR(p);
if (error) {
fs_err(sdp, "can't start glockd thread: %d\n", error);
goto fail;
}
sdp->sd_glockd_process[sdp->sd_glockd_num] = p;
}
error = gfs2_glock_nq_num(sdp,
GFS2_MOUNT_LOCK, &gfs2_nondisk_glops,
LM_ST_EXCLUSIVE, LM_FLAG_NOEXP | GL_NOCACHE,
mount_gh);
if (error) {
fs_err(sdp, "can't acquire mount glock: %d\n", error);
goto fail;
}
error = gfs2_glock_nq_num(sdp,
GFS2_LIVE_LOCK, &gfs2_nondisk_glops,
LM_ST_SHARED,
LM_FLAG_NOEXP | GL_EXACT,
&sdp->sd_live_gh);
if (error) {
fs_err(sdp, "can't acquire live glock: %d\n", error);
goto fail_mount;
}
error = gfs2_glock_get(sdp, GFS2_RENAME_LOCK, &gfs2_nondisk_glops,
CREATE, &sdp->sd_rename_gl);
if (error) {
fs_err(sdp, "can't create rename glock: %d\n", error);
goto fail_live;
}
error = gfs2_glock_get(sdp, GFS2_TRANS_LOCK, &gfs2_trans_glops,
CREATE, &sdp->sd_trans_gl);
if (error) {
fs_err(sdp, "can't create transaction glock: %d\n", error);
goto fail_rename;
}
set_bit(GLF_STICKY, &sdp->sd_trans_gl->gl_flags);
return 0;
fail_trans:
gfs2_glock_put(sdp->sd_trans_gl);
fail_rename:
gfs2_glock_put(sdp->sd_rename_gl);
fail_live:
gfs2_glock_dq_uninit(&sdp->sd_live_gh);
fail_mount:
gfs2_glock_dq_uninit(mount_gh);
fail:
while (sdp->sd_glockd_num--)
kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]);
kthread_stop(sdp->sd_scand_process);
return error;
}
static struct inode *gfs2_lookup_root(struct super_block *sb,
struct gfs2_inum_host *inum)
{
return gfs2_inode_lookup(sb, inum, DT_DIR);
}
static int init_sb(struct gfs2_sbd *sdp, int silent, int undo)
{
struct super_block *sb = sdp->sd_vfs;
struct gfs2_holder sb_gh;
struct gfs2_inum_host *inum;
struct inode *inode;
int error = 0;
if (undo) {
if (sb->s_root) {
dput(sb->s_root);
sb->s_root = NULL;
}
return 0;
}
error = gfs2_glock_nq_num(sdp, GFS2_SB_LOCK, &gfs2_meta_glops,
LM_ST_SHARED, 0, &sb_gh);
if (error) {
fs_err(sdp, "can't acquire superblock glock: %d\n", error);
return error;
}
error = gfs2_read_sb(sdp, sb_gh.gh_gl, silent);
if (error) {
fs_err(sdp, "can't read superblock: %d\n", error);
goto out;
}
/* Set up the buffer cache and SB for real */
if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) {
error = -EINVAL;
fs_err(sdp, "FS block size (%u) is too small for device "
"block size (%u)\n",
sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev));
goto out;
}
if (sdp->sd_sb.sb_bsize > PAGE_SIZE) {
error = -EINVAL;
fs_err(sdp, "FS block size (%u) is too big for machine "
"page size (%u)\n",
sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE);
goto out;
}
sb_set_blocksize(sb, sdp->sd_sb.sb_bsize);
/* Get the root inode */
inum = &sdp->sd_sb.sb_root_dir;
if (sb->s_type == &gfs2meta_fs_type)
inum = &sdp->sd_sb.sb_master_dir;
inode = gfs2_lookup_root(sb, inum);
if (IS_ERR(inode)) {
error = PTR_ERR(inode);
fs_err(sdp, "can't read in root inode: %d\n", error);
goto out;
}
sb->s_root = d_alloc_root(inode);
if (!sb->s_root) {
fs_err(sdp, "can't get root dentry\n");
error = -ENOMEM;
iput(inode);
}
sb->s_root->d_op = &gfs2_dops;
out:
gfs2_glock_dq_uninit(&sb_gh);
return error;
}
static int init_journal(struct gfs2_sbd *sdp, int undo)
{
struct gfs2_holder ji_gh;
struct task_struct *p;
struct gfs2_inode *ip;
int jindex = 1;
int error = 0;
if (undo) {
jindex = 0;
goto fail_recoverd;
}
sdp->sd_jindex = gfs2_lookup_simple(sdp->sd_master_dir, "jindex");
if (IS_ERR(sdp->sd_jindex)) {
fs_err(sdp, "can't lookup journal index: %d\n", error);
return PTR_ERR(sdp->sd_jindex);
}
ip = GFS2_I(sdp->sd_jindex);
set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
/* Load in the journal index special file */
error = gfs2_jindex_hold(sdp, &ji_gh);
if (error) {
fs_err(sdp, "can't read journal index: %d\n", error);
goto fail;
}
error = -EINVAL;
if (!gfs2_jindex_size(sdp)) {
fs_err(sdp, "no journals!\n");
goto fail_jindex;
}
if (sdp->sd_args.ar_spectator) {
sdp->sd_jdesc = gfs2_jdesc_find(sdp, 0);
sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks;
} else {
if (sdp->sd_lockstruct.ls_jid >= gfs2_jindex_size(sdp)) {
fs_err(sdp, "can't mount journal #%u\n",
sdp->sd_lockstruct.ls_jid);
fs_err(sdp, "there are only %u journals (0 - %u)\n",
gfs2_jindex_size(sdp),
gfs2_jindex_size(sdp) - 1);
goto fail_jindex;
}
sdp->sd_jdesc = gfs2_jdesc_find(sdp, sdp->sd_lockstruct.ls_jid);
error = gfs2_glock_nq_num(sdp, sdp->sd_lockstruct.ls_jid,
&gfs2_journal_glops,
LM_ST_EXCLUSIVE, LM_FLAG_NOEXP,
&sdp->sd_journal_gh);
if (error) {
fs_err(sdp, "can't acquire journal glock: %d\n", error);
goto fail_jindex;
}
ip = GFS2_I(sdp->sd_jdesc->jd_inode);
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
LM_FLAG_NOEXP | GL_EXACT,
&sdp->sd_jinode_gh);
if (error) {
fs_err(sdp, "can't acquire journal inode glock: %d\n",
error);
goto fail_journal_gh;
}
error = gfs2_jdesc_check(sdp->sd_jdesc);
if (error) {
fs_err(sdp, "my journal (%u) is bad: %d\n",
sdp->sd_jdesc->jd_jid, error);
goto fail_jinode_gh;
}
sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks;
}
if (sdp->sd_lockstruct.ls_first) {
unsigned int x;
for (x = 0; x < sdp->sd_journals; x++) {
error = gfs2_recover_journal(gfs2_jdesc_find(sdp, x));
if (error) {
fs_err(sdp, "error recovering journal %u: %d\n",
x, error);
goto fail_jinode_gh;
}
}
gfs2_lm_others_may_mount(sdp);
} else if (!sdp->sd_args.ar_spectator) {
error = gfs2_recover_journal(sdp->sd_jdesc);
if (error) {
fs_err(sdp, "error recovering my journal: %d\n", error);
goto fail_jinode_gh;
}
}
set_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags);
gfs2_glock_dq_uninit(&ji_gh);
jindex = 0;
p = kthread_run(gfs2_recoverd, sdp, "gfs2_recoverd");
error = IS_ERR(p);
if (error) {
fs_err(sdp, "can't start recoverd thread: %d\n", error);
goto fail_jinode_gh;
}
sdp->sd_recoverd_process = p;
return 0;
fail_recoverd:
kthread_stop(sdp->sd_recoverd_process);
fail_jinode_gh:
if (!sdp->sd_args.ar_spectator)
gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
fail_journal_gh:
if (!sdp->sd_args.ar_spectator)
gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
fail_jindex:
gfs2_jindex_free(sdp);
if (jindex)
gfs2_glock_dq_uninit(&ji_gh);
fail:
iput(sdp->sd_jindex);
return error;
}
static int init_inodes(struct gfs2_sbd *sdp, int undo)
{
int error = 0;
struct gfs2_inode *ip;
struct inode *inode;
if (undo)
goto fail_qinode;
inode = gfs2_lookup_root(sdp->sd_vfs, &sdp->sd_sb.sb_master_dir);
if (IS_ERR(inode)) {
error = PTR_ERR(inode);
fs_err(sdp, "can't read in master directory: %d\n", error);
goto fail;
}
sdp->sd_master_dir = inode;
error = init_journal(sdp, undo);
if (error)
goto fail_master;
/* Read in the master inode number inode */
sdp->sd_inum_inode = gfs2_lookup_simple(sdp->sd_master_dir, "inum");
if (IS_ERR(sdp->sd_inum_inode)) {
error = PTR_ERR(sdp->sd_inum_inode);
fs_err(sdp, "can't read in inum inode: %d\n", error);
goto fail_journal;
}
/* Read in the master statfs inode */
sdp->sd_statfs_inode = gfs2_lookup_simple(sdp->sd_master_dir, "statfs");
if (IS_ERR(sdp->sd_statfs_inode)) {
error = PTR_ERR(sdp->sd_statfs_inode);
fs_err(sdp, "can't read in statfs inode: %d\n", error);
goto fail_inum;
}
/* Read in the resource index inode */
sdp->sd_rindex = gfs2_lookup_simple(sdp->sd_master_dir, "rindex");
if (IS_ERR(sdp->sd_rindex)) {
error = PTR_ERR(sdp->sd_rindex);
fs_err(sdp, "can't get resource index inode: %d\n", error);
goto fail_statfs;
}
ip = GFS2_I(sdp->sd_rindex);
set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
sdp->sd_rindex_vn = ip->i_gl->gl_vn - 1;
/* Read in the quota inode */
sdp->sd_quota_inode = gfs2_lookup_simple(sdp->sd_master_dir, "quota");
if (IS_ERR(sdp->sd_quota_inode)) {
error = PTR_ERR(sdp->sd_quota_inode);
fs_err(sdp, "can't get quota file inode: %d\n", error);
goto fail_rindex;
}
return 0;
fail_qinode:
iput(sdp->sd_quota_inode);
fail_rindex:
gfs2_clear_rgrpd(sdp);
iput(sdp->sd_rindex);
fail_statfs:
iput(sdp->sd_statfs_inode);
fail_inum:
iput(sdp->sd_inum_inode);
fail_journal:
init_journal(sdp, UNDO);
fail_master:
iput(sdp->sd_master_dir);
fail:
return error;
}
static int init_per_node(struct gfs2_sbd *sdp, int undo)
{
struct inode *pn = NULL;
char buf[30];
int error = 0;
struct gfs2_inode *ip;
if (sdp->sd_args.ar_spectator)
return 0;
if (undo)
goto fail_qc_gh;
pn = gfs2_lookup_simple(sdp->sd_master_dir, "per_node");
if (IS_ERR(pn)) {
error = PTR_ERR(pn);
fs_err(sdp, "can't find per_node directory: %d\n", error);
return error;
}
sprintf(buf, "inum_range%u", sdp->sd_jdesc->jd_jid);
sdp->sd_ir_inode = gfs2_lookup_simple(pn, buf);
if (IS_ERR(sdp->sd_ir_inode)) {
error = PTR_ERR(sdp->sd_ir_inode);
fs_err(sdp, "can't find local \"ir\" file: %d\n", error);
goto fail;
}
sprintf(buf, "statfs_change%u", sdp->sd_jdesc->jd_jid);
sdp->sd_sc_inode = gfs2_lookup_simple(pn, buf);
if (IS_ERR(sdp->sd_sc_inode)) {
error = PTR_ERR(sdp->sd_sc_inode);
fs_err(sdp, "can't find local \"sc\" file: %d\n", error);
goto fail_ir_i;
}
sprintf(buf, "quota_change%u", sdp->sd_jdesc->jd_jid);
sdp->sd_qc_inode = gfs2_lookup_simple(pn, buf);
if (IS_ERR(sdp->sd_qc_inode)) {
error = PTR_ERR(sdp->sd_qc_inode);
fs_err(sdp, "can't find local \"qc\" file: %d\n", error);
goto fail_ut_i;
}
iput(pn);
pn = NULL;
ip = GFS2_I(sdp->sd_ir_inode);
error = gfs2_glock_nq_init(ip->i_gl,
LM_ST_EXCLUSIVE, 0,
&sdp->sd_ir_gh);
if (error) {
fs_err(sdp, "can't lock local \"ir\" file: %d\n", error);
goto fail_qc_i;
}
ip = GFS2_I(sdp->sd_sc_inode);
error = gfs2_glock_nq_init(ip->i_gl,
LM_ST_EXCLUSIVE, 0,
&sdp->sd_sc_gh);
if (error) {
fs_err(sdp, "can't lock local \"sc\" file: %d\n", error);
goto fail_ir_gh;
}
ip = GFS2_I(sdp->sd_qc_inode);
error = gfs2_glock_nq_init(ip->i_gl,
LM_ST_EXCLUSIVE, 0,
&sdp->sd_qc_gh);
if (error) {
fs_err(sdp, "can't lock local \"qc\" file: %d\n", error);
goto fail_ut_gh;
}
return 0;
fail_qc_gh:
gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
fail_ut_gh:
gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
fail_ir_gh:
gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
fail_qc_i:
iput(sdp->sd_qc_inode);
fail_ut_i:
iput(sdp->sd_sc_inode);
fail_ir_i:
iput(sdp->sd_ir_inode);
fail:
if (pn)
iput(pn);
return error;
}
static int init_threads(struct gfs2_sbd *sdp, int undo)
{
struct task_struct *p;
int error = 0;
if (undo)
goto fail_quotad;
sdp->sd_log_flush_time = jiffies;
sdp->sd_jindex_refresh_time = jiffies;
p = kthread_run(gfs2_logd, sdp, "gfs2_logd");
error = IS_ERR(p);
if (error) {
fs_err(sdp, "can't start logd thread: %d\n", error);
return error;
}
sdp->sd_logd_process = p;
sdp->sd_statfs_sync_time = jiffies;
sdp->sd_quota_sync_time = jiffies;
p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad");
error = IS_ERR(p);
if (error) {
fs_err(sdp, "can't start quotad thread: %d\n", error);
goto fail;
}
sdp->sd_quotad_process = p;
return 0;
fail_quotad:
kthread_stop(sdp->sd_quotad_process);
fail:
kthread_stop(sdp->sd_logd_process);
return error;
}
/**
* fill_super - Read in superblock
* @sb: The VFS superblock
* @data: Mount options
* @silent: Don't complain if it's not a GFS2 filesystem
*
* Returns: errno
*/
static int fill_super(struct super_block *sb, void *data, int silent)
{
struct gfs2_sbd *sdp;
struct gfs2_holder mount_gh;
int error;
sdp = init_sbd(sb);
if (!sdp) {
printk(KERN_WARNING "GFS2: can't alloc struct gfs2_sbd\n");
return -ENOMEM;
}
error = gfs2_mount_args(sdp, (char *)data, 0);
if (error) {
printk(KERN_WARNING "GFS2: can't parse mount arguments\n");
goto fail;
}
init_vfs(sb, SDF_NOATIME);
/* Set up the buffer cache and fill in some fake block size values
to allow us to read-in the on-disk superblock. */
sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, GFS2_BASIC_BLOCK);
sdp->sd_sb.sb_bsize_shift = sb->s_blocksize_bits;
sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
GFS2_BASIC_BLOCK_SHIFT;
sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
error = init_names(sdp, silent);
if (error)
goto fail;
error = gfs2_sys_fs_add(sdp);
if (error)
goto fail;
error = gfs2_lm_mount(sdp, silent);
if (error)
goto fail_sys;
error = init_locking(sdp, &mount_gh, DO);
if (error)
goto fail_lm;
error = init_sb(sdp, silent, DO);
if (error)
goto fail_locking;
error = init_inodes(sdp, DO);
if (error)
goto fail_sb;
error = init_per_node(sdp, DO);
if (error)
goto fail_inodes;
error = gfs2_statfs_init(sdp);
if (error) {
fs_err(sdp, "can't initialize statfs subsystem: %d\n", error);
goto fail_per_node;
}
error = init_threads(sdp, DO);
if (error)
goto fail_per_node;
if (!(sb->s_flags & MS_RDONLY)) {
error = gfs2_make_fs_rw(sdp);
if (error) {
fs_err(sdp, "can't make FS RW: %d\n", error);
goto fail_threads;
}
}
gfs2_glock_dq_uninit(&mount_gh);
return 0;
fail_threads:
init_threads(sdp, UNDO);
fail_per_node:
init_per_node(sdp, UNDO);
fail_inodes:
init_inodes(sdp, UNDO);
fail_sb:
init_sb(sdp, 0, UNDO);
fail_locking:
init_locking(sdp, &mount_gh, UNDO);
fail_lm:
gfs2_gl_hash_clear(sdp, WAIT);
gfs2_lm_unmount(sdp);
while (invalidate_inodes(sb))
yield();
fail_sys:
gfs2_sys_fs_del(sdp);
fail:
kfree(sdp);
sb->s_fs_info = NULL;
return error;
}
static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
const char *dev_name, void *data, struct vfsmount *mnt)
{
struct super_block *sb;
struct gfs2_sbd *sdp;
int error = get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt);
if (error)
goto out;
sb = mnt->mnt_sb;
sdp = sb->s_fs_info;
sdp->sd_gfs2mnt = mnt;
out:
return error;
}
static int fill_super_meta(struct super_block *sb, struct super_block *new,
void *data, int silent)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
struct inode *inode;
int error = 0;
new->s_fs_info = sdp;
sdp->sd_vfs_meta = sb;
init_vfs(new, SDF_NOATIME);
/* Get the master inode */
inode = igrab(sdp->sd_master_dir);
new->s_root = d_alloc_root(inode);
if (!new->s_root) {
fs_err(sdp, "can't get root dentry\n");
error = -ENOMEM;
iput(inode);
} else
new->s_root->d_op = &gfs2_dops;
return error;
}
static int set_bdev_super(struct super_block *s, void *data)
{
s->s_bdev = data;
s->s_dev = s->s_bdev->bd_dev;
return 0;
}
static int test_bdev_super(struct super_block *s, void *data)
{
return s->s_bdev == data;
}
static struct super_block* get_gfs2_sb(const char *dev_name)
{
struct kstat stat;
struct nameidata nd;
struct file_system_type *fstype;
struct super_block *sb = NULL, *s;
struct list_head *l;
int error;
error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd);
if (error) {
printk(KERN_WARNING "GFS2: path_lookup on %s returned error\n",
dev_name);
goto out;
}
error = vfs_getattr(nd.mnt, nd.dentry, &stat);
fstype = get_fs_type("gfs2");
list_for_each(l, &fstype->fs_supers) {
s = list_entry(l, struct super_block, s_instances);
if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) ||
(S_ISDIR(stat.mode) && s == nd.dentry->d_inode->i_sb)) {
sb = s;
goto free_nd;
}
}
printk(KERN_WARNING "GFS2: Unrecognized block device or "
"mount point %s\n", dev_name);
free_nd:
path_release(&nd);
out:
return sb;
}
static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
const char *dev_name, void *data, struct vfsmount *mnt)
{
int error = 0;
struct super_block *sb = NULL, *new;
struct gfs2_sbd *sdp;
sb = get_gfs2_sb(dev_name);
if (!sb) {
printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n");
error = -ENOENT;
goto error;
}
sdp = (struct gfs2_sbd*) sb->s_fs_info;
if (sdp->sd_vfs_meta) {
printk(KERN_WARNING "GFS2: gfs2meta mount already exists\n");
error = -EBUSY;
goto error;
}
down(&sb->s_bdev->bd_mount_sem);
new = sget(fs_type, test_bdev_super, set_bdev_super, sb->s_bdev);
up(&sb->s_bdev->bd_mount_sem);
if (IS_ERR(new)) {
error = PTR_ERR(new);
goto error;
}
module_put(fs_type->owner);
new->s_flags = flags;
strlcpy(new->s_id, sb->s_id, sizeof(new->s_id));
sb_set_blocksize(new, sb->s_blocksize);
error = fill_super_meta(sb, new, data, flags & MS_SILENT ? 1 : 0);
if (error) {
up_write(&new->s_umount);
deactivate_super(new);
goto error;
}
new->s_flags |= MS_ACTIVE;
/* Grab a reference to the gfs2 mount point */
atomic_inc(&sdp->sd_gfs2mnt->mnt_count);
return simple_set_mnt(mnt, new);
error:
return error;
}
static void gfs2_kill_sb(struct super_block *sb)
{
kill_block_super(sb);
}
static void gfs2_kill_sb_meta(struct super_block *sb)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
generic_shutdown_super(sb);
sdp->sd_vfs_meta = NULL;
atomic_dec(&sdp->sd_gfs2mnt->mnt_count);
}
struct file_system_type gfs2_fs_type = {
.name = "gfs2",
.fs_flags = FS_REQUIRES_DEV,
.get_sb = gfs2_get_sb,
.kill_sb = gfs2_kill_sb,
.owner = THIS_MODULE,
};
struct file_system_type gfs2meta_fs_type = {
.name = "gfs2meta",
.fs_flags = FS_REQUIRES_DEV,
.get_sb = gfs2_get_sb_meta,
.kill_sb = gfs2_kill_sb_meta,
.owner = THIS_MODULE,
};

18
fs/gfs2/ops_fstype.h Normal file
View File

@@ -0,0 +1,18 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __OPS_FSTYPE_DOT_H__
#define __OPS_FSTYPE_DOT_H__
#include <linux/fs.h>
extern struct file_system_type gfs2_fs_type;
extern struct file_system_type gfs2meta_fs_type;
#endif /* __OPS_FSTYPE_DOT_H__ */

1167
fs/gfs2/ops_inode.c Normal file

File diff suppressed because it is too large Load Diff

20
fs/gfs2/ops_inode.h Normal file
View File

@@ -0,0 +1,20 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __OPS_INODE_DOT_H__
#define __OPS_INODE_DOT_H__
#include <linux/fs.h>
extern const struct inode_operations gfs2_file_iops;
extern const struct inode_operations gfs2_dir_iops;
extern const struct inode_operations gfs2_symlink_iops;
extern const struct inode_operations gfs2_dev_iops;
#endif /* __OPS_INODE_DOT_H__ */

486
fs/gfs2/ops_super.c Normal file
View File

@@ -0,0 +1,486 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/statfs.h>
#include <linux/seq_file.h>
#include <linux/mount.h>
#include <linux/kthread.h>
#include <linux/delay.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
#include "glock.h"
#include "inode.h"
#include "lm.h"
#include "log.h"
#include "mount.h"
#include "ops_super.h"
#include "quota.h"
#include "recovery.h"
#include "rgrp.h"
#include "super.h"
#include "sys.h"
#include "util.h"
#include "trans.h"
#include "dir.h"
#include "eattr.h"
#include "bmap.h"
/**
* gfs2_write_inode - Make sure the inode is stable on the disk
* @inode: The inode
* @sync: synchronous write flag
*
* Returns: errno
*/
static int gfs2_write_inode(struct inode *inode, int sync)
{
struct gfs2_inode *ip = GFS2_I(inode);
/* Check this is a "normal" inode */
if (inode->i_private) {
if (current->flags & PF_MEMALLOC)
return 0;
if (sync)
gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
}
return 0;
}
/**
* gfs2_put_super - Unmount the filesystem
* @sb: The VFS superblock
*
*/
static void gfs2_put_super(struct super_block *sb)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
int error;
if (!sdp)
return;
if (!strncmp(sb->s_type->name, "gfs2meta", 8))
return; /* Nothing to do */
/* Unfreeze the filesystem, if we need to */
mutex_lock(&sdp->sd_freeze_lock);
if (sdp->sd_freeze_count)
gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
mutex_unlock(&sdp->sd_freeze_lock);
kthread_stop(sdp->sd_quotad_process);
kthread_stop(sdp->sd_logd_process);
kthread_stop(sdp->sd_recoverd_process);
while (sdp->sd_glockd_num--)
kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]);
kthread_stop(sdp->sd_scand_process);
if (!(sb->s_flags & MS_RDONLY)) {
error = gfs2_make_fs_ro(sdp);
if (error)
gfs2_io_error(sdp);
}
/* At this point, we're through modifying the disk */
/* Release stuff */
iput(sdp->sd_master_dir);
iput(sdp->sd_jindex);
iput(sdp->sd_inum_inode);
iput(sdp->sd_statfs_inode);
iput(sdp->sd_rindex);
iput(sdp->sd_quota_inode);
gfs2_glock_put(sdp->sd_rename_gl);
gfs2_glock_put(sdp->sd_trans_gl);
if (!sdp->sd_args.ar_spectator) {
gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
iput(sdp->sd_ir_inode);
iput(sdp->sd_sc_inode);
iput(sdp->sd_qc_inode);
}
gfs2_glock_dq_uninit(&sdp->sd_live_gh);
gfs2_clear_rgrpd(sdp);
gfs2_jindex_free(sdp);
/* Take apart glock structures and buffer lists */
gfs2_gl_hash_clear(sdp, WAIT);
/* Unmount the locking protocol */
gfs2_lm_unmount(sdp);
/* At this point, we're through participating in the lockspace */
gfs2_sys_fs_del(sdp);
kfree(sdp);
}
/**
* gfs2_write_super
* @sb: the superblock
*
*/
static void gfs2_write_super(struct super_block *sb)
{
sb->s_dirt = 0;
}
/**
* gfs2_sync_fs - sync the filesystem
* @sb: the superblock
*
* Flushes the log to disk.
*/
static int gfs2_sync_fs(struct super_block *sb, int wait)
{
sb->s_dirt = 0;
if (wait)
gfs2_log_flush(sb->s_fs_info, NULL);
return 0;
}
/**
* gfs2_write_super_lockfs - prevent further writes to the filesystem
* @sb: the VFS structure for the filesystem
*
*/
static void gfs2_write_super_lockfs(struct super_block *sb)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
int error;
if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
return;
for (;;) {
error = gfs2_freeze_fs(sdp);
if (!error)
break;
switch (error) {
case -EBUSY:
fs_err(sdp, "waiting for recovery before freeze\n");
break;
default:
fs_err(sdp, "error freezing FS: %d\n", error);
break;
}
fs_err(sdp, "retrying...\n");
msleep(1000);
}
}
/**
* gfs2_unlockfs - reallow writes to the filesystem
* @sb: the VFS structure for the filesystem
*
*/
static void gfs2_unlockfs(struct super_block *sb)
{
gfs2_unfreeze_fs(sb->s_fs_info);
}
/**
* gfs2_statfs - Gather and return stats about the filesystem
* @sb: The superblock
* @statfsbuf: The buffer
*
* Returns: 0 on success or error code
*/
static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_inode->i_sb;
struct gfs2_sbd *sdp = sb->s_fs_info;
struct gfs2_statfs_change_host sc;
int error;
if (gfs2_tune_get(sdp, gt_statfs_slow))
error = gfs2_statfs_slow(sdp, &sc);
else
error = gfs2_statfs_i(sdp, &sc);
if (error)
return error;
buf->f_type = GFS2_MAGIC;
buf->f_bsize = sdp->sd_sb.sb_bsize;
buf->f_blocks = sc.sc_total;
buf->f_bfree = sc.sc_free;
buf->f_bavail = sc.sc_free;
buf->f_files = sc.sc_dinodes + sc.sc_free;
buf->f_ffree = sc.sc_free;
buf->f_namelen = GFS2_FNAMESIZE;
return 0;
}
/**
* gfs2_remount_fs - called when the FS is remounted
* @sb: the filesystem
* @flags: the remount flags
* @data: extra data passed in (not used right now)
*
* Returns: errno
*/
static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
int error;
error = gfs2_mount_args(sdp, data, 1);
if (error)
return error;
if (sdp->sd_args.ar_spectator)
*flags |= MS_RDONLY;
else {
if (*flags & MS_RDONLY) {
if (!(sb->s_flags & MS_RDONLY))
error = gfs2_make_fs_ro(sdp);
} else if (!(*flags & MS_RDONLY) &&
(sb->s_flags & MS_RDONLY)) {
error = gfs2_make_fs_rw(sdp);
}
}
if (*flags & (MS_NOATIME | MS_NODIRATIME))
set_bit(SDF_NOATIME, &sdp->sd_flags);
else
clear_bit(SDF_NOATIME, &sdp->sd_flags);
/* Don't let the VFS update atimes. GFS2 handles this itself. */
*flags |= MS_NOATIME | MS_NODIRATIME;
return error;
}
/**
* gfs2_clear_inode - Deallocate an inode when VFS is done with it
* @inode: The VFS inode
*
*/
static void gfs2_clear_inode(struct inode *inode)
{
/* This tells us its a "real" inode and not one which only
* serves to contain an address space (see rgrp.c, meta_io.c)
* which therefore doesn't have its own glocks.
*/
if (inode->i_private) {
struct gfs2_inode *ip = GFS2_I(inode);
ip->i_gl->gl_object = NULL;
gfs2_glock_schedule_for_reclaim(ip->i_gl);
gfs2_glock_put(ip->i_gl);
ip->i_gl = NULL;
if (ip->i_iopen_gh.gh_gl)
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
}
}
/**
* gfs2_show_options - Show mount options for /proc/mounts
* @s: seq_file structure
* @mnt: vfsmount
*
* Returns: 0 on success or error code
*/
static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
{
struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info;
struct gfs2_args *args = &sdp->sd_args;
if (args->ar_lockproto[0])
seq_printf(s, ",lockproto=%s", args->ar_lockproto);
if (args->ar_locktable[0])
seq_printf(s, ",locktable=%s", args->ar_locktable);
if (args->ar_hostdata[0])
seq_printf(s, ",hostdata=%s", args->ar_hostdata);
if (args->ar_spectator)
seq_printf(s, ",spectator");
if (args->ar_ignore_local_fs)
seq_printf(s, ",ignore_local_fs");
if (args->ar_localflocks)
seq_printf(s, ",localflocks");
if (args->ar_localcaching)
seq_printf(s, ",localcaching");
if (args->ar_debug)
seq_printf(s, ",debug");
if (args->ar_upgrade)
seq_printf(s, ",upgrade");
if (args->ar_num_glockd != GFS2_GLOCKD_DEFAULT)
seq_printf(s, ",num_glockd=%u", args->ar_num_glockd);
if (args->ar_posix_acl)
seq_printf(s, ",acl");
if (args->ar_quota != GFS2_QUOTA_DEFAULT) {
char *state;
switch (args->ar_quota) {
case GFS2_QUOTA_OFF:
state = "off";
break;
case GFS2_QUOTA_ACCOUNT:
state = "account";
break;
case GFS2_QUOTA_ON:
state = "on";
break;
default:
state = "unknown";
break;
}
seq_printf(s, ",quota=%s", state);
}
if (args->ar_suiddir)
seq_printf(s, ",suiddir");
if (args->ar_data != GFS2_DATA_DEFAULT) {
char *state;
switch (args->ar_data) {
case GFS2_DATA_WRITEBACK:
state = "writeback";
break;
case GFS2_DATA_ORDERED:
state = "ordered";
break;
default:
state = "unknown";
break;
}
seq_printf(s, ",data=%s", state);
}
return 0;
}
/*
* We have to (at the moment) hold the inodes main lock to cover
* the gap between unlocking the shared lock on the iopen lock and
* taking the exclusive lock. I'd rather do a shared -> exclusive
* conversion on the iopen lock, but we can change that later. This
* is safe, just less efficient.
*/
static void gfs2_delete_inode(struct inode *inode)
{
struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_holder gh;
int error;
if (!inode->i_private)
goto out;
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB, &gh);
if (unlikely(error)) {
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
goto out;
}
gfs2_glock_dq(&ip->i_iopen_gh);
gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
error = gfs2_glock_nq(&ip->i_iopen_gh);
if (error)
goto out_uninit;
if (S_ISDIR(inode->i_mode) &&
(ip->i_di.di_flags & GFS2_DIF_EXHASH)) {
error = gfs2_dir_exhash_dealloc(ip);
if (error)
goto out_unlock;
}
if (ip->i_di.di_eattr) {
error = gfs2_ea_dealloc(ip);
if (error)
goto out_unlock;
}
if (!gfs2_is_stuffed(ip)) {
error = gfs2_file_dealloc(ip);
if (error)
goto out_unlock;
}
error = gfs2_dinode_dealloc(ip);
/*
* Must do this before unlock to avoid trying to write back
* potentially dirty data now that inode no longer exists
* on disk.
*/
truncate_inode_pages(&inode->i_data, 0);
out_unlock:
gfs2_glock_dq(&ip->i_iopen_gh);
out_uninit:
gfs2_holder_uninit(&ip->i_iopen_gh);
gfs2_glock_dq_uninit(&gh);
if (error)
fs_warn(sdp, "gfs2_delete_inode: %d\n", error);
out:
truncate_inode_pages(&inode->i_data, 0);
clear_inode(inode);
}
static struct inode *gfs2_alloc_inode(struct super_block *sb)
{
struct gfs2_inode *ip;
ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL);
if (ip) {
ip->i_flags = 0;
ip->i_gl = NULL;
ip->i_last_pfault = jiffies;
}
return &ip->i_inode;
}
static void gfs2_destroy_inode(struct inode *inode)
{
kmem_cache_free(gfs2_inode_cachep, inode);
}
const struct super_operations gfs2_super_ops = {
.alloc_inode = gfs2_alloc_inode,
.destroy_inode = gfs2_destroy_inode,
.write_inode = gfs2_write_inode,
.delete_inode = gfs2_delete_inode,
.put_super = gfs2_put_super,
.write_super = gfs2_write_super,
.sync_fs = gfs2_sync_fs,
.write_super_lockfs = gfs2_write_super_lockfs,
.unlockfs = gfs2_unlockfs,
.statfs = gfs2_statfs,
.remount_fs = gfs2_remount_fs,
.clear_inode = gfs2_clear_inode,
.show_options = gfs2_show_options,
};

17
fs/gfs2/ops_super.h Normal file
View File

@@ -0,0 +1,17 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __OPS_SUPER_DOT_H__
#define __OPS_SUPER_DOT_H__
#include <linux/fs.h>
extern const struct super_operations gfs2_super_ops;
#endif /* __OPS_SUPER_DOT_H__ */

161
fs/gfs2/ops_vm.c Normal file
View File

@@ -0,0 +1,161 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/gfs2_ondisk.h>
#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
#include "bmap.h"
#include "glock.h"
#include "inode.h"
#include "ops_vm.h"
#include "quota.h"
#include "rgrp.h"
#include "trans.h"
#include "util.h"
static struct page *gfs2_private_nopage(struct vm_area_struct *area,
unsigned long address, int *type)
{
struct gfs2_inode *ip = GFS2_I(area->vm_file->f_mapping->host);
set_bit(GIF_PAGED, &ip->i_flags);
return filemap_nopage(area, address, type);
}
static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
unsigned long index = page->index;
u64 lblock = index << (PAGE_CACHE_SHIFT -
sdp->sd_sb.sb_bsize_shift);
unsigned int blocks = PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift;
struct gfs2_alloc *al;
unsigned int data_blocks, ind_blocks;
unsigned int x;
int error;
al = gfs2_alloc_get(ip);
error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
if (error)
goto out;
error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
if (error)
goto out_gunlock_q;
gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
al->al_requested = data_blocks + ind_blocks;
error = gfs2_inplace_reserve(ip);
if (error)
goto out_gunlock_q;
error = gfs2_trans_begin(sdp, al->al_rgd->rd_ri.ri_length +
ind_blocks + RES_DINODE +
RES_STATFS + RES_QUOTA, 0);
if (error)
goto out_ipres;
if (gfs2_is_stuffed(ip)) {
error = gfs2_unstuff_dinode(ip, NULL);
if (error)
goto out_trans;
}
for (x = 0; x < blocks; ) {
u64 dblock;
unsigned int extlen;
int new = 1;
error = gfs2_extent_map(&ip->i_inode, lblock, &new, &dblock, &extlen);
if (error)
goto out_trans;
lblock += extlen;
x += extlen;
}
gfs2_assert_warn(sdp, al->al_alloced);
out_trans:
gfs2_trans_end(sdp);
out_ipres:
gfs2_inplace_release(ip);
out_gunlock_q:
gfs2_quota_unlock(ip);
out:
gfs2_alloc_put(ip);
return error;
}
static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area,
unsigned long address, int *type)
{
struct file *file = area->vm_file;
struct gfs2_file *gf = file->private_data;
struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
struct gfs2_holder i_gh;
struct page *result = NULL;
unsigned long index = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) +
area->vm_pgoff;
int alloc_required;
int error;
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
if (error)
return NULL;
set_bit(GIF_PAGED, &ip->i_flags);
set_bit(GIF_SW_PAGED, &ip->i_flags);
error = gfs2_write_alloc_required(ip, (u64)index << PAGE_CACHE_SHIFT,
PAGE_CACHE_SIZE, &alloc_required);
if (error)
goto out;
set_bit(GFF_EXLOCK, &gf->f_flags);
result = filemap_nopage(area, address, type);
clear_bit(GFF_EXLOCK, &gf->f_flags);
if (!result || result == NOPAGE_OOM)
goto out;
if (alloc_required) {
error = alloc_page_backing(ip, result);
if (error) {
page_cache_release(result);
result = NULL;
goto out;
}
set_page_dirty(result);
}
out:
gfs2_glock_dq_uninit(&i_gh);
return result;
}
struct vm_operations_struct gfs2_vm_ops_private = {
.nopage = gfs2_private_nopage,
};
struct vm_operations_struct gfs2_vm_ops_sharewrite = {
.nopage = gfs2_sharewrite_nopage,
};

18
fs/gfs2/ops_vm.h Normal file
View File

@@ -0,0 +1,18 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __OPS_VM_DOT_H__
#define __OPS_VM_DOT_H__
#include <linux/mm.h>
extern struct vm_operations_struct gfs2_vm_ops_private;
extern struct vm_operations_struct gfs2_vm_ops_sharewrite;
#endif /* __OPS_VM_DOT_H__ */

1227
fs/gfs2/quota.c Normal file

File diff suppressed because it is too large Load Diff

35
fs/gfs2/quota.h Normal file
View File

@@ -0,0 +1,35 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __QUOTA_DOT_H__
#define __QUOTA_DOT_H__
struct gfs2_inode;
struct gfs2_sbd;
#define NO_QUOTA_CHANGE ((u32)-1)
int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid);
void gfs2_quota_unhold(struct gfs2_inode *ip);
int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid);
void gfs2_quota_unlock(struct gfs2_inode *ip);
int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid);
void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
u32 uid, u32 gid);
int gfs2_quota_sync(struct gfs2_sbd *sdp);
int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id);
int gfs2_quota_init(struct gfs2_sbd *sdp);
void gfs2_quota_scan(struct gfs2_sbd *sdp);
void gfs2_quota_cleanup(struct gfs2_sbd *sdp);
#endif /* __QUOTA_DOT_H__ */

571
fs/gfs2/recovery.c Normal file
View File

@@ -0,0 +1,571 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
#include "bmap.h"
#include "glock.h"
#include "glops.h"
#include "lm.h"
#include "lops.h"
#include "meta_io.h"
#include "recovery.h"
#include "super.h"
#include "util.h"
#include "dir.h"
int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
struct buffer_head **bh)
{
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
struct gfs2_glock *gl = ip->i_gl;
int new = 0;
u64 dblock;
u32 extlen;
int error;
error = gfs2_extent_map(&ip->i_inode, blk, &new, &dblock, &extlen);
if (error)
return error;
if (!dblock) {
gfs2_consist_inode(ip);
return -EIO;
}
*bh = gfs2_meta_ra(gl, dblock, extlen);
return error;
}
int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where)
{
struct list_head *head = &sdp->sd_revoke_list;
struct gfs2_revoke_replay *rr;
int found = 0;
list_for_each_entry(rr, head, rr_list) {
if (rr->rr_blkno == blkno) {
found = 1;
break;
}
}
if (found) {
rr->rr_where = where;
return 0;
}
rr = kmalloc(sizeof(struct gfs2_revoke_replay), GFP_KERNEL);
if (!rr)
return -ENOMEM;
rr->rr_blkno = blkno;
rr->rr_where = where;
list_add(&rr->rr_list, head);
return 1;
}
int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where)
{
struct gfs2_revoke_replay *rr;
int wrap, a, b, revoke;
int found = 0;
list_for_each_entry(rr, &sdp->sd_revoke_list, rr_list) {
if (rr->rr_blkno == blkno) {
found = 1;
break;
}
}
if (!found)
return 0;
wrap = (rr->rr_where < sdp->sd_replay_tail);
a = (sdp->sd_replay_tail < where);
b = (where < rr->rr_where);
revoke = (wrap) ? (a || b) : (a && b);
return revoke;
}
void gfs2_revoke_clean(struct gfs2_sbd *sdp)
{
struct list_head *head = &sdp->sd_revoke_list;
struct gfs2_revoke_replay *rr;
while (!list_empty(head)) {
rr = list_entry(head->next, struct gfs2_revoke_replay, rr_list);
list_del(&rr->rr_list);
kfree(rr);
}
}
/**
* get_log_header - read the log header for a given segment
* @jd: the journal
* @blk: the block to look at
* @lh: the log header to return
*
* Read the log header for a given segement in a given journal. Do a few
* sanity checks on it.
*
* Returns: 0 on success,
* 1 if the header was invalid or incomplete,
* errno on error
*/
static int get_log_header(struct gfs2_jdesc *jd, unsigned int blk,
struct gfs2_log_header_host *head)
{
struct buffer_head *bh;
struct gfs2_log_header_host lh;
const u32 nothing = 0;
u32 hash;
int error;
error = gfs2_replay_read_block(jd, blk, &bh);
if (error)
return error;
hash = crc32_le((u32)~0, bh->b_data, sizeof(struct gfs2_log_header) -
sizeof(u32));
hash = crc32_le(hash, (unsigned char const *)&nothing, sizeof(nothing));
hash ^= (u32)~0;
gfs2_log_header_in(&lh, bh->b_data);
brelse(bh);
if (lh.lh_header.mh_magic != GFS2_MAGIC ||
lh.lh_header.mh_type != GFS2_METATYPE_LH ||
lh.lh_blkno != blk || lh.lh_hash != hash)
return 1;
*head = lh;
return 0;
}
/**
* find_good_lh - find a good log header
* @jd: the journal
* @blk: the segment to start searching from
* @lh: the log header to fill in
* @forward: if true search forward in the log, else search backward
*
* Call get_log_header() to get a log header for a segment, but if the
* segment is bad, either scan forward or backward until we find a good one.
*
* Returns: errno
*/
static int find_good_lh(struct gfs2_jdesc *jd, unsigned int *blk,
struct gfs2_log_header_host *head)
{
unsigned int orig_blk = *blk;
int error;
for (;;) {
error = get_log_header(jd, *blk, head);
if (error <= 0)
return error;
if (++*blk == jd->jd_blocks)
*blk = 0;
if (*blk == orig_blk) {
gfs2_consist_inode(GFS2_I(jd->jd_inode));
return -EIO;
}
}
}
/**
* jhead_scan - make sure we've found the head of the log
* @jd: the journal
* @head: this is filled in with the log descriptor of the head
*
* At this point, seg and lh should be either the head of the log or just
* before. Scan forward until we find the head.
*
* Returns: errno
*/
static int jhead_scan(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
{
unsigned int blk = head->lh_blkno;
struct gfs2_log_header_host lh;
int error;
for (;;) {
if (++blk == jd->jd_blocks)
blk = 0;
error = get_log_header(jd, blk, &lh);
if (error < 0)
return error;
if (error == 1)
continue;
if (lh.lh_sequence == head->lh_sequence) {
gfs2_consist_inode(GFS2_I(jd->jd_inode));
return -EIO;
}
if (lh.lh_sequence < head->lh_sequence)
break;
*head = lh;
}
return 0;
}
/**
* gfs2_find_jhead - find the head of a log
* @jd: the journal
* @head: the log descriptor for the head of the log is returned here
*
* Do a binary search of a journal and find the valid log entry with the
* highest sequence number. (i.e. the log head)
*
* Returns: errno
*/
int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
{
struct gfs2_log_header_host lh_1, lh_m;
u32 blk_1, blk_2, blk_m;
int error;
blk_1 = 0;
blk_2 = jd->jd_blocks - 1;
for (;;) {
blk_m = (blk_1 + blk_2) / 2;
error = find_good_lh(jd, &blk_1, &lh_1);
if (error)
return error;
error = find_good_lh(jd, &blk_m, &lh_m);
if (error)
return error;
if (blk_1 == blk_m || blk_m == blk_2)
break;
if (lh_1.lh_sequence <= lh_m.lh_sequence)
blk_1 = blk_m;
else
blk_2 = blk_m;
}
error = jhead_scan(jd, &lh_1);
if (error)
return error;
*head = lh_1;
return error;
}
/**
* foreach_descriptor - go through the active part of the log
* @jd: the journal
* @start: the first log header in the active region
* @end: the last log header (don't process the contents of this entry))
*
* Call a given function once for every log descriptor in the active
* portion of the log.
*
* Returns: errno
*/
static int foreach_descriptor(struct gfs2_jdesc *jd, unsigned int start,
unsigned int end, int pass)
{
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
struct buffer_head *bh;
struct gfs2_log_descriptor *ld;
int error = 0;
u32 length;
__be64 *ptr;
unsigned int offset = sizeof(struct gfs2_log_descriptor);
offset += sizeof(__be64) - 1;
offset &= ~(sizeof(__be64) - 1);
while (start != end) {
error = gfs2_replay_read_block(jd, start, &bh);
if (error)
return error;
if (gfs2_meta_check(sdp, bh)) {
brelse(bh);
return -EIO;
}
ld = (struct gfs2_log_descriptor *)bh->b_data;
length = be32_to_cpu(ld->ld_length);
if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) {
struct gfs2_log_header_host lh;
error = get_log_header(jd, start, &lh);
if (!error) {
gfs2_replay_incr_blk(sdp, &start);
brelse(bh);
continue;
}
if (error == 1) {
gfs2_consist_inode(GFS2_I(jd->jd_inode));
error = -EIO;
}
brelse(bh);
return error;
} else if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LD)) {
brelse(bh);
return -EIO;
}
ptr = (__be64 *)(bh->b_data + offset);
error = lops_scan_elements(jd, start, ld, ptr, pass);
if (error) {
brelse(bh);
return error;
}
while (length--)
gfs2_replay_incr_blk(sdp, &start);
brelse(bh);
}
return 0;
}
/**
* clean_journal - mark a dirty journal as being clean
* @sdp: the filesystem
* @jd: the journal
* @gl: the journal's glock
* @head: the head journal to start from
*
* Returns: errno
*/
static int clean_journal(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head)
{
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
unsigned int lblock;
struct gfs2_log_header *lh;
u32 hash;
struct buffer_head *bh;
int error;
struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
lblock = head->lh_blkno;
gfs2_replay_incr_blk(sdp, &lblock);
bh_map.b_size = 1 << ip->i_inode.i_blkbits;
error = gfs2_block_map(&ip->i_inode, lblock, 0, &bh_map);
if (error)
return error;
if (!bh_map.b_blocknr) {
gfs2_consist_inode(ip);
return -EIO;
}
bh = sb_getblk(sdp->sd_vfs, bh_map.b_blocknr);
lock_buffer(bh);
memset(bh->b_data, 0, bh->b_size);
set_buffer_uptodate(bh);
clear_buffer_dirty(bh);
unlock_buffer(bh);
lh = (struct gfs2_log_header *)bh->b_data;
memset(lh, 0, sizeof(struct gfs2_log_header));
lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);
lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH);
lh->lh_sequence = cpu_to_be64(head->lh_sequence + 1);
lh->lh_flags = cpu_to_be32(GFS2_LOG_HEAD_UNMOUNT);
lh->lh_blkno = cpu_to_be32(lblock);
hash = gfs2_disk_hash((const char *)lh, sizeof(struct gfs2_log_header));
lh->lh_hash = cpu_to_be32(hash);
set_buffer_dirty(bh);
if (sync_dirty_buffer(bh))
gfs2_io_error_bh(sdp, bh);
brelse(bh);
return error;
}
/**
* gfs2_recover_journal - recovery a given journal
* @jd: the struct gfs2_jdesc describing the journal
*
* Acquire the journal's lock, check to see if the journal is clean, and
* do recovery if necessary.
*
* Returns: errno
*/
int gfs2_recover_journal(struct gfs2_jdesc *jd)
{
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
struct gfs2_log_header_host head;
struct gfs2_holder j_gh, ji_gh, t_gh;
unsigned long t;
int ro = 0;
unsigned int pass;
int error;
if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n",
jd->jd_jid);
/* Aquire the journal lock so we can do recovery */
error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops,
LM_ST_EXCLUSIVE,
LM_FLAG_NOEXP | LM_FLAG_TRY | GL_NOCACHE,
&j_gh);
switch (error) {
case 0:
break;
case GLR_TRYFAILED:
fs_info(sdp, "jid=%u: Busy\n", jd->jd_jid);
error = 0;
default:
goto fail;
};
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
LM_FLAG_NOEXP, &ji_gh);
if (error)
goto fail_gunlock_j;
} else {
fs_info(sdp, "jid=%u, already locked for use\n", jd->jd_jid);
}
fs_info(sdp, "jid=%u: Looking at journal...\n", jd->jd_jid);
error = gfs2_jdesc_check(jd);
if (error)
goto fail_gunlock_ji;
error = gfs2_find_jhead(jd, &head);
if (error)
goto fail_gunlock_ji;
if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
fs_info(sdp, "jid=%u: Acquiring the transaction lock...\n",
jd->jd_jid);
t = jiffies;
/* Acquire a shared hold on the transaction lock */
error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
LM_FLAG_NOEXP | LM_FLAG_PRIORITY |
GL_NOCANCEL | GL_NOCACHE, &t_gh);
if (error)
goto fail_gunlock_ji;
if (test_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags)) {
if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
ro = 1;
} else {
if (sdp->sd_vfs->s_flags & MS_RDONLY)
ro = 1;
}
if (ro) {
fs_warn(sdp, "jid=%u: Can't replay: read-only FS\n",
jd->jd_jid);
error = -EROFS;
goto fail_gunlock_tr;
}
fs_info(sdp, "jid=%u: Replaying journal...\n", jd->jd_jid);
for (pass = 0; pass < 2; pass++) {
lops_before_scan(jd, &head, pass);
error = foreach_descriptor(jd, head.lh_tail,
head.lh_blkno, pass);
lops_after_scan(jd, error, pass);
if (error)
goto fail_gunlock_tr;
}
error = clean_journal(jd, &head);
if (error)
goto fail_gunlock_tr;
gfs2_glock_dq_uninit(&t_gh);
t = DIV_ROUND_UP(jiffies - t, HZ);
fs_info(sdp, "jid=%u: Journal replayed in %lus\n",
jd->jd_jid, t);
}
if (jd->jd_jid != sdp->sd_lockstruct.ls_jid)
gfs2_glock_dq_uninit(&ji_gh);
gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS);
if (jd->jd_jid != sdp->sd_lockstruct.ls_jid)
gfs2_glock_dq_uninit(&j_gh);
fs_info(sdp, "jid=%u: Done\n", jd->jd_jid);
return 0;
fail_gunlock_tr:
gfs2_glock_dq_uninit(&t_gh);
fail_gunlock_ji:
if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) {
gfs2_glock_dq_uninit(&ji_gh);
fail_gunlock_j:
gfs2_glock_dq_uninit(&j_gh);
}
fs_info(sdp, "jid=%u: %s\n", jd->jd_jid, (error) ? "Failed" : "Done");
fail:
gfs2_lm_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
return error;
}
/**
* gfs2_check_journals - Recover any dirty journals
* @sdp: the filesystem
*
*/
void gfs2_check_journals(struct gfs2_sbd *sdp)
{
struct gfs2_jdesc *jd;
for (;;) {
jd = gfs2_jdesc_find_dirty(sdp);
if (!jd)
break;
if (jd != sdp->sd_jdesc)
gfs2_recover_journal(jd);
}
}

34
fs/gfs2/recovery.h Normal file
View File

@@ -0,0 +1,34 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __RECOVERY_DOT_H__
#define __RECOVERY_DOT_H__
#include "incore.h"
static inline void gfs2_replay_incr_blk(struct gfs2_sbd *sdp, unsigned int *blk)
{
if (++*blk == sdp->sd_jdesc->jd_blocks)
*blk = 0;
}
int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
struct buffer_head **bh);
int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where);
int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where);
void gfs2_revoke_clean(struct gfs2_sbd *sdp);
int gfs2_find_jhead(struct gfs2_jdesc *jd,
struct gfs2_log_header_host *head);
int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd);
void gfs2_check_journals(struct gfs2_sbd *sdp);
#endif /* __RECOVERY_DOT_H__ */

1511
fs/gfs2/rgrp.c Normal file

File diff suppressed because it is too large Load Diff

69
fs/gfs2/rgrp.h Normal file
View File

@@ -0,0 +1,69 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __RGRP_DOT_H__
#define __RGRP_DOT_H__
struct gfs2_rgrpd;
struct gfs2_sbd;
struct gfs2_holder;
void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd);
struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk);
struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp);
struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd);
void gfs2_clear_rgrpd(struct gfs2_sbd *sdp);
int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh);
int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd);
void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd);
void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd);
void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd);
struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
static inline void gfs2_alloc_put(struct gfs2_inode *ip)
{
return; /* So we can see where ip->i_alloc is used */
}
int gfs2_inplace_reserve_i(struct gfs2_inode *ip,
char *file, unsigned int line);
#define gfs2_inplace_reserve(ip) \
gfs2_inplace_reserve_i((ip), __FILE__, __LINE__)
void gfs2_inplace_release(struct gfs2_inode *ip);
unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block);
u64 gfs2_alloc_data(struct gfs2_inode *ip);
u64 gfs2_alloc_meta(struct gfs2_inode *ip);
u64 gfs2_alloc_di(struct gfs2_inode *ip, u64 *generation);
void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
void gfs2_unlink_di(struct inode *inode);
struct gfs2_rgrp_list {
unsigned int rl_rgrps;
unsigned int rl_space;
struct gfs2_rgrpd **rl_rgd;
struct gfs2_holder *rl_ghs;
};
void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
u64 block);
void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state,
int flags);
void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
#endif /* __RGRP_DOT_H__ */

985
fs/gfs2/super.c Normal file
View File

@@ -0,0 +1,985 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/crc32.h>
#include <linux/gfs2_ondisk.h>
#include <linux/bio.h>
#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
#include "bmap.h"
#include "dir.h"
#include "glock.h"
#include "glops.h"
#include "inode.h"
#include "log.h"
#include "meta_io.h"
#include "quota.h"
#include "recovery.h"
#include "rgrp.h"
#include "super.h"
#include "trans.h"
#include "util.h"
static const u32 gfs2_old_fs_formats[] = {
0
};
static const u32 gfs2_old_multihost_formats[] = {
0
};
/**
* gfs2_tune_init - Fill a gfs2_tune structure with default values
* @gt: tune
*
*/
void gfs2_tune_init(struct gfs2_tune *gt)
{
spin_lock_init(&gt->gt_spin);
gt->gt_ilimit = 100;
gt->gt_ilimit_tries = 3;
gt->gt_ilimit_min = 1;
gt->gt_demote_secs = 300;
gt->gt_incore_log_blocks = 1024;
gt->gt_log_flush_secs = 60;
gt->gt_jindex_refresh_secs = 60;
gt->gt_scand_secs = 15;
gt->gt_recoverd_secs = 60;
gt->gt_logd_secs = 1;
gt->gt_quotad_secs = 5;
gt->gt_quota_simul_sync = 64;
gt->gt_quota_warn_period = 10;
gt->gt_quota_scale_num = 1;
gt->gt_quota_scale_den = 1;
gt->gt_quota_cache_secs = 300;
gt->gt_quota_quantum = 60;
gt->gt_atime_quantum = 3600;
gt->gt_new_files_jdata = 0;
gt->gt_new_files_directio = 0;
gt->gt_max_readahead = 1 << 18;
gt->gt_lockdump_size = 131072;
gt->gt_stall_secs = 600;
gt->gt_complain_secs = 10;
gt->gt_reclaim_limit = 5000;
gt->gt_statfs_quantum = 30;
gt->gt_statfs_slow = 0;
}
/**
* gfs2_check_sb - Check superblock
* @sdp: the filesystem
* @sb: The superblock
* @silent: Don't print a message if the check fails
*
* Checks the version code of the FS is one that we understand how to
* read and that the sizes of the various on-disk structures have not
* changed.
*/
int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent)
{
unsigned int x;
if (sb->sb_header.mh_magic != GFS2_MAGIC ||
sb->sb_header.mh_type != GFS2_METATYPE_SB) {
if (!silent)
printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n");
return -EINVAL;
}
/* If format numbers match exactly, we're done. */
if (sb->sb_fs_format == GFS2_FORMAT_FS &&
sb->sb_multihost_format == GFS2_FORMAT_MULTI)
return 0;
if (sb->sb_fs_format != GFS2_FORMAT_FS) {
for (x = 0; gfs2_old_fs_formats[x]; x++)
if (gfs2_old_fs_formats[x] == sb->sb_fs_format)
break;
if (!gfs2_old_fs_formats[x]) {
printk(KERN_WARNING
"GFS2: code version (%u, %u) is incompatible "
"with ondisk format (%u, %u)\n",
GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
sb->sb_fs_format, sb->sb_multihost_format);
printk(KERN_WARNING
"GFS2: I don't know how to upgrade this FS\n");
return -EINVAL;
}
}
if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) {
for (x = 0; gfs2_old_multihost_formats[x]; x++)
if (gfs2_old_multihost_formats[x] ==
sb->sb_multihost_format)
break;
if (!gfs2_old_multihost_formats[x]) {
printk(KERN_WARNING
"GFS2: code version (%u, %u) is incompatible "
"with ondisk format (%u, %u)\n",
GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
sb->sb_fs_format, sb->sb_multihost_format);
printk(KERN_WARNING
"GFS2: I don't know how to upgrade this FS\n");
return -EINVAL;
}
}
if (!sdp->sd_args.ar_upgrade) {
printk(KERN_WARNING
"GFS2: code version (%u, %u) is incompatible "
"with ondisk format (%u, %u)\n",
GFS2_FORMAT_FS, GFS2_FORMAT_MULTI,
sb->sb_fs_format, sb->sb_multihost_format);
printk(KERN_INFO
"GFS2: Use the \"upgrade\" mount option to upgrade "
"the FS\n");
printk(KERN_INFO "GFS2: See the manual for more details\n");
return -EINVAL;
}
return 0;
}
static int end_bio_io_page(struct bio *bio, unsigned int bytes_done, int error)
{
struct page *page = bio->bi_private;
if (bio->bi_size)
return 1;
if (!error)
SetPageUptodate(page);
else
printk(KERN_WARNING "gfs2: error %d reading superblock\n", error);
unlock_page(page);
return 0;
}
/**
* gfs2_read_super - Read the gfs2 super block from disk
* @sb: The VFS super block
* @sector: The location of the super block
*
* This uses the bio functions to read the super block from disk
* because we want to be 100% sure that we never read cached data.
* A super block is read twice only during each GFS2 mount and is
* never written to by the filesystem. The first time its read no
* locks are held, and the only details which are looked at are those
* relating to the locking protocol. Once locking is up and working,
* the sb is read again under the lock to establish the location of
* the master directory (contains pointers to journals etc) and the
* root directory.
*
* Returns: A page containing the sb or NULL
*/
struct page *gfs2_read_super(struct super_block *sb, sector_t sector)
{
struct page *page;
struct bio *bio;
page = alloc_page(GFP_KERNEL);
if (unlikely(!page))
return NULL;
ClearPageUptodate(page);
ClearPageDirty(page);
lock_page(page);
bio = bio_alloc(GFP_KERNEL, 1);
if (unlikely(!bio)) {
__free_page(page);
return NULL;
}
bio->bi_sector = sector * (sb->s_blocksize >> 9);
bio->bi_bdev = sb->s_bdev;
bio_add_page(bio, page, PAGE_SIZE, 0);
bio->bi_end_io = end_bio_io_page;
bio->bi_private = page;
submit_bio(READ_SYNC | (1 << BIO_RW_META), bio);
wait_on_page_locked(page);
bio_put(bio);
if (!PageUptodate(page)) {
__free_page(page);
return NULL;
}
return page;
}
/**
* gfs2_read_sb - Read super block
* @sdp: The GFS2 superblock
* @gl: the glock for the superblock (assumed to be held)
* @silent: Don't print message if mount fails
*
*/
int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent)
{
u32 hash_blocks, ind_blocks, leaf_blocks;
u32 tmp_blocks;
unsigned int x;
int error;
struct page *page;
char *sb;
page = gfs2_read_super(sdp->sd_vfs, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift);
if (!page) {
if (!silent)
fs_err(sdp, "can't read superblock\n");
return -EIO;
}
sb = kmap(page);
gfs2_sb_in(&sdp->sd_sb, sb);
kunmap(page);
__free_page(page);
error = gfs2_check_sb(sdp, &sdp->sd_sb, silent);
if (error)
return error;
sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
GFS2_BASIC_BLOCK_SHIFT;
sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
sdp->sd_diptrs = (sdp->sd_sb.sb_bsize -
sizeof(struct gfs2_dinode)) / sizeof(u64);
sdp->sd_inptrs = (sdp->sd_sb.sb_bsize -
sizeof(struct gfs2_meta_header)) / sizeof(u64);
sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header);
sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2;
sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1;
sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(u64);
sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize -
sizeof(struct gfs2_meta_header)) /
sizeof(struct gfs2_quota_change);
/* Compute maximum reservation required to add a entry to a directory */
hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH),
sdp->sd_jbsize);
ind_blocks = 0;
for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) {
tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs);
ind_blocks += tmp_blocks;
}
leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH;
sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks;
sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize -
sizeof(struct gfs2_dinode);
sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs;
for (x = 2;; x++) {
u64 space, d;
u32 m;
space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs;
d = space;
m = do_div(d, sdp->sd_inptrs);
if (d != sdp->sd_heightsize[x - 1] || m)
break;
sdp->sd_heightsize[x] = space;
}
sdp->sd_max_height = x;
gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT);
sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize -
sizeof(struct gfs2_dinode);
sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs;
for (x = 2;; x++) {
u64 space, d;
u32 m;
space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs;
d = space;
m = do_div(d, sdp->sd_inptrs);
if (d != sdp->sd_jheightsize[x - 1] || m)
break;
sdp->sd_jheightsize[x] = space;
}
sdp->sd_max_jheight = x;
gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT);
return 0;
}
/**
* gfs2_jindex_hold - Grab a lock on the jindex
* @sdp: The GFS2 superblock
* @ji_gh: the holder for the jindex glock
*
* This is very similar to the gfs2_rindex_hold() function, except that
* in general we hold the jindex lock for longer periods of time and
* we grab it far less frequently (in general) then the rgrp lock.
*
* Returns: errno
*/
int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
{
struct gfs2_inode *dip = GFS2_I(sdp->sd_jindex);
struct qstr name;
char buf[20];
struct gfs2_jdesc *jd;
int error;
name.name = buf;
mutex_lock(&sdp->sd_jindex_mutex);
for (;;) {
error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, ji_gh);
if (error)
break;
name.len = sprintf(buf, "journal%u", sdp->sd_journals);
name.hash = gfs2_disk_hash(name.name, name.len);
error = gfs2_dir_search(sdp->sd_jindex, &name, NULL, NULL);
if (error == -ENOENT) {
error = 0;
break;
}
gfs2_glock_dq_uninit(ji_gh);
if (error)
break;
error = -ENOMEM;
jd = kzalloc(sizeof(struct gfs2_jdesc), GFP_KERNEL);
if (!jd)
break;
jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1, NULL);
if (!jd->jd_inode || IS_ERR(jd->jd_inode)) {
if (!jd->jd_inode)
error = -ENOENT;
else
error = PTR_ERR(jd->jd_inode);
kfree(jd);
break;
}
spin_lock(&sdp->sd_jindex_spin);
jd->jd_jid = sdp->sd_journals++;
list_add_tail(&jd->jd_list, &sdp->sd_jindex_list);
spin_unlock(&sdp->sd_jindex_spin);
}
mutex_unlock(&sdp->sd_jindex_mutex);
return error;
}
/**
* gfs2_jindex_free - Clear all the journal index information
* @sdp: The GFS2 superblock
*
*/
void gfs2_jindex_free(struct gfs2_sbd *sdp)
{
struct list_head list;
struct gfs2_jdesc *jd;
spin_lock(&sdp->sd_jindex_spin);
list_add(&list, &sdp->sd_jindex_list);
list_del_init(&sdp->sd_jindex_list);
sdp->sd_journals = 0;
spin_unlock(&sdp->sd_jindex_spin);
while (!list_empty(&list)) {
jd = list_entry(list.next, struct gfs2_jdesc, jd_list);
list_del(&jd->jd_list);
iput(jd->jd_inode);
kfree(jd);
}
}
static struct gfs2_jdesc *jdesc_find_i(struct list_head *head, unsigned int jid)
{
struct gfs2_jdesc *jd;
int found = 0;
list_for_each_entry(jd, head, jd_list) {
if (jd->jd_jid == jid) {
found = 1;
break;
}
}
if (!found)
jd = NULL;
return jd;
}
struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid)
{
struct gfs2_jdesc *jd;
spin_lock(&sdp->sd_jindex_spin);
jd = jdesc_find_i(&sdp->sd_jindex_list, jid);
spin_unlock(&sdp->sd_jindex_spin);
return jd;
}
void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid)
{
struct gfs2_jdesc *jd;
spin_lock(&sdp->sd_jindex_spin);
jd = jdesc_find_i(&sdp->sd_jindex_list, jid);
if (jd)
jd->jd_dirty = 1;
spin_unlock(&sdp->sd_jindex_spin);
}
struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp)
{
struct gfs2_jdesc *jd;
int found = 0;
spin_lock(&sdp->sd_jindex_spin);
list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
if (jd->jd_dirty) {
jd->jd_dirty = 0;
found = 1;
break;
}
}
spin_unlock(&sdp->sd_jindex_spin);
if (!found)
jd = NULL;
return jd;
}
int gfs2_jdesc_check(struct gfs2_jdesc *jd)
{
struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
int ar;
int error;
if (ip->i_di.di_size < (8 << 20) || ip->i_di.di_size > (1 << 30) ||
(ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1))) {
gfs2_consist_inode(ip);
return -EIO;
}
jd->jd_blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift;
error = gfs2_write_alloc_required(ip, 0, ip->i_di.di_size, &ar);
if (!error && ar) {
gfs2_consist_inode(ip);
error = -EIO;
}
return error;
}
/**
* gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one
* @sdp: the filesystem
*
* Returns: errno
*/
int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
{
struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
struct gfs2_glock *j_gl = ip->i_gl;
struct gfs2_holder t_gh;
struct gfs2_log_header_host head;
int error;
error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &t_gh);
if (error)
return error;
gfs2_meta_cache_flush(ip);
j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
error = gfs2_find_jhead(sdp->sd_jdesc, &head);
if (error)
goto fail;
if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
gfs2_consist(sdp);
error = -EIO;
goto fail;
}
/* Initialize some head of the log stuff */
sdp->sd_log_sequence = head.lh_sequence + 1;
gfs2_log_pointers_init(sdp, head.lh_blkno);
error = gfs2_quota_init(sdp);
if (error)
goto fail;
set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
gfs2_glock_dq_uninit(&t_gh);
return 0;
fail:
t_gh.gh_flags |= GL_NOCACHE;
gfs2_glock_dq_uninit(&t_gh);
return error;
}
/**
* gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one
* @sdp: the filesystem
*
* Returns: errno
*/
int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
{
struct gfs2_holder t_gh;
int error;
gfs2_quota_sync(sdp);
gfs2_statfs_sync(sdp);
error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE,
&t_gh);
if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
return error;
gfs2_meta_syncfs(sdp);
gfs2_log_shutdown(sdp);
clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
if (t_gh.gh_gl)
gfs2_glock_dq_uninit(&t_gh);
gfs2_quota_cleanup(sdp);
return error;
}
int gfs2_statfs_init(struct gfs2_sbd *sdp)
{
struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
struct buffer_head *m_bh, *l_bh;
struct gfs2_holder gh;
int error;
error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
&gh);
if (error)
return error;
error = gfs2_meta_inode_buffer(m_ip, &m_bh);
if (error)
goto out;
if (sdp->sd_args.ar_spectator) {
spin_lock(&sdp->sd_statfs_spin);
gfs2_statfs_change_in(m_sc, m_bh->b_data +
sizeof(struct gfs2_dinode));
spin_unlock(&sdp->sd_statfs_spin);
} else {
error = gfs2_meta_inode_buffer(l_ip, &l_bh);
if (error)
goto out_m_bh;
spin_lock(&sdp->sd_statfs_spin);
gfs2_statfs_change_in(m_sc, m_bh->b_data +
sizeof(struct gfs2_dinode));
gfs2_statfs_change_in(l_sc, l_bh->b_data +
sizeof(struct gfs2_dinode));
spin_unlock(&sdp->sd_statfs_spin);
brelse(l_bh);
}
out_m_bh:
brelse(m_bh);
out:
gfs2_glock_dq_uninit(&gh);
return 0;
}
void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
s64 dinodes)
{
struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
struct buffer_head *l_bh;
int error;
error = gfs2_meta_inode_buffer(l_ip, &l_bh);
if (error)
return;
mutex_lock(&sdp->sd_statfs_mutex);
gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1);
mutex_unlock(&sdp->sd_statfs_mutex);
spin_lock(&sdp->sd_statfs_spin);
l_sc->sc_total += total;
l_sc->sc_free += free;
l_sc->sc_dinodes += dinodes;
gfs2_statfs_change_out(l_sc, l_bh->b_data + sizeof(struct gfs2_dinode));
spin_unlock(&sdp->sd_statfs_spin);
brelse(l_bh);
}
int gfs2_statfs_sync(struct gfs2_sbd *sdp)
{
struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode);
struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
struct gfs2_holder gh;
struct buffer_head *m_bh, *l_bh;
int error;
error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
&gh);
if (error)
return error;
error = gfs2_meta_inode_buffer(m_ip, &m_bh);
if (error)
goto out;
spin_lock(&sdp->sd_statfs_spin);
gfs2_statfs_change_in(m_sc, m_bh->b_data +
sizeof(struct gfs2_dinode));
if (!l_sc->sc_total && !l_sc->sc_free && !l_sc->sc_dinodes) {
spin_unlock(&sdp->sd_statfs_spin);
goto out_bh;
}
spin_unlock(&sdp->sd_statfs_spin);
error = gfs2_meta_inode_buffer(l_ip, &l_bh);
if (error)
goto out_bh;
error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0);
if (error)
goto out_bh2;
mutex_lock(&sdp->sd_statfs_mutex);
gfs2_trans_add_bh(l_ip->i_gl, l_bh, 1);
mutex_unlock(&sdp->sd_statfs_mutex);
spin_lock(&sdp->sd_statfs_spin);
m_sc->sc_total += l_sc->sc_total;
m_sc->sc_free += l_sc->sc_free;
m_sc->sc_dinodes += l_sc->sc_dinodes;
memset(l_sc, 0, sizeof(struct gfs2_statfs_change));
memset(l_bh->b_data + sizeof(struct gfs2_dinode),
0, sizeof(struct gfs2_statfs_change));
spin_unlock(&sdp->sd_statfs_spin);
gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1);
gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode));
gfs2_trans_end(sdp);
out_bh2:
brelse(l_bh);
out_bh:
brelse(m_bh);
out:
gfs2_glock_dq_uninit(&gh);
return error;
}
/**
* gfs2_statfs_i - Do a statfs
* @sdp: the filesystem
* @sg: the sg structure
*
* Returns: errno
*/
int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
{
struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
spin_lock(&sdp->sd_statfs_spin);
*sc = *m_sc;
sc->sc_total += l_sc->sc_total;
sc->sc_free += l_sc->sc_free;
sc->sc_dinodes += l_sc->sc_dinodes;
spin_unlock(&sdp->sd_statfs_spin);
if (sc->sc_free < 0)
sc->sc_free = 0;
if (sc->sc_free > sc->sc_total)
sc->sc_free = sc->sc_total;
if (sc->sc_dinodes < 0)
sc->sc_dinodes = 0;
return 0;
}
/**
* statfs_fill - fill in the sg for a given RG
* @rgd: the RG
* @sc: the sc structure
*
* Returns: 0 on success, -ESTALE if the LVB is invalid
*/
static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
struct gfs2_statfs_change_host *sc)
{
gfs2_rgrp_verify(rgd);
sc->sc_total += rgd->rd_ri.ri_data;
sc->sc_free += rgd->rd_rg.rg_free;
sc->sc_dinodes += rgd->rd_rg.rg_dinodes;
return 0;
}
/**
* gfs2_statfs_slow - Stat a filesystem using asynchronous locking
* @sdp: the filesystem
* @sc: the sc info that will be returned
*
* Any error (other than a signal) will cause this routine to fall back
* to the synchronous version.
*
* FIXME: This really shouldn't busy wait like this.
*
* Returns: errno
*/
int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
{
struct gfs2_holder ri_gh;
struct gfs2_rgrpd *rgd_next;
struct gfs2_holder *gha, *gh;
unsigned int slots = 64;
unsigned int x;
int done;
int error = 0, err;
memset(sc, 0, sizeof(struct gfs2_statfs_change_host));
gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL);
if (!gha)
return -ENOMEM;
error = gfs2_rindex_hold(sdp, &ri_gh);
if (error)
goto out;
rgd_next = gfs2_rgrpd_get_first(sdp);
for (;;) {
done = 1;
for (x = 0; x < slots; x++) {
gh = gha + x;
if (gh->gh_gl && gfs2_glock_poll(gh)) {
err = gfs2_glock_wait(gh);
if (err) {
gfs2_holder_uninit(gh);
error = err;
} else {
if (!error)
error = statfs_slow_fill(
gh->gh_gl->gl_object, sc);
gfs2_glock_dq_uninit(gh);
}
}
if (gh->gh_gl)
done = 0;
else if (rgd_next && !error) {
error = gfs2_glock_nq_init(rgd_next->rd_gl,
LM_ST_SHARED,
GL_ASYNC,
gh);
rgd_next = gfs2_rgrpd_get_next(rgd_next);
done = 0;
}
if (signal_pending(current))
error = -ERESTARTSYS;
}
if (done)
break;
yield();
}
gfs2_glock_dq_uninit(&ri_gh);
out:
kfree(gha);
return error;
}
struct lfcc {
struct list_head list;
struct gfs2_holder gh;
};
/**
* gfs2_lock_fs_check_clean - Stop all writes to the FS and check that all
* journals are clean
* @sdp: the file system
* @state: the state to put the transaction lock into
* @t_gh: the hold on the transaction lock
*
* Returns: errno
*/
static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp,
struct gfs2_holder *t_gh)
{
struct gfs2_inode *ip;
struct gfs2_holder ji_gh;
struct gfs2_jdesc *jd;
struct lfcc *lfcc;
LIST_HEAD(list);
struct gfs2_log_header_host lh;
int error;
error = gfs2_jindex_hold(sdp, &ji_gh);
if (error)
return error;
list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL);
if (!lfcc) {
error = -ENOMEM;
goto out;
}
ip = GFS2_I(jd->jd_inode);
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &lfcc->gh);
if (error) {
kfree(lfcc);
goto out;
}
list_add(&lfcc->list, &list);
}
error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_DEFERRED,
LM_FLAG_PRIORITY | GL_NOCACHE,
t_gh);
list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
error = gfs2_jdesc_check(jd);
if (error)
break;
error = gfs2_find_jhead(jd, &lh);
if (error)
break;
if (!(lh.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
error = -EBUSY;
break;
}
}
if (error)
gfs2_glock_dq_uninit(t_gh);
out:
while (!list_empty(&list)) {
lfcc = list_entry(list.next, struct lfcc, list);
list_del(&lfcc->list);
gfs2_glock_dq_uninit(&lfcc->gh);
kfree(lfcc);
}
gfs2_glock_dq_uninit(&ji_gh);
return error;
}
/**
* gfs2_freeze_fs - freezes the file system
* @sdp: the file system
*
* This function flushes data and meta data for all machines by
* aquiring the transaction log exclusively. All journals are
* ensured to be in a clean state as well.
*
* Returns: errno
*/
int gfs2_freeze_fs(struct gfs2_sbd *sdp)
{
int error = 0;
mutex_lock(&sdp->sd_freeze_lock);
if (!sdp->sd_freeze_count++) {
error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh);
if (error)
sdp->sd_freeze_count--;
}
mutex_unlock(&sdp->sd_freeze_lock);
return error;
}
/**
* gfs2_unfreeze_fs - unfreezes the file system
* @sdp: the file system
*
* This function allows the file system to proceed by unlocking
* the exclusively held transaction lock. Other GFS2 nodes are
* now free to acquire the lock shared and go on with their lives.
*
*/
void gfs2_unfreeze_fs(struct gfs2_sbd *sdp)
{
mutex_lock(&sdp->sd_freeze_lock);
if (sdp->sd_freeze_count && !--sdp->sd_freeze_count)
gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
mutex_unlock(&sdp->sd_freeze_lock);
}

55
fs/gfs2/super.h Normal file
View File

@@ -0,0 +1,55 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __SUPER_DOT_H__
#define __SUPER_DOT_H__
#include "incore.h"
void gfs2_tune_init(struct gfs2_tune *gt);
int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent);
int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent);
struct page *gfs2_read_super(struct super_block *sb, sector_t sector);
static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
{
unsigned int x;
spin_lock(&sdp->sd_jindex_spin);
x = sdp->sd_journals;
spin_unlock(&sdp->sd_jindex_spin);
return x;
}
int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh);
void gfs2_jindex_free(struct gfs2_sbd *sdp);
struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid);
struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp);
int gfs2_jdesc_check(struct gfs2_jdesc *jd);
int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
struct gfs2_inode **ipp);
int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
int gfs2_make_fs_ro(struct gfs2_sbd *sdp);
int gfs2_statfs_init(struct gfs2_sbd *sdp);
void gfs2_statfs_change(struct gfs2_sbd *sdp,
s64 total, s64 free, s64 dinodes);
int gfs2_statfs_sync(struct gfs2_sbd *sdp);
int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc);
int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc);
int gfs2_freeze_fs(struct gfs2_sbd *sdp);
void gfs2_unfreeze_fs(struct gfs2_sbd *sdp);
#endif /* __SUPER_DOT_H__ */

565
fs/gfs2/sys.c Normal file
View File

@@ -0,0 +1,565 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/module.h>
#include <linux/kobject.h>
#include <linux/gfs2_ondisk.h>
#include <linux/lm_interface.h>
#include <asm/uaccess.h>
#include "gfs2.h"
#include "incore.h"
#include "lm.h"
#include "sys.h"
#include "super.h"
#include "glock.h"
#include "quota.h"
#include "util.h"
char *gfs2_sys_margs;
spinlock_t gfs2_sys_margs_lock;
static ssize_t id_show(struct gfs2_sbd *sdp, char *buf)
{
return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_vfs->s_id);
}
static ssize_t fsname_show(struct gfs2_sbd *sdp, char *buf)
{
return snprintf(buf, PAGE_SIZE, "%s\n", sdp->sd_fsname);
}
static ssize_t freeze_show(struct gfs2_sbd *sdp, char *buf)
{
unsigned int count;
mutex_lock(&sdp->sd_freeze_lock);
count = sdp->sd_freeze_count;
mutex_unlock(&sdp->sd_freeze_lock);
return snprintf(buf, PAGE_SIZE, "%u\n", count);
}
static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
{
ssize_t ret = len;
int error = 0;
int n = simple_strtol(buf, NULL, 0);
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
switch (n) {
case 0:
gfs2_unfreeze_fs(sdp);
break;
case 1:
error = gfs2_freeze_fs(sdp);
break;
default:
ret = -EINVAL;
}
if (error)
fs_warn(sdp, "freeze %d error %d", n, error);
return ret;
}
static ssize_t withdraw_show(struct gfs2_sbd *sdp, char *buf)
{
unsigned int b = test_bit(SDF_SHUTDOWN, &sdp->sd_flags);
return snprintf(buf, PAGE_SIZE, "%u\n", b);
}
static ssize_t withdraw_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
{
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (simple_strtol(buf, NULL, 0) != 1)
return -EINVAL;
gfs2_lm_withdraw(sdp,
"GFS2: fsid=%s: withdrawing from cluster at user's request\n",
sdp->sd_fsname);
return len;
}
static ssize_t statfs_sync_store(struct gfs2_sbd *sdp, const char *buf,
size_t len)
{
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (simple_strtol(buf, NULL, 0) != 1)
return -EINVAL;
gfs2_statfs_sync(sdp);
return len;
}
static ssize_t shrink_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
{
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (simple_strtol(buf, NULL, 0) != 1)
return -EINVAL;
gfs2_gl_hash_clear(sdp, NO_WAIT);
return len;
}
static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf,
size_t len)
{
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (simple_strtol(buf, NULL, 0) != 1)
return -EINVAL;
gfs2_quota_sync(sdp);
return len;
}
static ssize_t quota_refresh_user_store(struct gfs2_sbd *sdp, const char *buf,
size_t len)
{
u32 id;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
id = simple_strtoul(buf, NULL, 0);
gfs2_quota_refresh(sdp, 1, id);
return len;
}
static ssize_t quota_refresh_group_store(struct gfs2_sbd *sdp, const char *buf,
size_t len)
{
u32 id;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
id = simple_strtoul(buf, NULL, 0);
gfs2_quota_refresh(sdp, 0, id);
return len;
}
struct gfs2_attr {
struct attribute attr;
ssize_t (*show)(struct gfs2_sbd *, char *);
ssize_t (*store)(struct gfs2_sbd *, const char *, size_t);
};
#define GFS2_ATTR(name, mode, show, store) \
static struct gfs2_attr gfs2_attr_##name = __ATTR(name, mode, show, store)
GFS2_ATTR(id, 0444, id_show, NULL);
GFS2_ATTR(fsname, 0444, fsname_show, NULL);
GFS2_ATTR(freeze, 0644, freeze_show, freeze_store);
GFS2_ATTR(shrink, 0200, NULL, shrink_store);
GFS2_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
GFS2_ATTR(statfs_sync, 0200, NULL, statfs_sync_store);
GFS2_ATTR(quota_sync, 0200, NULL, quota_sync_store);
GFS2_ATTR(quota_refresh_user, 0200, NULL, quota_refresh_user_store);
GFS2_ATTR(quota_refresh_group, 0200, NULL, quota_refresh_group_store);
static struct attribute *gfs2_attrs[] = {
&gfs2_attr_id.attr,
&gfs2_attr_fsname.attr,
&gfs2_attr_freeze.attr,
&gfs2_attr_shrink.attr,
&gfs2_attr_withdraw.attr,
&gfs2_attr_statfs_sync.attr,
&gfs2_attr_quota_sync.attr,
&gfs2_attr_quota_refresh_user.attr,
&gfs2_attr_quota_refresh_group.attr,
NULL,
};
static ssize_t gfs2_attr_show(struct kobject *kobj, struct attribute *attr,
char *buf)
{
struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr);
return a->show ? a->show(sdp, buf) : 0;
}
static ssize_t gfs2_attr_store(struct kobject *kobj, struct attribute *attr,
const char *buf, size_t len)
{
struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr);
return a->store ? a->store(sdp, buf, len) : len;
}
static struct sysfs_ops gfs2_attr_ops = {
.show = gfs2_attr_show,
.store = gfs2_attr_store,
};
static struct kobj_type gfs2_ktype = {
.default_attrs = gfs2_attrs,
.sysfs_ops = &gfs2_attr_ops,
};
static struct kset gfs2_kset = {
.subsys = &fs_subsys,
.kobj = {.name = "gfs2"},
.ktype = &gfs2_ktype,
};
/*
* display struct lm_lockstruct fields
*/
struct lockstruct_attr {
struct attribute attr;
ssize_t (*show)(struct gfs2_sbd *, char *);
};
#define LOCKSTRUCT_ATTR(name, fmt) \
static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
{ \
return snprintf(buf, PAGE_SIZE, fmt, sdp->sd_lockstruct.ls_##name); \
} \
static struct lockstruct_attr lockstruct_attr_##name = __ATTR_RO(name)
LOCKSTRUCT_ATTR(jid, "%u\n");
LOCKSTRUCT_ATTR(first, "%u\n");
LOCKSTRUCT_ATTR(lvb_size, "%u\n");
LOCKSTRUCT_ATTR(flags, "%d\n");
static struct attribute *lockstruct_attrs[] = {
&lockstruct_attr_jid.attr,
&lockstruct_attr_first.attr,
&lockstruct_attr_lvb_size.attr,
&lockstruct_attr_flags.attr,
NULL,
};
/*
* display struct gfs2_args fields
*/
struct args_attr {
struct attribute attr;
ssize_t (*show)(struct gfs2_sbd *, char *);
};
#define ARGS_ATTR(name, fmt) \
static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
{ \
return snprintf(buf, PAGE_SIZE, fmt, sdp->sd_args.ar_##name); \
} \
static struct args_attr args_attr_##name = __ATTR_RO(name)
ARGS_ATTR(lockproto, "%s\n");
ARGS_ATTR(locktable, "%s\n");
ARGS_ATTR(hostdata, "%s\n");
ARGS_ATTR(spectator, "%d\n");
ARGS_ATTR(ignore_local_fs, "%d\n");
ARGS_ATTR(localcaching, "%d\n");
ARGS_ATTR(localflocks, "%d\n");
ARGS_ATTR(debug, "%d\n");
ARGS_ATTR(upgrade, "%d\n");
ARGS_ATTR(num_glockd, "%u\n");
ARGS_ATTR(posix_acl, "%d\n");
ARGS_ATTR(quota, "%u\n");
ARGS_ATTR(suiddir, "%d\n");
ARGS_ATTR(data, "%d\n");
/* one oddball doesn't fit the macro mold */
static ssize_t noatime_show(struct gfs2_sbd *sdp, char *buf)
{
return snprintf(buf, PAGE_SIZE, "%d\n",
!!test_bit(SDF_NOATIME, &sdp->sd_flags));
}
static struct args_attr args_attr_noatime = __ATTR_RO(noatime);
static struct attribute *args_attrs[] = {
&args_attr_lockproto.attr,
&args_attr_locktable.attr,
&args_attr_hostdata.attr,
&args_attr_spectator.attr,
&args_attr_ignore_local_fs.attr,
&args_attr_localcaching.attr,
&args_attr_localflocks.attr,
&args_attr_debug.attr,
&args_attr_upgrade.attr,
&args_attr_num_glockd.attr,
&args_attr_posix_acl.attr,
&args_attr_quota.attr,
&args_attr_suiddir.attr,
&args_attr_data.attr,
&args_attr_noatime.attr,
NULL,
};
/*
* display counters from superblock
*/
struct counters_attr {
struct attribute attr;
ssize_t (*show)(struct gfs2_sbd *, char *);
};
#define COUNTERS_ATTR(name, fmt) \
static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
{ \
return snprintf(buf, PAGE_SIZE, fmt, \
(unsigned int)atomic_read(&sdp->sd_##name)); \
} \
static struct counters_attr counters_attr_##name = __ATTR_RO(name)
COUNTERS_ATTR(glock_count, "%u\n");
COUNTERS_ATTR(glock_held_count, "%u\n");
COUNTERS_ATTR(inode_count, "%u\n");
COUNTERS_ATTR(reclaimed, "%u\n");
static struct attribute *counters_attrs[] = {
&counters_attr_glock_count.attr,
&counters_attr_glock_held_count.attr,
&counters_attr_inode_count.attr,
&counters_attr_reclaimed.attr,
NULL,
};
/*
* get and set struct gfs2_tune fields
*/
static ssize_t quota_scale_show(struct gfs2_sbd *sdp, char *buf)
{
return snprintf(buf, PAGE_SIZE, "%u %u\n",
sdp->sd_tune.gt_quota_scale_num,
sdp->sd_tune.gt_quota_scale_den);
}
static ssize_t quota_scale_store(struct gfs2_sbd *sdp, const char *buf,
size_t len)
{
struct gfs2_tune *gt = &sdp->sd_tune;
unsigned int x, y;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (sscanf(buf, "%u %u", &x, &y) != 2 || !y)
return -EINVAL;
spin_lock(&gt->gt_spin);
gt->gt_quota_scale_num = x;
gt->gt_quota_scale_den = y;
spin_unlock(&gt->gt_spin);
return len;
}
static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field,
int check_zero, const char *buf, size_t len)
{
struct gfs2_tune *gt = &sdp->sd_tune;
unsigned int x;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
x = simple_strtoul(buf, NULL, 0);
if (check_zero && !x)
return -EINVAL;
spin_lock(&gt->gt_spin);
*field = x;
spin_unlock(&gt->gt_spin);
return len;
}
struct tune_attr {
struct attribute attr;
ssize_t (*show)(struct gfs2_sbd *, char *);
ssize_t (*store)(struct gfs2_sbd *, const char *, size_t);
};
#define TUNE_ATTR_3(name, show, store) \
static struct tune_attr tune_attr_##name = __ATTR(name, 0644, show, store)
#define TUNE_ATTR_2(name, store) \
static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \
{ \
return snprintf(buf, PAGE_SIZE, "%u\n", sdp->sd_tune.gt_##name); \
} \
TUNE_ATTR_3(name, name##_show, store)
#define TUNE_ATTR(name, check_zero) \
static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
{ \
return tune_set(sdp, &sdp->sd_tune.gt_##name, check_zero, buf, len); \
} \
TUNE_ATTR_2(name, name##_store)
#define TUNE_ATTR_DAEMON(name, process) \
static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\
{ \
ssize_t r = tune_set(sdp, &sdp->sd_tune.gt_##name, 1, buf, len); \
wake_up_process(sdp->sd_##process); \
return r; \
} \
TUNE_ATTR_2(name, name##_store)
TUNE_ATTR(demote_secs, 0);
TUNE_ATTR(incore_log_blocks, 0);
TUNE_ATTR(log_flush_secs, 0);
TUNE_ATTR(jindex_refresh_secs, 0);
TUNE_ATTR(quota_warn_period, 0);
TUNE_ATTR(quota_quantum, 0);
TUNE_ATTR(atime_quantum, 0);
TUNE_ATTR(max_readahead, 0);
TUNE_ATTR(complain_secs, 0);
TUNE_ATTR(reclaim_limit, 0);
TUNE_ATTR(statfs_slow, 0);
TUNE_ATTR(new_files_jdata, 0);
TUNE_ATTR(new_files_directio, 0);
TUNE_ATTR(quota_simul_sync, 1);
TUNE_ATTR(quota_cache_secs, 1);
TUNE_ATTR(stall_secs, 1);
TUNE_ATTR(statfs_quantum, 1);
TUNE_ATTR_DAEMON(scand_secs, scand_process);
TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process);
TUNE_ATTR_DAEMON(logd_secs, logd_process);
TUNE_ATTR_DAEMON(quotad_secs, quotad_process);
TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store);
static struct attribute *tune_attrs[] = {
&tune_attr_demote_secs.attr,
&tune_attr_incore_log_blocks.attr,
&tune_attr_log_flush_secs.attr,
&tune_attr_jindex_refresh_secs.attr,
&tune_attr_quota_warn_period.attr,
&tune_attr_quota_quantum.attr,
&tune_attr_atime_quantum.attr,
&tune_attr_max_readahead.attr,
&tune_attr_complain_secs.attr,
&tune_attr_reclaim_limit.attr,
&tune_attr_statfs_slow.attr,
&tune_attr_quota_simul_sync.attr,
&tune_attr_quota_cache_secs.attr,
&tune_attr_stall_secs.attr,
&tune_attr_statfs_quantum.attr,
&tune_attr_scand_secs.attr,
&tune_attr_recoverd_secs.attr,
&tune_attr_logd_secs.attr,
&tune_attr_quotad_secs.attr,
&tune_attr_quota_scale.attr,
&tune_attr_new_files_jdata.attr,
&tune_attr_new_files_directio.attr,
NULL,
};
static struct attribute_group lockstruct_group = {
.name = "lockstruct",
.attrs = lockstruct_attrs,
};
static struct attribute_group counters_group = {
.name = "counters",
.attrs = counters_attrs,
};
static struct attribute_group args_group = {
.name = "args",
.attrs = args_attrs,
};
static struct attribute_group tune_group = {
.name = "tune",
.attrs = tune_attrs,
};
int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
{
int error;
sdp->sd_kobj.kset = &gfs2_kset;
sdp->sd_kobj.ktype = &gfs2_ktype;
error = kobject_set_name(&sdp->sd_kobj, "%s", sdp->sd_table_name);
if (error)
goto fail;
error = kobject_register(&sdp->sd_kobj);
if (error)
goto fail;
error = sysfs_create_group(&sdp->sd_kobj, &lockstruct_group);
if (error)
goto fail_reg;
error = sysfs_create_group(&sdp->sd_kobj, &counters_group);
if (error)
goto fail_lockstruct;
error = sysfs_create_group(&sdp->sd_kobj, &args_group);
if (error)
goto fail_counters;
error = sysfs_create_group(&sdp->sd_kobj, &tune_group);
if (error)
goto fail_args;
return 0;
fail_args:
sysfs_remove_group(&sdp->sd_kobj, &args_group);
fail_counters:
sysfs_remove_group(&sdp->sd_kobj, &counters_group);
fail_lockstruct:
sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group);
fail_reg:
kobject_unregister(&sdp->sd_kobj);
fail:
fs_err(sdp, "error %d adding sysfs files", error);
return error;
}
void gfs2_sys_fs_del(struct gfs2_sbd *sdp)
{
sysfs_remove_group(&sdp->sd_kobj, &tune_group);
sysfs_remove_group(&sdp->sd_kobj, &args_group);
sysfs_remove_group(&sdp->sd_kobj, &counters_group);
sysfs_remove_group(&sdp->sd_kobj, &lockstruct_group);
kobject_unregister(&sdp->sd_kobj);
}
int gfs2_sys_init(void)
{
gfs2_sys_margs = NULL;
spin_lock_init(&gfs2_sys_margs_lock);
return kset_register(&gfs2_kset);
}
void gfs2_sys_uninit(void)
{
kfree(gfs2_sys_margs);
kset_unregister(&gfs2_kset);
}

27
fs/gfs2/sys.h Normal file
View File

@@ -0,0 +1,27 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __SYS_DOT_H__
#define __SYS_DOT_H__
#include <linux/spinlock.h>
struct gfs2_sbd;
/* Allow args to be passed to GFS2 when using an initial ram disk */
extern char *gfs2_sys_margs;
extern spinlock_t gfs2_sys_margs_lock;
int gfs2_sys_fs_add(struct gfs2_sbd *sdp);
void gfs2_sys_fs_del(struct gfs2_sbd *sdp);
int gfs2_sys_init(void);
void gfs2_sys_uninit(void);
#endif /* __SYS_DOT_H__ */

184
fs/gfs2/trans.c Normal file
View File

@@ -0,0 +1,184 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/gfs2_ondisk.h>
#include <linux/kallsyms.h>
#include <linux/lm_interface.h>
#include "gfs2.h"
#include "incore.h"
#include "glock.h"
#include "log.h"
#include "lops.h"
#include "meta_io.h"
#include "trans.h"
#include "util.h"
int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
unsigned int revokes)
{
struct gfs2_trans *tr;
int error;
BUG_ON(current->journal_info);
BUG_ON(blocks == 0 && revokes == 0);
tr = kzalloc(sizeof(struct gfs2_trans), GFP_NOFS);
if (!tr)
return -ENOMEM;
tr->tr_ip = (unsigned long)__builtin_return_address(0);
tr->tr_blocks = blocks;
tr->tr_revokes = revokes;
tr->tr_reserved = 1;
if (blocks)
tr->tr_reserved += 6 + blocks;
if (revokes)
tr->tr_reserved += gfs2_struct2blk(sdp, revokes,
sizeof(u64));
INIT_LIST_HEAD(&tr->tr_list_buf);
gfs2_holder_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &tr->tr_t_gh);
error = gfs2_glock_nq(&tr->tr_t_gh);
if (error)
goto fail_holder_uninit;
if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
tr->tr_t_gh.gh_flags |= GL_NOCACHE;
error = -EROFS;
goto fail_gunlock;
}
error = gfs2_log_reserve(sdp, tr->tr_reserved);
if (error)
goto fail_gunlock;
current->journal_info = tr;
return 0;
fail_gunlock:
gfs2_glock_dq(&tr->tr_t_gh);
fail_holder_uninit:
gfs2_holder_uninit(&tr->tr_t_gh);
kfree(tr);
return error;
}
void gfs2_trans_end(struct gfs2_sbd *sdp)
{
struct gfs2_trans *tr = current->journal_info;
BUG_ON(!tr);
current->journal_info = NULL;
if (!tr->tr_touched) {
gfs2_log_release(sdp, tr->tr_reserved);
gfs2_glock_dq(&tr->tr_t_gh);
gfs2_holder_uninit(&tr->tr_t_gh);
kfree(tr);
return;
}
if (gfs2_assert_withdraw(sdp, tr->tr_num_buf <= tr->tr_blocks)) {
fs_err(sdp, "tr_num_buf = %u, tr_blocks = %u ",
tr->tr_num_buf, tr->tr_blocks);
print_symbol(KERN_WARNING "GFS2: Transaction created at: %s\n", tr->tr_ip);
}
if (gfs2_assert_withdraw(sdp, tr->tr_num_revoke <= tr->tr_revokes)) {
fs_err(sdp, "tr_num_revoke = %u, tr_revokes = %u ",
tr->tr_num_revoke, tr->tr_revokes);
print_symbol(KERN_WARNING "GFS2: Transaction created at: %s\n", tr->tr_ip);
}
gfs2_log_commit(sdp, tr);
gfs2_glock_dq(&tr->tr_t_gh);
gfs2_holder_uninit(&tr->tr_t_gh);
kfree(tr);
if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS)
gfs2_log_flush(sdp, NULL);
}
void gfs2_trans_add_gl(struct gfs2_glock *gl)
{
lops_add(gl->gl_sbd, &gl->gl_le);
}
/**
* gfs2_trans_add_bh - Add a to-be-modified buffer to the current transaction
* @gl: the glock the buffer belongs to
* @bh: The buffer to add
* @meta: True in the case of adding metadata
*
*/
void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
struct gfs2_bufdata *bd;
bd = bh->b_private;
if (bd)
gfs2_assert(sdp, bd->bd_gl == gl);
else {
gfs2_attach_bufdata(gl, bh, meta);
bd = bh->b_private;
}
lops_add(sdp, &bd->bd_le);
}
void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, u64 blkno)
{
struct gfs2_revoke *rv = kmalloc(sizeof(struct gfs2_revoke),
GFP_NOFS | __GFP_NOFAIL);
lops_init_le(&rv->rv_le, &gfs2_revoke_lops);
rv->rv_blkno = blkno;
lops_add(sdp, &rv->rv_le);
}
void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno)
{
struct gfs2_revoke *rv;
int found = 0;
gfs2_log_lock(sdp);
list_for_each_entry(rv, &sdp->sd_log_le_revoke, rv_le.le_list) {
if (rv->rv_blkno == blkno) {
list_del(&rv->rv_le.le_list);
gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke);
sdp->sd_log_num_revoke--;
found = 1;
break;
}
}
gfs2_log_unlock(sdp);
if (found) {
struct gfs2_trans *tr = current->journal_info;
kfree(rv);
tr->tr_num_revoke_rm++;
}
}
void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd)
{
lops_add(rgd->rd_sbd, &rgd->rd_le);
}

39
fs/gfs2/trans.h Normal file
View File

@@ -0,0 +1,39 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __TRANS_DOT_H__
#define __TRANS_DOT_H__
#include <linux/buffer_head.h>
struct gfs2_sbd;
struct gfs2_rgrpd;
struct gfs2_glock;
#define RES_DINODE 1
#define RES_INDIRECT 1
#define RES_JDATA 1
#define RES_DATA 1
#define RES_LEAF 1
#define RES_RG_BIT 2
#define RES_EATTR 1
#define RES_STATFS 1
#define RES_QUOTA 2
int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
unsigned int revokes);
void gfs2_trans_end(struct gfs2_sbd *sdp);
void gfs2_trans_add_gl(struct gfs2_glock *gl);
void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta);
void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, u64 blkno);
void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno);
void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd);
#endif /* __TRANS_DOT_H__ */

244
fs/gfs2/util.c Normal file
View File

@@ -0,0 +1,244 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
#include <linux/crc32.h>
#include <linux/gfs2_ondisk.h>
#include <linux/lm_interface.h>
#include <asm/uaccess.h>
#include "gfs2.h"
#include "incore.h"
#include "glock.h"
#include "lm.h"
#include "util.h"
struct kmem_cache *gfs2_glock_cachep __read_mostly;
struct kmem_cache *gfs2_inode_cachep __read_mostly;
struct kmem_cache *gfs2_bufdata_cachep __read_mostly;
void gfs2_assert_i(struct gfs2_sbd *sdp)
{
printk(KERN_EMERG "GFS2: fsid=%s: fatal assertion failed\n",
sdp->sd_fsname);
}
/**
* gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false
* Returns: -1 if this call withdrew the machine,
* -2 if it was already withdrawn
*/
int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
const char *function, char *file, unsigned int line)
{
int me;
me = gfs2_lm_withdraw(sdp,
"GFS2: fsid=%s: fatal: assertion \"%s\" failed\n"
"GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
sdp->sd_fsname, assertion,
sdp->sd_fsname, function, file, line);
dump_stack();
return (me) ? -1 : -2;
}
/**
* gfs2_assert_warn_i - Print a message to the console if @assertion is false
* Returns: -1 if we printed something
* -2 if we didn't
*/
int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
const char *function, char *file, unsigned int line)
{
if (time_before(jiffies,
sdp->sd_last_warning +
gfs2_tune_get(sdp, gt_complain_secs) * HZ))
return -2;
printk(KERN_WARNING
"GFS2: fsid=%s: warning: assertion \"%s\" failed\n"
"GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
sdp->sd_fsname, assertion,
sdp->sd_fsname, function, file, line);
if (sdp->sd_args.ar_debug)
BUG();
else
dump_stack();
sdp->sd_last_warning = jiffies;
return -1;
}
/**
* gfs2_consist_i - Flag a filesystem consistency error and withdraw
* Returns: -1 if this call withdrew the machine,
* 0 if it was already withdrawn
*/
int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, const char *function,
char *file, unsigned int line)
{
int rv;
rv = gfs2_lm_withdraw(sdp,
"GFS2: fsid=%s: fatal: filesystem consistency error\n"
"GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
sdp->sd_fsname,
sdp->sd_fsname, function, file, line);
return rv;
}
/**
* gfs2_consist_inode_i - Flag an inode consistency error and withdraw
* Returns: -1 if this call withdrew the machine,
* 0 if it was already withdrawn
*/
int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
const char *function, char *file, unsigned int line)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
int rv;
rv = gfs2_lm_withdraw(sdp,
"GFS2: fsid=%s: fatal: filesystem consistency error\n"
"GFS2: fsid=%s: inode = %llu %llu\n"
"GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
sdp->sd_fsname,
sdp->sd_fsname, (unsigned long long)ip->i_num.no_formal_ino,
(unsigned long long)ip->i_num.no_addr,
sdp->sd_fsname, function, file, line);
return rv;
}
/**
* gfs2_consist_rgrpd_i - Flag a RG consistency error and withdraw
* Returns: -1 if this call withdrew the machine,
* 0 if it was already withdrawn
*/
int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
const char *function, char *file, unsigned int line)
{
struct gfs2_sbd *sdp = rgd->rd_sbd;
int rv;
rv = gfs2_lm_withdraw(sdp,
"GFS2: fsid=%s: fatal: filesystem consistency error\n"
"GFS2: fsid=%s: RG = %llu\n"
"GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
sdp->sd_fsname,
sdp->sd_fsname, (unsigned long long)rgd->rd_ri.ri_addr,
sdp->sd_fsname, function, file, line);
return rv;
}
/**
* gfs2_meta_check_ii - Flag a magic number consistency error and withdraw
* Returns: -1 if this call withdrew the machine,
* -2 if it was already withdrawn
*/
int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
const char *type, const char *function, char *file,
unsigned int line)
{
int me;
me = gfs2_lm_withdraw(sdp,
"GFS2: fsid=%s: fatal: invalid metadata block\n"
"GFS2: fsid=%s: bh = %llu (%s)\n"
"GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
sdp->sd_fsname,
sdp->sd_fsname, (unsigned long long)bh->b_blocknr, type,
sdp->sd_fsname, function, file, line);
return (me) ? -1 : -2;
}
/**
* gfs2_metatype_check_ii - Flag a metadata type consistency error and withdraw
* Returns: -1 if this call withdrew the machine,
* -2 if it was already withdrawn
*/
int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
u16 type, u16 t, const char *function,
char *file, unsigned int line)
{
int me;
me = gfs2_lm_withdraw(sdp,
"GFS2: fsid=%s: fatal: invalid metadata block\n"
"GFS2: fsid=%s: bh = %llu (type: exp=%u, found=%u)\n"
"GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
sdp->sd_fsname,
sdp->sd_fsname, (unsigned long long)bh->b_blocknr, type, t,
sdp->sd_fsname, function, file, line);
return (me) ? -1 : -2;
}
/**
* gfs2_io_error_i - Flag an I/O error and withdraw
* Returns: -1 if this call withdrew the machine,
* 0 if it was already withdrawn
*/
int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file,
unsigned int line)
{
int rv;
rv = gfs2_lm_withdraw(sdp,
"GFS2: fsid=%s: fatal: I/O error\n"
"GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
sdp->sd_fsname,
sdp->sd_fsname, function, file, line);
return rv;
}
/**
* gfs2_io_error_bh_i - Flag a buffer I/O error and withdraw
* Returns: -1 if this call withdrew the machine,
* 0 if it was already withdrawn
*/
int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
const char *function, char *file, unsigned int line)
{
int rv;
rv = gfs2_lm_withdraw(sdp,
"GFS2: fsid=%s: fatal: I/O error\n"
"GFS2: fsid=%s: block = %llu\n"
"GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
sdp->sd_fsname,
sdp->sd_fsname, (unsigned long long)bh->b_blocknr,
sdp->sd_fsname, function, file, line);
return rv;
}
void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap,
unsigned int bit, int new_value)
{
unsigned int c, o, b = bit;
int old_value;
c = b / (8 * PAGE_SIZE);
b %= 8 * PAGE_SIZE;
o = b / 8;
b %= 8;
old_value = (bitmap[c][o] & (1 << b));
gfs2_assert_withdraw(sdp, !old_value != !new_value);
if (new_value)
bitmap[c][o] |= 1 << b;
else
bitmap[c][o] &= ~(1 << b);
}

168
fs/gfs2/util.h Normal file
View File

@@ -0,0 +1,168 @@
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU General Public License version 2.
*/
#ifndef __UTIL_DOT_H__
#define __UTIL_DOT_H__
#include "incore.h"
#define fs_printk(level, fs, fmt, arg...) \
printk(level "GFS2: fsid=%s: " fmt , (fs)->sd_fsname , ## arg)
#define fs_info(fs, fmt, arg...) \
fs_printk(KERN_INFO , fs , fmt , ## arg)
#define fs_warn(fs, fmt, arg...) \
fs_printk(KERN_WARNING , fs , fmt , ## arg)
#define fs_err(fs, fmt, arg...) \
fs_printk(KERN_ERR, fs , fmt , ## arg)
void gfs2_assert_i(struct gfs2_sbd *sdp);
#define gfs2_assert(sdp, assertion) \
do { \
if (unlikely(!(assertion))) { \
gfs2_assert_i(sdp); \
BUG(); \
} \
} while (0)
int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
const char *function, char *file, unsigned int line);
#define gfs2_assert_withdraw(sdp, assertion) \
((likely(assertion)) ? 0 : gfs2_assert_withdraw_i((sdp), #assertion, \
__FUNCTION__, __FILE__, __LINE__))
int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
const char *function, char *file, unsigned int line);
#define gfs2_assert_warn(sdp, assertion) \
((likely(assertion)) ? 0 : gfs2_assert_warn_i((sdp), #assertion, \
__FUNCTION__, __FILE__, __LINE__))
int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide,
const char *function, char *file, unsigned int line);
#define gfs2_consist(sdp) \
gfs2_consist_i((sdp), 0, __FUNCTION__, __FILE__, __LINE__)
int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
const char *function, char *file, unsigned int line);
#define gfs2_consist_inode(ip) \
gfs2_consist_inode_i((ip), 0, __FUNCTION__, __FILE__, __LINE__)
int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
const char *function, char *file, unsigned int line);
#define gfs2_consist_rgrpd(rgd) \
gfs2_consist_rgrpd_i((rgd), 0, __FUNCTION__, __FILE__, __LINE__)
int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
const char *type, const char *function,
char *file, unsigned int line);
static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp,
struct buffer_head *bh,
const char *function,
char *file, unsigned int line)
{
struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
u32 magic = be32_to_cpu(mh->mh_magic);
if (unlikely(magic != GFS2_MAGIC))
return gfs2_meta_check_ii(sdp, bh, "magic number", function,
file, line);
return 0;
}
#define gfs2_meta_check(sdp, bh) \
gfs2_meta_check_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__)
int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
u16 type, u16 t,
const char *function,
char *file, unsigned int line);
static inline int gfs2_metatype_check_i(struct gfs2_sbd *sdp,
struct buffer_head *bh,
u16 type,
const char *function,
char *file, unsigned int line)
{
struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
u32 magic = be32_to_cpu(mh->mh_magic);
u16 t = be32_to_cpu(mh->mh_type);
if (unlikely(magic != GFS2_MAGIC))
return gfs2_meta_check_ii(sdp, bh, "magic number", function,
file, line);
if (unlikely(t != type))
return gfs2_metatype_check_ii(sdp, bh, type, t, function,
file, line);
return 0;
}
#define gfs2_metatype_check(sdp, bh, type) \
gfs2_metatype_check_i((sdp), (bh), (type), __FUNCTION__, __FILE__, __LINE__)
static inline void gfs2_metatype_set(struct buffer_head *bh, u16 type,
u16 format)
{
struct gfs2_meta_header *mh;
mh = (struct gfs2_meta_header *)bh->b_data;
mh->mh_type = cpu_to_be32(type);
mh->mh_format = cpu_to_be32(format);
}
int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function,
char *file, unsigned int line);
#define gfs2_io_error(sdp) \
gfs2_io_error_i((sdp), __FUNCTION__, __FILE__, __LINE__);
int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
const char *function, char *file, unsigned int line);
#define gfs2_io_error_bh(sdp, bh) \
gfs2_io_error_bh_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__);
extern struct kmem_cache *gfs2_glock_cachep;
extern struct kmem_cache *gfs2_inode_cachep;
extern struct kmem_cache *gfs2_bufdata_cachep;
static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
unsigned int *p)
{
unsigned int x;
spin_lock(&gt->gt_spin);
x = *p;
spin_unlock(&gt->gt_spin);
return x;
}
#define gfs2_tune_get(sdp, field) \
gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field)
void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap,
unsigned int bit, int new_value);
#endif /* __UTIL_DOT_H__ */