Creation of Cybook 2416 (actually Gen4) repository

This commit is contained in:
mlt
2009-12-18 17:10:00 +00:00
committed by godzil
commit 76f20f4d40
13791 changed files with 6812321 additions and 0 deletions

16
fs/jfs/Makefile Normal file
View File

@@ -0,0 +1,16 @@
#
# Makefile for the Linux JFS filesystem routines.
#
obj-$(CONFIG_JFS_FS) += jfs.o
jfs-y := super.o file.o inode.o namei.o jfs_mount.o jfs_umount.o \
jfs_xtree.o jfs_imap.o jfs_debug.o jfs_dmap.o \
jfs_unicode.o jfs_dtree.o jfs_inode.o \
jfs_extent.o symlink.o jfs_metapage.o \
jfs_logmgr.o jfs_txnmgr.o jfs_uniupr.o \
resize.o xattr.o ioctl.o
jfs-$(CONFIG_JFS_POSIX_ACL) += acl.o
EXTRA_CFLAGS += -D_JFS_4K

246
fs/jfs/acl.c Normal file
View File

@@ -0,0 +1,246 @@
/*
* Copyright (C) International Business Machines Corp., 2002-2004
* Copyright (C) Andreas Gruenbacher, 2001
* Copyright (C) Linus Torvalds, 1991, 1992
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/quotaops.h>
#include <linux/posix_acl_xattr.h>
#include "jfs_incore.h"
#include "jfs_txnmgr.h"
#include "jfs_xattr.h"
#include "jfs_acl.h"
static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
{
struct posix_acl *acl;
char *ea_name;
struct jfs_inode_info *ji = JFS_IP(inode);
struct posix_acl **p_acl;
int size;
char *value = NULL;
switch(type) {
case ACL_TYPE_ACCESS:
ea_name = POSIX_ACL_XATTR_ACCESS;
p_acl = &ji->i_acl;
break;
case ACL_TYPE_DEFAULT:
ea_name = POSIX_ACL_XATTR_DEFAULT;
p_acl = &ji->i_default_acl;
break;
default:
return ERR_PTR(-EINVAL);
}
if (*p_acl != JFS_ACL_NOT_CACHED)
return posix_acl_dup(*p_acl);
size = __jfs_getxattr(inode, ea_name, NULL, 0);
if (size > 0) {
value = kmalloc(size, GFP_KERNEL);
if (!value)
return ERR_PTR(-ENOMEM);
size = __jfs_getxattr(inode, ea_name, value, size);
}
if (size < 0) {
if (size == -ENODATA) {
*p_acl = NULL;
acl = NULL;
} else
acl = ERR_PTR(size);
} else {
acl = posix_acl_from_xattr(value, size);
if (!IS_ERR(acl))
*p_acl = posix_acl_dup(acl);
}
kfree(value);
return acl;
}
static int jfs_set_acl(tid_t tid, struct inode *inode, int type,
struct posix_acl *acl)
{
char *ea_name;
struct jfs_inode_info *ji = JFS_IP(inode);
struct posix_acl **p_acl;
int rc;
int size = 0;
char *value = NULL;
if (S_ISLNK(inode->i_mode))
return -EOPNOTSUPP;
switch(type) {
case ACL_TYPE_ACCESS:
ea_name = POSIX_ACL_XATTR_ACCESS;
p_acl = &ji->i_acl;
break;
case ACL_TYPE_DEFAULT:
ea_name = POSIX_ACL_XATTR_DEFAULT;
p_acl = &ji->i_default_acl;
if (!S_ISDIR(inode->i_mode))
return acl ? -EACCES : 0;
break;
default:
return -EINVAL;
}
if (acl) {
size = posix_acl_xattr_size(acl->a_count);
value = kmalloc(size, GFP_KERNEL);
if (!value)
return -ENOMEM;
rc = posix_acl_to_xattr(acl, value, size);
if (rc < 0)
goto out;
}
rc = __jfs_setxattr(tid, inode, ea_name, value, size, 0);
out:
kfree(value);
if (!rc) {
if (*p_acl && (*p_acl != JFS_ACL_NOT_CACHED))
posix_acl_release(*p_acl);
*p_acl = posix_acl_dup(acl);
}
return rc;
}
static int jfs_check_acl(struct inode *inode, int mask)
{
struct jfs_inode_info *ji = JFS_IP(inode);
if (ji->i_acl == JFS_ACL_NOT_CACHED) {
struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(acl))
return PTR_ERR(acl);
posix_acl_release(acl);
}
if (ji->i_acl)
return posix_acl_permission(inode, ji->i_acl, mask);
return -EAGAIN;
}
int jfs_permission(struct inode *inode, int mask, struct nameidata *nd)
{
return generic_permission(inode, mask, jfs_check_acl);
}
int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir)
{
struct posix_acl *acl = NULL;
struct posix_acl *clone;
mode_t mode;
int rc = 0;
if (S_ISLNK(inode->i_mode))
return 0;
acl = jfs_get_acl(dir, ACL_TYPE_DEFAULT);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (acl) {
if (S_ISDIR(inode->i_mode)) {
rc = jfs_set_acl(tid, inode, ACL_TYPE_DEFAULT, acl);
if (rc)
goto cleanup;
}
clone = posix_acl_clone(acl, GFP_KERNEL);
if (!clone) {
rc = -ENOMEM;
goto cleanup;
}
mode = inode->i_mode;
rc = posix_acl_create_masq(clone, &mode);
if (rc >= 0) {
inode->i_mode = mode;
if (rc > 0)
rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS,
clone);
}
posix_acl_release(clone);
cleanup:
posix_acl_release(acl);
} else
inode->i_mode &= ~current->fs->umask;
JFS_IP(inode)->mode2 = (JFS_IP(inode)->mode2 & 0xffff0000) |
inode->i_mode;
return rc;
}
static int jfs_acl_chmod(struct inode *inode)
{
struct posix_acl *acl, *clone;
int rc;
if (S_ISLNK(inode->i_mode))
return -EOPNOTSUPP;
acl = jfs_get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(acl) || !acl)
return PTR_ERR(acl);
clone = posix_acl_clone(acl, GFP_KERNEL);
posix_acl_release(acl);
if (!clone)
return -ENOMEM;
rc = posix_acl_chmod_masq(clone, inode->i_mode);
if (!rc) {
tid_t tid = txBegin(inode->i_sb, 0);
mutex_lock(&JFS_IP(inode)->commit_mutex);
rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, clone);
if (!rc)
rc = txCommit(tid, 1, &inode, 0);
txEnd(tid);
mutex_unlock(&JFS_IP(inode)->commit_mutex);
}
posix_acl_release(clone);
return rc;
}
int jfs_setattr(struct dentry *dentry, struct iattr *iattr)
{
struct inode *inode = dentry->d_inode;
int rc;
rc = inode_change_ok(inode, iattr);
if (rc)
return rc;
if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
(iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
if (DQUOT_TRANSFER(inode, iattr))
return -EDQUOT;
}
rc = inode_setattr(inode, iattr);
if (!rc && (iattr->ia_valid & ATTR_MODE))
rc = jfs_acl_chmod(inode);
return rc;
}

49
fs/jfs/endian24.h Normal file
View File

@@ -0,0 +1,49 @@
/*
* Copyright (C) International Business Machines Corp., 2001
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _H_ENDIAN24
#define _H_ENDIAN24
/*
* endian24.h:
*
* Endian conversion for 24-byte data
*
*/
#define __swab24(x) \
({ \
__u32 __x = (x); \
((__u32)( \
((__x & (__u32)0x000000ffUL) << 16) | \
(__x & (__u32)0x0000ff00UL) | \
((__x & (__u32)0x00ff0000UL) >> 16) )); \
})
#if (defined(__KERNEL__) && defined(__LITTLE_ENDIAN)) || (defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN))
#define __cpu_to_le24(x) ((__u32)(x))
#define __le24_to_cpu(x) ((__u32)(x))
#else
#define __cpu_to_le24(x) __swab24(x)
#define __le24_to_cpu(x) __swab24(x)
#endif
#ifdef __KERNEL__
#define cpu_to_le24 __cpu_to_le24
#define le24_to_cpu __le24_to_cpu
#endif
#endif /* !_H_ENDIAN24 */

117
fs/jfs/file.c Normal file
View File

@@ -0,0 +1,117 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2002
* Portions Copyright (C) Christoph Hellwig, 2001-2002
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/fs.h>
#include "jfs_incore.h"
#include "jfs_inode.h"
#include "jfs_dmap.h"
#include "jfs_txnmgr.h"
#include "jfs_xattr.h"
#include "jfs_acl.h"
#include "jfs_debug.h"
int jfs_fsync(struct file *file, struct dentry *dentry, int datasync)
{
struct inode *inode = dentry->d_inode;
int rc = 0;
if (!(inode->i_state & I_DIRTY) ||
(datasync && !(inode->i_state & I_DIRTY_DATASYNC))) {
/* Make sure committed changes hit the disk */
jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1);
return rc;
}
rc |= jfs_commit_inode(inode, 1);
return rc ? -EIO : 0;
}
static int jfs_open(struct inode *inode, struct file *file)
{
int rc;
if ((rc = generic_file_open(inode, file)))
return rc;
/*
* We attempt to allow only one "active" file open per aggregate
* group. Otherwise, appending to files in parallel can cause
* fragmentation within the files.
*
* If the file is empty, it was probably just created and going
* to be written to. If it has a size, we'll hold off until the
* file is actually grown.
*/
if (S_ISREG(inode->i_mode) && file->f_mode & FMODE_WRITE &&
(inode->i_size == 0)) {
struct jfs_inode_info *ji = JFS_IP(inode);
spin_lock_irq(&ji->ag_lock);
if (ji->active_ag == -1) {
ji->active_ag = ji->agno;
atomic_inc(
&JFS_SBI(inode->i_sb)->bmap->db_active[ji->agno]);
}
spin_unlock_irq(&ji->ag_lock);
}
return 0;
}
static int jfs_release(struct inode *inode, struct file *file)
{
struct jfs_inode_info *ji = JFS_IP(inode);
spin_lock_irq(&ji->ag_lock);
if (ji->active_ag != -1) {
struct bmap *bmap = JFS_SBI(inode->i_sb)->bmap;
atomic_dec(&bmap->db_active[ji->active_ag]);
ji->active_ag = -1;
}
spin_unlock_irq(&ji->ag_lock);
return 0;
}
const struct inode_operations jfs_file_inode_operations = {
.truncate = jfs_truncate,
.setxattr = jfs_setxattr,
.getxattr = jfs_getxattr,
.listxattr = jfs_listxattr,
.removexattr = jfs_removexattr,
#ifdef CONFIG_JFS_POSIX_ACL
.setattr = jfs_setattr,
.permission = jfs_permission,
#endif
};
const struct file_operations jfs_file_operations = {
.open = jfs_open,
.llseek = generic_file_llseek,
.write = do_sync_write,
.read = do_sync_read,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.sendfile = generic_file_sendfile,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
.fsync = jfs_fsync,
.release = jfs_release,
.ioctl = jfs_ioctl,
};

365
fs/jfs/inode.c Normal file
View File

@@ -0,0 +1,365 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2004
* Portions Copyright (C) Christoph Hellwig, 2001-2002
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/fs.h>
#include <linux/mpage.h>
#include <linux/buffer_head.h>
#include <linux/pagemap.h>
#include <linux/quotaops.h>
#include "jfs_incore.h"
#include "jfs_inode.h"
#include "jfs_filsys.h"
#include "jfs_imap.h"
#include "jfs_extent.h"
#include "jfs_unicode.h"
#include "jfs_debug.h"
void jfs_read_inode(struct inode *inode)
{
if (diRead(inode)) {
make_bad_inode(inode);
return;
}
if (S_ISREG(inode->i_mode)) {
inode->i_op = &jfs_file_inode_operations;
inode->i_fop = &jfs_file_operations;
inode->i_mapping->a_ops = &jfs_aops;
} else if (S_ISDIR(inode->i_mode)) {
inode->i_op = &jfs_dir_inode_operations;
inode->i_fop = &jfs_dir_operations;
} else if (S_ISLNK(inode->i_mode)) {
if (inode->i_size >= IDATASIZE) {
inode->i_op = &page_symlink_inode_operations;
inode->i_mapping->a_ops = &jfs_aops;
} else
inode->i_op = &jfs_symlink_inode_operations;
} else {
inode->i_op = &jfs_file_inode_operations;
init_special_inode(inode, inode->i_mode, inode->i_rdev);
}
jfs_set_inode_flags(inode);
}
/*
* Workhorse of both fsync & write_inode
*/
int jfs_commit_inode(struct inode *inode, int wait)
{
int rc = 0;
tid_t tid;
static int noisy = 5;
jfs_info("In jfs_commit_inode, inode = 0x%p", inode);
/*
* Don't commit if inode has been committed since last being
* marked dirty, or if it has been deleted.
*/
if (inode->i_nlink == 0 || !test_cflag(COMMIT_Dirty, inode))
return 0;
if (isReadOnly(inode)) {
/* kernel allows writes to devices on read-only
* partitions and may think inode is dirty
*/
if (!special_file(inode->i_mode) && noisy) {
jfs_err("jfs_commit_inode(0x%p) called on "
"read-only volume", inode);
jfs_err("Is remount racy?");
noisy--;
}
return 0;
}
tid = txBegin(inode->i_sb, COMMIT_INODE);
mutex_lock(&JFS_IP(inode)->commit_mutex);
/*
* Retest inode state after taking commit_mutex
*/
if (inode->i_nlink && test_cflag(COMMIT_Dirty, inode))
rc = txCommit(tid, 1, &inode, wait ? COMMIT_SYNC : 0);
txEnd(tid);
mutex_unlock(&JFS_IP(inode)->commit_mutex);
return rc;
}
int jfs_write_inode(struct inode *inode, int wait)
{
if (test_cflag(COMMIT_Nolink, inode))
return 0;
/*
* If COMMIT_DIRTY is not set, the inode isn't really dirty.
* It has been committed since the last change, but was still
* on the dirty inode list.
*/
if (!test_cflag(COMMIT_Dirty, inode)) {
/* Make sure committed changes hit the disk */
jfs_flush_journal(JFS_SBI(inode->i_sb)->log, wait);
return 0;
}
if (jfs_commit_inode(inode, wait)) {
jfs_err("jfs_write_inode: jfs_commit_inode failed!");
return -EIO;
} else
return 0;
}
void jfs_delete_inode(struct inode *inode)
{
jfs_info("In jfs_delete_inode, inode = 0x%p", inode);
if (!is_bad_inode(inode) &&
(JFS_IP(inode)->fileset == FILESYSTEM_I)) {
truncate_inode_pages(&inode->i_data, 0);
if (test_cflag(COMMIT_Freewmap, inode))
jfs_free_zero_link(inode);
diFree(inode);
/*
* Free the inode from the quota allocation.
*/
DQUOT_INIT(inode);
DQUOT_FREE_INODE(inode);
DQUOT_DROP(inode);
}
clear_inode(inode);
}
void jfs_dirty_inode(struct inode *inode)
{
static int noisy = 5;
if (isReadOnly(inode)) {
if (!special_file(inode->i_mode) && noisy) {
/* kernel allows writes to devices on read-only
* partitions and may try to mark inode dirty
*/
jfs_err("jfs_dirty_inode called on read-only volume");
jfs_err("Is remount racy?");
noisy--;
}
return;
}
set_cflag(COMMIT_Dirty, inode);
}
int jfs_get_block(struct inode *ip, sector_t lblock,
struct buffer_head *bh_result, int create)
{
s64 lblock64 = lblock;
int rc = 0;
xad_t xad;
s64 xaddr;
int xflag;
s32 xlen = bh_result->b_size >> ip->i_blkbits;
/*
* Take appropriate lock on inode
*/
if (create)
IWRITE_LOCK(ip, RDWRLOCK_NORMAL);
else
IREAD_LOCK(ip, RDWRLOCK_NORMAL);
if (((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size) &&
(!xtLookup(ip, lblock64, xlen, &xflag, &xaddr, &xlen, 0)) &&
xaddr) {
if (xflag & XAD_NOTRECORDED) {
if (!create)
/*
* Allocated but not recorded, read treats
* this as a hole
*/
goto unlock;
#ifdef _JFS_4K
XADoffset(&xad, lblock64);
XADlength(&xad, xlen);
XADaddress(&xad, xaddr);
#else /* _JFS_4K */
/*
* As long as block size = 4K, this isn't a problem.
* We should mark the whole page not ABNR, but how
* will we know to mark the other blocks BH_New?
*/
BUG();
#endif /* _JFS_4K */
rc = extRecord(ip, &xad);
if (rc)
goto unlock;
set_buffer_new(bh_result);
}
map_bh(bh_result, ip->i_sb, xaddr);
bh_result->b_size = xlen << ip->i_blkbits;
goto unlock;
}
if (!create)
goto unlock;
/*
* Allocate a new block
*/
#ifdef _JFS_4K
if ((rc = extHint(ip, lblock64 << ip->i_sb->s_blocksize_bits, &xad)))
goto unlock;
rc = extAlloc(ip, xlen, lblock64, &xad, false);
if (rc)
goto unlock;
set_buffer_new(bh_result);
map_bh(bh_result, ip->i_sb, addressXAD(&xad));
bh_result->b_size = lengthXAD(&xad) << ip->i_blkbits;
#else /* _JFS_4K */
/*
* We need to do whatever it takes to keep all but the last buffers
* in 4K pages - see jfs_write.c
*/
BUG();
#endif /* _JFS_4K */
unlock:
/*
* Release lock on inode
*/
if (create)
IWRITE_UNLOCK(ip);
else
IREAD_UNLOCK(ip);
return rc;
}
static int jfs_writepage(struct page *page, struct writeback_control *wbc)
{
return nobh_writepage(page, jfs_get_block, wbc);
}
static int jfs_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
return mpage_writepages(mapping, wbc, jfs_get_block);
}
static int jfs_readpage(struct file *file, struct page *page)
{
return mpage_readpage(page, jfs_get_block);
}
static int jfs_readpages(struct file *file, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages)
{
return mpage_readpages(mapping, pages, nr_pages, jfs_get_block);
}
static int jfs_prepare_write(struct file *file,
struct page *page, unsigned from, unsigned to)
{
return nobh_prepare_write(page, from, to, jfs_get_block);
}
static sector_t jfs_bmap(struct address_space *mapping, sector_t block)
{
return generic_block_bmap(mapping, block, jfs_get_block);
}
static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
const struct iovec *iov, loff_t offset, unsigned long nr_segs)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
offset, nr_segs, jfs_get_block, NULL);
}
const struct address_space_operations jfs_aops = {
.readpage = jfs_readpage,
.readpages = jfs_readpages,
.writepage = jfs_writepage,
.writepages = jfs_writepages,
.sync_page = block_sync_page,
.prepare_write = jfs_prepare_write,
.commit_write = nobh_commit_write,
.bmap = jfs_bmap,
.direct_IO = jfs_direct_IO,
};
/*
* Guts of jfs_truncate. Called with locks already held. Can be called
* with directory for truncating directory index table.
*/
void jfs_truncate_nolock(struct inode *ip, loff_t length)
{
loff_t newsize;
tid_t tid;
ASSERT(length >= 0);
if (test_cflag(COMMIT_Nolink, ip)) {
xtTruncate(0, ip, length, COMMIT_WMAP);
return;
}
do {
tid = txBegin(ip->i_sb, 0);
/*
* The commit_mutex cannot be taken before txBegin.
* txBegin may block and there is a chance the inode
* could be marked dirty and need to be committed
* before txBegin unblocks
*/
mutex_lock(&JFS_IP(ip)->commit_mutex);
newsize = xtTruncate(tid, ip, length,
COMMIT_TRUNCATE | COMMIT_PWMAP);
if (newsize < 0) {
txEnd(tid);
mutex_unlock(&JFS_IP(ip)->commit_mutex);
break;
}
ip->i_mtime = ip->i_ctime = CURRENT_TIME;
mark_inode_dirty(ip);
txCommit(tid, 1, &ip, 0);
txEnd(tid);
mutex_unlock(&JFS_IP(ip)->commit_mutex);
} while (newsize > length); /* Truncate isn't always atomic */
}
void jfs_truncate(struct inode *ip)
{
jfs_info("jfs_truncate: size = 0x%lx", (ulong) ip->i_size);
nobh_truncate_page(ip->i_mapping, ip->i_size);
IWRITE_LOCK(ip, RDWRLOCK_NORMAL);
jfs_truncate_nolock(ip, ip->i_size);
IWRITE_UNLOCK(ip);
}

107
fs/jfs/ioctl.c Normal file
View File

@@ -0,0 +1,107 @@
/*
* linux/fs/jfs/ioctl.c
*
* Copyright (C) 2006 Herbert Poetzl
* adapted from Remy Card's ext2/ioctl.c
*/
#include <linux/fs.h>
#include <linux/ctype.h>
#include <linux/capability.h>
#include <linux/time.h>
#include <linux/sched.h>
#include <asm/current.h>
#include <asm/uaccess.h>
#include "jfs_incore.h"
#include "jfs_dinode.h"
#include "jfs_inode.h"
static struct {
long jfs_flag;
long ext2_flag;
} jfs_map[] = {
{JFS_NOATIME_FL, FS_NOATIME_FL},
{JFS_DIRSYNC_FL, FS_DIRSYNC_FL},
{JFS_SYNC_FL, FS_SYNC_FL},
{JFS_SECRM_FL, FS_SECRM_FL},
{JFS_UNRM_FL, FS_UNRM_FL},
{JFS_APPEND_FL, FS_APPEND_FL},
{JFS_IMMUTABLE_FL, FS_IMMUTABLE_FL},
{0, 0},
};
static long jfs_map_ext2(unsigned long flags, int from)
{
int index=0;
long mapped=0;
while (jfs_map[index].jfs_flag) {
if (from) {
if (jfs_map[index].ext2_flag & flags)
mapped |= jfs_map[index].jfs_flag;
} else {
if (jfs_map[index].jfs_flag & flags)
mapped |= jfs_map[index].ext2_flag;
}
index++;
}
return mapped;
}
int jfs_ioctl(struct inode * inode, struct file * filp, unsigned int cmd,
unsigned long arg)
{
struct jfs_inode_info *jfs_inode = JFS_IP(inode);
unsigned int flags;
switch (cmd) {
case JFS_IOC_GETFLAGS:
flags = jfs_inode->mode2 & JFS_FL_USER_VISIBLE;
flags = jfs_map_ext2(flags, 0);
return put_user(flags, (int __user *) arg);
case JFS_IOC_SETFLAGS: {
unsigned int oldflags;
if (IS_RDONLY(inode))
return -EROFS;
if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
return -EACCES;
if (get_user(flags, (int __user *) arg))
return -EFAULT;
flags = jfs_map_ext2(flags, 1);
if (!S_ISDIR(inode->i_mode))
flags &= ~JFS_DIRSYNC_FL;
oldflags = jfs_inode->mode2;
/*
* The IMMUTABLE and APPEND_ONLY flags can only be changed by
* the relevant capability.
*/
if ((oldflags & JFS_IMMUTABLE_FL) ||
((flags ^ oldflags) &
(JFS_APPEND_FL | JFS_IMMUTABLE_FL))) {
if (!capable(CAP_LINUX_IMMUTABLE))
return -EPERM;
}
flags = flags & JFS_FL_USER_MODIFIABLE;
flags |= oldflags & ~JFS_FL_USER_MODIFIABLE;
jfs_inode->mode2 = flags;
jfs_set_inode_flags(inode);
inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
return 0;
}
default:
return -ENOTTY;
}
}

36
fs/jfs/jfs_acl.h Normal file
View File

@@ -0,0 +1,36 @@
/*
* Copyright (C) International Business Machines Corp., 2002
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _H_JFS_ACL
#define _H_JFS_ACL
#ifdef CONFIG_JFS_POSIX_ACL
int jfs_permission(struct inode *, int, struct nameidata *);
int jfs_init_acl(tid_t, struct inode *, struct inode *);
int jfs_setattr(struct dentry *, struct iattr *);
#else
static inline int jfs_init_acl(tid_t tid, struct inode *inode,
struct inode *dir)
{
return 0;
}
#endif
#endif /* _H_JFS_ACL */

172
fs/jfs/jfs_btree.h Normal file
View File

@@ -0,0 +1,172 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2004
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _H_JFS_BTREE
#define _H_JFS_BTREE
/*
* jfs_btree.h: B+-tree
*
* JFS B+-tree (dtree and xtree) common definitions
*/
/*
* basic btree page - btpage
*
struct btpage {
s64 next; right sibling bn
s64 prev; left sibling bn
u8 flag;
u8 rsrvd[7]; type specific
s64 self; self address
u8 entry[4064];
}; */
/* btpaget_t flag */
#define BT_TYPE 0x07 /* B+-tree index */
#define BT_ROOT 0x01 /* root page */
#define BT_LEAF 0x02 /* leaf page */
#define BT_INTERNAL 0x04 /* internal page */
#define BT_RIGHTMOST 0x10 /* rightmost page */
#define BT_LEFTMOST 0x20 /* leftmost page */
#define BT_SWAPPED 0x80 /* used by fsck for endian swapping */
/* btorder (in inode) */
#define BT_RANDOM 0x0000
#define BT_SEQUENTIAL 0x0001
#define BT_LOOKUP 0x0010
#define BT_INSERT 0x0020
#define BT_DELETE 0x0040
/*
* btree page buffer cache access
*/
#define BT_IS_ROOT(MP) (((MP)->xflag & COMMIT_PAGE) == 0)
/* get page from buffer page */
#define BT_PAGE(IP, MP, TYPE, ROOT)\
(BT_IS_ROOT(MP) ? (TYPE *)&JFS_IP(IP)->ROOT : (TYPE *)(MP)->data)
/* get the page buffer and the page for specified block address */
#define BT_GETPAGE(IP, BN, MP, TYPE, SIZE, P, RC, ROOT)\
{\
if ((BN) == 0)\
{\
MP = (struct metapage *)&JFS_IP(IP)->bxflag;\
P = (TYPE *)&JFS_IP(IP)->ROOT;\
RC = 0;\
}\
else\
{\
MP = read_metapage((IP), BN, SIZE, 1);\
if (MP) {\
RC = 0;\
P = (MP)->data;\
} else {\
P = NULL;\
jfs_err("bread failed!");\
RC = -EIO;\
}\
}\
}
#define BT_MARK_DIRTY(MP, IP)\
{\
if (BT_IS_ROOT(MP))\
mark_inode_dirty(IP);\
else\
mark_metapage_dirty(MP);\
}
/* put the page buffer */
#define BT_PUTPAGE(MP)\
{\
if (! BT_IS_ROOT(MP)) \
release_metapage(MP); \
}
/*
* btree traversal stack
*
* record the path traversed during the search;
* top frame record the leaf page/entry selected.
*/
struct btframe { /* stack frame */
s64 bn; /* 8: */
s16 index; /* 2: */
s16 lastindex; /* 2: unused */
struct metapage *mp; /* 4/8: */
}; /* (16/24) */
struct btstack {
struct btframe *top;
int nsplit;
struct btframe stack[MAXTREEHEIGHT];
};
#define BT_CLR(btstack)\
(btstack)->top = (btstack)->stack
#define BT_STACK_FULL(btstack)\
( (btstack)->top == &((btstack)->stack[MAXTREEHEIGHT-1]))
#define BT_PUSH(BTSTACK, BN, INDEX)\
{\
assert(!BT_STACK_FULL(BTSTACK));\
(BTSTACK)->top->bn = BN;\
(BTSTACK)->top->index = INDEX;\
++(BTSTACK)->top;\
}
#define BT_POP(btstack)\
( (btstack)->top == (btstack)->stack ? NULL : --(btstack)->top )
#define BT_STACK(btstack)\
( (btstack)->top == (btstack)->stack ? NULL : (btstack)->top )
static inline void BT_STACK_DUMP(struct btstack *btstack)
{
int i;
printk("btstack dump:\n");
for (i = 0; i < MAXTREEHEIGHT; i++)
printk(KERN_ERR "bn = %Lx, index = %d\n",
(long long)btstack->stack[i].bn,
btstack->stack[i].index);
}
/* retrieve search results */
#define BT_GETSEARCH(IP, LEAF, BN, MP, TYPE, P, INDEX, ROOT)\
{\
BN = (LEAF)->bn;\
MP = (LEAF)->mp;\
if (BN)\
P = (TYPE *)MP->data;\
else\
P = (TYPE *)&JFS_IP(IP)->ROOT;\
INDEX = (LEAF)->index;\
}
/* put the page buffer of search */
#define BT_PUTSEARCH(BTSTACK)\
{\
if (! BT_IS_ROOT((BTSTACK)->top->mp))\
release_metapage((BTSTACK)->top->mp);\
}
#endif /* _H_JFS_BTREE */

144
fs/jfs/jfs_debug.c Normal file
View File

@@ -0,0 +1,144 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2004
* Portions Copyright (C) Christoph Hellwig, 2001-2002
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/fs.h>
#include <linux/ctype.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
#include <asm/uaccess.h>
#include "jfs_incore.h"
#include "jfs_filsys.h"
#include "jfs_debug.h"
#ifdef CONFIG_JFS_DEBUG
void dump_mem(char *label, void *data, int length)
{
int i, j;
int *intptr = data;
char *charptr = data;
char buf[10], line[80];
printk("%s: dump of %d bytes of data at 0x%p\n\n", label, length,
data);
for (i = 0; i < length; i += 16) {
line[0] = 0;
for (j = 0; (j < 4) && (i + j * 4 < length); j++) {
sprintf(buf, " %08x", intptr[i / 4 + j]);
strcat(line, buf);
}
buf[0] = ' ';
buf[2] = 0;
for (j = 0; (j < 16) && (i + j < length); j++) {
buf[1] =
isprint(charptr[i + j]) ? charptr[i + j] : '.';
strcat(line, buf);
}
printk("%s\n", line);
}
}
#endif
#ifdef PROC_FS_JFS /* see jfs_debug.h */
static struct proc_dir_entry *base;
#ifdef CONFIG_JFS_DEBUG
static int loglevel_read(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
int len;
len = sprintf(page, "%d\n", jfsloglevel);
len -= off;
*start = page + off;
if (len > count)
len = count;
else
*eof = 1;
if (len < 0)
len = 0;
return len;
}
static int loglevel_write(struct file *file, const char __user *buffer,
unsigned long count, void *data)
{
char c;
if (get_user(c, buffer))
return -EFAULT;
/* yes, I know this is an ASCIIism. --hch */
if (c < '0' || c > '9')
return -EINVAL;
jfsloglevel = c - '0';
return count;
}
#endif
static struct {
const char *name;
read_proc_t *read_fn;
write_proc_t *write_fn;
} Entries[] = {
#ifdef CONFIG_JFS_STATISTICS
{ "lmstats", jfs_lmstats_read, },
{ "txstats", jfs_txstats_read, },
{ "xtstat", jfs_xtstat_read, },
{ "mpstat", jfs_mpstat_read, },
#endif
#ifdef CONFIG_JFS_DEBUG
{ "TxAnchor", jfs_txanchor_read, },
{ "loglevel", loglevel_read, loglevel_write }
#endif
};
#define NPROCENT ARRAY_SIZE(Entries)
void jfs_proc_init(void)
{
int i;
if (!(base = proc_mkdir("jfs", proc_root_fs)))
return;
base->owner = THIS_MODULE;
for (i = 0; i < NPROCENT; i++) {
struct proc_dir_entry *p;
if ((p = create_proc_entry(Entries[i].name, 0, base))) {
p->read_proc = Entries[i].read_fn;
p->write_proc = Entries[i].write_fn;
}
}
}
void jfs_proc_clean(void)
{
int i;
if (base) {
for (i = 0; i < NPROCENT; i++)
remove_proc_entry(Entries[i].name, base);
remove_proc_entry("jfs", proc_root_fs);
}
}
#endif /* PROC_FS_JFS */

124
fs/jfs/jfs_debug.h Normal file
View File

@@ -0,0 +1,124 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2002
* Portions Copyright (C) Christoph Hellwig, 2001-2002
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _H_JFS_DEBUG
#define _H_JFS_DEBUG
/*
* jfs_debug.h
*
* global debug message, data structure/macro definitions
* under control of CONFIG_JFS_DEBUG, CONFIG_JFS_STATISTICS;
*/
/*
* Create /proc/fs/jfs if procfs is enabled andeither
* CONFIG_JFS_DEBUG or CONFIG_JFS_STATISTICS is defined
*/
#if defined(CONFIG_PROC_FS) && (defined(CONFIG_JFS_DEBUG) || defined(CONFIG_JFS_STATISTICS))
#define PROC_FS_JFS
extern void jfs_proc_init(void);
extern void jfs_proc_clean(void);
#endif
/*
* assert with traditional printf/panic
*/
#define assert(p) do { \
if (!(p)) { \
printk(KERN_CRIT "BUG at %s:%d assert(%s)\n", \
__FILE__, __LINE__, #p); \
BUG(); \
} \
} while (0)
/*
* debug ON
* --------
*/
#ifdef CONFIG_JFS_DEBUG
#define ASSERT(p) assert(p)
/* printk verbosity */
#define JFS_LOGLEVEL_ERR 1
#define JFS_LOGLEVEL_WARN 2
#define JFS_LOGLEVEL_DEBUG 3
#define JFS_LOGLEVEL_INFO 4
extern int jfsloglevel;
extern void dump_mem(char *label, void *data, int length);
extern int jfs_txanchor_read(char *, char **, off_t, int, int *, void *);
/* information message: e.g., configuration, major event */
#define jfs_info(fmt, arg...) do { \
if (jfsloglevel >= JFS_LOGLEVEL_INFO) \
printk(KERN_INFO fmt "\n", ## arg); \
} while (0)
/* debug message: ad hoc */
#define jfs_debug(fmt, arg...) do { \
if (jfsloglevel >= JFS_LOGLEVEL_DEBUG) \
printk(KERN_DEBUG fmt "\n", ## arg); \
} while (0)
/* warn message: */
#define jfs_warn(fmt, arg...) do { \
if (jfsloglevel >= JFS_LOGLEVEL_WARN) \
printk(KERN_WARNING fmt "\n", ## arg); \
} while (0)
/* error event message: e.g., i/o error */
#define jfs_err(fmt, arg...) do { \
if (jfsloglevel >= JFS_LOGLEVEL_ERR) \
printk(KERN_ERR fmt "\n", ## arg); \
} while (0)
/*
* debug OFF
* ---------
*/
#else /* CONFIG_JFS_DEBUG */
#define dump_mem(label,data,length) do {} while (0)
#define ASSERT(p) do {} while (0)
#define jfs_info(fmt, arg...) do {} while (0)
#define jfs_debug(fmt, arg...) do {} while (0)
#define jfs_warn(fmt, arg...) do {} while (0)
#define jfs_err(fmt, arg...) do {} while (0)
#endif /* CONFIG_JFS_DEBUG */
/*
* statistics
* ----------
*/
#ifdef CONFIG_JFS_STATISTICS
extern int jfs_lmstats_read(char *, char **, off_t, int, int *, void *);
extern int jfs_txstats_read(char *, char **, off_t, int, int *, void *);
extern int jfs_mpstat_read(char *, char **, off_t, int, int *, void *);
extern int jfs_xtstat_read(char *, char **, off_t, int, int *, void *);
#define INCREMENT(x) ((x)++)
#define DECREMENT(x) ((x)--)
#define HIGHWATERMARK(x,y) ((x) = max((x), (y)))
#else
#define INCREMENT(x)
#define DECREMENT(x)
#define HIGHWATERMARK(x,y)
#endif /* CONFIG_JFS_STATISTICS */
#endif /* _H_JFS_DEBUG */

174
fs/jfs/jfs_dinode.h Normal file
View File

@@ -0,0 +1,174 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2001
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _H_JFS_DINODE
#define _H_JFS_DINODE
/*
* jfs_dinode.h: on-disk inode manager
*/
#define INODESLOTSIZE 128
#define L2INODESLOTSIZE 7
#define log2INODESIZE 9 /* log2(bytes per dinode) */
/*
* on-disk inode : 512 bytes
*
* note: align 64-bit fields on 8-byte boundary.
*/
struct dinode {
/*
* I. base area (128 bytes)
* ------------------------
*
* define generic/POSIX attributes
*/
__le32 di_inostamp; /* 4: stamp to show inode belongs to fileset */
__le32 di_fileset; /* 4: fileset number */
__le32 di_number; /* 4: inode number, aka file serial number */
__le32 di_gen; /* 4: inode generation number */
pxd_t di_ixpxd; /* 8: inode extent descriptor */
__le64 di_size; /* 8: size */
__le64 di_nblocks; /* 8: number of blocks allocated */
__le32 di_nlink; /* 4: number of links to the object */
__le32 di_uid; /* 4: user id of owner */
__le32 di_gid; /* 4: group id of owner */
__le32 di_mode; /* 4: attribute, format and permission */
struct timestruc_t di_atime; /* 8: time last data accessed */
struct timestruc_t di_ctime; /* 8: time last status changed */
struct timestruc_t di_mtime; /* 8: time last data modified */
struct timestruc_t di_otime; /* 8: time created */
dxd_t di_acl; /* 16: acl descriptor */
dxd_t di_ea; /* 16: ea descriptor */
__le32 di_next_index; /* 4: Next available dir_table index */
__le32 di_acltype; /* 4: Type of ACL */
/*
* Extension Areas.
*
* Historically, the inode was partitioned into 4 128-byte areas,
* the last 3 being defined as unions which could have multiple
* uses. The first 96 bytes had been completely unused until
* an index table was added to the directory. It is now more
* useful to describe the last 3/4 of the inode as a single
* union. We would probably be better off redesigning the
* entire structure from scratch, but we don't want to break
* commonality with OS/2's JFS at this time.
*/
union {
struct {
/*
* This table contains the information needed to
* find a directory entry from a 32-bit index.
* If the index is small enough, the table is inline,
* otherwise, an x-tree root overlays this table
*/
struct dir_table_slot _table[12]; /* 96: inline */
dtroot_t _dtroot; /* 288: dtree root */
} _dir; /* (384) */
#define di_dirtable u._dir._table
#define di_dtroot u._dir._dtroot
#define di_parent di_dtroot.header.idotdot
#define di_DASD di_dtroot.header.DASD
struct {
union {
u8 _data[96]; /* 96: unused */
struct {
void *_imap; /* 4: unused */
__le32 _gengen; /* 4: generator */
} _imap;
} _u1; /* 96: */
#define di_gengen u._file._u1._imap._gengen
union {
xtpage_t _xtroot;
struct {
u8 unused[16]; /* 16: */
dxd_t _dxd; /* 16: */
union {
__le32 _rdev; /* 4: */
u8 _fastsymlink[128];
} _u;
u8 _inlineea[128];
} _special;
} _u2;
} _file;
#define di_xtroot u._file._u2._xtroot
#define di_dxd u._file._u2._special._dxd
#define di_btroot di_xtroot
#define di_inlinedata u._file._u2._special._u
#define di_rdev u._file._u2._special._u._rdev
#define di_fastsymlink u._file._u2._special._u._fastsymlink
#define di_inlineea u._file._u2._special._inlineea
} u;
};
/* extended mode bits (on-disk inode di_mode) */
#define IFJOURNAL 0x00010000 /* journalled file */
#define ISPARSE 0x00020000 /* sparse file enabled */
#define INLINEEA 0x00040000 /* inline EA area free */
#define ISWAPFILE 0x00800000 /* file open for pager swap space */
/* more extended mode bits: attributes for OS/2 */
#define IREADONLY 0x02000000 /* no write access to file */
#define IHIDDEN 0x04000000 /* hidden file */
#define ISYSTEM 0x08000000 /* system file */
#define IDIRECTORY 0x20000000 /* directory (shadow of real bit) */
#define IARCHIVE 0x40000000 /* file archive bit */
#define INEWNAME 0x80000000 /* non-8.3 filename format */
#define IRASH 0x4E000000 /* mask for changeable attributes */
#define ATTRSHIFT 25 /* bits to shift to move attribute
specification to mode position */
/* extended attributes for Linux */
#define JFS_NOATIME_FL 0x00080000 /* do not update atime */
#define JFS_DIRSYNC_FL 0x00100000 /* dirsync behaviour */
#define JFS_SYNC_FL 0x00200000 /* Synchronous updates */
#define JFS_SECRM_FL 0x00400000 /* Secure deletion */
#define JFS_UNRM_FL 0x00800000 /* allow for undelete */
#define JFS_APPEND_FL 0x01000000 /* writes to file may only append */
#define JFS_IMMUTABLE_FL 0x02000000 /* Immutable file */
#define JFS_FL_USER_VISIBLE 0x03F80000
#define JFS_FL_USER_MODIFIABLE 0x03F80000
#define JFS_FL_INHERIT 0x03C80000
/* These are identical to EXT[23]_IOC_GETFLAGS/SETFLAGS */
#define JFS_IOC_GETFLAGS _IOR('f', 1, long)
#define JFS_IOC_SETFLAGS _IOW('f', 2, long)
#endif /*_H_JFS_DINODE */

3999
fs/jfs/jfs_dmap.c Normal file

File diff suppressed because it is too large Load Diff

314
fs/jfs/jfs_dmap.h Normal file
View File

@@ -0,0 +1,314 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2002
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _H_JFS_DMAP
#define _H_JFS_DMAP
#include "jfs_txnmgr.h"
#define BMAPVERSION 1 /* version number */
#define TREESIZE (256+64+16+4+1) /* size of a dmap tree */
#define LEAFIND (64+16+4+1) /* index of 1st leaf of a dmap tree */
#define LPERDMAP 256 /* num leaves per dmap tree */
#define L2LPERDMAP 8 /* l2 number of leaves per dmap tree */
#define DBWORD 32 /* # of blks covered by a map word */
#define L2DBWORD 5 /* l2 # of blks covered by a mword */
#define BUDMIN L2DBWORD /* max free string in a map word */
#define BPERDMAP (LPERDMAP * DBWORD) /* num of blks per dmap */
#define L2BPERDMAP 13 /* l2 num of blks per dmap */
#define CTLTREESIZE (1024+256+64+16+4+1) /* size of a dmapctl tree */
#define CTLLEAFIND (256+64+16+4+1) /* idx of 1st leaf of a dmapctl tree */
#define LPERCTL 1024 /* num of leaves per dmapctl tree */
#define L2LPERCTL 10 /* l2 num of leaves per dmapctl tree */
#define ROOT 0 /* index of the root of a tree */
#define NOFREE ((s8) -1) /* no blocks free */
#define MAXAG 128 /* max number of allocation groups */
#define L2MAXAG 7 /* l2 max num of AG */
#define L2MINAGSZ 25 /* l2 of minimum AG size in bytes */
#define BMAPBLKNO 0 /* lblkno of bmap within the map */
/*
* maximum l2 number of disk blocks at the various dmapctl levels.
*/
#define L2MAXL0SIZE (L2BPERDMAP + 1 * L2LPERCTL)
#define L2MAXL1SIZE (L2BPERDMAP + 2 * L2LPERCTL)
#define L2MAXL2SIZE (L2BPERDMAP + 3 * L2LPERCTL)
/*
* maximum number of disk blocks at the various dmapctl levels.
*/
#define MAXL0SIZE ((s64)1 << L2MAXL0SIZE)
#define MAXL1SIZE ((s64)1 << L2MAXL1SIZE)
#define MAXL2SIZE ((s64)1 << L2MAXL2SIZE)
#define MAXMAPSIZE MAXL2SIZE /* maximum aggregate map size */
/*
* determine the maximum free string for four (lower level) nodes
* of the tree.
*/
static __inline signed char TREEMAX(signed char *cp)
{
signed char tmp1, tmp2;
tmp1 = max(*(cp+2), *(cp+3));
tmp2 = max(*(cp), *(cp+1));
return max(tmp1, tmp2);
}
/*
* convert disk block number to the logical block number of the dmap
* describing the disk block. s is the log2(number of logical blocks per page)
*
* The calculation figures out how many logical pages are in front of the dmap.
* - the number of dmaps preceding it
* - the number of L0 pages preceding its L0 page
* - the number of L1 pages preceding its L1 page
* - 3 is added to account for the L2, L1, and L0 page for this dmap
* - 1 is added to account for the control page of the map.
*/
#define BLKTODMAP(b,s) \
((((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1) << (s))
/*
* convert disk block number to the logical block number of the LEVEL 0
* dmapctl describing the disk block. s is the log2(number of logical blocks
* per page)
*
* The calculation figures out how many logical pages are in front of the L0.
* - the number of dmap pages preceding it
* - the number of L0 pages preceding it
* - the number of L1 pages preceding its L1 page
* - 2 is added to account for the L2, and L1 page for this L0
* - 1 is added to account for the control page of the map.
*/
#define BLKTOL0(b,s) \
(((((b) >> 23) << 10) + ((b) >> 23) + ((b) >> 33) + 2 + 1) << (s))
/*
* convert disk block number to the logical block number of the LEVEL 1
* dmapctl describing the disk block. s is the log2(number of logical blocks
* per page)
*
* The calculation figures out how many logical pages are in front of the L1.
* - the number of dmap pages preceding it
* - the number of L0 pages preceding it
* - the number of L1 pages preceding it
* - 1 is added to account for the L2 page
* - 1 is added to account for the control page of the map.
*/
#define BLKTOL1(b,s) \
(((((b) >> 33) << 20) + (((b) >> 33) << 10) + ((b) >> 33) + 1 + 1) << (s))
/*
* convert disk block number to the logical block number of the dmapctl
* at the specified level which describes the disk block.
*/
#define BLKTOCTL(b,s,l) \
(((l) == 2) ? 1 : ((l) == 1) ? BLKTOL1((b),(s)) : BLKTOL0((b),(s)))
/*
* convert aggregate map size to the zero origin dmapctl level of the
* top dmapctl.
*/
#define BMAPSZTOLEV(size) \
(((size) <= MAXL0SIZE) ? 0 : ((size) <= MAXL1SIZE) ? 1 : 2)
/* convert disk block number to allocation group number.
*/
#define BLKTOAG(b,sbi) ((b) >> ((sbi)->bmap->db_agl2size))
/* convert allocation group number to starting disk block
* number.
*/
#define AGTOBLK(a,ip) \
((s64)(a) << (JFS_SBI((ip)->i_sb)->bmap->db_agl2size))
/*
* dmap summary tree
*
* dmaptree must be consistent with dmapctl.
*/
struct dmaptree {
__le32 nleafs; /* 4: number of tree leafs */
__le32 l2nleafs; /* 4: l2 number of tree leafs */
__le32 leafidx; /* 4: index of first tree leaf */
__le32 height; /* 4: height of the tree */
s8 budmin; /* 1: min l2 tree leaf value to combine */
s8 stree[TREESIZE]; /* TREESIZE: tree */
u8 pad[2]; /* 2: pad to word boundary */
}; /* - 360 - */
/*
* dmap page per 8K blocks bitmap
*/
struct dmap {
__le32 nblocks; /* 4: num blks covered by this dmap */
__le32 nfree; /* 4: num of free blks in this dmap */
__le64 start; /* 8: starting blkno for this dmap */
struct dmaptree tree; /* 360: dmap tree */
u8 pad[1672]; /* 1672: pad to 2048 bytes */
__le32 wmap[LPERDMAP]; /* 1024: bits of the working map */
__le32 pmap[LPERDMAP]; /* 1024: bits of the persistent map */
}; /* - 4096 - */
/*
* disk map control page per level.
*
* dmapctl must be consistent with dmaptree.
*/
struct dmapctl {
__le32 nleafs; /* 4: number of tree leafs */
__le32 l2nleafs; /* 4: l2 number of tree leafs */
__le32 leafidx; /* 4: index of the first tree leaf */
__le32 height; /* 4: height of tree */
s8 budmin; /* 1: minimum l2 tree leaf value */
s8 stree[CTLTREESIZE]; /* CTLTREESIZE: dmapctl tree */
u8 pad[2714]; /* 2714: pad to 4096 */
}; /* - 4096 - */
/*
* common definition for dmaptree within dmap and dmapctl
*/
typedef union dmtree {
struct dmaptree t1;
struct dmapctl t2;
} dmtree_t;
/* macros for accessing fields within dmtree */
#define dmt_nleafs t1.nleafs
#define dmt_l2nleafs t1.l2nleafs
#define dmt_leafidx t1.leafidx
#define dmt_height t1.height
#define dmt_budmin t1.budmin
#define dmt_stree t1.stree
/*
* on-disk aggregate disk allocation map descriptor.
*/
struct dbmap_disk {
__le64 dn_mapsize; /* 8: number of blocks in aggregate */
__le64 dn_nfree; /* 8: num free blks in aggregate map */
__le32 dn_l2nbperpage; /* 4: number of blks per page */
__le32 dn_numag; /* 4: total number of ags */
__le32 dn_maxlevel; /* 4: number of active ags */
__le32 dn_maxag; /* 4: max active alloc group number */
__le32 dn_agpref; /* 4: preferred alloc group (hint) */
__le32 dn_aglevel; /* 4: dmapctl level holding the AG */
__le32 dn_agheigth; /* 4: height in dmapctl of the AG */
__le32 dn_agwidth; /* 4: width in dmapctl of the AG */
__le32 dn_agstart; /* 4: start tree index at AG height */
__le32 dn_agl2size; /* 4: l2 num of blks per alloc group */
__le64 dn_agfree[MAXAG];/* 8*MAXAG: per AG free count */
__le64 dn_agsize; /* 8: num of blks per alloc group */
s8 dn_maxfreebud; /* 1: max free buddy system */
u8 pad[3007]; /* 3007: pad to 4096 */
}; /* - 4096 - */
struct dbmap {
s64 dn_mapsize; /* number of blocks in aggregate */
s64 dn_nfree; /* num free blks in aggregate map */
int dn_l2nbperpage; /* number of blks per page */
int dn_numag; /* total number of ags */
int dn_maxlevel; /* number of active ags */
int dn_maxag; /* max active alloc group number */
int dn_agpref; /* preferred alloc group (hint) */
int dn_aglevel; /* dmapctl level holding the AG */
int dn_agheigth; /* height in dmapctl of the AG */
int dn_agwidth; /* width in dmapctl of the AG */
int dn_agstart; /* start tree index at AG height */
int dn_agl2size; /* l2 num of blks per alloc group */
s64 dn_agfree[MAXAG]; /* per AG free count */
s64 dn_agsize; /* num of blks per alloc group */
signed char dn_maxfreebud; /* max free buddy system */
}; /* - 4096 - */
/*
* in-memory aggregate disk allocation map descriptor.
*/
struct bmap {
struct dbmap db_bmap; /* on-disk aggregate map descriptor */
struct inode *db_ipbmap; /* ptr to aggregate map incore inode */
struct mutex db_bmaplock; /* aggregate map lock */
atomic_t db_active[MAXAG]; /* count of active, open files in AG */
u32 *db_DBmap;
};
/* macros for accessing fields within in-memory aggregate map descriptor */
#define db_mapsize db_bmap.dn_mapsize
#define db_nfree db_bmap.dn_nfree
#define db_agfree db_bmap.dn_agfree
#define db_agsize db_bmap.dn_agsize
#define db_agl2size db_bmap.dn_agl2size
#define db_agwidth db_bmap.dn_agwidth
#define db_agheigth db_bmap.dn_agheigth
#define db_agstart db_bmap.dn_agstart
#define db_numag db_bmap.dn_numag
#define db_maxlevel db_bmap.dn_maxlevel
#define db_aglevel db_bmap.dn_aglevel
#define db_agpref db_bmap.dn_agpref
#define db_maxag db_bmap.dn_maxag
#define db_maxfreebud db_bmap.dn_maxfreebud
#define db_l2nbperpage db_bmap.dn_l2nbperpage
/*
* macros for various conversions needed by the allocators.
* blkstol2(), cntlz(), and cnttz() are operating system dependent functions.
*/
/* convert number of blocks to log2 number of blocks, rounding up to
* the next log2 value if blocks is not a l2 multiple.
*/
#define BLKSTOL2(d) (blkstol2(d))
/* convert number of leafs to log2 leaf value */
#define NLSTOL2BSZ(n) (31 - cntlz((n)) + BUDMIN)
/* convert leaf index to log2 leaf value */
#define LITOL2BSZ(n,m,b) ((((n) == 0) ? (m) : cnttz((n))) + (b))
/* convert a block number to a dmap control leaf index */
#define BLKTOCTLLEAF(b,m) \
(((b) & (((s64)1 << ((m) + L2LPERCTL)) - 1)) >> (m))
/* convert log2 leaf value to buddy size */
#define BUDSIZE(s,m) (1 << ((s) - (m)))
/*
* external references.
*/
extern int dbMount(struct inode *ipbmap);
extern int dbUnmount(struct inode *ipbmap, int mounterror);
extern int dbFree(struct inode *ipbmap, s64 blkno, s64 nblocks);
extern int dbUpdatePMap(struct inode *ipbmap,
int free, s64 blkno, s64 nblocks, struct tblock * tblk);
extern int dbNextAG(struct inode *ipbmap);
extern int dbAlloc(struct inode *ipbmap, s64 hint, s64 nblocks, s64 * results);
extern int dbReAlloc(struct inode *ipbmap,
s64 blkno, s64 nblocks, s64 addnblocks, s64 * results);
extern int dbSync(struct inode *ipbmap);
extern int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks);
extern int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks);
extern void dbFinalizeBmap(struct inode *ipbmap);
extern s64 dbMapFileSizeToMapSize(struct inode *ipbmap);
#endif /* _H_JFS_DMAP */

4572
fs/jfs/jfs_dtree.c Normal file

File diff suppressed because it is too large Load Diff

272
fs/jfs/jfs_dtree.h Normal file
View File

@@ -0,0 +1,272 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2002
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _H_JFS_DTREE
#define _H_JFS_DTREE
/*
* jfs_dtree.h: directory B+-tree manager
*/
#include "jfs_btree.h"
typedef union {
struct {
tid_t tid;
struct inode *ip;
u32 ino;
} leaf;
pxd_t xd;
} ddata_t;
/*
* entry segment/slot
*
* an entry consists of type dependent head/only segment/slot and
* additional segments/slots linked vi next field;
* N.B. last/only segment of entry is terminated by next = -1;
*/
/*
* directory page slot
*/
struct dtslot {
s8 next; /* 1: */
s8 cnt; /* 1: */
__le16 name[15]; /* 30: */
}; /* (32) */
#define DATASLOTSIZE 16
#define L2DATASLOTSIZE 4
#define DTSLOTSIZE 32
#define L2DTSLOTSIZE 5
#define DTSLOTHDRSIZE 2
#define DTSLOTDATASIZE 30
#define DTSLOTDATALEN 15
/*
* internal node entry head/only segment
*/
struct idtentry {
pxd_t xd; /* 8: child extent descriptor */
s8 next; /* 1: */
u8 namlen; /* 1: */
__le16 name[11]; /* 22: 2-byte aligned */
}; /* (32) */
#define DTIHDRSIZE 10
#define DTIHDRDATALEN 11
/* compute number of slots for entry */
#define NDTINTERNAL(klen) ( ((4 + (klen)) + (15 - 1)) / 15 )
/*
* leaf node entry head/only segment
*
* For legacy filesystems, name contains 13 wchars -- no index field
*/
struct ldtentry {
__le32 inumber; /* 4: 4-byte aligned */
s8 next; /* 1: */
u8 namlen; /* 1: */
__le16 name[11]; /* 22: 2-byte aligned */
__le32 index; /* 4: index into dir_table */
}; /* (32) */
#define DTLHDRSIZE 6
#define DTLHDRDATALEN_LEGACY 13 /* Old (OS/2) format */
#define DTLHDRDATALEN 11
/*
* dir_table used for directory traversal during readdir
*/
/*
* Keep persistent index for directory entries
*/
#define DO_INDEX(INODE) (JFS_SBI((INODE)->i_sb)->mntflag & JFS_DIR_INDEX)
/*
* Maximum entry in inline directory table
*/
#define MAX_INLINE_DIRTABLE_ENTRY 13
struct dir_table_slot {
u8 rsrvd; /* 1: */
u8 flag; /* 1: 0 if free */
u8 slot; /* 1: slot within leaf page of entry */
u8 addr1; /* 1: upper 8 bits of leaf page address */
__le32 addr2; /* 4: lower 32 bits of leaf page address -OR-
index of next entry when this entry was deleted */
}; /* (8) */
/*
* flag values
*/
#define DIR_INDEX_VALID 1
#define DIR_INDEX_FREE 0
#define DTSaddress(dir_table_slot, address64)\
{\
(dir_table_slot)->addr1 = ((u64)address64) >> 32;\
(dir_table_slot)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\
}
#define addressDTS(dts)\
( ((s64)((dts)->addr1)) << 32 | __le32_to_cpu((dts)->addr2) )
/* compute number of slots for entry */
#define NDTLEAF_LEGACY(klen) ( ((2 + (klen)) + (15 - 1)) / 15 )
#define NDTLEAF NDTINTERNAL
/*
* directory root page (in-line in on-disk inode):
*
* cf. dtpage_t below.
*/
typedef union {
struct {
struct dasd DASD; /* 16: DASD limit/usage info */
u8 flag; /* 1: */
u8 nextindex; /* 1: next free entry in stbl */
s8 freecnt; /* 1: free count */
s8 freelist; /* 1: freelist header */
__le32 idotdot; /* 4: parent inode number */
s8 stbl[8]; /* 8: sorted entry index table */
} header; /* (32) */
struct dtslot slot[9];
} dtroot_t;
#define PARENT(IP) \
(le32_to_cpu(JFS_IP(IP)->i_dtroot.header.idotdot))
#define DTROOTMAXSLOT 9
#define dtEmpty(IP) (JFS_IP(IP)->i_dtroot.header.nextindex == 0)
/*
* directory regular page:
*
* entry slot array of 32 byte slot
*
* sorted entry slot index table (stbl):
* contiguous slots at slot specified by stblindex,
* 1-byte per entry
* 512 byte block: 16 entry tbl (1 slot)
* 1024 byte block: 32 entry tbl (1 slot)
* 2048 byte block: 64 entry tbl (2 slot)
* 4096 byte block: 128 entry tbl (4 slot)
*
* data area:
* 512 byte block: 16 - 2 = 14 slot
* 1024 byte block: 32 - 2 = 30 slot
* 2048 byte block: 64 - 3 = 61 slot
* 4096 byte block: 128 - 5 = 123 slot
*
* N.B. index is 0-based; index fields refer to slot index
* except nextindex which refers to entry index in stbl;
* end of entry stot list or freelist is marked with -1.
*/
typedef union {
struct {
__le64 next; /* 8: next sibling */
__le64 prev; /* 8: previous sibling */
u8 flag; /* 1: */
u8 nextindex; /* 1: next entry index in stbl */
s8 freecnt; /* 1: */
s8 freelist; /* 1: slot index of head of freelist */
u8 maxslot; /* 1: number of slots in page slot[] */
u8 stblindex; /* 1: slot index of start of stbl */
u8 rsrvd[2]; /* 2: */
pxd_t self; /* 8: self pxd */
} header; /* (32) */
struct dtslot slot[128];
} dtpage_t;
#define DTPAGEMAXSLOT 128
#define DT8THPGNODEBYTES 512
#define DT8THPGNODETSLOTS 1
#define DT8THPGNODESLOTS 16
#define DTQTRPGNODEBYTES 1024
#define DTQTRPGNODETSLOTS 1
#define DTQTRPGNODESLOTS 32
#define DTHALFPGNODEBYTES 2048
#define DTHALFPGNODETSLOTS 2
#define DTHALFPGNODESLOTS 64
#define DTFULLPGNODEBYTES 4096
#define DTFULLPGNODETSLOTS 4
#define DTFULLPGNODESLOTS 128
#define DTENTRYSTART 1
/* get sorted entry table of the page */
#define DT_GETSTBL(p) ( ((p)->header.flag & BT_ROOT) ?\
((dtroot_t *)(p))->header.stbl : \
(s8 *)&(p)->slot[(p)->header.stblindex] )
/*
* Flags for dtSearch
*/
#define JFS_CREATE 1
#define JFS_LOOKUP 2
#define JFS_REMOVE 3
#define JFS_RENAME 4
#define DIRENTSIZ(namlen) \
( (sizeof(struct dirent) - 2*(JFS_NAME_MAX+1) + 2*((namlen)+1) + 3) &~ 3 )
/*
* Maximum file offset for directories.
*/
#define DIREND INT_MAX
/*
* external declarations
*/
extern void dtInitRoot(tid_t tid, struct inode *ip, u32 idotdot);
extern int dtSearch(struct inode *ip, struct component_name * key,
ino_t * data, struct btstack * btstack, int flag);
extern int dtInsert(tid_t tid, struct inode *ip, struct component_name * key,
ino_t * ino, struct btstack * btstack);
extern int dtDelete(tid_t tid, struct inode *ip, struct component_name * key,
ino_t * data, int flag);
extern int dtModify(tid_t tid, struct inode *ip, struct component_name * key,
ino_t * orig_ino, ino_t new_ino, int flag);
extern int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir);
#endif /* !_H_JFS_DTREE */

663
fs/jfs/jfs_extent.c Normal file
View File

@@ -0,0 +1,663 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2004
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/fs.h>
#include <linux/quotaops.h>
#include "jfs_incore.h"
#include "jfs_inode.h"
#include "jfs_superblock.h"
#include "jfs_dmap.h"
#include "jfs_extent.h"
#include "jfs_debug.h"
/*
* forward references
*/
static int extBalloc(struct inode *, s64, s64 *, s64 *);
#ifdef _NOTYET
static int extBrealloc(struct inode *, s64, s64, s64 *, s64 *);
#endif
static s64 extRoundDown(s64 nb);
#define DPD(a) (printk("(a): %d\n",(a)))
#define DPC(a) (printk("(a): %c\n",(a)))
#define DPL1(a) \
{ \
if ((a) >> 32) \
printk("(a): %x%08x ",(a)); \
else \
printk("(a): %x ",(a) << 32); \
}
#define DPL(a) \
{ \
if ((a) >> 32) \
printk("(a): %x%08x\n",(a)); \
else \
printk("(a): %x\n",(a) << 32); \
}
#define DPD1(a) (printk("(a): %d ",(a)))
#define DPX(a) (printk("(a): %08x\n",(a)))
#define DPX1(a) (printk("(a): %08x ",(a)))
#define DPS(a) (printk("%s\n",(a)))
#define DPE(a) (printk("\nENTERING: %s\n",(a)))
#define DPE1(a) (printk("\nENTERING: %s",(a)))
#define DPS1(a) (printk(" %s ",(a)))
/*
* NAME: extAlloc()
*
* FUNCTION: allocate an extent for a specified page range within a
* file.
*
* PARAMETERS:
* ip - the inode of the file.
* xlen - requested extent length.
* pno - the starting page number with the file.
* xp - pointer to an xad. on entry, xad describes an
* extent that is used as an allocation hint if the
* xaddr of the xad is non-zero. on successful exit,
* the xad describes the newly allocated extent.
* abnr - bool indicating whether the newly allocated extent
* should be marked as allocated but not recorded.
*
* RETURN VALUES:
* 0 - success
* -EIO - i/o error.
* -ENOSPC - insufficient disk resources.
*/
int
extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
{
struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
s64 nxlen, nxaddr, xoff, hint, xaddr = 0;
int rc;
int xflag;
/* This blocks if we are low on resources */
txBeginAnon(ip->i_sb);
/* Avoid race with jfs_commit_inode() */
mutex_lock(&JFS_IP(ip)->commit_mutex);
/* validate extent length */
if (xlen > MAXXLEN)
xlen = MAXXLEN;
/* get the page's starting extent offset */
xoff = pno << sbi->l2nbperpage;
/* check if an allocation hint was provided */
if ((hint = addressXAD(xp))) {
/* get the size of the extent described by the hint */
nxlen = lengthXAD(xp);
/* check if the hint is for the portion of the file
* immediately previous to the current allocation
* request and if hint extent has the same abnr
* value as the current request. if so, we can
* extend the hint extent to include the current
* extent if we can allocate the blocks immediately
* following the hint extent.
*/
if (offsetXAD(xp) + nxlen == xoff &&
abnr == ((xp->flag & XAD_NOTRECORDED) ? true : false))
xaddr = hint + nxlen;
/* adjust the hint to the last block of the extent */
hint += (nxlen - 1);
}
/* allocate the disk blocks for the extent. initially, extBalloc()
* will try to allocate disk blocks for the requested size (xlen).
* if this fails (xlen contiguous free blocks not avaliable), it'll
* try to allocate a smaller number of blocks (producing a smaller
* extent), with this smaller number of blocks consisting of the
* requested number of blocks rounded down to the next smaller
* power of 2 number (i.e. 16 -> 8). it'll continue to round down
* and retry the allocation until the number of blocks to allocate
* is smaller than the number of blocks per page.
*/
nxlen = xlen;
if ((rc = extBalloc(ip, hint ? hint : INOHINT(ip), &nxlen, &nxaddr))) {
mutex_unlock(&JFS_IP(ip)->commit_mutex);
return (rc);
}
/* Allocate blocks to quota. */
if (DQUOT_ALLOC_BLOCK(ip, nxlen)) {
dbFree(ip, nxaddr, (s64) nxlen);
mutex_unlock(&JFS_IP(ip)->commit_mutex);
return -EDQUOT;
}
/* determine the value of the extent flag */
xflag = abnr ? XAD_NOTRECORDED : 0;
/* if we can extend the hint extent to cover the current request,
* extend it. otherwise, insert a new extent to
* cover the current request.
*/
if (xaddr && xaddr == nxaddr)
rc = xtExtend(0, ip, xoff, (int) nxlen, 0);
else
rc = xtInsert(0, ip, xflag, xoff, (int) nxlen, &nxaddr, 0);
/* if the extend or insert failed,
* free the newly allocated blocks and return the error.
*/
if (rc) {
dbFree(ip, nxaddr, nxlen);
DQUOT_FREE_BLOCK(ip, nxlen);
mutex_unlock(&JFS_IP(ip)->commit_mutex);
return (rc);
}
/* set the results of the extent allocation */
XADaddress(xp, nxaddr);
XADlength(xp, nxlen);
XADoffset(xp, xoff);
xp->flag = xflag;
mark_inode_dirty(ip);
mutex_unlock(&JFS_IP(ip)->commit_mutex);
/*
* COMMIT_SyncList flags an anonymous tlock on page that is on
* sync list.
* We need to commit the inode to get the page written disk.
*/
if (test_and_clear_cflag(COMMIT_Synclist,ip))
jfs_commit_inode(ip, 0);
return (0);
}
#ifdef _NOTYET
/*
* NAME: extRealloc()
*
* FUNCTION: extend the allocation of a file extent containing a
* partial back last page.
*
* PARAMETERS:
* ip - the inode of the file.
* cp - cbuf for the partial backed last page.
* xlen - request size of the resulting extent.
* xp - pointer to an xad. on successful exit, the xad
* describes the newly allocated extent.
* abnr - bool indicating whether the newly allocated extent
* should be marked as allocated but not recorded.
*
* RETURN VALUES:
* 0 - success
* -EIO - i/o error.
* -ENOSPC - insufficient disk resources.
*/
int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr)
{
struct super_block *sb = ip->i_sb;
s64 xaddr, xlen, nxaddr, delta, xoff;
s64 ntail, nextend, ninsert;
int rc, nbperpage = JFS_SBI(sb)->nbperpage;
int xflag;
/* This blocks if we are low on resources */
txBeginAnon(ip->i_sb);
mutex_lock(&JFS_IP(ip)->commit_mutex);
/* validate extent length */
if (nxlen > MAXXLEN)
nxlen = MAXXLEN;
/* get the extend (partial) page's disk block address and
* number of blocks.
*/
xaddr = addressXAD(xp);
xlen = lengthXAD(xp);
xoff = offsetXAD(xp);
/* if the extend page is abnr and if the request is for
* the extent to be allocated and recorded,
* make the page allocated and recorded.
*/
if ((xp->flag & XAD_NOTRECORDED) && !abnr) {
xp->flag = 0;
if ((rc = xtUpdate(0, ip, xp)))
goto exit;
}
/* try to allocated the request number of blocks for the
* extent. dbRealloc() first tries to satisfy the request
* by extending the allocation in place. otherwise, it will
* try to allocate a new set of blocks large enough for the
* request. in satisfying a request, dbReAlloc() may allocate
* less than what was request but will always allocate enough
* space as to satisfy the extend page.
*/
if ((rc = extBrealloc(ip, xaddr, xlen, &nxlen, &nxaddr)))
goto exit;
/* Allocat blocks to quota. */
if (DQUOT_ALLOC_BLOCK(ip, nxlen)) {
dbFree(ip, nxaddr, (s64) nxlen);
mutex_unlock(&JFS_IP(ip)->commit_mutex);
return -EDQUOT;
}
delta = nxlen - xlen;
/* check if the extend page is not abnr but the request is abnr
* and the allocated disk space is for more than one page. if this
* is the case, there is a miss match of abnr between the extend page
* and the one or more pages following the extend page. as a result,
* two extents will have to be manipulated. the first will be that
* of the extent of the extend page and will be manipulated thru
* an xtExtend() or an xtTailgate(), depending upon whether the
* disk allocation occurred as an inplace extension. the second
* extent will be manipulated (created) through an xtInsert() and
* will be for the pages following the extend page.
*/
if (abnr && (!(xp->flag & XAD_NOTRECORDED)) && (nxlen > nbperpage)) {
ntail = nbperpage;
nextend = ntail - xlen;
ninsert = nxlen - nbperpage;
xflag = XAD_NOTRECORDED;
} else {
ntail = nxlen;
nextend = delta;
ninsert = 0;
xflag = xp->flag;
}
/* if we were able to extend the disk allocation in place,
* extend the extent. otherwise, move the extent to a
* new disk location.
*/
if (xaddr == nxaddr) {
/* extend the extent */
if ((rc = xtExtend(0, ip, xoff + xlen, (int) nextend, 0))) {
dbFree(ip, xaddr + xlen, delta);
DQUOT_FREE_BLOCK(ip, nxlen);
goto exit;
}
} else {
/*
* move the extent to a new location:
*
* xtTailgate() accounts for relocated tail extent;
*/
if ((rc = xtTailgate(0, ip, xoff, (int) ntail, nxaddr, 0))) {
dbFree(ip, nxaddr, nxlen);
DQUOT_FREE_BLOCK(ip, nxlen);
goto exit;
}
}
/* check if we need to also insert a new extent */
if (ninsert) {
/* perform the insert. if it fails, free the blocks
* to be inserted and make it appear that we only did
* the xtExtend() or xtTailgate() above.
*/
xaddr = nxaddr + ntail;
if (xtInsert (0, ip, xflag, xoff + ntail, (int) ninsert,
&xaddr, 0)) {
dbFree(ip, xaddr, (s64) ninsert);
delta = nextend;
nxlen = ntail;
xflag = 0;
}
}
/* set the return results */
XADaddress(xp, nxaddr);
XADlength(xp, nxlen);
XADoffset(xp, xoff);
xp->flag = xflag;
mark_inode_dirty(ip);
exit:
mutex_unlock(&JFS_IP(ip)->commit_mutex);
return (rc);
}
#endif /* _NOTYET */
/*
* NAME: extHint()
*
* FUNCTION: produce an extent allocation hint for a file offset.
*
* PARAMETERS:
* ip - the inode of the file.
* offset - file offset for which the hint is needed.
* xp - pointer to the xad that is to be filled in with
* the hint.
*
* RETURN VALUES:
* 0 - success
* -EIO - i/o error.
*/
int extHint(struct inode *ip, s64 offset, xad_t * xp)
{
struct super_block *sb = ip->i_sb;
struct xadlist xadl;
struct lxdlist lxdl;
lxd_t lxd;
s64 prev;
int rc, nbperpage = JFS_SBI(sb)->nbperpage;
/* init the hint as "no hint provided" */
XADaddress(xp, 0);
/* determine the starting extent offset of the page previous
* to the page containing the offset.
*/
prev = ((offset & ~POFFSET) >> JFS_SBI(sb)->l2bsize) - nbperpage;
/* if the offsets in the first page of the file,
* no hint provided.
*/
if (prev < 0)
return (0);
/* prepare to lookup the previous page's extent info */
lxdl.maxnlxd = 1;
lxdl.nlxd = 1;
lxdl.lxd = &lxd;
LXDoffset(&lxd, prev)
LXDlength(&lxd, nbperpage);
xadl.maxnxad = 1;
xadl.nxad = 0;
xadl.xad = xp;
/* perform the lookup */
if ((rc = xtLookupList(ip, &lxdl, &xadl, 0)))
return (rc);
/* check if not extent exists for the previous page.
* this is possible for sparse files.
*/
if (xadl.nxad == 0) {
// assert(ISSPARSE(ip));
return (0);
}
/* only preserve the abnr flag within the xad flags
* of the returned hint.
*/
xp->flag &= XAD_NOTRECORDED;
if(xadl.nxad != 1 || lengthXAD(xp) != nbperpage) {
jfs_error(ip->i_sb, "extHint: corrupt xtree");
return -EIO;
}
return (0);
}
/*
* NAME: extRecord()
*
* FUNCTION: change a page with a file from not recorded to recorded.
*
* PARAMETERS:
* ip - inode of the file.
* cp - cbuf of the file page.
*
* RETURN VALUES:
* 0 - success
* -EIO - i/o error.
* -ENOSPC - insufficient disk resources.
*/
int extRecord(struct inode *ip, xad_t * xp)
{
int rc;
txBeginAnon(ip->i_sb);
mutex_lock(&JFS_IP(ip)->commit_mutex);
/* update the extent */
rc = xtUpdate(0, ip, xp);
mutex_unlock(&JFS_IP(ip)->commit_mutex);
return rc;
}
#ifdef _NOTYET
/*
* NAME: extFill()
*
* FUNCTION: allocate disk space for a file page that represents
* a file hole.
*
* PARAMETERS:
* ip - the inode of the file.
* cp - cbuf of the file page represent the hole.
*
* RETURN VALUES:
* 0 - success
* -EIO - i/o error.
* -ENOSPC - insufficient disk resources.
*/
int extFill(struct inode *ip, xad_t * xp)
{
int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage;
s64 blkno = offsetXAD(xp) >> ip->i_blkbits;
// assert(ISSPARSE(ip));
/* initialize the extent allocation hint */
XADaddress(xp, 0);
/* allocate an extent to fill the hole */
if ((rc = extAlloc(ip, nbperpage, blkno, xp, false)))
return (rc);
assert(lengthPXD(xp) == nbperpage);
return (0);
}
#endif /* _NOTYET */
/*
* NAME: extBalloc()
*
* FUNCTION: allocate disk blocks to form an extent.
*
* initially, we will try to allocate disk blocks for the
* requested size (nblocks). if this fails (nblocks
* contiguous free blocks not avaliable), we'll try to allocate
* a smaller number of blocks (producing a smaller extent), with
* this smaller number of blocks consisting of the requested
* number of blocks rounded down to the next smaller power of 2
* number (i.e. 16 -> 8). we'll continue to round down and
* retry the allocation until the number of blocks to allocate
* is smaller than the number of blocks per page.
*
* PARAMETERS:
* ip - the inode of the file.
* hint - disk block number to be used as an allocation hint.
* *nblocks - pointer to an s64 value. on entry, this value specifies
* the desired number of block to be allocated. on successful
* exit, this value is set to the number of blocks actually
* allocated.
* blkno - pointer to a block address that is filled in on successful
* return with the starting block number of the newly
* allocated block range.
*
* RETURN VALUES:
* 0 - success
* -EIO - i/o error.
* -ENOSPC - insufficient disk resources.
*/
static int
extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
{
struct jfs_inode_info *ji = JFS_IP(ip);
struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
s64 nb, nblks, daddr, max;
int rc, nbperpage = sbi->nbperpage;
struct bmap *bmp = sbi->bmap;
int ag;
/* get the number of blocks to initially attempt to allocate.
* we'll first try the number of blocks requested unless this
* number is greater than the maximum number of contiguous free
* blocks in the map. in that case, we'll start off with the
* maximum free.
*/
max = (s64) 1 << bmp->db_maxfreebud;
if (*nblocks >= max && *nblocks > nbperpage)
nb = nblks = (max > nbperpage) ? max : nbperpage;
else
nb = nblks = *nblocks;
/* try to allocate blocks */
while ((rc = dbAlloc(ip, hint, nb, &daddr)) != 0) {
/* if something other than an out of space error,
* stop and return this error.
*/
if (rc != -ENOSPC)
return (rc);
/* decrease the allocation request size */
nb = min(nblks, extRoundDown(nb));
/* give up if we cannot cover a page */
if (nb < nbperpage)
return (rc);
}
*nblocks = nb;
*blkno = daddr;
if (S_ISREG(ip->i_mode) && (ji->fileset == FILESYSTEM_I)) {
ag = BLKTOAG(daddr, sbi);
spin_lock_irq(&ji->ag_lock);
if (ji->active_ag == -1) {
atomic_inc(&bmp->db_active[ag]);
ji->active_ag = ag;
} else if (ji->active_ag != ag) {
atomic_dec(&bmp->db_active[ji->active_ag]);
atomic_inc(&bmp->db_active[ag]);
ji->active_ag = ag;
}
spin_unlock_irq(&ji->ag_lock);
}
return (0);
}
#ifdef _NOTYET
/*
* NAME: extBrealloc()
*
* FUNCTION: attempt to extend an extent's allocation.
*
* Initially, we will try to extend the extent's allocation
* in place. If this fails, we'll try to move the extent
* to a new set of blocks. If moving the extent, we initially
* will try to allocate disk blocks for the requested size
* (newnblks). if this fails (new contiguous free blocks not
* avaliable), we'll try to allocate a smaller number of
* blocks (producing a smaller extent), with this smaller
* number of blocks consisting of the requested number of
* blocks rounded down to the next smaller power of 2
* number (i.e. 16 -> 8). We'll continue to round down and
* retry the allocation until the number of blocks to allocate
* is smaller than the number of blocks per page.
*
* PARAMETERS:
* ip - the inode of the file.
* blkno - starting block number of the extents current allocation.
* nblks - number of blocks within the extents current allocation.
* newnblks - pointer to a s64 value. on entry, this value is the
* the new desired extent size (number of blocks). on
* successful exit, this value is set to the extent's actual
* new size (new number of blocks).
* newblkno - the starting block number of the extents new allocation.
*
* RETURN VALUES:
* 0 - success
* -EIO - i/o error.
* -ENOSPC - insufficient disk resources.
*/
static int
extBrealloc(struct inode *ip,
s64 blkno, s64 nblks, s64 * newnblks, s64 * newblkno)
{
int rc;
/* try to extend in place */
if ((rc = dbExtend(ip, blkno, nblks, *newnblks - nblks)) == 0) {
*newblkno = blkno;
return (0);
} else {
if (rc != -ENOSPC)
return (rc);
}
/* in place extension not possible.
* try to move the extent to a new set of blocks.
*/
return (extBalloc(ip, blkno, newnblks, newblkno));
}
#endif /* _NOTYET */
/*
* NAME: extRoundDown()
*
* FUNCTION: round down a specified number of blocks to the next
* smallest power of 2 number.
*
* PARAMETERS:
* nb - the inode of the file.
*
* RETURN VALUES:
* next smallest power of 2 number.
*/
static s64 extRoundDown(s64 nb)
{
int i;
u64 m, k;
for (i = 0, m = (u64) 1 << 63; i < 64; i++, m >>= 1) {
if (m & nb)
break;
}
i = 63 - i;
k = (u64) 1 << i;
k = ((k - 1) & nb) ? k : k >> 1;
return (k);
}

31
fs/jfs/jfs_extent.h Normal file
View File

@@ -0,0 +1,31 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2001
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _H_JFS_EXTENT
#define _H_JFS_EXTENT
/* get block allocation allocation hint as location of disk inode */
#define INOHINT(ip) \
(addressPXD(&(JFS_IP(ip)->ixpxd)) + lengthPXD(&(JFS_IP(ip)->ixpxd)) - 1)
extern int extAlloc(struct inode *, s64, s64, xad_t *, bool);
extern int extFill(struct inode *, xad_t *);
extern int extHint(struct inode *, s64, xad_t *);
extern int extRealloc(struct inode *, s64, xad_t *, bool);
extern int extRecord(struct inode *, xad_t *);
#endif /* _H_JFS_EXTENT */

283
fs/jfs/jfs_filsys.h Normal file
View File

@@ -0,0 +1,283 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2003
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _H_JFS_FILSYS
#define _H_JFS_FILSYS
/*
* jfs_filsys.h
*
* file system (implementation-dependent) constants
*
* refer to <limits.h> for system wide implementation-dependent constants
*/
/*
* file system option (superblock flag)
*/
/* directory option */
#define JFS_UNICODE 0x00000001 /* unicode name */
/* mount time flags for error handling */
#define JFS_ERR_REMOUNT_RO 0x00000002 /* remount read-only */
#define JFS_ERR_CONTINUE 0x00000004 /* continue */
#define JFS_ERR_PANIC 0x00000008 /* panic */
/* Quota support */
#define JFS_USRQUOTA 0x00000010
#define JFS_GRPQUOTA 0x00000020
/* mount time flag to disable journaling to disk */
#define JFS_NOINTEGRITY 0x00000040
/* commit option */
#define JFS_COMMIT 0x00000f00 /* commit option mask */
#define JFS_GROUPCOMMIT 0x00000100 /* group (of 1) commit */
#define JFS_LAZYCOMMIT 0x00000200 /* lazy commit */
#define JFS_TMPFS 0x00000400 /* temporary file system -
* do not log/commit:
* Never implemented
*/
/* log logical volume option */
#define JFS_INLINELOG 0x00000800 /* inline log within file system */
#define JFS_INLINEMOVE 0x00001000 /* inline log being moved */
/* Secondary aggregate inode table */
#define JFS_BAD_SAIT 0x00010000 /* current secondary ait is bad */
/* sparse regular file support */
#define JFS_SPARSE 0x00020000 /* sparse regular file */
/* DASD Limits F226941 */
#define JFS_DASD_ENABLED 0x00040000 /* DASD limits enabled */
#define JFS_DASD_PRIME 0x00080000 /* Prime DASD usage on boot */
/* big endian flag */
#define JFS_SWAP_BYTES 0x00100000 /* running on big endian computer */
/* Directory index */
#define JFS_DIR_INDEX 0x00200000 /* Persistent index for */
/* platform options */
#define JFS_LINUX 0x10000000 /* Linux support */
#define JFS_DFS 0x20000000 /* DCE DFS LFS support */
/* Never implemented */
#define JFS_OS2 0x40000000 /* OS/2 support */
/* case-insensitive name/directory support */
#define JFS_AIX 0x80000000 /* AIX support */
/* POSIX name/directory support - Never implemented*/
/*
* buffer cache configuration
*/
/* page size */
#ifdef PSIZE
#undef PSIZE
#endif
#define PSIZE 4096 /* page size (in byte) */
#define L2PSIZE 12 /* log2(PSIZE) */
#define POFFSET 4095 /* offset within page */
/* buffer page size */
#define BPSIZE PSIZE
/*
* fs fundamental size
*
* PSIZE >= file system block size >= PBSIZE >= DISIZE
*/
#define PBSIZE 512 /* physical block size (in byte) */
#define L2PBSIZE 9 /* log2(PBSIZE) */
#define DISIZE 512 /* on-disk inode size (in byte) */
#define L2DISIZE 9 /* log2(DISIZE) */
#define IDATASIZE 256 /* inode inline data size */
#define IXATTRSIZE 128 /* inode inline extended attribute size */
#define XTPAGE_SIZE 4096
#define log2_PAGESIZE 12
#define IAG_SIZE 4096
#define IAG_EXTENT_SIZE 4096
#define INOSPERIAG 4096 /* number of disk inodes per iag */
#define L2INOSPERIAG 12 /* l2 number of disk inodes per iag */
#define INOSPEREXT 32 /* number of disk inode per extent */
#define L2INOSPEREXT 5 /* l2 number of disk inode per extent */
#define IXSIZE (DISIZE * INOSPEREXT) /* inode extent size */
#define INOSPERPAGE 8 /* number of disk inodes per 4K page */
#define L2INOSPERPAGE 3 /* log2(INOSPERPAGE) */
#define IAGFREELIST_LWM 64
#define INODE_EXTENT_SIZE IXSIZE /* inode extent size */
#define NUM_INODE_PER_EXTENT INOSPEREXT
#define NUM_INODE_PER_IAG INOSPERIAG
#define MINBLOCKSIZE 512
#define MAXBLOCKSIZE 4096
#define MAXFILESIZE ((s64)1 << 52)
#define JFS_LINK_MAX 0xffffffff
/* Minimum number of bytes supported for a JFS partition */
#define MINJFS (0x1000000)
#define MINJFSTEXT "16"
/*
* file system block size -> physical block size
*/
#define LBOFFSET(x) ((x) & (PBSIZE - 1))
#define LBNUMBER(x) ((x) >> L2PBSIZE)
#define LBLK2PBLK(sb,b) ((b) << (sb->s_blocksize_bits - L2PBSIZE))
#define PBLK2LBLK(sb,b) ((b) >> (sb->s_blocksize_bits - L2PBSIZE))
/* size in byte -> last page number */
#define SIZE2PN(size) ( ((s64)((size) - 1)) >> (L2PSIZE) )
/* size in byte -> last file system block number */
#define SIZE2BN(size, l2bsize) ( ((s64)((size) - 1)) >> (l2bsize) )
/*
* fixed physical block address (physical block size = 512 byte)
*
* NOTE: since we can't guarantee a physical block size of 512 bytes the use of
* these macros should be removed and the byte offset macros used instead.
*/
#define SUPER1_B 64 /* primary superblock */
#define AIMAP_B (SUPER1_B + 8) /* 1st extent of aggregate inode map */
#define AITBL_B (AIMAP_B + 16) /*
* 1st extent of aggregate inode table
*/
#define SUPER2_B (AITBL_B + 32) /* 2ndary superblock pbn */
#define BMAP_B (SUPER2_B + 8) /* block allocation map */
/*
* SIZE_OF_SUPER defines the total amount of space reserved on disk for the
* superblock. This is not the same as the superblock structure, since all of
* this space is not currently being used.
*/
#define SIZE_OF_SUPER PSIZE
/*
* SIZE_OF_AG_TABLE defines the amount of space reserved to hold the AG table
*/
#define SIZE_OF_AG_TABLE PSIZE
/*
* SIZE_OF_MAP_PAGE defines the amount of disk space reserved for each page of
* the inode allocation map (to hold iag)
*/
#define SIZE_OF_MAP_PAGE PSIZE
/*
* fixed byte offset address
*/
#define SUPER1_OFF 0x8000 /* primary superblock */
#define AIMAP_OFF (SUPER1_OFF + SIZE_OF_SUPER)
/*
* Control page of aggregate inode map
* followed by 1st extent of map
*/
#define AITBL_OFF (AIMAP_OFF + (SIZE_OF_MAP_PAGE << 1))
/*
* 1st extent of aggregate inode table
*/
#define SUPER2_OFF (AITBL_OFF + INODE_EXTENT_SIZE)
/*
* secondary superblock
*/
#define BMAP_OFF (SUPER2_OFF + SIZE_OF_SUPER)
/*
* block allocation map
*/
/*
* The following macro is used to indicate the number of reserved disk blocks at
* the front of an aggregate, in terms of physical blocks. This value is
* currently defined to be 32K. This turns out to be the same as the primary
* superblock's address, since it directly follows the reserved blocks.
*/
#define AGGR_RSVD_BLOCKS SUPER1_B
/*
* The following macro is used to indicate the number of reserved bytes at the
* front of an aggregate. This value is currently defined to be 32K. This
* turns out to be the same as the primary superblock's byte offset, since it
* directly follows the reserved blocks.
*/
#define AGGR_RSVD_BYTES SUPER1_OFF
/*
* The following macro defines the byte offset for the first inode extent in
* the aggregate inode table. This allows us to find the self inode to find the
* rest of the table. Currently this value is 44K.
*/
#define AGGR_INODE_TABLE_START AITBL_OFF
/*
* fixed reserved inode number
*/
/* aggregate inode */
#define AGGR_RESERVED_I 0 /* aggregate inode (reserved) */
#define AGGREGATE_I 1 /* aggregate inode map inode */
#define BMAP_I 2 /* aggregate block allocation map inode */
#define LOG_I 3 /* aggregate inline log inode */
#define BADBLOCK_I 4 /* aggregate bad block inode */
#define FILESYSTEM_I 16 /* 1st/only fileset inode in ait:
* fileset inode map inode
*/
/* per fileset inode */
#define FILESET_RSVD_I 0 /* fileset inode (reserved) */
#define FILESET_EXT_I 1 /* fileset inode extension */
#define ROOT_I 2 /* fileset root inode */
#define ACL_I 3 /* fileset ACL inode */
#define FILESET_OBJECT_I 4 /* the first fileset inode available for a file
* or directory or link...
*/
#define FIRST_FILESET_INO 16 /* the first aggregate inode which describes
* an inode. (To fsck this is also the first
* inode in part 2 of the agg inode table.)
*/
/*
* directory configuration
*/
#define JFS_NAME_MAX 255
#define JFS_PATH_MAX BPSIZE
/*
* file system state (superblock state)
*/
#define FM_CLEAN 0x00000000 /* file system is unmounted and clean */
#define FM_MOUNT 0x00000001 /* file system is mounted cleanly */
#define FM_DIRTY 0x00000002 /* file system was not unmounted and clean
* when mounted or
* commit failure occurred while being mounted:
* fsck() must be run to repair
*/
#define FM_LOGREDO 0x00000004 /* log based recovery (logredo()) failed:
* fsck() must be run to repair
*/
#define FM_EXTENDFS 0x00000008 /* file system extendfs() in progress */
#endif /* _H_JFS_FILSYS */

3202
fs/jfs/jfs_imap.c Normal file

File diff suppressed because it is too large Load Diff

175
fs/jfs/jfs_imap.h Normal file
View File

@@ -0,0 +1,175 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2002
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _H_JFS_IMAP
#define _H_JFS_IMAP
#include "jfs_txnmgr.h"
/*
* jfs_imap.h: disk inode manager
*/
#define EXTSPERIAG 128 /* number of disk inode extent per iag */
#define IMAPBLKNO 0 /* lblkno of dinomap within inode map */
#define SMAPSZ 4 /* number of words per summary map */
#define EXTSPERSUM 32 /* number of extents per summary map entry */
#define L2EXTSPERSUM 5 /* l2 number of extents per summary map */
#define PGSPERIEXT 4 /* number of 4K pages per dinode extent */
#define MAXIAGS ((1<<20)-1) /* maximum number of iags */
#define MAXAG 128 /* maximum number of allocation groups */
#define AMAPSIZE 512 /* bytes in the IAG allocation maps */
#define SMAPSIZE 16 /* bytes in the IAG summary maps */
/* convert inode number to iag number */
#define INOTOIAG(ino) ((ino) >> L2INOSPERIAG)
/* convert iag number to logical block number of the iag page */
#define IAGTOLBLK(iagno,l2nbperpg) (((iagno) + 1) << (l2nbperpg))
/* get the starting block number of the 4K page of an inode extent
* that contains ino.
*/
#define INOPBLK(pxd,ino,l2nbperpg) (addressPXD((pxd)) + \
((((ino) & (INOSPEREXT-1)) >> L2INOSPERPAGE) << (l2nbperpg)))
/*
* inode allocation map:
*
* inode allocation map consists of
* . the inode map control page and
* . inode allocation group pages (per 4096 inodes)
* which are addressed by standard JFS xtree.
*/
/*
* inode allocation group page (per 4096 inodes of an AG)
*/
struct iag {
__le64 agstart; /* 8: starting block of ag */
__le32 iagnum; /* 4: inode allocation group number */
__le32 inofreefwd; /* 4: ag inode free list forward */
__le32 inofreeback; /* 4: ag inode free list back */
__le32 extfreefwd; /* 4: ag inode extent free list forward */
__le32 extfreeback; /* 4: ag inode extent free list back */
__le32 iagfree; /* 4: iag free list */
/* summary map: 1 bit per inode extent */
__le32 inosmap[SMAPSZ]; /* 16: sum map of mapwords w/ free inodes;
* note: this indicates free and backed
* inodes, if the extent is not backed the
* value will be 1. if the extent is
* backed but all inodes are being used the
* value will be 1. if the extent is
* backed but at least one of the inodes is
* free the value will be 0.
*/
__le32 extsmap[SMAPSZ]; /* 16: sum map of mapwords w/ free extents */
__le32 nfreeinos; /* 4: number of free inodes */
__le32 nfreeexts; /* 4: number of free extents */
/* (72) */
u8 pad[1976]; /* 1976: pad to 2048 bytes */
/* allocation bit map: 1 bit per inode (0 - free, 1 - allocated) */
__le32 wmap[EXTSPERIAG]; /* 512: working allocation map */
__le32 pmap[EXTSPERIAG]; /* 512: persistent allocation map */
pxd_t inoext[EXTSPERIAG]; /* 1024: inode extent addresses */
}; /* (4096) */
/*
* per AG control information (in inode map control page)
*/
struct iagctl_disk {
__le32 inofree; /* 4: free inode list anchor */
__le32 extfree; /* 4: free extent list anchor */
__le32 numinos; /* 4: number of backed inodes */
__le32 numfree; /* 4: number of free inodes */
}; /* (16) */
struct iagctl {
int inofree; /* free inode list anchor */
int extfree; /* free extent list anchor */
int numinos; /* number of backed inodes */
int numfree; /* number of free inodes */
};
/*
* per fileset/aggregate inode map control page
*/
struct dinomap_disk {
__le32 in_freeiag; /* 4: free iag list anchor */
__le32 in_nextiag; /* 4: next free iag number */
__le32 in_numinos; /* 4: num of backed inodes */
__le32 in_numfree; /* 4: num of free backed inodes */
__le32 in_nbperiext; /* 4: num of blocks per inode extent */
__le32 in_l2nbperiext; /* 4: l2 of in_nbperiext */
__le32 in_diskblock; /* 4: for standalone test driver */
__le32 in_maxag; /* 4: for standalone test driver */
u8 pad[2016]; /* 2016: pad to 2048 */
struct iagctl_disk in_agctl[MAXAG]; /* 2048: AG control information */
}; /* (4096) */
struct dinomap {
int in_freeiag; /* free iag list anchor */
int in_nextiag; /* next free iag number */
int in_numinos; /* num of backed inodes */
int in_numfree; /* num of free backed inodes */
int in_nbperiext; /* num of blocks per inode extent */
int in_l2nbperiext; /* l2 of in_nbperiext */
int in_diskblock; /* for standalone test driver */
int in_maxag; /* for standalone test driver */
struct iagctl in_agctl[MAXAG]; /* AG control information */
};
/*
* In-core inode map control page
*/
struct inomap {
struct dinomap im_imap; /* 4096: inode allocation control */
struct inode *im_ipimap; /* 4: ptr to inode for imap */
struct mutex im_freelock; /* 4: iag free list lock */
struct mutex im_aglock[MAXAG]; /* 512: per AG locks */
u32 *im_DBGdimap;
atomic_t im_numinos; /* num of backed inodes */
atomic_t im_numfree; /* num of free backed inodes */
};
#define im_freeiag im_imap.in_freeiag
#define im_nextiag im_imap.in_nextiag
#define im_agctl im_imap.in_agctl
#define im_nbperiext im_imap.in_nbperiext
#define im_l2nbperiext im_imap.in_l2nbperiext
/* for standalone testdriver
*/
#define im_diskblock im_imap.in_diskblock
#define im_maxag im_imap.in_maxag
extern int diFree(struct inode *);
extern int diAlloc(struct inode *, bool, struct inode *);
extern int diSync(struct inode *);
/* external references */
extern int diUpdatePMap(struct inode *ipimap, unsigned long inum,
bool is_free, struct tblock * tblk);
extern int diExtendFS(struct inode *ipimap, struct inode *ipbmap);
extern int diMount(struct inode *);
extern int diUnmount(struct inode *, int);
extern int diRead(struct inode *);
extern struct inode *diReadSpecial(struct super_block *, ino_t, int);
extern void diWriteSpecial(struct inode *, int);
extern void diFreeSpecial(struct inode *);
extern int diWrite(tid_t tid, struct inode *);
#endif /* _H_JFS_IMAP */

229
fs/jfs/jfs_incore.h Normal file
View File

@@ -0,0 +1,229 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2004
* Portions Copyright (C) Christoph Hellwig, 2001-2002
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _H_JFS_INCORE
#define _H_JFS_INCORE
#include <linux/mutex.h>
#include <linux/rwsem.h>
#include <linux/slab.h>
#include <linux/bitops.h>
#include "jfs_types.h"
#include "jfs_xtree.h"
#include "jfs_dtree.h"
/*
* JFS magic number
*/
#define JFS_SUPER_MAGIC 0x3153464a /* "JFS1" */
/*
* JFS-private inode information
*/
struct jfs_inode_info {
int fileset; /* fileset number (always 16)*/
uint mode2; /* jfs-specific mode */
uint saved_uid; /* saved for uid mount option */
uint saved_gid; /* saved for gid mount option */
pxd_t ixpxd; /* inode extent descriptor */
dxd_t acl; /* dxd describing acl */
dxd_t ea; /* dxd describing ea */
time_t otime; /* time created */
uint next_index; /* next available directory entry index */
int acltype; /* Type of ACL */
short btorder; /* access order */
short btindex; /* btpage entry index*/
struct inode *ipimap; /* inode map */
long cflag; /* commit flags */
u16 bxflag; /* xflag of pseudo buffer? */
unchar agno; /* ag number */
signed char active_ag; /* ag currently allocating from */
lid_t blid; /* lid of pseudo buffer? */
lid_t atlhead; /* anonymous tlock list head */
lid_t atltail; /* anonymous tlock list tail */
spinlock_t ag_lock; /* protects active_ag */
struct list_head anon_inode_list; /* inodes having anonymous txns */
/*
* rdwrlock serializes xtree between reads & writes and synchronizes
* changes to special inodes. It's use would be redundant on
* directories since the i_mutex taken in the VFS is sufficient.
*/
struct rw_semaphore rdwrlock;
/*
* commit_mutex serializes transaction processing on an inode.
* It must be taken after beginning a transaction (txBegin), since
* dirty inodes may be committed while a new transaction on the
* inode is blocked in txBegin or TxBeginAnon
*/
struct mutex commit_mutex;
/* xattr_sem allows us to access the xattrs without taking i_mutex */
struct rw_semaphore xattr_sem;
lid_t xtlid; /* lid of xtree lock on directory */
#ifdef CONFIG_JFS_POSIX_ACL
struct posix_acl *i_acl;
struct posix_acl *i_default_acl;
#endif
union {
struct {
xtpage_t _xtroot; /* 288: xtree root */
struct inomap *_imap; /* 4: inode map header */
} file;
struct {
struct dir_table_slot _table[12]; /* 96: dir index */
dtroot_t _dtroot; /* 288: dtree root */
} dir;
struct {
unchar _unused[16]; /* 16: */
dxd_t _dxd; /* 16: */
unchar _inline[128]; /* 128: inline symlink */
/* _inline_ea may overlay the last part of
* file._xtroot if maxentry = XTROOTINITSLOT
*/
unchar _inline_ea[128]; /* 128: inline extended attr */
} link;
} u;
u32 dev; /* will die when we get wide dev_t */
struct inode vfs_inode;
};
#define i_xtroot u.file._xtroot
#define i_imap u.file._imap
#define i_dirtable u.dir._table
#define i_dtroot u.dir._dtroot
#define i_inline u.link._inline
#define i_inline_ea u.link._inline_ea
#define JFS_ACL_NOT_CACHED ((void *)-1)
#define IREAD_LOCK(ip, subclass) \
down_read_nested(&JFS_IP(ip)->rdwrlock, subclass)
#define IREAD_UNLOCK(ip) up_read(&JFS_IP(ip)->rdwrlock)
#define IWRITE_LOCK(ip, subclass) \
down_write_nested(&JFS_IP(ip)->rdwrlock, subclass)
#define IWRITE_UNLOCK(ip) up_write(&JFS_IP(ip)->rdwrlock)
/*
* cflag
*/
enum cflags {
COMMIT_Nolink, /* inode committed with zero link count */
COMMIT_Inlineea, /* commit inode inline EA */
COMMIT_Freewmap, /* free WMAP at iClose() */
COMMIT_Dirty, /* Inode is really dirty */
COMMIT_Dirtable, /* commit changes to di_dirtable */
COMMIT_Stale, /* data extent is no longer valid */
COMMIT_Synclist, /* metadata pages on group commit synclist */
};
/*
* commit_mutex nesting subclasses:
*/
enum commit_mutex_class
{
COMMIT_MUTEX_PARENT,
COMMIT_MUTEX_CHILD,
COMMIT_MUTEX_SECOND_PARENT, /* Renaming */
COMMIT_MUTEX_VICTIM /* Inode being unlinked due to rename */
};
/*
* rdwrlock subclasses:
* The dmap inode may be locked while a normal inode or the imap inode are
* locked.
*/
enum rdwrlock_class
{
RDWRLOCK_NORMAL,
RDWRLOCK_IMAP,
RDWRLOCK_DMAP
};
#define set_cflag(flag, ip) set_bit(flag, &(JFS_IP(ip)->cflag))
#define clear_cflag(flag, ip) clear_bit(flag, &(JFS_IP(ip)->cflag))
#define test_cflag(flag, ip) test_bit(flag, &(JFS_IP(ip)->cflag))
#define test_and_clear_cflag(flag, ip) \
test_and_clear_bit(flag, &(JFS_IP(ip)->cflag))
/*
* JFS-private superblock information.
*/
struct jfs_sb_info {
struct super_block *sb; /* Point back to vfs super block */
unsigned long mntflag; /* aggregate attributes */
struct inode *ipbmap; /* block map inode */
struct inode *ipaimap; /* aggregate inode map inode */
struct inode *ipaimap2; /* secondary aimap inode */
struct inode *ipimap; /* aggregate inode map inode */
struct jfs_log *log; /* log */
struct list_head log_list; /* volumes associated with a journal */
short bsize; /* logical block size */
short l2bsize; /* log2 logical block size */
short nbperpage; /* blocks per page */
short l2nbperpage; /* log2 blocks per page */
short l2niperblk; /* log2 inodes per page */
dev_t logdev; /* external log device */
uint aggregate; /* volume identifier in log record */
pxd_t logpxd; /* pxd describing log */
pxd_t fsckpxd; /* pxd describing fsck wkspc */
pxd_t ait2; /* pxd describing AIT copy */
char uuid[16]; /* 128-bit uuid for volume */
char loguuid[16]; /* 128-bit uuid for log */
/*
* commit_state is used for synchronization of the jfs_commit
* threads. It is protected by LAZY_LOCK().
*/
int commit_state; /* commit state */
/* Formerly in ipimap */
uint gengen; /* inode generation generator*/
uint inostamp; /* shows inode belongs to fileset*/
/* Formerly in ipbmap */
struct bmap *bmap; /* incore bmap descriptor */
struct nls_table *nls_tab; /* current codepage */
struct inode *direct_inode; /* metadata inode */
uint state; /* mount/recovery state */
unsigned long flag; /* mount time flags */
uint p_state; /* state prior to going no integrity */
uint uid; /* uid to override on-disk uid */
uint gid; /* gid to override on-disk gid */
uint umask; /* umask to override on-disk umask */
};
/* jfs_sb_info commit_state */
#define IN_LAZYCOMMIT 1
static inline struct jfs_inode_info *JFS_IP(struct inode *inode)
{
return list_entry(inode, struct jfs_inode_info, vfs_inode);
}
static inline int jfs_dirtable_inline(struct inode *inode)
{
return (JFS_IP(inode)->next_index <= (MAX_INLINE_DIRTABLE_ENTRY + 1));
}
static inline struct jfs_sb_info *JFS_SBI(struct super_block *sb)
{
return sb->s_fs_info;
}
static inline int isReadOnly(struct inode *inode)
{
if (JFS_SBI(inode->i_sb)->log)
return 0;
return 1;
}
#endif /* _H_JFS_INCORE */

143
fs/jfs/jfs_inode.c Normal file
View File

@@ -0,0 +1,143 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2004
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/fs.h>
#include <linux/quotaops.h>
#include "jfs_incore.h"
#include "jfs_inode.h"
#include "jfs_filsys.h"
#include "jfs_imap.h"
#include "jfs_dinode.h"
#include "jfs_debug.h"
void jfs_set_inode_flags(struct inode *inode)
{
unsigned int flags = JFS_IP(inode)->mode2;
inode->i_flags &= ~(S_IMMUTABLE | S_APPEND |
S_NOATIME | S_DIRSYNC | S_SYNC);
if (flags & JFS_IMMUTABLE_FL)
inode->i_flags |= S_IMMUTABLE;
if (flags & JFS_APPEND_FL)
inode->i_flags |= S_APPEND;
if (flags & JFS_NOATIME_FL)
inode->i_flags |= S_NOATIME;
if (flags & JFS_DIRSYNC_FL)
inode->i_flags |= S_DIRSYNC;
if (flags & JFS_SYNC_FL)
inode->i_flags |= S_SYNC;
}
/*
* NAME: ialloc()
*
* FUNCTION: Allocate a new inode
*
*/
struct inode *ialloc(struct inode *parent, umode_t mode)
{
struct super_block *sb = parent->i_sb;
struct inode *inode;
struct jfs_inode_info *jfs_inode;
int rc;
inode = new_inode(sb);
if (!inode) {
jfs_warn("ialloc: new_inode returned NULL!");
return ERR_PTR(-ENOMEM);
}
jfs_inode = JFS_IP(inode);
rc = diAlloc(parent, S_ISDIR(mode), inode);
if (rc) {
jfs_warn("ialloc: diAlloc returned %d!", rc);
if (rc == -EIO)
make_bad_inode(inode);
iput(inode);
return ERR_PTR(rc);
}
inode->i_uid = current->fsuid;
if (parent->i_mode & S_ISGID) {
inode->i_gid = parent->i_gid;
if (S_ISDIR(mode))
mode |= S_ISGID;
} else
inode->i_gid = current->fsgid;
/*
* New inodes need to save sane values on disk when
* uid & gid mount options are used
*/
jfs_inode->saved_uid = inode->i_uid;
jfs_inode->saved_gid = inode->i_gid;
/*
* Allocate inode to quota.
*/
if (DQUOT_ALLOC_INODE(inode)) {
DQUOT_DROP(inode);
inode->i_flags |= S_NOQUOTA;
inode->i_nlink = 0;
iput(inode);
return ERR_PTR(-EDQUOT);
}
inode->i_mode = mode;
/* inherit flags from parent */
jfs_inode->mode2 = JFS_IP(parent)->mode2 & JFS_FL_INHERIT;
if (S_ISDIR(mode)) {
jfs_inode->mode2 |= IDIRECTORY;
jfs_inode->mode2 &= ~JFS_DIRSYNC_FL;
}
else {
jfs_inode->mode2 |= INLINEEA | ISPARSE;
if (S_ISLNK(mode))
jfs_inode->mode2 &= ~(JFS_IMMUTABLE_FL|JFS_APPEND_FL);
}
jfs_inode->mode2 |= mode;
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
jfs_inode->otime = inode->i_ctime.tv_sec;
inode->i_generation = JFS_SBI(sb)->gengen++;
jfs_inode->cflag = 0;
/* Zero remaining fields */
memset(&jfs_inode->acl, 0, sizeof(dxd_t));
memset(&jfs_inode->ea, 0, sizeof(dxd_t));
jfs_inode->next_index = 0;
jfs_inode->acltype = 0;
jfs_inode->btorder = 0;
jfs_inode->btindex = 0;
jfs_inode->bxflag = 0;
jfs_inode->blid = 0;
jfs_inode->atlhead = 0;
jfs_inode->atltail = 0;
jfs_inode->xtlid = 0;
jfs_set_inode_flags(inode);
jfs_info("ialloc returns inode = 0x%p\n", inode);
return inode;
}

44
fs/jfs/jfs_inode.h Normal file
View File

@@ -0,0 +1,44 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2001
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _H_JFS_INODE
#define _H_JFS_INODE
extern struct inode *ialloc(struct inode *, umode_t);
extern int jfs_fsync(struct file *, struct dentry *, int);
extern int jfs_ioctl(struct inode *, struct file *,
unsigned int, unsigned long);
extern void jfs_read_inode(struct inode *);
extern int jfs_commit_inode(struct inode *, int);
extern int jfs_write_inode(struct inode*, int);
extern void jfs_delete_inode(struct inode *);
extern void jfs_dirty_inode(struct inode *);
extern void jfs_truncate(struct inode *);
extern void jfs_truncate_nolock(struct inode *, loff_t);
extern void jfs_free_zero_link(struct inode *);
extern struct dentry *jfs_get_parent(struct dentry *dentry);
extern void jfs_set_inode_flags(struct inode *);
extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
extern const struct address_space_operations jfs_aops;
extern const struct inode_operations jfs_dir_inode_operations;
extern const struct file_operations jfs_dir_operations;
extern const struct inode_operations jfs_file_inode_operations;
extern const struct file_operations jfs_file_operations;
extern const struct inode_operations jfs_symlink_inode_operations;
extern struct dentry_operations jfs_ci_dentry_operations;
#endif /* _H_JFS_INODE */

52
fs/jfs/jfs_lock.h Normal file
View File

@@ -0,0 +1,52 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2001
* Portions Copyright (C) Christoph Hellwig, 2001-2002
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _H_JFS_LOCK
#define _H_JFS_LOCK
#include <linux/spinlock.h>
#include <linux/mutex.h>
#include <linux/sched.h>
/*
* jfs_lock.h
*/
/*
* Conditional sleep where condition is protected by spinlock
*
* lock_cmd and unlock_cmd take and release the spinlock
*/
#define __SLEEP_COND(wq, cond, lock_cmd, unlock_cmd) \
do { \
DECLARE_WAITQUEUE(__wait, current); \
\
add_wait_queue(&wq, &__wait); \
for (;;) { \
set_current_state(TASK_UNINTERRUPTIBLE);\
if (cond) \
break; \
unlock_cmd; \
io_schedule(); \
lock_cmd; \
} \
current->state = TASK_RUNNING; \
remove_wait_queue(&wq, &__wait); \
} while (0)
#endif /* _H_JFS_LOCK */

2542
fs/jfs/jfs_logmgr.c Normal file

File diff suppressed because it is too large Load Diff

513
fs/jfs/jfs_logmgr.h Normal file
View File

@@ -0,0 +1,513 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2004
* Portions Copyright (C) Christoph Hellwig, 2001-2002
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _H_JFS_LOGMGR
#define _H_JFS_LOGMGR
#include "jfs_filsys.h"
#include "jfs_lock.h"
/*
* log manager configuration parameters
*/
/* log page size */
#define LOGPSIZE 4096
#define L2LOGPSIZE 12
#define LOGPAGES 16 /* Log pages per mounted file system */
/*
* log logical volume
*
* a log is used to make the commit operation on journalled
* files within the same logical volume group atomic.
* a log is implemented with a logical volume.
* there is one log per logical volume group.
*
* block 0 of the log logical volume is not used (ipl etc).
* block 1 contains a log "superblock" and is used by logFormat(),
* lmLogInit(), lmLogShutdown(), and logRedo() to record status
* of the log but is not otherwise used during normal processing.
* blocks 2 - (N-1) are used to contain log records.
*
* when a volume group is varied-on-line, logRedo() must have
* been executed before the file systems (logical volumes) in
* the volume group can be mounted.
*/
/*
* log superblock (block 1 of logical volume)
*/
#define LOGSUPER_B 1
#define LOGSTART_B 2
#define LOGMAGIC 0x87654321
#define LOGVERSION 1
#define MAX_ACTIVE 128 /* Max active file systems sharing log */
struct logsuper {
__le32 magic; /* 4: log lv identifier */
__le32 version; /* 4: version number */
__le32 serial; /* 4: log open/mount counter */
__le32 size; /* 4: size in number of LOGPSIZE blocks */
__le32 bsize; /* 4: logical block size in byte */
__le32 l2bsize; /* 4: log2 of bsize */
__le32 flag; /* 4: option */
__le32 state; /* 4: state - see below */
__le32 end; /* 4: addr of last log record set by logredo */
char uuid[16]; /* 16: 128-bit journal uuid */
char label[16]; /* 16: journal label */
struct {
char uuid[16];
} active[MAX_ACTIVE]; /* 2048: active file systems list */
};
#define NULL_UUID "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
/* log flag: commit option (see jfs_filsys.h) */
/* log state */
#define LOGMOUNT 0 /* log mounted by lmLogInit() */
#define LOGREDONE 1 /* log shutdown by lmLogShutdown().
* log redo completed by logredo().
*/
#define LOGWRAP 2 /* log wrapped */
#define LOGREADERR 3 /* log read error detected in logredo() */
/*
* log logical page
*
* (this comment should be rewritten !)
* the header and trailer structures (h,t) will normally have
* the same page and eor value.
* An exception to this occurs when a complete page write is not
* accomplished on a power failure. Since the hardware may "split write"
* sectors in the page, any out of order sequence may occur during powerfail
* and needs to be recognized during log replay. The xor value is
* an "exclusive or" of all log words in the page up to eor. This
* 32 bit eor is stored with the top 16 bits in the header and the
* bottom 16 bits in the trailer. logredo can easily recognize pages
* that were not completed by reconstructing this eor and checking
* the log page.
*
* Previous versions of the operating system did not allow split
* writes and detected partially written records in logredo by
* ordering the updates to the header, trailer, and the move of data
* into the logdata area. The order: (1) data is moved (2) header
* is updated (3) trailer is updated. In logredo, when the header
* differed from the trailer, the header and trailer were reconciled
* as follows: if h.page != t.page they were set to the smaller of
* the two and h.eor and t.eor set to 8 (i.e. empty page). if (only)
* h.eor != t.eor they were set to the smaller of their two values.
*/
struct logpage {
struct { /* header */
__le32 page; /* 4: log sequence page number */
__le16 rsrvd; /* 2: */
__le16 eor; /* 2: end-of-log offset of lasrt record write */
} h;
__le32 data[LOGPSIZE / 4 - 4]; /* log record area */
struct { /* trailer */
__le32 page; /* 4: normally the same as h.page */
__le16 rsrvd; /* 2: */
__le16 eor; /* 2: normally the same as h.eor */
} t;
};
#define LOGPHDRSIZE 8 /* log page header size */
#define LOGPTLRSIZE 8 /* log page trailer size */
/*
* log record
*
* (this comment should be rewritten !)
* jfs uses only "after" log records (only a single writer is allowed
* in a page, pages are written to temporary paging space if
* if they must be written to disk before commit, and i/o is
* scheduled for modified pages to their home location after
* the log records containing the after values and the commit
* record is written to the log on disk, undo discards the copy
* in main-memory.)
*
* a log record consists of a data area of variable length followed by
* a descriptor of fixed size LOGRDSIZE bytes.
* the data area is rounded up to an integral number of 4-bytes and
* must be no longer than LOGPSIZE.
* the descriptor is of size of multiple of 4-bytes and aligned on a
* 4-byte boundary.
* records are packed one after the other in the data area of log pages.
* (sometimes a DUMMY record is inserted so that at least one record ends
* on every page or the longest record is placed on at most two pages).
* the field eor in page header/trailer points to the byte following
* the last record on a page.
*/
/* log record types */
#define LOG_COMMIT 0x8000
#define LOG_SYNCPT 0x4000
#define LOG_MOUNT 0x2000
#define LOG_REDOPAGE 0x0800
#define LOG_NOREDOPAGE 0x0080
#define LOG_NOREDOINOEXT 0x0040
#define LOG_UPDATEMAP 0x0008
#define LOG_NOREDOFILE 0x0001
/* REDOPAGE/NOREDOPAGE log record data type */
#define LOG_INODE 0x0001
#define LOG_XTREE 0x0002
#define LOG_DTREE 0x0004
#define LOG_BTROOT 0x0010
#define LOG_EA 0x0020
#define LOG_ACL 0x0040
#define LOG_DATA 0x0080
#define LOG_NEW 0x0100
#define LOG_EXTEND 0x0200
#define LOG_RELOCATE 0x0400
#define LOG_DIR_XTREE 0x0800 /* Xtree is in directory inode */
/* UPDATEMAP log record descriptor type */
#define LOG_ALLOCXADLIST 0x0080
#define LOG_ALLOCPXDLIST 0x0040
#define LOG_ALLOCXAD 0x0020
#define LOG_ALLOCPXD 0x0010
#define LOG_FREEXADLIST 0x0008
#define LOG_FREEPXDLIST 0x0004
#define LOG_FREEXAD 0x0002
#define LOG_FREEPXD 0x0001
struct lrd {
/*
* type independent area
*/
__le32 logtid; /* 4: log transaction identifier */
__le32 backchain; /* 4: ptr to prev record of same transaction */
__le16 type; /* 2: record type */
__le16 length; /* 2: length of data in record (in byte) */
__le32 aggregate; /* 4: file system lv/aggregate */
/* (16) */
/*
* type dependent area (20)
*/
union {
/*
* COMMIT: commit
*
* transaction commit: no type-dependent information;
*/
/*
* REDOPAGE: after-image
*
* apply after-image;
*
* N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
*/
struct {
__le32 fileset; /* 4: fileset number */
__le32 inode; /* 4: inode number */
__le16 type; /* 2: REDOPAGE record type */
__le16 l2linesize; /* 2: log2 of line size */
pxd_t pxd; /* 8: on-disk page pxd */
} redopage; /* (20) */
/*
* NOREDOPAGE: the page is freed
*
* do not apply after-image records which precede this record
* in the log with the same page block number to this page.
*
* N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
*/
struct {
__le32 fileset; /* 4: fileset number */
__le32 inode; /* 4: inode number */
__le16 type; /* 2: NOREDOPAGE record type */
__le16 rsrvd; /* 2: reserved */
pxd_t pxd; /* 8: on-disk page pxd */
} noredopage; /* (20) */
/*
* UPDATEMAP: update block allocation map
*
* either in-line PXD,
* or out-of-line XADLIST;
*
* N.B. REDOPAGE, NOREDOPAGE, and UPDATEMAP must be same format;
*/
struct {
__le32 fileset; /* 4: fileset number */
__le32 inode; /* 4: inode number */
__le16 type; /* 2: UPDATEMAP record type */
__le16 nxd; /* 2: number of extents */
pxd_t pxd; /* 8: pxd */
} updatemap; /* (20) */
/*
* NOREDOINOEXT: the inode extent is freed
*
* do not apply after-image records which precede this
* record in the log with the any of the 4 page block
* numbers in this inode extent.
*
* NOTE: The fileset and pxd fields MUST remain in
* the same fields in the REDOPAGE record format.
*
*/
struct {
__le32 fileset; /* 4: fileset number */
__le32 iagnum; /* 4: IAG number */
__le32 inoext_idx; /* 4: inode extent index */
pxd_t pxd; /* 8: on-disk page pxd */
} noredoinoext; /* (20) */
/*
* SYNCPT: log sync point
*
* replay log upto syncpt address specified;
*/
struct {
__le32 sync; /* 4: syncpt address (0 = here) */
} syncpt;
/*
* MOUNT: file system mount
*
* file system mount: no type-dependent information;
*/
/*
* ? FREEXTENT: free specified extent(s)
*
* free specified extent(s) from block allocation map
* N.B.: nextents should be length of data/sizeof(xad_t)
*/
struct {
__le32 type; /* 4: FREEXTENT record type */
__le32 nextent; /* 4: number of extents */
/* data: PXD or XAD list */
} freextent;
/*
* ? NOREDOFILE: this file is freed
*
* do not apply records which precede this record in the log
* with the same inode number.
*
* NOREDOFILE must be the first to be written at commit
* (last to be read in logredo()) - it prevents
* replay of preceding updates of all preceding generations
* of the inumber esp. the on-disk inode itself.
*/
struct {
__le32 fileset; /* 4: fileset number */
__le32 inode; /* 4: inode number */
} noredofile;
/*
* ? NEWPAGE:
*
* metadata type dependent
*/
struct {
__le32 fileset; /* 4: fileset number */
__le32 inode; /* 4: inode number */
__le32 type; /* 4: NEWPAGE record type */
pxd_t pxd; /* 8: on-disk page pxd */
} newpage;
/*
* ? DUMMY: filler
*
* no type-dependent information
*/
} log;
}; /* (36) */
#define LOGRDSIZE (sizeof(struct lrd))
/*
* line vector descriptor
*/
struct lvd {
__le16 offset;
__le16 length;
};
/*
* log logical volume
*/
struct jfs_log {
struct list_head sb_list;/* This is used to sync metadata
* before writing syncpt.
*/
struct list_head journal_list; /* Global list */
struct block_device *bdev; /* 4: log lv pointer */
int serial; /* 4: log mount serial number */
s64 base; /* @8: log extent address (inline log ) */
int size; /* 4: log size in log page (in page) */
int l2bsize; /* 4: log2 of bsize */
long flag; /* 4: flag */
struct lbuf *lbuf_free; /* 4: free lbufs */
wait_queue_head_t free_wait; /* 4: */
/* log write */
int logtid; /* 4: log tid */
int page; /* 4: page number of eol page */
int eor; /* 4: eor of last record in eol page */
struct lbuf *bp; /* 4: current log page buffer */
struct mutex loglock; /* 4: log write serialization lock */
/* syncpt */
int nextsync; /* 4: bytes to write before next syncpt */
int active; /* 4: */
wait_queue_head_t syncwait; /* 4: */
/* commit */
uint cflag; /* 4: */
struct list_head cqueue; /* FIFO commit queue */
struct tblock *flush_tblk; /* tblk we're waiting on for flush */
int gcrtc; /* 4: GC_READY transaction count */
struct tblock *gclrt; /* 4: latest GC_READY transaction */
spinlock_t gclock; /* 4: group commit lock */
int logsize; /* 4: log data area size in byte */
int lsn; /* 4: end-of-log */
int clsn; /* 4: clsn */
int syncpt; /* 4: addr of last syncpt record */
int sync; /* 4: addr from last logsync() */
struct list_head synclist; /* 8: logsynclist anchor */
spinlock_t synclock; /* 4: synclist lock */
struct lbuf *wqueue; /* 4: log pageout queue */
int count; /* 4: count */
char uuid[16]; /* 16: 128-bit uuid of log device */
int no_integrity; /* 3: flag to disable journaling to disk */
};
/*
* Log flag
*/
#define log_INLINELOG 1
#define log_SYNCBARRIER 2
#define log_QUIESCE 3
#define log_FLUSH 4
/*
* group commit flag
*/
/* jfs_log */
#define logGC_PAGEOUT 0x00000001
/* tblock/lbuf */
#define tblkGC_QUEUE 0x0001
#define tblkGC_READY 0x0002
#define tblkGC_COMMIT 0x0004
#define tblkGC_COMMITTED 0x0008
#define tblkGC_EOP 0x0010
#define tblkGC_FREE 0x0020
#define tblkGC_LEADER 0x0040
#define tblkGC_ERROR 0x0080
#define tblkGC_LAZY 0x0100 // D230860
#define tblkGC_UNLOCKED 0x0200 // D230860
/*
* log cache buffer header
*/
struct lbuf {
struct jfs_log *l_log; /* 4: log associated with buffer */
/*
* data buffer base area
*/
uint l_flag; /* 4: pageout control flags */
struct lbuf *l_wqnext; /* 4: write queue link */
struct lbuf *l_freelist; /* 4: freelistlink */
int l_pn; /* 4: log page number */
int l_eor; /* 4: log record eor */
int l_ceor; /* 4: committed log record eor */
s64 l_blkno; /* 8: log page block number */
caddr_t l_ldata; /* 4: data page */
struct page *l_page; /* The page itself */
uint l_offset; /* Offset of l_ldata within the page */
wait_queue_head_t l_ioevent; /* 4: i/o done event */
};
/* Reuse l_freelist for redrive list */
#define l_redrive_next l_freelist
/*
* logsynclist block
*
* common logsyncblk prefix for jbuf_t and tblock
*/
struct logsyncblk {
u16 xflag; /* flags */
u16 flag; /* only meaninful in tblock */
lid_t lid; /* lock id */
s32 lsn; /* log sequence number */
struct list_head synclist; /* log sync list link */
};
/*
* logsynclist serialization (per log)
*/
#define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock)
#define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags)
#define LOGSYNC_UNLOCK(log, flags) \
spin_unlock_irqrestore(&(log)->synclock, flags)
/* compute the difference in bytes of lsn from sync point */
#define logdiff(diff, lsn, log)\
{\
diff = (lsn) - (log)->syncpt;\
if (diff < 0)\
diff += (log)->logsize;\
}
extern int lmLogOpen(struct super_block *sb);
extern int lmLogClose(struct super_block *sb);
extern int lmLogShutdown(struct jfs_log * log);
extern int lmLogInit(struct jfs_log * log);
extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize);
extern int lmGroupCommit(struct jfs_log *, struct tblock *);
extern int jfsIOWait(void *);
extern void jfs_flush_journal(struct jfs_log * log, int wait);
extern void jfs_syncpt(struct jfs_log *log, int hard_sync);
#endif /* _H_JFS_LOGMGR */

845
fs/jfs/jfs_metapage.c Normal file
View File

@@ -0,0 +1,845 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2005
* Portions Copyright (C) Christoph Hellwig, 2001-2002
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/bio.h>
#include <linux/init.h>
#include <linux/buffer_head.h>
#include <linux/mempool.h>
#include "jfs_incore.h"
#include "jfs_superblock.h"
#include "jfs_filsys.h"
#include "jfs_metapage.h"
#include "jfs_txnmgr.h"
#include "jfs_debug.h"
#ifdef CONFIG_JFS_STATISTICS
static struct {
uint pagealloc; /* # of page allocations */
uint pagefree; /* # of page frees */
uint lockwait; /* # of sleeping lock_metapage() calls */
} mpStat;
#endif
#define metapage_locked(mp) test_bit(META_locked, &(mp)->flag)
#define trylock_metapage(mp) test_and_set_bit(META_locked, &(mp)->flag)
static inline void unlock_metapage(struct metapage *mp)
{
clear_bit(META_locked, &mp->flag);
wake_up(&mp->wait);
}
static inline void __lock_metapage(struct metapage *mp)
{
DECLARE_WAITQUEUE(wait, current);
INCREMENT(mpStat.lockwait);
add_wait_queue_exclusive(&mp->wait, &wait);
do {
set_current_state(TASK_UNINTERRUPTIBLE);
if (metapage_locked(mp)) {
unlock_page(mp->page);
io_schedule();
lock_page(mp->page);
}
} while (trylock_metapage(mp));
__set_current_state(TASK_RUNNING);
remove_wait_queue(&mp->wait, &wait);
}
/*
* Must have mp->page locked
*/
static inline void lock_metapage(struct metapage *mp)
{
if (trylock_metapage(mp))
__lock_metapage(mp);
}
#define METAPOOL_MIN_PAGES 32
static struct kmem_cache *metapage_cache;
static mempool_t *metapage_mempool;
#define MPS_PER_PAGE (PAGE_CACHE_SIZE >> L2PSIZE)
#if MPS_PER_PAGE > 1
struct meta_anchor {
int mp_count;
atomic_t io_count;
struct metapage *mp[MPS_PER_PAGE];
};
#define mp_anchor(page) ((struct meta_anchor *)page_private(page))
static inline struct metapage *page_to_mp(struct page *page, uint offset)
{
if (!PagePrivate(page))
return NULL;
return mp_anchor(page)->mp[offset >> L2PSIZE];
}
static inline int insert_metapage(struct page *page, struct metapage *mp)
{
struct meta_anchor *a;
int index;
int l2mp_blocks; /* log2 blocks per metapage */
if (PagePrivate(page))
a = mp_anchor(page);
else {
a = kzalloc(sizeof(struct meta_anchor), GFP_NOFS);
if (!a)
return -ENOMEM;
set_page_private(page, (unsigned long)a);
SetPagePrivate(page);
kmap(page);
}
if (mp) {
l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits;
index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1);
a->mp_count++;
a->mp[index] = mp;
}
return 0;
}
static inline void remove_metapage(struct page *page, struct metapage *mp)
{
struct meta_anchor *a = mp_anchor(page);
int l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits;
int index;
index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1);
BUG_ON(a->mp[index] != mp);
a->mp[index] = NULL;
if (--a->mp_count == 0) {
kfree(a);
set_page_private(page, 0);
ClearPagePrivate(page);
kunmap(page);
}
}
static inline void inc_io(struct page *page)
{
atomic_inc(&mp_anchor(page)->io_count);
}
static inline void dec_io(struct page *page, void (*handler) (struct page *))
{
if (atomic_dec_and_test(&mp_anchor(page)->io_count))
handler(page);
}
#else
static inline struct metapage *page_to_mp(struct page *page, uint offset)
{
return PagePrivate(page) ? (struct metapage *)page_private(page) : NULL;
}
static inline int insert_metapage(struct page *page, struct metapage *mp)
{
if (mp) {
set_page_private(page, (unsigned long)mp);
SetPagePrivate(page);
kmap(page);
}
return 0;
}
static inline void remove_metapage(struct page *page, struct metapage *mp)
{
set_page_private(page, 0);
ClearPagePrivate(page);
kunmap(page);
}
#define inc_io(page) do {} while(0)
#define dec_io(page, handler) handler(page)
#endif
static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags)
{
struct metapage *mp = (struct metapage *)foo;
if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
SLAB_CTOR_CONSTRUCTOR) {
mp->lid = 0;
mp->lsn = 0;
mp->flag = 0;
mp->data = NULL;
mp->clsn = 0;
mp->log = NULL;
set_bit(META_free, &mp->flag);
init_waitqueue_head(&mp->wait);
}
}
static inline struct metapage *alloc_metapage(gfp_t gfp_mask)
{
return mempool_alloc(metapage_mempool, gfp_mask);
}
static inline void free_metapage(struct metapage *mp)
{
mp->flag = 0;
set_bit(META_free, &mp->flag);
mempool_free(mp, metapage_mempool);
}
int __init metapage_init(void)
{
/*
* Allocate the metapage structures
*/
metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage),
0, 0, init_once, NULL);
if (metapage_cache == NULL)
return -ENOMEM;
metapage_mempool = mempool_create_slab_pool(METAPOOL_MIN_PAGES,
metapage_cache);
if (metapage_mempool == NULL) {
kmem_cache_destroy(metapage_cache);
return -ENOMEM;
}
return 0;
}
void metapage_exit(void)
{
mempool_destroy(metapage_mempool);
kmem_cache_destroy(metapage_cache);
}
static inline void drop_metapage(struct page *page, struct metapage *mp)
{
if (mp->count || mp->nohomeok || test_bit(META_dirty, &mp->flag) ||
test_bit(META_io, &mp->flag))
return;
remove_metapage(page, mp);
INCREMENT(mpStat.pagefree);
free_metapage(mp);
}
/*
* Metapage address space operations
*/
static sector_t metapage_get_blocks(struct inode *inode, sector_t lblock,
unsigned int *len)
{
int rc = 0;
int xflag;
s64 xaddr;
sector_t file_blocks = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
inode->i_blkbits;
if (lblock >= file_blocks)
return 0;
if (lblock + *len > file_blocks)
*len = file_blocks - lblock;
if (inode->i_ino) {
rc = xtLookup(inode, (s64)lblock, *len, &xflag, &xaddr, len, 0);
if ((rc == 0) && *len)
lblock = (sector_t)xaddr;
else
lblock = 0;
} /* else no mapping */
return lblock;
}
static void last_read_complete(struct page *page)
{
if (!PageError(page))
SetPageUptodate(page);
unlock_page(page);
}
static int metapage_read_end_io(struct bio *bio, unsigned int bytes_done,
int err)
{
struct page *page = bio->bi_private;
if (bio->bi_size)
return 1;
if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
printk(KERN_ERR "metapage_read_end_io: I/O error\n");
SetPageError(page);
}
dec_io(page, last_read_complete);
bio_put(bio);
return 0;
}
static void remove_from_logsync(struct metapage *mp)
{
struct jfs_log *log = mp->log;
unsigned long flags;
/*
* This can race. Recheck that log hasn't been set to null, and after
* acquiring logsync lock, recheck lsn
*/
if (!log)
return;
LOGSYNC_LOCK(log, flags);
if (mp->lsn) {
mp->log = NULL;
mp->lsn = 0;
mp->clsn = 0;
log->count--;
list_del(&mp->synclist);
}
LOGSYNC_UNLOCK(log, flags);
}
static void last_write_complete(struct page *page)
{
struct metapage *mp;
unsigned int offset;
for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
mp = page_to_mp(page, offset);
if (mp && test_bit(META_io, &mp->flag)) {
if (mp->lsn)
remove_from_logsync(mp);
clear_bit(META_io, &mp->flag);
}
/*
* I'd like to call drop_metapage here, but I don't think it's
* safe unless I have the page locked
*/
}
end_page_writeback(page);
}
static int metapage_write_end_io(struct bio *bio, unsigned int bytes_done,
int err)
{
struct page *page = bio->bi_private;
BUG_ON(!PagePrivate(page));
if (bio->bi_size)
return 1;
if (! test_bit(BIO_UPTODATE, &bio->bi_flags)) {
printk(KERN_ERR "metapage_write_end_io: I/O error\n");
SetPageError(page);
}
dec_io(page, last_write_complete);
bio_put(bio);
return 0;
}
static int metapage_writepage(struct page *page, struct writeback_control *wbc)
{
struct bio *bio = NULL;
unsigned int block_offset; /* block offset of mp within page */
struct inode *inode = page->mapping->host;
unsigned int blocks_per_mp = JFS_SBI(inode->i_sb)->nbperpage;
unsigned int len;
unsigned int xlen;
struct metapage *mp;
int redirty = 0;
sector_t lblock;
sector_t pblock;
sector_t next_block = 0;
sector_t page_start;
unsigned long bio_bytes = 0;
unsigned long bio_offset = 0;
unsigned int offset;
page_start = (sector_t)page->index <<
(PAGE_CACHE_SHIFT - inode->i_blkbits);
BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page));
for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
mp = page_to_mp(page, offset);
if (!mp || !test_bit(META_dirty, &mp->flag))
continue;
if (mp->nohomeok && !test_bit(META_forcewrite, &mp->flag)) {
redirty = 1;
/*
* Make sure this page isn't blocked indefinitely.
* If the journal isn't undergoing I/O, push it
*/
if (mp->log && !(mp->log->cflag & logGC_PAGEOUT))
jfs_flush_journal(mp->log, 0);
continue;
}
clear_bit(META_dirty, &mp->flag);
block_offset = offset >> inode->i_blkbits;
lblock = page_start + block_offset;
if (bio) {
if (xlen && lblock == next_block) {
/* Contiguous, in memory & on disk */
len = min(xlen, blocks_per_mp);
xlen -= len;
bio_bytes += len << inode->i_blkbits;
set_bit(META_io, &mp->flag);
continue;
}
/* Not contiguous */
if (bio_add_page(bio, page, bio_bytes, bio_offset) <
bio_bytes)
goto add_failed;
/*
* Increment counter before submitting i/o to keep
* count from hitting zero before we're through
*/
inc_io(page);
if (!bio->bi_size)
goto dump_bio;
submit_bio(WRITE, bio);
bio = NULL;
} else {
set_page_writeback(page);
inc_io(page);
}
xlen = (PAGE_CACHE_SIZE - offset) >> inode->i_blkbits;
pblock = metapage_get_blocks(inode, lblock, &xlen);
if (!pblock) {
/* Need better error handling */
printk(KERN_ERR "JFS: metapage_get_blocks failed\n");
dec_io(page, last_write_complete);
continue;
}
set_bit(META_io, &mp->flag);
len = min(xlen, (uint) JFS_SBI(inode->i_sb)->nbperpage);
bio = bio_alloc(GFP_NOFS, 1);
bio->bi_bdev = inode->i_sb->s_bdev;
bio->bi_sector = pblock << (inode->i_blkbits - 9);
bio->bi_end_io = metapage_write_end_io;
bio->bi_private = page;
/* Don't call bio_add_page yet, we may add to this vec */
bio_offset = offset;
bio_bytes = len << inode->i_blkbits;
xlen -= len;
next_block = lblock + len;
}
if (bio) {
if (bio_add_page(bio, page, bio_bytes, bio_offset) < bio_bytes)
goto add_failed;
if (!bio->bi_size)
goto dump_bio;
submit_bio(WRITE, bio);
}
if (redirty)
redirty_page_for_writepage(wbc, page);
unlock_page(page);
return 0;
add_failed:
/* We should never reach here, since we're only adding one vec */
printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n");
goto skip;
dump_bio:
dump_mem("bio", bio, sizeof(*bio));
skip:
bio_put(bio);
unlock_page(page);
dec_io(page, last_write_complete);
return -EIO;
}
static int metapage_readpage(struct file *fp, struct page *page)
{
struct inode *inode = page->mapping->host;
struct bio *bio = NULL;
unsigned int block_offset;
unsigned int blocks_per_page = PAGE_CACHE_SIZE >> inode->i_blkbits;
sector_t page_start; /* address of page in fs blocks */
sector_t pblock;
unsigned int xlen;
unsigned int len;
unsigned int offset;
BUG_ON(!PageLocked(page));
page_start = (sector_t)page->index <<
(PAGE_CACHE_SHIFT - inode->i_blkbits);
block_offset = 0;
while (block_offset < blocks_per_page) {
xlen = blocks_per_page - block_offset;
pblock = metapage_get_blocks(inode, page_start + block_offset,
&xlen);
if (pblock) {
if (!PagePrivate(page))
insert_metapage(page, NULL);
inc_io(page);
if (bio)
submit_bio(READ, bio);
bio = bio_alloc(GFP_NOFS, 1);
bio->bi_bdev = inode->i_sb->s_bdev;
bio->bi_sector = pblock << (inode->i_blkbits - 9);
bio->bi_end_io = metapage_read_end_io;
bio->bi_private = page;
len = xlen << inode->i_blkbits;
offset = block_offset << inode->i_blkbits;
if (bio_add_page(bio, page, len, offset) < len)
goto add_failed;
block_offset += xlen;
} else
block_offset++;
}
if (bio)
submit_bio(READ, bio);
else
unlock_page(page);
return 0;
add_failed:
printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n");
bio_put(bio);
dec_io(page, last_read_complete);
return -EIO;
}
static int metapage_releasepage(struct page *page, gfp_t gfp_mask)
{
struct metapage *mp;
int ret = 1;
unsigned int offset;
for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
mp = page_to_mp(page, offset);
if (!mp)
continue;
jfs_info("metapage_releasepage: mp = 0x%p", mp);
if (mp->count || mp->nohomeok ||
test_bit(META_dirty, &mp->flag)) {
jfs_info("count = %ld, nohomeok = %d", mp->count,
mp->nohomeok);
ret = 0;
continue;
}
if (mp->lsn)
remove_from_logsync(mp);
remove_metapage(page, mp);
INCREMENT(mpStat.pagefree);
free_metapage(mp);
}
return ret;
}
static void metapage_invalidatepage(struct page *page, unsigned long offset)
{
BUG_ON(offset);
BUG_ON(PageWriteback(page));
metapage_releasepage(page, 0);
}
const struct address_space_operations jfs_metapage_aops = {
.readpage = metapage_readpage,
.writepage = metapage_writepage,
.sync_page = block_sync_page,
.releasepage = metapage_releasepage,
.invalidatepage = metapage_invalidatepage,
.set_page_dirty = __set_page_dirty_nobuffers,
};
struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
unsigned int size, int absolute,
unsigned long new)
{
int l2BlocksPerPage;
int l2bsize;
struct address_space *mapping;
struct metapage *mp = NULL;
struct page *page;
unsigned long page_index;
unsigned long page_offset;
jfs_info("__get_metapage: ino = %ld, lblock = 0x%lx, abs=%d",
inode->i_ino, lblock, absolute);
l2bsize = inode->i_blkbits;
l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
page_index = lblock >> l2BlocksPerPage;
page_offset = (lblock - (page_index << l2BlocksPerPage)) << l2bsize;
if ((page_offset + size) > PAGE_CACHE_SIZE) {
jfs_err("MetaData crosses page boundary!!");
jfs_err("lblock = %lx, size = %d", lblock, size);
dump_stack();
return NULL;
}
if (absolute)
mapping = JFS_SBI(inode->i_sb)->direct_inode->i_mapping;
else {
/*
* If an nfs client tries to read an inode that is larger
* than any existing inodes, we may try to read past the
* end of the inode map
*/
if ((lblock << inode->i_blkbits) >= inode->i_size)
return NULL;
mapping = inode->i_mapping;
}
if (new && (PSIZE == PAGE_CACHE_SIZE)) {
page = grab_cache_page(mapping, page_index);
if (!page) {
jfs_err("grab_cache_page failed!");
return NULL;
}
SetPageUptodate(page);
} else {
page = read_mapping_page(mapping, page_index, NULL);
if (IS_ERR(page) || !PageUptodate(page)) {
jfs_err("read_mapping_page failed!");
return NULL;
}
lock_page(page);
}
mp = page_to_mp(page, page_offset);
if (mp) {
if (mp->logical_size != size) {
jfs_error(inode->i_sb,
"__get_metapage: mp->logical_size != size");
jfs_err("logical_size = %d, size = %d",
mp->logical_size, size);
dump_stack();
goto unlock;
}
mp->count++;
lock_metapage(mp);
if (test_bit(META_discard, &mp->flag)) {
if (!new) {
jfs_error(inode->i_sb,
"__get_metapage: using a "
"discarded metapage");
discard_metapage(mp);
goto unlock;
}
clear_bit(META_discard, &mp->flag);
}
} else {
INCREMENT(mpStat.pagealloc);
mp = alloc_metapage(GFP_NOFS);
mp->page = page;
mp->flag = 0;
mp->xflag = COMMIT_PAGE;
mp->count = 1;
mp->nohomeok = 0;
mp->logical_size = size;
mp->data = page_address(page) + page_offset;
mp->index = lblock;
if (unlikely(insert_metapage(page, mp))) {
free_metapage(mp);
goto unlock;
}
lock_metapage(mp);
}
if (new) {
jfs_info("zeroing mp = 0x%p", mp);
memset(mp->data, 0, PSIZE);
}
unlock_page(page);
jfs_info("__get_metapage: returning = 0x%p data = 0x%p", mp, mp->data);
return mp;
unlock:
unlock_page(page);
return NULL;
}
void grab_metapage(struct metapage * mp)
{
jfs_info("grab_metapage: mp = 0x%p", mp);
page_cache_get(mp->page);
lock_page(mp->page);
mp->count++;
lock_metapage(mp);
unlock_page(mp->page);
}
void force_metapage(struct metapage *mp)
{
struct page *page = mp->page;
jfs_info("force_metapage: mp = 0x%p", mp);
set_bit(META_forcewrite, &mp->flag);
clear_bit(META_sync, &mp->flag);
page_cache_get(page);
lock_page(page);
set_page_dirty(page);
write_one_page(page, 1);
clear_bit(META_forcewrite, &mp->flag);
page_cache_release(page);
}
void hold_metapage(struct metapage *mp)
{
lock_page(mp->page);
}
void put_metapage(struct metapage *mp)
{
if (mp->count || mp->nohomeok) {
/* Someone else will release this */
unlock_page(mp->page);
return;
}
page_cache_get(mp->page);
mp->count++;
lock_metapage(mp);
unlock_page(mp->page);
release_metapage(mp);
}
void release_metapage(struct metapage * mp)
{
struct page *page = mp->page;
jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp, mp->flag);
BUG_ON(!page);
lock_page(page);
unlock_metapage(mp);
assert(mp->count);
if (--mp->count || mp->nohomeok) {
unlock_page(page);
page_cache_release(page);
return;
}
if (test_bit(META_dirty, &mp->flag)) {
set_page_dirty(page);
if (test_bit(META_sync, &mp->flag)) {
clear_bit(META_sync, &mp->flag);
write_one_page(page, 1);
lock_page(page); /* write_one_page unlocks the page */
}
} else if (mp->lsn) /* discard_metapage doesn't remove it */
remove_from_logsync(mp);
/* Try to keep metapages from using up too much memory */
drop_metapage(page, mp);
unlock_page(page);
page_cache_release(page);
}
void __invalidate_metapages(struct inode *ip, s64 addr, int len)
{
sector_t lblock;
int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits;
int BlocksPerPage = 1 << l2BlocksPerPage;
/* All callers are interested in block device's mapping */
struct address_space *mapping =
JFS_SBI(ip->i_sb)->direct_inode->i_mapping;
struct metapage *mp;
struct page *page;
unsigned int offset;
/*
* Mark metapages to discard. They will eventually be
* released, but should not be written.
*/
for (lblock = addr & ~(BlocksPerPage - 1); lblock < addr + len;
lblock += BlocksPerPage) {
page = find_lock_page(mapping, lblock >> l2BlocksPerPage);
if (!page)
continue;
for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
mp = page_to_mp(page, offset);
if (!mp)
continue;
if (mp->index < addr)
continue;
if (mp->index >= addr + len)
break;
clear_bit(META_dirty, &mp->flag);
set_bit(META_discard, &mp->flag);
if (mp->lsn)
remove_from_logsync(mp);
}
unlock_page(page);
page_cache_release(page);
}
}
#ifdef CONFIG_JFS_STATISTICS
int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length,
int *eof, void *data)
{
int len = 0;
off_t begin;
len += sprintf(buffer,
"JFS Metapage statistics\n"
"=======================\n"
"page allocations = %d\n"
"page frees = %d\n"
"lock waits = %d\n",
mpStat.pagealloc,
mpStat.pagefree,
mpStat.lockwait);
begin = offset;
*start = buffer + begin;
len -= begin;
if (len > length)
len = length;
else
*eof = 1;
if (len < 0)
len = 0;
return len;
}
#endif

155
fs/jfs/jfs_metapage.h Normal file
View File

@@ -0,0 +1,155 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2002
* Portions Copyright (C) Christoph Hellwig, 2001-2002
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _H_JFS_METAPAGE
#define _H_JFS_METAPAGE
#include <linux/pagemap.h>
struct metapage {
/* Common logsyncblk prefix (see jfs_logmgr.h) */
u16 xflag;
u16 unused;
lid_t lid;
int lsn;
struct list_head synclist;
/* End of logsyncblk prefix */
unsigned long flag; /* See Below */
unsigned long count; /* Reference count */
void *data; /* Data pointer */
sector_t index; /* block address of page */
wait_queue_head_t wait;
/* implementation */
struct page *page;
unsigned int logical_size;
/* Journal management */
int clsn;
int nohomeok;
struct jfs_log *log;
};
/* metapage flag */
#define META_locked 0
#define META_free 1
#define META_dirty 2
#define META_sync 3
#define META_discard 4
#define META_forcewrite 5
#define META_io 6
#define mark_metapage_dirty(mp) set_bit(META_dirty, &(mp)->flag)
/* function prototypes */
extern int metapage_init(void);
extern void metapage_exit(void);
extern struct metapage *__get_metapage(struct inode *inode,
unsigned long lblock, unsigned int size,
int absolute, unsigned long new);
#define read_metapage(inode, lblock, size, absolute)\
__get_metapage(inode, lblock, size, absolute, false)
#define get_metapage(inode, lblock, size, absolute)\
__get_metapage(inode, lblock, size, absolute, true)
extern void release_metapage(struct metapage *);
extern void grab_metapage(struct metapage *);
extern void force_metapage(struct metapage *);
/*
* hold_metapage and put_metapage are used in conjuction. The page lock
* is not dropped between the two, so no other threads can get or release
* the metapage
*/
extern void hold_metapage(struct metapage *);
extern void put_metapage(struct metapage *);
static inline void write_metapage(struct metapage *mp)
{
set_bit(META_dirty, &mp->flag);
release_metapage(mp);
}
static inline void flush_metapage(struct metapage *mp)
{
set_bit(META_sync, &mp->flag);
write_metapage(mp);
}
static inline void discard_metapage(struct metapage *mp)
{
clear_bit(META_dirty, &mp->flag);
set_bit(META_discard, &mp->flag);
release_metapage(mp);
}
static inline void metapage_nohomeok(struct metapage *mp)
{
struct page *page = mp->page;
lock_page(page);
if (!mp->nohomeok++) {
mark_metapage_dirty(mp);
page_cache_get(page);
wait_on_page_writeback(page);
}
unlock_page(page);
}
/*
* This serializes access to mp->lsn when metapages are added to logsynclist
* without setting nohomeok. i.e. updating imap & dmap
*/
static inline void metapage_wait_for_io(struct metapage *mp)
{
if (test_bit(META_io, &mp->flag))
wait_on_page_writeback(mp->page);
}
/*
* This is called when already holding the metapage
*/
static inline void _metapage_homeok(struct metapage *mp)
{
if (!--mp->nohomeok)
page_cache_release(mp->page);
}
static inline void metapage_homeok(struct metapage *mp)
{
hold_metapage(mp);
_metapage_homeok(mp);
put_metapage(mp);
}
extern const struct address_space_operations jfs_metapage_aops;
/*
* This routines invalidate all pages for an extent.
*/
extern void __invalidate_metapages(struct inode *, s64, int);
#define invalidate_pxd_metapages(ip, pxd) \
__invalidate_metapages((ip), addressPXD(&(pxd)), lengthPXD(&(pxd)))
#define invalidate_dxd_metapages(ip, dxd) \
__invalidate_metapages((ip), addressDXD(&(dxd)), lengthDXD(&(dxd)))
#define invalidate_xad_metapages(ip, xad) \
__invalidate_metapages((ip), addressXAD(&(xad)), lengthXAD(&(xad)))
#endif /* _H_JFS_METAPAGE */

507
fs/jfs/jfs_mount.c Normal file
View File

@@ -0,0 +1,507 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2004
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Module: jfs_mount.c
*
* note: file system in transition to aggregate/fileset:
*
* file system mount is interpreted as the mount of aggregate,
* if not already mounted, and mount of the single/only fileset in
* the aggregate;
*
* a file system/aggregate is represented by an internal inode
* (aka mount inode) initialized with aggregate superblock;
* each vfs represents a fileset, and points to its "fileset inode
* allocation map inode" (aka fileset inode):
* (an aggregate itself is structured recursively as a filset:
* an internal vfs is constructed and points to its "fileset inode
* allocation map inode" (aka aggregate inode) where each inode
* represents a fileset inode) so that inode number is mapped to
* on-disk inode in uniform way at both aggregate and fileset level;
*
* each vnode/inode of a fileset is linked to its vfs (to facilitate
* per fileset inode operations, e.g., unmount of a fileset, etc.);
* each inode points to the mount inode (to facilitate access to
* per aggregate information, e.g., block size, etc.) as well as
* its file set inode.
*
* aggregate
* ipmnt
* mntvfs -> fileset ipimap+ -> aggregate ipbmap -> aggregate ipaimap;
* fileset vfs -> vp(1) <-> ... <-> vp(n) <->vproot;
*/
#include <linux/fs.h>
#include <linux/buffer_head.h>
#include "jfs_incore.h"
#include "jfs_filsys.h"
#include "jfs_superblock.h"
#include "jfs_dmap.h"
#include "jfs_imap.h"
#include "jfs_metapage.h"
#include "jfs_debug.h"
/*
* forward references
*/
static int chkSuper(struct super_block *);
static int logMOUNT(struct super_block *sb);
/*
* NAME: jfs_mount(sb)
*
* FUNCTION: vfs_mount()
*
* PARAMETER: sb - super block
*
* RETURN: -EBUSY - device already mounted or open for write
* -EBUSY - cvrdvp already mounted;
* -EBUSY - mount table full
* -ENOTDIR- cvrdvp not directory on a device mount
* -ENXIO - device open failure
*/
int jfs_mount(struct super_block *sb)
{
int rc = 0; /* Return code */
struct jfs_sb_info *sbi = JFS_SBI(sb);
struct inode *ipaimap = NULL;
struct inode *ipaimap2 = NULL;
struct inode *ipimap = NULL;
struct inode *ipbmap = NULL;
/*
* read/validate superblock
* (initialize mount inode from the superblock)
*/
if ((rc = chkSuper(sb))) {
goto errout20;
}
ipaimap = diReadSpecial(sb, AGGREGATE_I, 0);
if (ipaimap == NULL) {
jfs_err("jfs_mount: Faild to read AGGREGATE_I");
rc = -EIO;
goto errout20;
}
sbi->ipaimap = ipaimap;
jfs_info("jfs_mount: ipaimap:0x%p", ipaimap);
/*
* initialize aggregate inode allocation map
*/
if ((rc = diMount(ipaimap))) {
jfs_err("jfs_mount: diMount(ipaimap) failed w/rc = %d", rc);
goto errout21;
}
/*
* open aggregate block allocation map
*/
ipbmap = diReadSpecial(sb, BMAP_I, 0);
if (ipbmap == NULL) {
rc = -EIO;
goto errout22;
}
jfs_info("jfs_mount: ipbmap:0x%p", ipbmap);
sbi->ipbmap = ipbmap;
/*
* initialize aggregate block allocation map
*/
if ((rc = dbMount(ipbmap))) {
jfs_err("jfs_mount: dbMount failed w/rc = %d", rc);
goto errout22;
}
/*
* open the secondary aggregate inode allocation map
*
* This is a duplicate of the aggregate inode allocation map.
*
* hand craft a vfs in the same fashion as we did to read ipaimap.
* By adding INOSPEREXT (32) to the inode number, we are telling
* diReadSpecial that we are reading from the secondary aggregate
* inode table. This also creates a unique entry in the inode hash
* table.
*/
if ((sbi->mntflag & JFS_BAD_SAIT) == 0) {
ipaimap2 = diReadSpecial(sb, AGGREGATE_I, 1);
if (ipaimap2 == 0) {
jfs_err("jfs_mount: Faild to read AGGREGATE_I");
rc = -EIO;
goto errout35;
}
sbi->ipaimap2 = ipaimap2;
jfs_info("jfs_mount: ipaimap2:0x%p", ipaimap2);
/*
* initialize secondary aggregate inode allocation map
*/
if ((rc = diMount(ipaimap2))) {
jfs_err("jfs_mount: diMount(ipaimap2) failed, rc = %d",
rc);
goto errout35;
}
} else
/* Secondary aggregate inode table is not valid */
sbi->ipaimap2 = NULL;
/*
* mount (the only/single) fileset
*/
/*
* open fileset inode allocation map (aka fileset inode)
*/
ipimap = diReadSpecial(sb, FILESYSTEM_I, 0);
if (ipimap == NULL) {
jfs_err("jfs_mount: Failed to read FILESYSTEM_I");
/* open fileset secondary inode allocation map */
rc = -EIO;
goto errout40;
}
jfs_info("jfs_mount: ipimap:0x%p", ipimap);
/* map further access of per fileset inodes by the fileset inode */
sbi->ipimap = ipimap;
/* initialize fileset inode allocation map */
if ((rc = diMount(ipimap))) {
jfs_err("jfs_mount: diMount failed w/rc = %d", rc);
goto errout41;
}
goto out;
/*
* unwind on error
*/
errout41: /* close fileset inode allocation map inode */
diFreeSpecial(ipimap);
errout40: /* fileset closed */
/* close secondary aggregate inode allocation map */
if (ipaimap2) {
diUnmount(ipaimap2, 1);
diFreeSpecial(ipaimap2);
}
errout35:
/* close aggregate block allocation map */
dbUnmount(ipbmap, 1);
diFreeSpecial(ipbmap);
errout22: /* close aggregate inode allocation map */
diUnmount(ipaimap, 1);
errout21: /* close aggregate inodes */
diFreeSpecial(ipaimap);
errout20: /* aggregate closed */
out:
if (rc)
jfs_err("Mount JFS Failure: %d", rc);
return rc;
}
/*
* NAME: jfs_mount_rw(sb, remount)
*
* FUNCTION: Completes read-write mount, or remounts read-only volume
* as read-write
*/
int jfs_mount_rw(struct super_block *sb, int remount)
{
struct jfs_sb_info *sbi = JFS_SBI(sb);
int rc;
/*
* If we are re-mounting a previously read-only volume, we want to
* re-read the inode and block maps, since fsck.jfs may have updated
* them.
*/
if (remount) {
if (chkSuper(sb) || (sbi->state != FM_CLEAN))
return -EINVAL;
truncate_inode_pages(sbi->ipimap->i_mapping, 0);
truncate_inode_pages(sbi->ipbmap->i_mapping, 0);
diUnmount(sbi->ipimap, 1);
if ((rc = diMount(sbi->ipimap))) {
jfs_err("jfs_mount_rw: diMount failed!");
return rc;
}
dbUnmount(sbi->ipbmap, 1);
if ((rc = dbMount(sbi->ipbmap))) {
jfs_err("jfs_mount_rw: dbMount failed!");
return rc;
}
}
/*
* open/initialize log
*/
if ((rc = lmLogOpen(sb)))
return rc;
/*
* update file system superblock;
*/
if ((rc = updateSuper(sb, FM_MOUNT))) {
jfs_err("jfs_mount: updateSuper failed w/rc = %d", rc);
lmLogClose(sb);
return rc;
}
/*
* write MOUNT log record of the file system
*/
logMOUNT(sb);
return rc;
}
/*
* chkSuper()
*
* validate the superblock of the file system to be mounted and
* get the file system parameters.
*
* returns
* 0 with fragsize set if check successful
* error code if not successful
*/
static int chkSuper(struct super_block *sb)
{
int rc = 0;
struct jfs_sb_info *sbi = JFS_SBI(sb);
struct jfs_superblock *j_sb;
struct buffer_head *bh;
int AIM_bytesize, AIT_bytesize;
int expected_AIM_bytesize, expected_AIT_bytesize;
s64 AIM_byte_addr, AIT_byte_addr, fsckwsp_addr;
s64 byte_addr_diff0, byte_addr_diff1;
s32 bsize;
if ((rc = readSuper(sb, &bh)))
return rc;
j_sb = (struct jfs_superblock *)bh->b_data;
/*
* validate superblock
*/
/* validate fs signature */
if (strncmp(j_sb->s_magic, JFS_MAGIC, 4) ||
le32_to_cpu(j_sb->s_version) > JFS_VERSION) {
rc = -EINVAL;
goto out;
}
bsize = le32_to_cpu(j_sb->s_bsize);
#ifdef _JFS_4K
if (bsize != PSIZE) {
jfs_err("Currently only 4K block size supported!");
rc = -EINVAL;
goto out;
}
#endif /* _JFS_4K */
jfs_info("superblock: flag:0x%08x state:0x%08x size:0x%Lx",
le32_to_cpu(j_sb->s_flag), le32_to_cpu(j_sb->s_state),
(unsigned long long) le64_to_cpu(j_sb->s_size));
/* validate the descriptors for Secondary AIM and AIT */
if ((j_sb->s_flag & cpu_to_le32(JFS_BAD_SAIT)) !=
cpu_to_le32(JFS_BAD_SAIT)) {
expected_AIM_bytesize = 2 * PSIZE;
AIM_bytesize = lengthPXD(&(j_sb->s_aim2)) * bsize;
expected_AIT_bytesize = 4 * PSIZE;
AIT_bytesize = lengthPXD(&(j_sb->s_ait2)) * bsize;
AIM_byte_addr = addressPXD(&(j_sb->s_aim2)) * bsize;
AIT_byte_addr = addressPXD(&(j_sb->s_ait2)) * bsize;
byte_addr_diff0 = AIT_byte_addr - AIM_byte_addr;
fsckwsp_addr = addressPXD(&(j_sb->s_fsckpxd)) * bsize;
byte_addr_diff1 = fsckwsp_addr - AIT_byte_addr;
if ((AIM_bytesize != expected_AIM_bytesize) ||
(AIT_bytesize != expected_AIT_bytesize) ||
(byte_addr_diff0 != AIM_bytesize) ||
(byte_addr_diff1 <= AIT_bytesize))
j_sb->s_flag |= cpu_to_le32(JFS_BAD_SAIT);
}
if ((j_sb->s_flag & cpu_to_le32(JFS_GROUPCOMMIT)) !=
cpu_to_le32(JFS_GROUPCOMMIT))
j_sb->s_flag |= cpu_to_le32(JFS_GROUPCOMMIT);
/* validate fs state */
if (j_sb->s_state != cpu_to_le32(FM_CLEAN) &&
!(sb->s_flags & MS_RDONLY)) {
jfs_err("jfs_mount: Mount Failure: File System Dirty.");
rc = -EINVAL;
goto out;
}
sbi->state = le32_to_cpu(j_sb->s_state);
sbi->mntflag = le32_to_cpu(j_sb->s_flag);
/*
* JFS always does I/O by 4K pages. Don't tell the buffer cache
* that we use anything else (leave s_blocksize alone).
*/
sbi->bsize = bsize;
sbi->l2bsize = le16_to_cpu(j_sb->s_l2bsize);
/*
* For now, ignore s_pbsize, l2bfactor. All I/O going through buffer
* cache.
*/
sbi->nbperpage = PSIZE >> sbi->l2bsize;
sbi->l2nbperpage = L2PSIZE - sbi->l2bsize;
sbi->l2niperblk = sbi->l2bsize - L2DISIZE;
if (sbi->mntflag & JFS_INLINELOG)
sbi->logpxd = j_sb->s_logpxd;
else {
sbi->logdev = new_decode_dev(le32_to_cpu(j_sb->s_logdev));
memcpy(sbi->uuid, j_sb->s_uuid, sizeof(sbi->uuid));
memcpy(sbi->loguuid, j_sb->s_loguuid, sizeof(sbi->uuid));
}
sbi->fsckpxd = j_sb->s_fsckpxd;
sbi->ait2 = j_sb->s_ait2;
out:
brelse(bh);
return rc;
}
/*
* updateSuper()
*
* update synchronously superblock if it is mounted read-write.
*/
int updateSuper(struct super_block *sb, uint state)
{
struct jfs_superblock *j_sb;
struct jfs_sb_info *sbi = JFS_SBI(sb);
struct buffer_head *bh;
int rc;
if (sbi->flag & JFS_NOINTEGRITY) {
if (state == FM_DIRTY) {
sbi->p_state = state;
return 0;
} else if (state == FM_MOUNT) {
sbi->p_state = sbi->state;
state = FM_DIRTY;
} else if (state == FM_CLEAN) {
state = sbi->p_state;
} else
jfs_err("updateSuper: bad state");
} else if (sbi->state == FM_DIRTY)
return 0;
if ((rc = readSuper(sb, &bh)))
return rc;
j_sb = (struct jfs_superblock *)bh->b_data;
j_sb->s_state = cpu_to_le32(state);
sbi->state = state;
if (state == FM_MOUNT) {
/* record log's dev_t and mount serial number */
j_sb->s_logdev = cpu_to_le32(new_encode_dev(sbi->log->bdev->bd_dev));
j_sb->s_logserial = cpu_to_le32(sbi->log->serial);
} else if (state == FM_CLEAN) {
/*
* If this volume is shared with OS/2, OS/2 will need to
* recalculate DASD usage, since we don't deal with it.
*/
if (j_sb->s_flag & cpu_to_le32(JFS_DASD_ENABLED))
j_sb->s_flag |= cpu_to_le32(JFS_DASD_PRIME);
}
mark_buffer_dirty(bh);
sync_dirty_buffer(bh);
brelse(bh);
return 0;
}
/*
* readSuper()
*
* read superblock by raw sector address
*/
int readSuper(struct super_block *sb, struct buffer_head **bpp)
{
/* read in primary superblock */
*bpp = sb_bread(sb, SUPER1_OFF >> sb->s_blocksize_bits);
if (*bpp)
return 0;
/* read in secondary/replicated superblock */
*bpp = sb_bread(sb, SUPER2_OFF >> sb->s_blocksize_bits);
if (*bpp)
return 0;
return -EIO;
}
/*
* logMOUNT()
*
* function: write a MOUNT log record for file system.
*
* MOUNT record keeps logredo() from processing log records
* for this file system past this point in log.
* it is harmless if mount fails.
*
* note: MOUNT record is at aggregate level, not at fileset level,
* since log records of previous mounts of a fileset
* (e.g., AFTER record of extent allocation) have to be processed
* to update block allocation map at aggregate level.
*/
static int logMOUNT(struct super_block *sb)
{
struct jfs_log *log = JFS_SBI(sb)->log;
struct lrd lrd;
lrd.logtid = 0;
lrd.backchain = 0;
lrd.type = cpu_to_le16(LOG_MOUNT);
lrd.length = 0;
lrd.aggregate = cpu_to_le32(new_encode_dev(sb->s_bdev->bd_dev));
lmLog(log, NULL, &lrd, NULL);
return 0;
}

121
fs/jfs/jfs_superblock.h Normal file
View File

@@ -0,0 +1,121 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2003
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _H_JFS_SUPERBLOCK
#define _H_JFS_SUPERBLOCK
/*
* make the magic number something a human could read
*/
#define JFS_MAGIC "JFS1" /* Magic word */
#define JFS_VERSION 2 /* Version number: Version 2 */
#define LV_NAME_SIZE 11 /* MUST BE 11 for OS/2 boot sector */
/*
* aggregate superblock
*
* The name superblock is too close to super_block, so the name has been
* changed to jfs_superblock. The utilities are still using the old name.
*/
struct jfs_superblock {
char s_magic[4]; /* 4: magic number */
__le32 s_version; /* 4: version number */
__le64 s_size; /* 8: aggregate size in hardware/LVM blocks;
* VFS: number of blocks
*/
__le32 s_bsize; /* 4: aggregate block size in bytes;
* VFS: fragment size
*/
__le16 s_l2bsize; /* 2: log2 of s_bsize */
__le16 s_l2bfactor; /* 2: log2(s_bsize/hardware block size) */
__le32 s_pbsize; /* 4: hardware/LVM block size in bytes */
__le16 s_l2pbsize; /* 2: log2 of s_pbsize */
__le16 pad; /* 2: padding necessary for alignment */
__le32 s_agsize; /* 4: allocation group size in aggr. blocks */
__le32 s_flag; /* 4: aggregate attributes:
* see jfs_filsys.h
*/
__le32 s_state; /* 4: mount/unmount/recovery state:
* see jfs_filsys.h
*/
__le32 s_compress; /* 4: > 0 if data compression */
pxd_t s_ait2; /* 8: first extent of secondary
* aggregate inode table
*/
pxd_t s_aim2; /* 8: first extent of secondary
* aggregate inode map
*/
__le32 s_logdev; /* 4: device address of log */
__le32 s_logserial; /* 4: log serial number at aggregate mount */
pxd_t s_logpxd; /* 8: inline log extent */
pxd_t s_fsckpxd; /* 8: inline fsck work space extent */
struct timestruc_t s_time; /* 8: time last updated */
__le32 s_fsckloglen; /* 4: Number of filesystem blocks reserved for
* the fsck service log.
* N.B. These blocks are divided among the
* versions kept. This is not a per
* version size.
* N.B. These blocks are included in the
* length field of s_fsckpxd.
*/
s8 s_fscklog; /* 1: which fsck service log is most recent
* 0 => no service log data yet
* 1 => the first one
* 2 => the 2nd one
*/
char s_fpack[11]; /* 11: file system volume name
* N.B. This must be 11 bytes to
* conform with the OS/2 BootSector
* requirements
* Only used when s_version is 1
*/
/* extendfs() parameter under s_state & FM_EXTENDFS */
__le64 s_xsize; /* 8: extendfs s_size */
pxd_t s_xfsckpxd; /* 8: extendfs fsckpxd */
pxd_t s_xlogpxd; /* 8: extendfs logpxd */
/* - 128 byte boundary - */
char s_uuid[16]; /* 16: 128-bit uuid for volume */
char s_label[16]; /* 16: volume label */
char s_loguuid[16]; /* 16: 128-bit uuid for log device */
};
extern int readSuper(struct super_block *, struct buffer_head **);
extern int updateSuper(struct super_block *, uint);
extern void jfs_error(struct super_block *, const char *, ...);
extern int jfs_mount(struct super_block *);
extern int jfs_mount_rw(struct super_block *, int);
extern int jfs_umount(struct super_block *);
extern int jfs_umount_rw(struct super_block *);
extern int jfs_extendfs(struct super_block *, s64, int);
extern struct task_struct *jfsIOthread;
extern struct task_struct *jfsSyncThread;
#endif /*_H_JFS_SUPERBLOCK */

3097
fs/jfs/jfs_txnmgr.c Normal file

File diff suppressed because it is too large Load Diff

311
fs/jfs/jfs_txnmgr.h Normal file
View File

@@ -0,0 +1,311 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2004
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _H_JFS_TXNMGR
#define _H_JFS_TXNMGR
#include "jfs_logmgr.h"
/*
* Hide implementation of TxBlock and TxLock
*/
#define tid_to_tblock(tid) (&TxBlock[tid])
#define lid_to_tlock(lid) (&TxLock[lid])
/*
* transaction block
*/
struct tblock {
/*
* tblock and jbuf_t common area: struct logsyncblk
*
* the following 5 fields are the same as struct logsyncblk
* which is common to tblock and jbuf to form logsynclist
*/
u16 xflag; /* tx commit type */
u16 flag; /* tx commit state */
lid_t dummy; /* Must keep structures common */
s32 lsn; /* recovery lsn */
struct list_head synclist; /* logsynclist link */
/* lock management */
struct super_block *sb; /* super block */
lid_t next; /* index of first tlock of tid */
lid_t last; /* index of last tlock of tid */
wait_queue_head_t waitor; /* tids waiting on this tid */
/* log management */
u32 logtid; /* log transaction id */
/* commit management */
struct list_head cqueue; /* commit queue list */
s32 clsn; /* commit lsn */
struct lbuf *bp;
s32 pn; /* commit record log page number */
s32 eor; /* commit record eor */
wait_queue_head_t gcwait; /* group commit event list:
* ready transactions wait on this
* event for group commit completion.
*/
union {
struct inode *ip; /* inode being deleted */
pxd_t ixpxd; /* pxd of inode extent for created inode */
} u;
u32 ino; /* inode number being created */
};
extern struct tblock *TxBlock; /* transaction block table */
/* commit flags: tblk->xflag */
#define COMMIT_SYNC 0x0001 /* synchronous commit */
#define COMMIT_FORCE 0x0002 /* force pageout at end of commit */
#define COMMIT_FLUSH 0x0004 /* init flush at end of commit */
#define COMMIT_MAP 0x00f0
#define COMMIT_PMAP 0x0010 /* update pmap */
#define COMMIT_WMAP 0x0020 /* update wmap */
#define COMMIT_PWMAP 0x0040 /* update pwmap */
#define COMMIT_FREE 0x0f00
#define COMMIT_DELETE 0x0100 /* inode delete */
#define COMMIT_TRUNCATE 0x0200 /* file truncation */
#define COMMIT_CREATE 0x0400 /* inode create */
#define COMMIT_LAZY 0x0800 /* lazy commit */
#define COMMIT_PAGE 0x1000 /* Identifies element as metapage */
#define COMMIT_INODE 0x2000 /* Identifies element as inode */
/* group commit flags tblk->flag: see jfs_logmgr.h */
/*
* transaction lock
*/
struct tlock {
lid_t next; /* 2: index next lockword on tid locklist
* next lockword on freelist
*/
tid_t tid; /* 2: transaction id holding lock */
u16 flag; /* 2: lock control */
u16 type; /* 2: log type */
struct metapage *mp; /* 4/8: object page buffer locked */
struct inode *ip; /* 4/8: object */
/* (16) */
s16 lock[24]; /* 48: overlay area */
}; /* (64) */
extern struct tlock *TxLock; /* transaction lock table */
/*
* tlock flag
*/
/* txLock state */
#define tlckPAGELOCK 0x8000
#define tlckINODELOCK 0x4000
#define tlckLINELOCK 0x2000
#define tlckINLINELOCK 0x1000
/* lmLog state */
#define tlckLOG 0x0800
/* updateMap state */
#define tlckUPDATEMAP 0x0080
#define tlckDIRECTORY 0x0040
/* freeLock state */
#define tlckFREELOCK 0x0008
#define tlckWRITEPAGE 0x0004
#define tlckFREEPAGE 0x0002
/*
* tlock type
*/
#define tlckTYPE 0xfe00
#define tlckINODE 0x8000
#define tlckXTREE 0x4000
#define tlckDTREE 0x2000
#define tlckMAP 0x1000
#define tlckEA 0x0800
#define tlckACL 0x0400
#define tlckDATA 0x0200
#define tlckBTROOT 0x0100
#define tlckOPERATION 0x00ff
#define tlckGROW 0x0001 /* file grow */
#define tlckREMOVE 0x0002 /* file delete */
#define tlckTRUNCATE 0x0004 /* file truncate */
#define tlckRELOCATE 0x0008 /* file/directory relocate */
#define tlckENTRY 0x0001 /* directory insert/delete */
#define tlckEXTEND 0x0002 /* directory extend in-line */
#define tlckSPLIT 0x0010 /* splited page */
#define tlckNEW 0x0020 /* new page from split */
#define tlckFREE 0x0040 /* free page */
#define tlckRELINK 0x0080 /* update sibling pointer */
/*
* linelock for lmLog()
*
* note: linelock and its variations are overlaid
* at tlock.lock: watch for alignment;
*/
struct lv {
u8 offset; /* 1: */
u8 length; /* 1: */
}; /* (2) */
#define TLOCKSHORT 20
#define TLOCKLONG 28
struct linelock {
lid_t next; /* 2: next linelock */
s8 maxcnt; /* 1: */
s8 index; /* 1: */
u16 flag; /* 2: */
u8 type; /* 1: */
u8 l2linesize; /* 1: log2 of linesize */
/* (8) */
struct lv lv[20]; /* 40: */
}; /* (48) */
#define dt_lock linelock
struct xtlock {
lid_t next; /* 2: */
s8 maxcnt; /* 1: */
s8 index; /* 1: */
u16 flag; /* 2: */
u8 type; /* 1: */
u8 l2linesize; /* 1: log2 of linesize */
/* (8) */
struct lv header; /* 2: */
struct lv lwm; /* 2: low water mark */
struct lv hwm; /* 2: high water mark */
struct lv twm; /* 2: */
/* (16) */
s32 pxdlock[8]; /* 32: */
}; /* (48) */
/*
* maplock for txUpdateMap()
*
* note: maplock and its variations are overlaid
* at tlock.lock/linelock: watch for alignment;
* N.B. next field may be set by linelock, and should not
* be modified by maplock;
* N.B. index of the first pxdlock specifies index of next
* free maplock (i.e., number of maplock) in the tlock;
*/
struct maplock {
lid_t next; /* 2: */
u8 maxcnt; /* 2: */
u8 index; /* 2: next free maplock index */
u16 flag; /* 2: */
u8 type; /* 1: */
u8 count; /* 1: number of pxd/xad */
/* (8) */
pxd_t pxd; /* 8: */
}; /* (16): */
/* maplock flag */
#define mlckALLOC 0x00f0
#define mlckALLOCXADLIST 0x0080
#define mlckALLOCPXDLIST 0x0040
#define mlckALLOCXAD 0x0020
#define mlckALLOCPXD 0x0010
#define mlckFREE 0x000f
#define mlckFREEXADLIST 0x0008
#define mlckFREEPXDLIST 0x0004
#define mlckFREEXAD 0x0002
#define mlckFREEPXD 0x0001
#define pxd_lock maplock
struct xdlistlock {
lid_t next; /* 2: */
u8 maxcnt; /* 2: */
u8 index; /* 2: */
u16 flag; /* 2: */
u8 type; /* 1: */
u8 count; /* 1: number of pxd/xad */
/* (8) */
/*
* We need xdlist to be 64 bits (8 bytes), regardless of
* whether void * is 32 or 64 bits
*/
union {
void *_xdlist; /* pxd/xad list */
s64 pad; /* 8: Force 64-bit xdlist size */
} union64;
}; /* (16): */
#define xdlist union64._xdlist
/*
* commit
*
* parameter to the commit manager routines
*/
struct commit {
tid_t tid; /* tid = index of tblock */
int flag; /* flags */
struct jfs_log *log; /* log */
struct super_block *sb; /* superblock */
int nip; /* number of entries in iplist */
struct inode **iplist; /* list of pointers to inodes */
/* log record descriptor on 64-bit boundary */
struct lrd lrd; /* : log record descriptor */
};
/*
* external declarations
*/
extern int jfs_tlocks_low;
extern int txInit(void);
extern void txExit(void);
extern struct tlock *txLock(tid_t, struct inode *, struct metapage *, int);
extern struct tlock *txMaplock(tid_t, struct inode *, int);
extern int txCommit(tid_t, int, struct inode **, int);
extern tid_t txBegin(struct super_block *, int);
extern void txBeginAnon(struct super_block *);
extern void txEnd(tid_t);
extern void txAbort(tid_t, int);
extern struct linelock *txLinelock(struct linelock *);
extern void txFreeMap(struct inode *, struct maplock *, struct tblock *, int);
extern void txEA(tid_t, struct inode *, dxd_t *, dxd_t *);
extern void txFreelock(struct inode *);
extern int lmLog(struct jfs_log *, struct tblock *, struct lrd *,
struct tlock *);
extern void txQuiesce(struct super_block *);
extern void txResume(struct super_block *);
extern void txLazyUnlock(struct tblock *);
extern int jfs_lazycommit(void *);
extern int jfs_sync(void *);
#endif /* _H_JFS_TXNMGR */

188
fs/jfs/jfs_types.h Normal file
View File

@@ -0,0 +1,188 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2004
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _H_JFS_TYPES
#define _H_JFS_TYPES
/*
* jfs_types.h:
*
* basic type/utility definitions
*
* note: this header file must be the 1st include file
* of JFS include list in all JFS .c file.
*/
#include <linux/types.h>
#include <linux/nls.h>
#include "endian24.h"
/*
* transaction and lock id's
*
* Don't change these without carefully considering the impact on the
* size and alignment of all of the linelock variants
*/
typedef u16 tid_t;
typedef u16 lid_t;
/*
* Almost identical to Linux's timespec, but not quite
*/
struct timestruc_t {
__le32 tv_sec;
__le32 tv_nsec;
};
/*
* handy
*/
#define LEFTMOSTONE 0x80000000
#define HIGHORDER 0x80000000u /* high order bit on */
#define ONES 0xffffffffu /* all bit on */
/*
* logical xd (lxd)
*/
typedef struct {
unsigned len:24;
unsigned off1:8;
u32 off2;
} lxd_t;
/* lxd_t field construction */
#define LXDlength(lxd, length32) ( (lxd)->len = length32 )
#define LXDoffset(lxd, offset64)\
{\
(lxd)->off1 = ((s64)offset64) >> 32;\
(lxd)->off2 = (offset64) & 0xffffffff;\
}
/* lxd_t field extraction */
#define lengthLXD(lxd) ( (lxd)->len )
#define offsetLXD(lxd)\
( ((s64)((lxd)->off1)) << 32 | (lxd)->off2 )
/* lxd list */
struct lxdlist {
s16 maxnlxd;
s16 nlxd;
lxd_t *lxd;
};
/*
* physical xd (pxd)
*/
typedef struct {
unsigned len:24;
unsigned addr1:8;
__le32 addr2;
} pxd_t;
/* xd_t field construction */
#define PXDlength(pxd, length32) ((pxd)->len = __cpu_to_le24(length32))
#define PXDaddress(pxd, address64)\
{\
(pxd)->addr1 = ((s64)address64) >> 32;\
(pxd)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\
}
/* xd_t field extraction */
#define lengthPXD(pxd) __le24_to_cpu((pxd)->len)
#define addressPXD(pxd)\
( ((s64)((pxd)->addr1)) << 32 | __le32_to_cpu((pxd)->addr2))
#define MAXTREEHEIGHT 8
/* pxd list */
struct pxdlist {
s16 maxnpxd;
s16 npxd;
pxd_t pxd[MAXTREEHEIGHT];
};
/*
* data extent descriptor (dxd)
*/
typedef struct {
unsigned flag:8; /* 1: flags */
unsigned rsrvd:24;
__le32 size; /* 4: size in byte */
unsigned len:24; /* 3: length in unit of fsblksize */
unsigned addr1:8; /* 1: address in unit of fsblksize */
__le32 addr2; /* 4: address in unit of fsblksize */
} dxd_t; /* - 16 - */
/* dxd_t flags */
#define DXD_INDEX 0x80 /* B+-tree index */
#define DXD_INLINE 0x40 /* in-line data extent */
#define DXD_EXTENT 0x20 /* out-of-line single extent */
#define DXD_FILE 0x10 /* out-of-line file (inode) */
#define DXD_CORRUPT 0x08 /* Inconsistency detected */
/* dxd_t field construction
* Conveniently, the PXD macros work for DXD
*/
#define DXDlength PXDlength
#define DXDaddress PXDaddress
#define lengthDXD lengthPXD
#define addressDXD addressPXD
#define DXDsize(dxd, size32) ((dxd)->size = cpu_to_le32(size32))
#define sizeDXD(dxd) le32_to_cpu((dxd)->size)
/*
* directory entry argument
*/
struct component_name {
int namlen;
wchar_t *name;
};
/*
* DASD limit information - stored in directory inode
*/
struct dasd {
u8 thresh; /* Alert Threshold (in percent) */
u8 delta; /* Alert Threshold delta (in percent) */
u8 rsrvd1;
u8 limit_hi; /* DASD limit (in logical blocks) */
__le32 limit_lo; /* DASD limit (in logical blocks) */
u8 rsrvd2[3];
u8 used_hi; /* DASD usage (in logical blocks) */
__le32 used_lo; /* DASD usage (in logical blocks) */
};
#define DASDLIMIT(dasdp) \
(((u64)((dasdp)->limit_hi) << 32) + __le32_to_cpu((dasdp)->limit_lo))
#define setDASDLIMIT(dasdp, limit)\
{\
(dasdp)->limit_hi = ((u64)limit) >> 32;\
(dasdp)->limit_lo = __cpu_to_le32(limit);\
}
#define DASDUSED(dasdp) \
(((u64)((dasdp)->used_hi) << 32) + __le32_to_cpu((dasdp)->used_lo))
#define setDASDUSED(dasdp, used)\
{\
(dasdp)->used_hi = ((u64)used) >> 32;\
(dasdp)->used_lo = __cpu_to_le32(used);\
}
#endif /* !_H_JFS_TYPES */

168
fs/jfs/jfs_umount.c Normal file
View File

@@ -0,0 +1,168 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2004
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* jfs_umount.c
*
* note: file system in transition to aggregate/fileset:
* (ref. jfs_mount.c)
*
* file system unmount is interpreted as mount of the single/only
* fileset in the aggregate and, if unmount of the last fileset,
* as unmount of the aggerate;
*/
#include <linux/fs.h>
#include "jfs_incore.h"
#include "jfs_filsys.h"
#include "jfs_superblock.h"
#include "jfs_dmap.h"
#include "jfs_imap.h"
#include "jfs_metapage.h"
#include "jfs_debug.h"
/*
* NAME: jfs_umount(vfsp, flags, crp)
*
* FUNCTION: vfs_umount()
*
* PARAMETERS: vfsp - virtual file system pointer
* flags - unmount for shutdown
* crp - credential
*
* RETURN : EBUSY - device has open files
*/
int jfs_umount(struct super_block *sb)
{
struct jfs_sb_info *sbi = JFS_SBI(sb);
struct inode *ipbmap = sbi->ipbmap;
struct inode *ipimap = sbi->ipimap;
struct inode *ipaimap = sbi->ipaimap;
struct inode *ipaimap2 = sbi->ipaimap2;
struct jfs_log *log;
int rc = 0;
jfs_info("UnMount JFS: sb:0x%p", sb);
/*
* update superblock and close log
*
* if mounted read-write and log based recovery was enabled
*/
if ((log = sbi->log))
/*
* Wait for outstanding transactions to be written to log:
*/
jfs_flush_journal(log, 2);
/*
* close fileset inode allocation map (aka fileset inode)
*/
diUnmount(ipimap, 0);
diFreeSpecial(ipimap);
sbi->ipimap = NULL;
/*
* close secondary aggregate inode allocation map
*/
ipaimap2 = sbi->ipaimap2;
if (ipaimap2) {
diUnmount(ipaimap2, 0);
diFreeSpecial(ipaimap2);
sbi->ipaimap2 = NULL;
}
/*
* close aggregate inode allocation map
*/
ipaimap = sbi->ipaimap;
diUnmount(ipaimap, 0);
diFreeSpecial(ipaimap);
sbi->ipaimap = NULL;
/*
* close aggregate block allocation map
*/
dbUnmount(ipbmap, 0);
diFreeSpecial(ipbmap);
sbi->ipimap = NULL;
/*
* Make sure all metadata makes it to disk before we mark
* the superblock as clean
*/
filemap_write_and_wait(sbi->direct_inode->i_mapping);
/*
* ensure all file system file pages are propagated to their
* home blocks on disk (and their in-memory buffer pages are
* invalidated) BEFORE updating file system superblock state
* (to signify file system is unmounted cleanly, and thus in
* consistent state) and log superblock active file system
* list (to signify skip logredo()).
*/
if (log) { /* log = NULL if read-only mount */
updateSuper(sb, FM_CLEAN);
/*
* close log:
*
* remove file system from log active file system list.
*/
rc = lmLogClose(sb);
}
jfs_info("UnMount JFS Complete: rc = %d", rc);
return rc;
}
int jfs_umount_rw(struct super_block *sb)
{
struct jfs_sb_info *sbi = JFS_SBI(sb);
struct jfs_log *log = sbi->log;
if (!log)
return 0;
/*
* close log:
*
* remove file system from log active file system list.
*/
jfs_flush_journal(log, 2);
/*
* Make sure all metadata makes it to disk
*/
dbSync(sbi->ipbmap);
diSync(sbi->ipimap);
/*
* Note that we have to do this even if sync_blockdev() will
* do exactly the same a few instructions later: We can't
* mark the superblock clean before everything is flushed to
* disk.
*/
filemap_write_and_wait(sbi->direct_inode->i_mapping);
updateSuper(sb, FM_CLEAN);
return lmLogClose(sb);
}

138
fs/jfs/jfs_unicode.c Normal file
View File

@@ -0,0 +1,138 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2004
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/fs.h>
#include <linux/slab.h>
#include "jfs_incore.h"
#include "jfs_filsys.h"
#include "jfs_unicode.h"
#include "jfs_debug.h"
/*
* NAME: jfs_strfromUCS()
*
* FUNCTION: Convert little-endian unicode string to character string
*
*/
int jfs_strfromUCS_le(char *to, const __le16 * from,
int len, struct nls_table *codepage)
{
int i;
int outlen = 0;
static int warn_again = 5; /* Only warn up to 5 times total */
int warn = !!warn_again; /* once per string */
if (codepage) {
for (i = 0; (i < len) && from[i]; i++) {
int charlen;
charlen =
codepage->uni2char(le16_to_cpu(from[i]),
&to[outlen],
NLS_MAX_CHARSET_SIZE);
if (charlen > 0)
outlen += charlen;
else
to[outlen++] = '?';
}
} else {
for (i = 0; (i < len) && from[i]; i++) {
if (unlikely(le16_to_cpu(from[i]) & 0xff00)) {
to[i] = '?';
if (unlikely(warn)) {
warn--;
warn_again--;
printk(KERN_ERR
"non-latin1 character 0x%x found in JFS file name\n",
le16_to_cpu(from[i]));
printk(KERN_ERR
"mount with iocharset=utf8 to access\n");
}
}
else
to[i] = (char) (le16_to_cpu(from[i]));
}
outlen = i;
}
to[outlen] = 0;
return outlen;
}
/*
* NAME: jfs_strtoUCS()
*
* FUNCTION: Convert character string to unicode string
*
*/
static int jfs_strtoUCS(wchar_t * to, const unsigned char *from, int len,
struct nls_table *codepage)
{
int charlen;
int i;
if (codepage) {
for (i = 0; len && *from; i++, from += charlen, len -= charlen)
{
charlen = codepage->char2uni(from, len, &to[i]);
if (charlen < 1) {
jfs_err("jfs_strtoUCS: char2uni returned %d.",
charlen);
jfs_err("charset = %s, char = 0x%x",
codepage->charset, *from);
return charlen;
}
}
} else {
for (i = 0; (i < len) && from[i]; i++)
to[i] = (wchar_t) from[i];
}
to[i] = 0;
return i;
}
/*
* NAME: get_UCSname()
*
* FUNCTION: Allocate and translate to unicode string
*
*/
int get_UCSname(struct component_name * uniName, struct dentry *dentry)
{
struct nls_table *nls_tab = JFS_SBI(dentry->d_sb)->nls_tab;
int length = dentry->d_name.len;
if (length > JFS_NAME_MAX)
return -ENAMETOOLONG;
uniName->name =
kmalloc((length + 1) * sizeof(wchar_t), GFP_NOFS);
if (uniName->name == NULL)
return -ENOMEM;
uniName->namlen = jfs_strtoUCS(uniName->name, dentry->d_name.name,
length, nls_tab);
if (uniName->namlen < 0) {
kfree(uniName->name);
return uniName->namlen;
}
return 0;
}

155
fs/jfs/jfs_unicode.h Normal file
View File

@@ -0,0 +1,155 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2002
* Portions Copyright (C) Christoph Hellwig, 2001-2002
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _H_JFS_UNICODE
#define _H_JFS_UNICODE
#include <asm/byteorder.h>
#include "jfs_types.h"
typedef struct {
wchar_t start;
wchar_t end;
signed char *table;
} UNICASERANGE;
extern signed char UniUpperTable[512];
extern UNICASERANGE UniUpperRange[];
extern int get_UCSname(struct component_name *, struct dentry *);
extern int jfs_strfromUCS_le(char *, const __le16 *, int, struct nls_table *);
#define free_UCSname(COMP) kfree((COMP)->name)
/*
* UniStrcpy: Copy a string
*/
static inline wchar_t *UniStrcpy(wchar_t * ucs1, const wchar_t * ucs2)
{
wchar_t *anchor = ucs1; /* save the start of result string */
while ((*ucs1++ = *ucs2++));
return anchor;
}
/*
* UniStrncpy: Copy length limited string with pad
*/
static inline __le16 *UniStrncpy_le(__le16 * ucs1, const __le16 * ucs2,
size_t n)
{
__le16 *anchor = ucs1;
while (n-- && *ucs2) /* Copy the strings */
*ucs1++ = *ucs2++;
n++;
while (n--) /* Pad with nulls */
*ucs1++ = 0;
return anchor;
}
/*
* UniStrncmp_le: Compare length limited string - native to little-endian
*/
static inline int UniStrncmp_le(const wchar_t * ucs1, const __le16 * ucs2,
size_t n)
{
if (!n)
return 0; /* Null strings are equal */
while ((*ucs1 == __le16_to_cpu(*ucs2)) && *ucs1 && --n) {
ucs1++;
ucs2++;
}
return (int) *ucs1 - (int) __le16_to_cpu(*ucs2);
}
/*
* UniStrncpy_to_le: Copy length limited string with pad to little-endian
*/
static inline __le16 *UniStrncpy_to_le(__le16 * ucs1, const wchar_t * ucs2,
size_t n)
{
__le16 *anchor = ucs1;
while (n-- && *ucs2) /* Copy the strings */
*ucs1++ = cpu_to_le16(*ucs2++);
n++;
while (n--) /* Pad with nulls */
*ucs1++ = 0;
return anchor;
}
/*
* UniStrncpy_from_le: Copy length limited string with pad from little-endian
*/
static inline wchar_t *UniStrncpy_from_le(wchar_t * ucs1, const __le16 * ucs2,
size_t n)
{
wchar_t *anchor = ucs1;
while (n-- && *ucs2) /* Copy the strings */
*ucs1++ = __le16_to_cpu(*ucs2++);
n++;
while (n--) /* Pad with nulls */
*ucs1++ = 0;
return anchor;
}
/*
* UniToupper: Convert a unicode character to upper case
*/
static inline wchar_t UniToupper(wchar_t uc)
{
UNICASERANGE *rp;
if (uc < sizeof(UniUpperTable)) { /* Latin characters */
return uc + UniUpperTable[uc]; /* Use base tables */
} else {
rp = UniUpperRange; /* Use range tables */
while (rp->start) {
if (uc < rp->start) /* Before start of range */
return uc; /* Uppercase = input */
if (uc <= rp->end) /* In range */
return uc + rp->table[uc - rp->start];
rp++; /* Try next range */
}
}
return uc; /* Past last range */
}
/*
* UniStrupr: Upper case a unicode string
*/
static inline wchar_t *UniStrupr(wchar_t * upin)
{
wchar_t *up;
up = upin;
while (*up) { /* For all characters */
*up = UniToupper(*up);
up++;
}
return upin; /* Return input pointer */
}
#endif /* !_H_JFS_UNICODE */

134
fs/jfs/jfs_uniupr.c Normal file
View File

@@ -0,0 +1,134 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2002
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/fs.h>
#include "jfs_unicode.h"
/*
* Latin upper case
*/
signed char UniUpperTable[512] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 000-00f */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 010-01f */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 020-02f */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 030-03f */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 040-04f */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 050-05f */
0,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32, /* 060-06f */
-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32, 0, 0, 0, 0, 0, /* 070-07f */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 080-08f */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 090-09f */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0a0-0af */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0b0-0bf */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0c0-0cf */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0d0-0df */
-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32, /* 0e0-0ef */
-32,-32,-32,-32,-32,-32,-32, 0,-32,-32,-32,-32,-32,-32,-32,121, /* 0f0-0ff */
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 100-10f */
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 110-11f */
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 120-12f */
0, 0, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, /* 130-13f */
-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, /* 140-14f */
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 150-15f */
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 160-16f */
0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1, 0, -1, 0, /* 170-17f */
0, 0, 0, -1, 0, -1, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, /* 180-18f */
0, 0, -1, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, /* 190-19f */
0, -1, 0, -1, 0, -1, 0, 0, -1, 0, 0, 0, 0, -1, 0, 0, /* 1a0-1af */
-1, 0, 0, 0, -1, 0, -1, 0, 0, -1, 0, 0, 0, -1, 0, 0, /* 1b0-1bf */
0, 0, 0, 0, 0, -1, -2, 0, -1, -2, 0, -1, -2, 0, -1, 0, /* 1c0-1cf */
-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1,-79, 0, -1, /* 1d0-1df */
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e0-1ef */
0, 0, -1, -2, 0, -1, 0, 0, 0, -1, 0, -1, 0, -1, 0, -1, /* 1f0-1ff */
};
/* Upper case range - Greek */
static signed char UniCaseRangeU03a0[47] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-38,-37,-37,-37, /* 3a0-3af */
0,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32, /* 3b0-3bf */
-32,-32,-31,-32,-32,-32,-32,-32,-32,-32,-32,-32,-64,-63,-63,
};
/* Upper case range - Cyrillic */
static signed char UniCaseRangeU0430[48] = {
-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32, /* 430-43f */
-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32, /* 440-44f */
0,-80,-80,-80,-80,-80,-80,-80,-80,-80,-80,-80,-80, 0,-80,-80, /* 450-45f */
};
/* Upper case range - Extended cyrillic */
static signed char UniCaseRangeU0490[61] = {
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 490-49f */
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 4a0-4af */
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 4b0-4bf */
0, 0, -1, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1,
};
/* Upper case range - Extended latin and greek */
static signed char UniCaseRangeU1e00[509] = {
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e00-1e0f */
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e10-1e1f */
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e20-1e2f */
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e30-1e3f */
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e40-1e4f */
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e50-1e5f */
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e60-1e6f */
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e70-1e7f */
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1e80-1e8f */
0, -1, 0, -1, 0, -1, 0, 0, 0, 0, 0,-59, 0, -1, 0, -1, /* 1e90-1e9f */
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1ea0-1eaf */
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1eb0-1ebf */
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1ec0-1ecf */
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1ed0-1edf */
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, /* 1ee0-1eef */
0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0, /* 1ef0-1eff */
8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f00-1f0f */
8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f10-1f1f */
8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f20-1f2f */
8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f30-1f3f */
8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f40-1f4f */
0, 8, 0, 8, 0, 8, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f50-1f5f */
8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f60-1f6f */
74, 74, 86, 86, 86, 86,100,100, 0, 0,112,112,126,126, 0, 0, /* 1f70-1f7f */
8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f80-1f8f */
8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1f90-1f9f */
8, 8, 8, 8, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fa0-1faf */
8, 8, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fb0-1fbf */
0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fc0-1fcf */
8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fd0-1fdf */
8, 8, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1fe0-1fef */
0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
/* Upper case range - Wide latin */
static signed char UniCaseRangeUff40[27] = {
0,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32, /* ff40-ff4f */
-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,-32,
};
/*
* Upper Case Range
*/
UNICASERANGE UniUpperRange[] = {
{ 0x03a0, 0x03ce, UniCaseRangeU03a0 },
{ 0x0430, 0x045f, UniCaseRangeU0430 },
{ 0x0490, 0x04cc, UniCaseRangeU0490 },
{ 0x1e00, 0x1ffc, UniCaseRangeU1e00 },
{ 0xff40, 0xff5a, UniCaseRangeUff40 },
{ 0 }
};

74
fs/jfs/jfs_xattr.h Normal file
View File

@@ -0,0 +1,74 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2002
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef H_JFS_XATTR
#define H_JFS_XATTR
/*
* jfs_ea_list describe the on-disk format of the extended attributes.
* I know the null-terminator is redundant since namelen is stored, but
* I am maintaining compatibility with OS/2 where possible.
*/
struct jfs_ea {
u8 flag; /* Unused? */
u8 namelen; /* Length of name */
__le16 valuelen; /* Length of value */
char name[0]; /* Attribute name (includes null-terminator) */
}; /* Value immediately follows name */
struct jfs_ea_list {
__le32 size; /* overall size */
struct jfs_ea ea[0]; /* Variable length list */
};
/* Macros for defining maxiumum number of bytes supported for EAs */
#define MAXEASIZE 65535
#define MAXEALISTSIZE MAXEASIZE
/*
* some macros for dealing with variable length EA lists.
*/
#define EA_SIZE(ea) \
(sizeof (struct jfs_ea) + (ea)->namelen + 1 + \
le16_to_cpu((ea)->valuelen))
#define NEXT_EA(ea) ((struct jfs_ea *) (((char *) (ea)) + (EA_SIZE (ea))))
#define FIRST_EA(ealist) ((ealist)->ea)
#define EALIST_SIZE(ealist) le32_to_cpu((ealist)->size)
#define END_EALIST(ealist) \
((struct jfs_ea *) (((char *) (ealist)) + EALIST_SIZE(ealist)))
extern int __jfs_setxattr(tid_t, struct inode *, const char *, const void *,
size_t, int);
extern int jfs_setxattr(struct dentry *, const char *, const void *, size_t,
int);
extern ssize_t __jfs_getxattr(struct inode *, const char *, void *, size_t);
extern ssize_t jfs_getxattr(struct dentry *, const char *, void *, size_t);
extern ssize_t jfs_listxattr(struct dentry *, char *, size_t);
extern int jfs_removexattr(struct dentry *, const char *);
#ifdef CONFIG_JFS_SECURITY
extern int jfs_init_security(tid_t, struct inode *, struct inode *);
#else
static inline int jfs_init_security(tid_t tid, struct inode *inode,
struct inode *dir)
{
return 0;
}
#endif
#endif /* H_JFS_XATTR */

4181
fs/jfs/jfs_xtree.c Normal file

File diff suppressed because it is too large Load Diff

134
fs/jfs/jfs_xtree.h Normal file
View File

@@ -0,0 +1,134 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2002
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _H_JFS_XTREE
#define _H_JFS_XTREE
/*
* jfs_xtree.h: extent allocation descriptor B+-tree manager
*/
#include "jfs_btree.h"
/*
* extent allocation descriptor (xad)
*/
typedef struct xad {
unsigned flag:8; /* 1: flag */
unsigned rsvrd:16; /* 2: reserved */
unsigned off1:8; /* 1: offset in unit of fsblksize */
__le32 off2; /* 4: offset in unit of fsblksize */
unsigned len:24; /* 3: length in unit of fsblksize */
unsigned addr1:8; /* 1: address in unit of fsblksize */
__le32 addr2; /* 4: address in unit of fsblksize */
} xad_t; /* (16) */
#define MAXXLEN ((1 << 24) - 1)
#define XTSLOTSIZE 16
#define L2XTSLOTSIZE 4
/* xad_t field construction */
#define XADoffset(xad, offset64)\
{\
(xad)->off1 = ((u64)offset64) >> 32;\
(xad)->off2 = __cpu_to_le32((offset64) & 0xffffffff);\
}
#define XADaddress(xad, address64)\
{\
(xad)->addr1 = ((u64)address64) >> 32;\
(xad)->addr2 = __cpu_to_le32((address64) & 0xffffffff);\
}
#define XADlength(xad, length32) (xad)->len = __cpu_to_le24(length32)
/* xad_t field extraction */
#define offsetXAD(xad)\
( ((s64)((xad)->off1)) << 32 | __le32_to_cpu((xad)->off2))
#define addressXAD(xad)\
( ((s64)((xad)->addr1)) << 32 | __le32_to_cpu((xad)->addr2))
#define lengthXAD(xad) __le24_to_cpu((xad)->len)
/* xad list */
struct xadlist {
s16 maxnxad;
s16 nxad;
xad_t *xad;
};
/* xad_t flags */
#define XAD_NEW 0x01 /* new */
#define XAD_EXTENDED 0x02 /* extended */
#define XAD_COMPRESSED 0x04 /* compressed with recorded length */
#define XAD_NOTRECORDED 0x08 /* allocated but not recorded */
#define XAD_COW 0x10 /* copy-on-write */
/* possible values for maxentry */
#define XTROOTINITSLOT_DIR 6
#define XTROOTINITSLOT 10
#define XTROOTMAXSLOT 18
#define XTPAGEMAXSLOT 256
#define XTENTRYSTART 2
/*
* xtree page:
*/
typedef union {
struct xtheader {
__le64 next; /* 8: */
__le64 prev; /* 8: */
u8 flag; /* 1: */
u8 rsrvd1; /* 1: */
__le16 nextindex; /* 2: next index = number of entries */
__le16 maxentry; /* 2: max number of entries */
__le16 rsrvd2; /* 2: */
pxd_t self; /* 8: self */
} header; /* (32) */
xad_t xad[XTROOTMAXSLOT]; /* 16 * maxentry: xad array */
} xtpage_t;
/*
* external declaration
*/
extern int xtLookup(struct inode *ip, s64 lstart, s64 llen,
int *pflag, s64 * paddr, int *plen, int flag);
extern int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
struct xadlist * xadlist, int flag);
extern void xtInitRoot(tid_t tid, struct inode *ip);
extern int xtInsert(tid_t tid, struct inode *ip,
int xflag, s64 xoff, int xlen, s64 * xaddrp, int flag);
extern int xtExtend(tid_t tid, struct inode *ip, s64 xoff, int xlen,
int flag);
#ifdef _NOTYET
extern int xtTailgate(tid_t tid, struct inode *ip,
s64 xoff, int xlen, s64 xaddr, int flag);
#endif
extern int xtUpdate(tid_t tid, struct inode *ip, struct xad *nxad);
extern int xtDelete(tid_t tid, struct inode *ip, s64 xoff, int xlen,
int flag);
extern s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int type);
extern s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size);
extern int xtRelocate(tid_t tid, struct inode *ip,
xad_t * oxad, s64 nxaddr, int xtype);
extern int xtAppend(tid_t tid,
struct inode *ip, int xflag, s64 xoff, int maxblocks,
int *xlenp, s64 * xaddrp, int flag);
#endif /* !_H_JFS_XTREE */

1574
fs/jfs/namei.c Normal file

File diff suppressed because it is too large Load Diff

539
fs/jfs/resize.c Normal file
View File

@@ -0,0 +1,539 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2004
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/fs.h>
#include <linux/buffer_head.h>
#include <linux/quotaops.h>
#include "jfs_incore.h"
#include "jfs_filsys.h"
#include "jfs_metapage.h"
#include "jfs_dinode.h"
#include "jfs_imap.h"
#include "jfs_dmap.h"
#include "jfs_superblock.h"
#include "jfs_txnmgr.h"
#include "jfs_debug.h"
#define BITSPERPAGE (PSIZE << 3)
#define L2MEGABYTE 20
#define MEGABYTE (1 << L2MEGABYTE)
#define MEGABYTE32 (MEGABYTE << 5)
/* convert block number to bmap file page number */
#define BLKTODMAPN(b)\
(((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1)
/*
* jfs_extendfs()
*
* function: extend file system;
*
* |-------------------------------|----------|----------|
* file system space fsck inline log
* workspace space
*
* input:
* new LVSize: in LV blocks (required)
* new LogSize: in LV blocks (optional)
* new FSSize: in LV blocks (optional)
*
* new configuration:
* 1. set new LogSize as specified or default from new LVSize;
* 2. compute new FSCKSize from new LVSize;
* 3. set new FSSize as MIN(FSSize, LVSize-(LogSize+FSCKSize)) where
* assert(new FSSize >= old FSSize),
* i.e., file system must not be shrinked;
*/
int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
{
int rc = 0;
struct jfs_sb_info *sbi = JFS_SBI(sb);
struct inode *ipbmap = sbi->ipbmap;
struct inode *ipbmap2;
struct inode *ipimap = sbi->ipimap;
struct jfs_log *log = sbi->log;
struct bmap *bmp = sbi->bmap;
s64 newLogAddress, newFSCKAddress;
int newFSCKSize;
s64 newMapSize = 0, mapSize;
s64 XAddress, XSize, nblocks, xoff, xaddr, t64;
s64 oldLVSize;
s64 newFSSize;
s64 VolumeSize;
int newNpages = 0, nPages, newPage, xlen, t32;
int tid;
int log_formatted = 0;
struct inode *iplist[1];
struct jfs_superblock *j_sb, *j_sb2;
uint old_agsize;
struct buffer_head *bh, *bh2;
/* If the volume hasn't grown, get out now */
if (sbi->mntflag & JFS_INLINELOG)
oldLVSize = addressPXD(&sbi->logpxd) + lengthPXD(&sbi->logpxd);
else
oldLVSize = addressPXD(&sbi->fsckpxd) +
lengthPXD(&sbi->fsckpxd);
if (oldLVSize >= newLVSize) {
printk(KERN_WARNING
"jfs_extendfs: volume hasn't grown, returning\n");
goto out;
}
VolumeSize = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
if (VolumeSize) {
if (newLVSize > VolumeSize) {
printk(KERN_WARNING "jfs_extendfs: invalid size\n");
rc = -EINVAL;
goto out;
}
} else {
/* check the device */
bh = sb_bread(sb, newLVSize - 1);
if (!bh) {
printk(KERN_WARNING "jfs_extendfs: invalid size\n");
rc = -EINVAL;
goto out;
}
bforget(bh);
}
/* Can't extend write-protected drive */
if (isReadOnly(ipbmap)) {
printk(KERN_WARNING "jfs_extendfs: read-only file system\n");
rc = -EROFS;
goto out;
}
/*
* reconfigure LV spaces
* ---------------------
*
* validate new size, or, if not specified, determine new size
*/
/*
* reconfigure inline log space:
*/
if ((sbi->mntflag & JFS_INLINELOG)) {
if (newLogSize == 0) {
/*
* no size specified: default to 1/256 of aggregate
* size; rounded up to a megabyte boundary;
*/
newLogSize = newLVSize >> 8;
t32 = (1 << (20 - sbi->l2bsize)) - 1;
newLogSize = (newLogSize + t32) & ~t32;
newLogSize =
min(newLogSize, MEGABYTE32 >> sbi->l2bsize);
} else {
/*
* convert the newLogSize to fs blocks.
*
* Since this is given in megabytes, it will always be
* an even number of pages.
*/
newLogSize = (newLogSize * MEGABYTE) >> sbi->l2bsize;
}
} else
newLogSize = 0;
newLogAddress = newLVSize - newLogSize;
/*
* reconfigure fsck work space:
*
* configure it to the end of the logical volume regardless of
* whether file system extends to the end of the aggregate;
* Need enough 4k pages to cover:
* - 1 bit per block in aggregate rounded up to BPERDMAP boundary
* - 1 extra page to handle control page and intermediate level pages
* - 50 extra pages for the chkdsk service log
*/
t64 = ((newLVSize - newLogSize + BPERDMAP - 1) >> L2BPERDMAP)
<< L2BPERDMAP;
t32 = ((t64 + (BITSPERPAGE - 1)) / BITSPERPAGE) + 1 + 50;
newFSCKSize = t32 << sbi->l2nbperpage;
newFSCKAddress = newLogAddress - newFSCKSize;
/*
* compute new file system space;
*/
newFSSize = newLVSize - newLogSize - newFSCKSize;
/* file system cannot be shrinked */
if (newFSSize < bmp->db_mapsize) {
rc = -EINVAL;
goto out;
}
/*
* If we're expanding enough that the inline log does not overlap
* the old one, we can format the new log before we quiesce the
* filesystem.
*/
if ((sbi->mntflag & JFS_INLINELOG) && (newLogAddress > oldLVSize)) {
if ((rc = lmLogFormat(log, newLogAddress, newLogSize)))
goto out;
log_formatted = 1;
}
/*
* quiesce file system
*
* (prepare to move the inline log and to prevent map update)
*
* block any new transactions and wait for completion of
* all wip transactions and flush modified pages s.t.
* on-disk file system is in consistent state and
* log is not required for recovery.
*/
txQuiesce(sb);
/* Reset size of direct inode */
sbi->direct_inode->i_size = sb->s_bdev->bd_inode->i_size;
if (sbi->mntflag & JFS_INLINELOG) {
/*
* deactivate old inline log
*/
lmLogShutdown(log);
/*
* mark on-disk super block for fs in transition;
*
* update on-disk superblock for the new space configuration
* of inline log space and fsck work space descriptors:
* N.B. FS descriptor is NOT updated;
*
* crash recovery:
* logredo(): if FM_EXTENDFS, return to fsck() for cleanup;
* fsck(): if FM_EXTENDFS, reformat inline log and fsck
* workspace from superblock inline log descriptor and fsck
* workspace descriptor;
*/
/* read in superblock */
if ((rc = readSuper(sb, &bh)))
goto error_out;
j_sb = (struct jfs_superblock *)bh->b_data;
/* mark extendfs() in progress */
j_sb->s_state |= cpu_to_le32(FM_EXTENDFS);
j_sb->s_xsize = cpu_to_le64(newFSSize);
PXDaddress(&j_sb->s_xfsckpxd, newFSCKAddress);
PXDlength(&j_sb->s_xfsckpxd, newFSCKSize);
PXDaddress(&j_sb->s_xlogpxd, newLogAddress);
PXDlength(&j_sb->s_xlogpxd, newLogSize);
/* synchronously update superblock */
mark_buffer_dirty(bh);
sync_dirty_buffer(bh);
brelse(bh);
/*
* format new inline log synchronously;
*
* crash recovery: if log move in progress,
* reformat log and exit success;
*/
if (!log_formatted)
if ((rc = lmLogFormat(log, newLogAddress, newLogSize)))
goto error_out;
/*
* activate new log
*/
log->base = newLogAddress;
log->size = newLogSize >> (L2LOGPSIZE - sb->s_blocksize_bits);
if ((rc = lmLogInit(log)))
goto error_out;
}
/*
* extend block allocation map
* ---------------------------
*
* extendfs() for new extension, retry after crash recovery;
*
* note: both logredo() and fsck() rebuild map from
* the bitmap and configuration parameter from superblock
* (disregarding all other control information in the map);
*
* superblock:
* s_size: aggregate size in physical blocks;
*/
/*
* compute the new block allocation map configuration
*
* map dinode:
* di_size: map file size in byte;
* di_nblocks: number of blocks allocated for map file;
* di_mapsize: number of blocks in aggregate (covered by map);
* map control page:
* db_mapsize: number of blocks in aggregate (covered by map);
*/
newMapSize = newFSSize;
/* number of data pages of new bmap file:
* roundup new size to full dmap page boundary and
* add 1 extra dmap page for next extendfs()
*/
t64 = (newMapSize - 1) + BPERDMAP;
newNpages = BLKTODMAPN(t64) + 1;
/*
* extend map from current map (WITHOUT growing mapfile)
*
* map new extension with unmapped part of the last partial
* dmap page, if applicable, and extra page(s) allocated
* at end of bmap by mkfs() or previous extendfs();
*/
extendBmap:
/* compute number of blocks requested to extend */
mapSize = bmp->db_mapsize;
XAddress = mapSize; /* eXtension Address */
XSize = newMapSize - mapSize; /* eXtension Size */
old_agsize = bmp->db_agsize; /* We need to know if this changes */
/* compute number of blocks that can be extended by current mapfile */
t64 = dbMapFileSizeToMapSize(ipbmap);
if (mapSize > t64) {
printk(KERN_ERR "jfs_extendfs: mapSize (0x%Lx) > t64 (0x%Lx)\n",
(long long) mapSize, (long long) t64);
rc = -EIO;
goto error_out;
}
nblocks = min(t64 - mapSize, XSize);
/*
* update map pages for new extension:
*
* update/init dmap and bubble up the control hierarchy
* incrementally fold up dmaps into upper levels;
* update bmap control page;
*/
if ((rc = dbExtendFS(ipbmap, XAddress, nblocks)))
goto error_out;
/*
* the map now has extended to cover additional nblocks:
* dn_mapsize = oldMapsize + nblocks;
*/
/* ipbmap->i_mapsize += nblocks; */
XSize -= nblocks;
/*
* grow map file to cover remaining extension
* and/or one extra dmap page for next extendfs();
*
* allocate new map pages and its backing blocks, and
* update map file xtree
*/
/* compute number of data pages of current bmap file */
nPages = ipbmap->i_size >> L2PSIZE;
/* need to grow map file ? */
if (nPages == newNpages)
goto finalizeBmap;
/*
* grow bmap file for the new map pages required:
*
* allocate growth at the start of newly extended region;
* bmap file only grows sequentially, i.e., both data pages
* and possibly xtree index pages may grow in append mode,
* s.t. logredo() can reconstruct pre-extension state
* by washing away bmap file of pages outside s_size boundary;
*/
/*
* journal map file growth as if a regular file growth:
* (note: bmap is created with di_mode = IFJOURNAL|IFREG);
*
* journaling of bmap file growth is not required since
* logredo() do/can not use log records of bmap file growth
* but it provides careful write semantics, pmap update, etc.;
*/
/* synchronous write of data pages: bmap data pages are
* cached in meta-data cache, and not written out
* by txCommit();
*/
filemap_fdatawait(ipbmap->i_mapping);
filemap_write_and_wait(ipbmap->i_mapping);
diWriteSpecial(ipbmap, 0);
newPage = nPages; /* first new page number */
xoff = newPage << sbi->l2nbperpage;
xlen = (newNpages - nPages) << sbi->l2nbperpage;
xlen = min(xlen, (int) nblocks) & ~(sbi->nbperpage - 1);
xaddr = XAddress;
tid = txBegin(sb, COMMIT_FORCE);
if ((rc = xtAppend(tid, ipbmap, 0, xoff, nblocks, &xlen, &xaddr, 0))) {
txEnd(tid);
goto error_out;
}
/* update bmap file size */
ipbmap->i_size += xlen << sbi->l2bsize;
inode_add_bytes(ipbmap, xlen << sbi->l2bsize);
iplist[0] = ipbmap;
rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE);
txEnd(tid);
if (rc)
goto error_out;
/*
* map file has been grown now to cover extension to further out;
* di_size = new map file size;
*
* if huge extension, the previous extension based on previous
* map file size may not have been sufficient to cover whole extension
* (it could have been used up for new map pages),
* but the newly grown map file now covers lot bigger new free space
* available for further extension of map;
*/
/* any more blocks to extend ? */
if (XSize)
goto extendBmap;
finalizeBmap:
/* finalize bmap */
dbFinalizeBmap(ipbmap);
/*
* update inode allocation map
* ---------------------------
*
* move iag lists from old to new iag;
* agstart field is not updated for logredo() to reconstruct
* iag lists if system crash occurs.
* (computation of ag number from agstart based on agsize
* will correctly identify the new ag);
*/
/* if new AG size the same as old AG size, done! */
if (bmp->db_agsize != old_agsize) {
if ((rc = diExtendFS(ipimap, ipbmap)))
goto error_out;
/* finalize imap */
if ((rc = diSync(ipimap)))
goto error_out;
}
/*
* finalize
* --------
*
* extension is committed when on-disk super block is
* updated with new descriptors: logredo will recover
* crash before it to pre-extension state;
*/
/* sync log to skip log replay of bmap file growth transaction; */
/* lmLogSync(log, 1); */
/*
* synchronous write bmap global control page;
* for crash before completion of write
* logredo() will recover to pre-extendfs state;
* for crash after completion of write,
* logredo() will recover post-extendfs state;
*/
if ((rc = dbSync(ipbmap)))
goto error_out;
/*
* copy primary bmap inode to secondary bmap inode
*/
ipbmap2 = diReadSpecial(sb, BMAP_I, 1);
if (ipbmap2 == NULL) {
printk(KERN_ERR "jfs_extendfs: diReadSpecial(bmap) failed\n");
goto error_out;
}
memcpy(&JFS_IP(ipbmap2)->i_xtroot, &JFS_IP(ipbmap)->i_xtroot, 288);
ipbmap2->i_size = ipbmap->i_size;
ipbmap2->i_blocks = ipbmap->i_blocks;
diWriteSpecial(ipbmap2, 1);
diFreeSpecial(ipbmap2);
/*
* update superblock
*/
if ((rc = readSuper(sb, &bh)))
goto error_out;
j_sb = (struct jfs_superblock *)bh->b_data;
/* mark extendfs() completion */
j_sb->s_state &= cpu_to_le32(~FM_EXTENDFS);
j_sb->s_size = cpu_to_le64(bmp->db_mapsize <<
le16_to_cpu(j_sb->s_l2bfactor));
j_sb->s_agsize = cpu_to_le32(bmp->db_agsize);
/* update inline log space descriptor */
if (sbi->mntflag & JFS_INLINELOG) {
PXDaddress(&(j_sb->s_logpxd), newLogAddress);
PXDlength(&(j_sb->s_logpxd), newLogSize);
}
/* record log's mount serial number */
j_sb->s_logserial = cpu_to_le32(log->serial);
/* update fsck work space descriptor */
PXDaddress(&(j_sb->s_fsckpxd), newFSCKAddress);
PXDlength(&(j_sb->s_fsckpxd), newFSCKSize);
j_sb->s_fscklog = 1;
/* sb->s_fsckloglen remains the same */
/* Update secondary superblock */
bh2 = sb_bread(sb, SUPER2_OFF >> sb->s_blocksize_bits);
if (bh2) {
j_sb2 = (struct jfs_superblock *)bh2->b_data;
memcpy(j_sb2, j_sb, sizeof (struct jfs_superblock));
mark_buffer_dirty(bh);
sync_dirty_buffer(bh2);
brelse(bh2);
}
/* write primary superblock */
mark_buffer_dirty(bh);
sync_dirty_buffer(bh);
brelse(bh);
goto resume;
error_out:
jfs_error(sb, "jfs_extendfs");
resume:
/*
* resume file system transactions
*/
txResume(sb);
out:
return rc;
}

874
fs/jfs/super.c Normal file
View File

@@ -0,0 +1,874 @@
/*
* Copyright (C) International Business Machines Corp., 2000-2004
* Portions Copyright (C) Christoph Hellwig, 2001-2002
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/fs.h>
#include <linux/module.h>
#include <linux/parser.h>
#include <linux/completion.h>
#include <linux/vfs.h>
#include <linux/mount.h>
#include <linux/moduleparam.h>
#include <linux/kthread.h>
#include <linux/posix_acl.h>
#include <linux/buffer_head.h>
#include <asm/uaccess.h>
#include <linux/seq_file.h>
#include "jfs_incore.h"
#include "jfs_filsys.h"
#include "jfs_inode.h"
#include "jfs_metapage.h"
#include "jfs_superblock.h"
#include "jfs_dmap.h"
#include "jfs_imap.h"
#include "jfs_acl.h"
#include "jfs_debug.h"
MODULE_DESCRIPTION("The Journaled Filesystem (JFS)");
MODULE_AUTHOR("Steve Best/Dave Kleikamp/Barry Arndt, IBM");
MODULE_LICENSE("GPL");
static struct kmem_cache * jfs_inode_cachep;
static const struct super_operations jfs_super_operations;
static struct export_operations jfs_export_operations;
static struct file_system_type jfs_fs_type;
#define MAX_COMMIT_THREADS 64
static int commit_threads = 0;
module_param(commit_threads, int, 0);
MODULE_PARM_DESC(commit_threads, "Number of commit threads");
static struct task_struct *jfsCommitThread[MAX_COMMIT_THREADS];
struct task_struct *jfsIOthread;
struct task_struct *jfsSyncThread;
#ifdef CONFIG_JFS_DEBUG
int jfsloglevel = JFS_LOGLEVEL_WARN;
module_param(jfsloglevel, int, 0644);
MODULE_PARM_DESC(jfsloglevel, "Specify JFS loglevel (0, 1 or 2)");
#endif
static void jfs_handle_error(struct super_block *sb)
{
struct jfs_sb_info *sbi = JFS_SBI(sb);
if (sb->s_flags & MS_RDONLY)
return;
updateSuper(sb, FM_DIRTY);
if (sbi->flag & JFS_ERR_PANIC)
panic("JFS (device %s): panic forced after error\n",
sb->s_id);
else if (sbi->flag & JFS_ERR_REMOUNT_RO) {
jfs_err("ERROR: (device %s): remounting filesystem "
"as read-only\n",
sb->s_id);
sb->s_flags |= MS_RDONLY;
}
/* nothing is done for continue beyond marking the superblock dirty */
}
void jfs_error(struct super_block *sb, const char * function, ...)
{
static char error_buf[256];
va_list args;
va_start(args, function);
vsnprintf(error_buf, sizeof(error_buf), function, args);
va_end(args);
printk(KERN_ERR "ERROR: (device %s): %s\n", sb->s_id, error_buf);
jfs_handle_error(sb);
}
static struct inode *jfs_alloc_inode(struct super_block *sb)
{
struct jfs_inode_info *jfs_inode;
jfs_inode = kmem_cache_alloc(jfs_inode_cachep, GFP_NOFS);
if (!jfs_inode)
return NULL;
return &jfs_inode->vfs_inode;
}
static void jfs_destroy_inode(struct inode *inode)
{
struct jfs_inode_info *ji = JFS_IP(inode);
BUG_ON(!list_empty(&ji->anon_inode_list));
spin_lock_irq(&ji->ag_lock);
if (ji->active_ag != -1) {
struct bmap *bmap = JFS_SBI(inode->i_sb)->bmap;
atomic_dec(&bmap->db_active[ji->active_ag]);
ji->active_ag = -1;
}
spin_unlock_irq(&ji->ag_lock);
#ifdef CONFIG_JFS_POSIX_ACL
if (ji->i_acl != JFS_ACL_NOT_CACHED) {
posix_acl_release(ji->i_acl);
ji->i_acl = JFS_ACL_NOT_CACHED;
}
if (ji->i_default_acl != JFS_ACL_NOT_CACHED) {
posix_acl_release(ji->i_default_acl);
ji->i_default_acl = JFS_ACL_NOT_CACHED;
}
#endif
kmem_cache_free(jfs_inode_cachep, ji);
}
static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct jfs_sb_info *sbi = JFS_SBI(dentry->d_sb);
s64 maxinodes;
struct inomap *imap = JFS_IP(sbi->ipimap)->i_imap;
jfs_info("In jfs_statfs");
buf->f_type = JFS_SUPER_MAGIC;
buf->f_bsize = sbi->bsize;
buf->f_blocks = sbi->bmap->db_mapsize;
buf->f_bfree = sbi->bmap->db_nfree;
buf->f_bavail = sbi->bmap->db_nfree;
/*
* If we really return the number of allocated & free inodes, some
* applications will fail because they won't see enough free inodes.
* We'll try to calculate some guess as to how may inodes we can
* really allocate
*
* buf->f_files = atomic_read(&imap->im_numinos);
* buf->f_ffree = atomic_read(&imap->im_numfree);
*/
maxinodes = min((s64) atomic_read(&imap->im_numinos) +
((sbi->bmap->db_nfree >> imap->im_l2nbperiext)
<< L2INOSPEREXT), (s64) 0xffffffffLL);
buf->f_files = maxinodes;
buf->f_ffree = maxinodes - (atomic_read(&imap->im_numinos) -
atomic_read(&imap->im_numfree));
buf->f_namelen = JFS_NAME_MAX;
return 0;
}
static void jfs_put_super(struct super_block *sb)
{
struct jfs_sb_info *sbi = JFS_SBI(sb);
int rc;
jfs_info("In jfs_put_super");
rc = jfs_umount(sb);
if (rc)
jfs_err("jfs_umount failed with return code %d", rc);
if (sbi->nls_tab)
unload_nls(sbi->nls_tab);
sbi->nls_tab = NULL;
truncate_inode_pages(sbi->direct_inode->i_mapping, 0);
iput(sbi->direct_inode);
sbi->direct_inode = NULL;
kfree(sbi);
}
enum {
Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize,
Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota,
Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask
};
static match_table_t tokens = {
{Opt_integrity, "integrity"},
{Opt_nointegrity, "nointegrity"},
{Opt_iocharset, "iocharset=%s"},
{Opt_resize, "resize=%u"},
{Opt_resize_nosize, "resize"},
{Opt_errors, "errors=%s"},
{Opt_ignore, "noquota"},
{Opt_ignore, "quota"},
{Opt_usrquota, "usrquota"},
{Opt_grpquota, "grpquota"},
{Opt_uid, "uid=%u"},
{Opt_gid, "gid=%u"},
{Opt_umask, "umask=%u"},
{Opt_err, NULL}
};
static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
int *flag)
{
void *nls_map = (void *)-1; /* -1: no change; NULL: none */
char *p;
struct jfs_sb_info *sbi = JFS_SBI(sb);
*newLVSize = 0;
if (!options)
return 1;
while ((p = strsep(&options, ",")) != NULL) {
substring_t args[MAX_OPT_ARGS];
int token;
if (!*p)
continue;
token = match_token(p, tokens, args);
switch (token) {
case Opt_integrity:
*flag &= ~JFS_NOINTEGRITY;
break;
case Opt_nointegrity:
*flag |= JFS_NOINTEGRITY;
break;
case Opt_ignore:
/* Silently ignore the quota options */
/* Don't do anything ;-) */
break;
case Opt_iocharset:
if (nls_map && nls_map != (void *) -1)
unload_nls(nls_map);
if (!strcmp(args[0].from, "none"))
nls_map = NULL;
else {
nls_map = load_nls(args[0].from);
if (!nls_map) {
printk(KERN_ERR
"JFS: charset not found\n");
goto cleanup;
}
}
break;
case Opt_resize:
{
char *resize = args[0].from;
*newLVSize = simple_strtoull(resize, &resize, 0);
break;
}
case Opt_resize_nosize:
{
*newLVSize = sb->s_bdev->bd_inode->i_size >>
sb->s_blocksize_bits;
if (*newLVSize == 0)
printk(KERN_ERR
"JFS: Cannot determine volume size\n");
break;
}
case Opt_errors:
{
char *errors = args[0].from;
if (!errors || !*errors)
goto cleanup;
if (!strcmp(errors, "continue")) {
*flag &= ~JFS_ERR_REMOUNT_RO;
*flag &= ~JFS_ERR_PANIC;
*flag |= JFS_ERR_CONTINUE;
} else if (!strcmp(errors, "remount-ro")) {
*flag &= ~JFS_ERR_CONTINUE;
*flag &= ~JFS_ERR_PANIC;
*flag |= JFS_ERR_REMOUNT_RO;
} else if (!strcmp(errors, "panic")) {
*flag &= ~JFS_ERR_CONTINUE;
*flag &= ~JFS_ERR_REMOUNT_RO;
*flag |= JFS_ERR_PANIC;
} else {
printk(KERN_ERR
"JFS: %s is an invalid error handler\n",
errors);
goto cleanup;
}
break;
}
#ifdef CONFIG_QUOTA
case Opt_quota:
case Opt_usrquota:
*flag |= JFS_USRQUOTA;
break;
case Opt_grpquota:
*flag |= JFS_GRPQUOTA;
break;
#else
case Opt_usrquota:
case Opt_grpquota:
case Opt_quota:
printk(KERN_ERR
"JFS: quota operations not supported\n");
break;
#endif
case Opt_uid:
{
char *uid = args[0].from;
sbi->uid = simple_strtoul(uid, &uid, 0);
break;
}
case Opt_gid:
{
char *gid = args[0].from;
sbi->gid = simple_strtoul(gid, &gid, 0);
break;
}
case Opt_umask:
{
char *umask = args[0].from;
sbi->umask = simple_strtoul(umask, &umask, 8);
if (sbi->umask & ~0777) {
printk(KERN_ERR
"JFS: Invalid value of umask\n");
goto cleanup;
}
break;
}
default:
printk("jfs: Unrecognized mount option \"%s\" "
" or missing value\n", p);
goto cleanup;
}
}
if (nls_map != (void *) -1) {
/* Discard old (if remount) */
if (sbi->nls_tab)
unload_nls(sbi->nls_tab);
sbi->nls_tab = nls_map;
}
return 1;
cleanup:
if (nls_map && nls_map != (void *) -1)
unload_nls(nls_map);
return 0;
}
static int jfs_remount(struct super_block *sb, int *flags, char *data)
{
s64 newLVSize = 0;
int rc = 0;
int flag = JFS_SBI(sb)->flag;
if (!parse_options(data, sb, &newLVSize, &flag)) {
return -EINVAL;
}
if (newLVSize) {
if (sb->s_flags & MS_RDONLY) {
printk(KERN_ERR
"JFS: resize requires volume to be mounted read-write\n");
return -EROFS;
}
rc = jfs_extendfs(sb, newLVSize, 0);
if (rc)
return rc;
}
if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
/*
* Invalidate any previously read metadata. fsck may have
* changed the on-disk data since we mounted r/o
*/
truncate_inode_pages(JFS_SBI(sb)->direct_inode->i_mapping, 0);
JFS_SBI(sb)->flag = flag;
return jfs_mount_rw(sb, 1);
}
if ((!(sb->s_flags & MS_RDONLY)) && (*flags & MS_RDONLY)) {
rc = jfs_umount_rw(sb);
JFS_SBI(sb)->flag = flag;
return rc;
}
if ((JFS_SBI(sb)->flag & JFS_NOINTEGRITY) != (flag & JFS_NOINTEGRITY))
if (!(sb->s_flags & MS_RDONLY)) {
rc = jfs_umount_rw(sb);
if (rc)
return rc;
JFS_SBI(sb)->flag = flag;
return jfs_mount_rw(sb, 1);
}
JFS_SBI(sb)->flag = flag;
return 0;
}
static int jfs_fill_super(struct super_block *sb, void *data, int silent)
{
struct jfs_sb_info *sbi;
struct inode *inode;
int rc;
s64 newLVSize = 0;
int flag;
jfs_info("In jfs_read_super: s_flags=0x%lx", sb->s_flags);
if (!new_valid_dev(sb->s_bdev->bd_dev))
return -EOVERFLOW;
sbi = kzalloc(sizeof (struct jfs_sb_info), GFP_KERNEL);
if (!sbi)
return -ENOMEM;
sb->s_fs_info = sbi;
sbi->sb = sb;
sbi->uid = sbi->gid = sbi->umask = -1;
/* initialize the mount flag and determine the default error handler */
flag = JFS_ERR_REMOUNT_RO;
if (!parse_options((char *) data, sb, &newLVSize, &flag)) {
kfree(sbi);
return -EINVAL;
}
sbi->flag = flag;
#ifdef CONFIG_JFS_POSIX_ACL
sb->s_flags |= MS_POSIXACL;
#endif
if (newLVSize) {
printk(KERN_ERR "resize option for remount only\n");
return -EINVAL;
}
/*
* Initialize blocksize to 4K.
*/
sb_set_blocksize(sb, PSIZE);
/*
* Set method vectors.
*/
sb->s_op = &jfs_super_operations;
sb->s_export_op = &jfs_export_operations;
/*
* Initialize direct-mapping inode/address-space
*/
inode = new_inode(sb);
if (inode == NULL)
goto out_kfree;
inode->i_ino = 0;
inode->i_nlink = 1;
inode->i_size = sb->s_bdev->bd_inode->i_size;
inode->i_mapping->a_ops = &jfs_metapage_aops;
insert_inode_hash(inode);
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
sbi->direct_inode = inode;
rc = jfs_mount(sb);
if (rc) {
if (!silent) {
jfs_err("jfs_mount failed w/return code = %d", rc);
}
goto out_mount_failed;
}
if (sb->s_flags & MS_RDONLY)
sbi->log = NULL;
else {
rc = jfs_mount_rw(sb, 0);
if (rc) {
if (!silent) {
jfs_err("jfs_mount_rw failed, return code = %d",
rc);
}
goto out_no_rw;
}
}
sb->s_magic = JFS_SUPER_MAGIC;
inode = iget(sb, ROOT_I);
if (!inode || is_bad_inode(inode))
goto out_no_root;
sb->s_root = d_alloc_root(inode);
if (!sb->s_root)
goto out_no_root;
if (sbi->mntflag & JFS_OS2)
sb->s_root->d_op = &jfs_ci_dentry_operations;
/* logical blocks are represented by 40 bits in pxd_t, etc. */
sb->s_maxbytes = ((u64) sb->s_blocksize) << 40;
#if BITS_PER_LONG == 32
/*
* Page cache is indexed by long.
* I would use MAX_LFS_FILESIZE, but it's only half as big
*/
sb->s_maxbytes = min(((u64) PAGE_CACHE_SIZE << 32) - 1, sb->s_maxbytes);
#endif
sb->s_time_gran = 1;
return 0;
out_no_root:
jfs_err("jfs_read_super: get root inode failed");
if (inode)
iput(inode);
out_no_rw:
rc = jfs_umount(sb);
if (rc) {
jfs_err("jfs_umount failed with return code %d", rc);
}
out_mount_failed:
filemap_write_and_wait(sbi->direct_inode->i_mapping);
truncate_inode_pages(sbi->direct_inode->i_mapping, 0);
make_bad_inode(sbi->direct_inode);
iput(sbi->direct_inode);
sbi->direct_inode = NULL;
out_kfree:
if (sbi->nls_tab)
unload_nls(sbi->nls_tab);
kfree(sbi);
return -EINVAL;
}
static void jfs_write_super_lockfs(struct super_block *sb)
{
struct jfs_sb_info *sbi = JFS_SBI(sb);
struct jfs_log *log = sbi->log;
if (!(sb->s_flags & MS_RDONLY)) {
txQuiesce(sb);
lmLogShutdown(log);
updateSuper(sb, FM_CLEAN);
}
}
static void jfs_unlockfs(struct super_block *sb)
{
struct jfs_sb_info *sbi = JFS_SBI(sb);
struct jfs_log *log = sbi->log;
int rc = 0;
if (!(sb->s_flags & MS_RDONLY)) {
updateSuper(sb, FM_MOUNT);
if ((rc = lmLogInit(log)))
jfs_err("jfs_unlock failed with return code %d", rc);
else
txResume(sb);
}
}
static int jfs_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data, struct vfsmount *mnt)
{
return get_sb_bdev(fs_type, flags, dev_name, data, jfs_fill_super,
mnt);
}
static int jfs_sync_fs(struct super_block *sb, int wait)
{
struct jfs_log *log = JFS_SBI(sb)->log;
/* log == NULL indicates read-only mount */
if (log) {
jfs_flush_journal(log, wait);
jfs_syncpt(log, 0);
}
return 0;
}
static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
{
struct jfs_sb_info *sbi = JFS_SBI(vfs->mnt_sb);
if (sbi->uid != -1)
seq_printf(seq, ",uid=%d", sbi->uid);
if (sbi->gid != -1)
seq_printf(seq, ",gid=%d", sbi->gid);
if (sbi->umask != -1)
seq_printf(seq, ",umask=%03o", sbi->umask);
if (sbi->flag & JFS_NOINTEGRITY)
seq_puts(seq, ",nointegrity");
#ifdef CONFIG_QUOTA
if (sbi->flag & JFS_USRQUOTA)
seq_puts(seq, ",usrquota");
if (sbi->flag & JFS_GRPQUOTA)
seq_puts(seq, ",grpquota");
#endif
return 0;
}
#ifdef CONFIG_QUOTA
/* Read data from quotafile - avoid pagecache and such because we cannot afford
* acquiring the locks... As quota files are never truncated and quota code
* itself serializes the operations (and noone else should touch the files)
* we don't have to be afraid of races */
static ssize_t jfs_quota_read(struct super_block *sb, int type, char *data,
size_t len, loff_t off)
{
struct inode *inode = sb_dqopt(sb)->files[type];
sector_t blk = off >> sb->s_blocksize_bits;
int err = 0;
int offset = off & (sb->s_blocksize - 1);
int tocopy;
size_t toread;
struct buffer_head tmp_bh;
struct buffer_head *bh;
loff_t i_size = i_size_read(inode);
if (off > i_size)
return 0;
if (off+len > i_size)
len = i_size-off;
toread = len;
while (toread > 0) {
tocopy = sb->s_blocksize - offset < toread ?
sb->s_blocksize - offset : toread;
tmp_bh.b_state = 0;
tmp_bh.b_size = 1 << inode->i_blkbits;
err = jfs_get_block(inode, blk, &tmp_bh, 0);
if (err)
return err;
if (!buffer_mapped(&tmp_bh)) /* A hole? */
memset(data, 0, tocopy);
else {
bh = sb_bread(sb, tmp_bh.b_blocknr);
if (!bh)
return -EIO;
memcpy(data, bh->b_data+offset, tocopy);
brelse(bh);
}
offset = 0;
toread -= tocopy;
data += tocopy;
blk++;
}
return len;
}
/* Write to quotafile */
static ssize_t jfs_quota_write(struct super_block *sb, int type,
const char *data, size_t len, loff_t off)
{
struct inode *inode = sb_dqopt(sb)->files[type];
sector_t blk = off >> sb->s_blocksize_bits;
int err = 0;
int offset = off & (sb->s_blocksize - 1);
int tocopy;
size_t towrite = len;
struct buffer_head tmp_bh;
struct buffer_head *bh;
mutex_lock(&inode->i_mutex);
while (towrite > 0) {
tocopy = sb->s_blocksize - offset < towrite ?
sb->s_blocksize - offset : towrite;
tmp_bh.b_state = 0;
tmp_bh.b_size = 1 << inode->i_blkbits;
err = jfs_get_block(inode, blk, &tmp_bh, 1);
if (err)
goto out;
if (offset || tocopy != sb->s_blocksize)
bh = sb_bread(sb, tmp_bh.b_blocknr);
else
bh = sb_getblk(sb, tmp_bh.b_blocknr);
if (!bh) {
err = -EIO;
goto out;
}
lock_buffer(bh);
memcpy(bh->b_data+offset, data, tocopy);
flush_dcache_page(bh->b_page);
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
unlock_buffer(bh);
brelse(bh);
offset = 0;
towrite -= tocopy;
data += tocopy;
blk++;
}
out:
if (len == towrite)
return err;
if (inode->i_size < off+len-towrite)
i_size_write(inode, off+len-towrite);
inode->i_version++;
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(inode);
mutex_unlock(&inode->i_mutex);
return len - towrite;
}
#endif
static const struct super_operations jfs_super_operations = {
.alloc_inode = jfs_alloc_inode,
.destroy_inode = jfs_destroy_inode,
.read_inode = jfs_read_inode,
.dirty_inode = jfs_dirty_inode,
.write_inode = jfs_write_inode,
.delete_inode = jfs_delete_inode,
.put_super = jfs_put_super,
.sync_fs = jfs_sync_fs,
.write_super_lockfs = jfs_write_super_lockfs,
.unlockfs = jfs_unlockfs,
.statfs = jfs_statfs,
.remount_fs = jfs_remount,
.show_options = jfs_show_options,
#ifdef CONFIG_QUOTA
.quota_read = jfs_quota_read,
.quota_write = jfs_quota_write,
#endif
};
static struct export_operations jfs_export_operations = {
.get_parent = jfs_get_parent,
};
static struct file_system_type jfs_fs_type = {
.owner = THIS_MODULE,
.name = "jfs",
.get_sb = jfs_get_sb,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
static void init_once(void *foo, struct kmem_cache * cachep, unsigned long flags)
{
struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo;
if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
SLAB_CTOR_CONSTRUCTOR) {
memset(jfs_ip, 0, sizeof(struct jfs_inode_info));
INIT_LIST_HEAD(&jfs_ip->anon_inode_list);
init_rwsem(&jfs_ip->rdwrlock);
mutex_init(&jfs_ip->commit_mutex);
init_rwsem(&jfs_ip->xattr_sem);
spin_lock_init(&jfs_ip->ag_lock);
jfs_ip->active_ag = -1;
#ifdef CONFIG_JFS_POSIX_ACL
jfs_ip->i_acl = JFS_ACL_NOT_CACHED;
jfs_ip->i_default_acl = JFS_ACL_NOT_CACHED;
#endif
inode_init_once(&jfs_ip->vfs_inode);
}
}
static int __init init_jfs_fs(void)
{
int i;
int rc;
jfs_inode_cachep =
kmem_cache_create("jfs_ip", sizeof(struct jfs_inode_info), 0,
SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
init_once, NULL);
if (jfs_inode_cachep == NULL)
return -ENOMEM;
/*
* Metapage initialization
*/
rc = metapage_init();
if (rc) {
jfs_err("metapage_init failed w/rc = %d", rc);
goto free_slab;
}
/*
* Transaction Manager initialization
*/
rc = txInit();
if (rc) {
jfs_err("txInit failed w/rc = %d", rc);
goto free_metapage;
}
/*
* I/O completion thread (endio)
*/
jfsIOthread = kthread_run(jfsIOWait, NULL, "jfsIO");
if (IS_ERR(jfsIOthread)) {
rc = PTR_ERR(jfsIOthread);
jfs_err("init_jfs_fs: fork failed w/rc = %d", rc);
goto end_txmngr;
}
if (commit_threads < 1)
commit_threads = num_online_cpus();
if (commit_threads > MAX_COMMIT_THREADS)
commit_threads = MAX_COMMIT_THREADS;
for (i = 0; i < commit_threads; i++) {
jfsCommitThread[i] = kthread_run(jfs_lazycommit, NULL, "jfsCommit");
if (IS_ERR(jfsCommitThread[i])) {
rc = PTR_ERR(jfsCommitThread[i]);
jfs_err("init_jfs_fs: fork failed w/rc = %d", rc);
commit_threads = i;
goto kill_committask;
}
}
jfsSyncThread = kthread_run(jfs_sync, NULL, "jfsSync");
if (IS_ERR(jfsSyncThread)) {
rc = PTR_ERR(jfsSyncThread);
jfs_err("init_jfs_fs: fork failed w/rc = %d", rc);
goto kill_committask;
}
#ifdef PROC_FS_JFS
jfs_proc_init();
#endif
return register_filesystem(&jfs_fs_type);
kill_committask:
for (i = 0; i < commit_threads; i++)
kthread_stop(jfsCommitThread[i]);
kthread_stop(jfsIOthread);
end_txmngr:
txExit();
free_metapage:
metapage_exit();
free_slab:
kmem_cache_destroy(jfs_inode_cachep);
return rc;
}
static void __exit exit_jfs_fs(void)
{
int i;
jfs_info("exit_jfs_fs called");
txExit();
metapage_exit();
kthread_stop(jfsIOthread);
for (i = 0; i < commit_threads; i++)
kthread_stop(jfsCommitThread[i]);
kthread_stop(jfsSyncThread);
#ifdef PROC_FS_JFS
jfs_proc_clean();
#endif
unregister_filesystem(&jfs_fs_type);
kmem_cache_destroy(jfs_inode_cachep);
}
module_init(init_jfs_fs)
module_exit(exit_jfs_fs)

40
fs/jfs/symlink.c Normal file
View File

@@ -0,0 +1,40 @@
/*
* Copyright (C) Christoph Hellwig, 2001-2002
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/fs.h>
#include <linux/namei.h>
#include "jfs_incore.h"
#include "jfs_inode.h"
#include "jfs_xattr.h"
static void *jfs_follow_link(struct dentry *dentry, struct nameidata *nd)
{
char *s = JFS_IP(dentry->d_inode)->i_inline;
nd_set_link(nd, s);
return NULL;
}
const struct inode_operations jfs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = jfs_follow_link,
.setxattr = jfs_setxattr,
.getxattr = jfs_getxattr,
.listxattr = jfs_listxattr,
.removexattr = jfs_removexattr,
};

1140
fs/jfs/xattr.c Normal file

File diff suppressed because it is too large Load Diff