diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-01-09 15:18:49 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-01-09 15:18:49 -0800 |
commit | 31aeb6c815549948571eec988ad9728c27d7a68d (patch) | |
tree | e155438be253924ebb1b792182e406947369b3eb | |
parent | c40f6f8bbc4cbd2902671aacd587400ddca62627 (diff) | |
parent | fc55584175589b70f4c30cb594f09f4bd6ad5d40 (diff) | |
download | linux-3.10-31aeb6c815549948571eec988ad9728c27d7a68d.tar.gz linux-3.10-31aeb6c815549948571eec988ad9728c27d7a68d.tar.bz2 linux-3.10-31aeb6c815549948571eec988ad9728c27d7a68d.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pkl/squashfs-linus
* git://git.kernel.org/pub/scm/linux/kernel/git/pkl/squashfs-linus:
MAINTAINERS: squashfs entry
Squashfs: documentation
Squashfs: initrd support
Squashfs: Kconfig entry
Squashfs: Makefiles
Squashfs: header files
Squashfs: block operations
Squashfs: cache operations
Squashfs: uid/gid lookup operations
Squashfs: fragment block operations
Squashfs: export operations
Squashfs: super block operations
Squashfs: symlink operations
Squashfs: regular file operations
Squashfs: directory readdir operations
Squashfs: directory lookup operations
Squashfs: inode operations
-rw-r--r-- | Documentation/filesystems/squashfs.txt | 225 | ||||
-rw-r--r-- | MAINTAINERS | 7 | ||||
-rw-r--r-- | fs/Kconfig | 52 | ||||
-rw-r--r-- | fs/Makefile | 1 | ||||
-rw-r--r-- | fs/squashfs/Makefile | 8 | ||||
-rw-r--r-- | fs/squashfs/block.c | 274 | ||||
-rw-r--r-- | fs/squashfs/cache.c | 412 | ||||
-rw-r--r-- | fs/squashfs/dir.c | 235 | ||||
-rw-r--r-- | fs/squashfs/export.c | 155 | ||||
-rw-r--r-- | fs/squashfs/file.c | 502 | ||||
-rw-r--r-- | fs/squashfs/fragment.c | 98 | ||||
-rw-r--r-- | fs/squashfs/id.c | 94 | ||||
-rw-r--r-- | fs/squashfs/inode.c | 346 | ||||
-rw-r--r-- | fs/squashfs/namei.c | 242 | ||||
-rw-r--r-- | fs/squashfs/squashfs.h | 90 | ||||
-rw-r--r-- | fs/squashfs/squashfs_fs.h | 381 | ||||
-rw-r--r-- | fs/squashfs/squashfs_fs_i.h | 45 | ||||
-rw-r--r-- | fs/squashfs/squashfs_fs_sb.h | 76 | ||||
-rw-r--r-- | fs/squashfs/super.c | 440 | ||||
-rw-r--r-- | fs/squashfs/symlink.c | 118 | ||||
-rw-r--r-- | init/do_mounts_rd.c | 14 |
21 files changed, 3815 insertions, 0 deletions
diff --git a/Documentation/filesystems/squashfs.txt b/Documentation/filesystems/squashfs.txt new file mode 100644 index 00000000000..3e79e4a7a39 --- /dev/null +++ b/Documentation/filesystems/squashfs.txt @@ -0,0 +1,225 @@ +SQUASHFS 4.0 FILESYSTEM +======================= + +Squashfs is a compressed read-only filesystem for Linux. +It uses zlib compression to compress files, inodes and directories. +Inodes in the system are very small and all blocks are packed to minimise +data overhead. Block sizes greater than 4K are supported up to a maximum +of 1Mbytes (default block size 128K). + +Squashfs is intended for general read-only filesystem use, for archival +use (i.e. in cases where a .tar.gz file may be used), and in constrained +block device/memory systems (e.g. embedded systems) where low overhead is +needed. + +Mailing list: squashfs-devel@lists.sourceforge.net +Web site: www.squashfs.org + +1. FILESYSTEM FEATURES +---------------------- + +Squashfs filesystem features versus Cramfs: + + Squashfs Cramfs + +Max filesystem size: 2^64 16 MiB +Max file size: ~ 2 TiB 16 MiB +Max files: unlimited unlimited +Max directories: unlimited unlimited +Max entries per directory: unlimited unlimited +Max block size: 1 MiB 4 KiB +Metadata compression: yes no +Directory indexes: yes no +Sparse file support: yes no +Tail-end packing (fragments): yes no +Exportable (NFS etc.): yes no +Hard link support: yes no +"." and ".." in readdir: yes no +Real inode numbers: yes no +32-bit uids/gids: yes no +File creation time: yes no +Xattr and ACL support: no no + +Squashfs compresses data, inodes and directories. In addition, inode and +directory data are highly compacted, and packed on byte boundaries. Each +compressed inode is on average 8 bytes in length (the exact length varies on +file type, i.e. regular file, directory, symbolic link, and block/char device +inodes have different sizes). + +2. USING SQUASHFS +----------------- + +As squashfs is a read-only filesystem, the mksquashfs program must be used to +create populated squashfs filesystems. This and other squashfs utilities +can be obtained from http://www.squashfs.org. Usage instructions can be +obtained from this site also. + + +3. SQUASHFS FILESYSTEM DESIGN +----------------------------- + +A squashfs filesystem consists of seven parts, packed together on a byte +alignment: + + --------------- + | superblock | + |---------------| + | datablocks | + | & fragments | + |---------------| + | inode table | + |---------------| + | directory | + | table | + |---------------| + | fragment | + | table | + |---------------| + | export | + | table | + |---------------| + | uid/gid | + | lookup table | + --------------- + +Compressed data blocks are written to the filesystem as files are read from +the source directory, and checked for duplicates. Once all file data has been +written the completed inode, directory, fragment, export and uid/gid lookup +tables are written. + +3.1 Inodes +---------- + +Metadata (inodes and directories) are compressed in 8Kbyte blocks. Each +compressed block is prefixed by a two byte length, the top bit is set if the +block is uncompressed. A block will be uncompressed if the -noI option is set, +or if the compressed block was larger than the uncompressed block. + +Inodes are packed into the metadata blocks, and are not aligned to block +boundaries, therefore inodes overlap compressed blocks. Inodes are identified +by a 48-bit number which encodes the location of the compressed metadata block +containing the inode, and the byte offset into that block where the inode is +placed (<block, offset>). + +To maximise compression there are different inodes for each file type +(regular file, directory, device, etc.), the inode contents and length +varying with the type. + +To further maximise compression, two types of regular file inode and +directory inode are defined: inodes optimised for frequently occurring +regular files and directories, and extended types where extra +information has to be stored. + +3.2 Directories +--------------- + +Like inodes, directories are packed into compressed metadata blocks, stored +in a directory table. Directories are accessed using the start address of +the metablock containing the directory and the offset into the +decompressed block (<block, offset>). + +Directories are organised in a slightly complex way, and are not simply +a list of file names. The organisation takes advantage of the +fact that (in most cases) the inodes of the files will be in the same +compressed metadata block, and therefore, can share the start block. +Directories are therefore organised in a two level list, a directory +header containing the shared start block value, and a sequence of directory +entries, each of which share the shared start block. A new directory header +is written once/if the inode start block changes. The directory +header/directory entry list is repeated as many times as necessary. + +Directories are sorted, and can contain a directory index to speed up +file lookup. Directory indexes store one entry per metablock, each entry +storing the index/filename mapping to the first directory header +in each metadata block. Directories are sorted in alphabetical order, +and at lookup the index is scanned linearly looking for the first filename +alphabetically larger than the filename being looked up. At this point the +location of the metadata block the filename is in has been found. +The general idea of the index is ensure only one metadata block needs to be +decompressed to do a lookup irrespective of the length of the directory. +This scheme has the advantage that it doesn't require extra memory overhead +and doesn't require much extra storage on disk. + +3.3 File data +------------- + +Regular files consist of a sequence of contiguous compressed blocks, and/or a +compressed fragment block (tail-end packed block). The compressed size +of each datablock is stored in a block list contained within the +file inode. + +To speed up access to datablocks when reading 'large' files (256 Mbytes or +larger), the code implements an index cache that caches the mapping from +block index to datablock location on disk. + +The index cache allows Squashfs to handle large files (up to 1.75 TiB) while +retaining a simple and space-efficient block list on disk. The cache +is split into slots, caching up to eight 224 GiB files (128 KiB blocks). +Larger files use multiple slots, with 1.75 TiB files using all 8 slots. +The index cache is designed to be memory efficient, and by default uses +16 KiB. + +3.4 Fragment lookup table +------------------------- + +Regular files can contain a fragment index which is mapped to a fragment +location on disk and compressed size using a fragment lookup table. This +fragment lookup table is itself stored compressed into metadata blocks. +A second index table is used to locate these. This second index table for +speed of access (and because it is small) is read at mount time and cached +in memory. + +3.5 Uid/gid lookup table +------------------------ + +For space efficiency regular files store uid and gid indexes, which are +converted to 32-bit uids/gids using an id look up table. This table is +stored compressed into metadata blocks. A second index table is used to +locate these. This second index table for speed of access (and because it +is small) is read at mount time and cached in memory. + +3.6 Export table +---------------- + +To enable Squashfs filesystems to be exportable (via NFS etc.) filesystems +can optionally (disabled with the -no-exports Mksquashfs option) contain +an inode number to inode disk location lookup table. This is required to +enable Squashfs to map inode numbers passed in filehandles to the inode +location on disk, which is necessary when the export code reinstantiates +expired/flushed inodes. + +This table is stored compressed into metadata blocks. A second index table is +used to locate these. This second index table for speed of access (and because +it is small) is read at mount time and cached in memory. + + +4. TODOS AND OUTSTANDING ISSUES +------------------------------- + +4.1 Todo list +------------- + +Implement Xattr and ACL support. The Squashfs 4.0 filesystem layout has hooks +for these but the code has not been written. Once the code has been written +the existing layout should not require modification. + +4.2 Squashfs internal cache +--------------------------- + +Blocks in Squashfs are compressed. To avoid repeatedly decompressing +recently accessed data Squashfs uses two small metadata and fragment caches. + +The cache is not used for file datablocks, these are decompressed and cached in +the page-cache in the normal way. The cache is used to temporarily cache +fragment and metadata blocks which have been read as a result of a metadata +(i.e. inode or directory) or fragment access. Because metadata and fragments +are packed together into blocks (to gain greater compression) the read of a +particular piece of metadata or fragment will retrieve other metadata/fragments +which have been packed with it, these because of locality-of-reference may be +read in the near future. Temporarily caching them ensures they are available +for near future access without requiring an additional read and decompress. + +In the future this internal cache may be replaced with an implementation which +uses the kernel page cache. Because the page cache operates on page sized +units this may introduce additional complexity in terms of locking and +associated race conditions. diff --git a/MAINTAINERS b/MAINTAINERS index 57e0309243c..6f65a269cb1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4081,6 +4081,13 @@ L: cbe-oss-dev@ozlabs.org W: http://www.ibm.com/developerworks/power/cell/ S: Supported +SQUASHFS FILE SYSTEM +P: Phillip Lougher +M: phillip@lougher.demon.co.uk +L: squashfs-devel@lists.sourceforge.net (subscribers-only) +W: http://squashfs.org.uk +S: Maintained + SRM (Alpha) environment access P: Jan-Benedict Glaw M: jbglaw@lug-owl.de diff --git a/fs/Kconfig b/fs/Kconfig index 02cff86af1b..51307b0fdf0 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -932,6 +932,58 @@ config CRAMFS If unsure, say N. +config SQUASHFS + tristate "SquashFS 4.0 - Squashed file system support" + depends on BLOCK + select ZLIB_INFLATE + help + Saying Y here includes support for SquashFS 4.0 (a Compressed + Read-Only File System). Squashfs is a highly compressed read-only + filesystem for Linux. It uses zlib compression to compress both + files, inodes and directories. Inodes in the system are very small + and all blocks are packed to minimise data overhead. Block sizes + greater than 4K are supported up to a maximum of 1 Mbytes (default + block size 128K). SquashFS 4.0 supports 64 bit filesystems and files + (larger than 4GB), full uid/gid information, hard links and + timestamps. + + Squashfs is intended for general read-only filesystem use, for + archival use (i.e. in cases where a .tar.gz file may be used), and in + embedded systems where low overhead is needed. Further information + and tools are available from http://squashfs.sourceforge.net. + + If you want to compile this as a module ( = code which can be + inserted in and removed from the running kernel whenever you want), + say M here and read <file:Documentation/modules.txt>. The module + will be called squashfs. Note that the root file system (the one + containing the directory /) cannot be compiled as a module. + + If unsure, say N. + +config SQUASHFS_EMBEDDED + + bool "Additional option for memory-constrained systems" + depends on SQUASHFS + default n + help + Saying Y here allows you to specify cache size. + + If unsure, say N. + +config SQUASHFS_FRAGMENT_CACHE_SIZE + int "Number of fragments cached" if SQUASHFS_EMBEDDED + depends on SQUASHFS + default "3" + help + By default SquashFS caches the last 3 fragments read from + the filesystem. Increasing this amount may mean SquashFS + has to re-read fragments less often from disk, at the expense + of extra system memory. Decreasing this amount will mean + SquashFS uses less memory at the expense of extra reads from disk. + + Note there must be at least one cached fragment. Anything + much more than three will probably not make much difference. + config VXFS_FS tristate "FreeVxFS file system support (VERITAS VxFS(TM) compatible)" depends on BLOCK diff --git a/fs/Makefile b/fs/Makefile index bc4e14df108..38bc735c67a 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -74,6 +74,7 @@ obj-$(CONFIG_JBD) += jbd/ obj-$(CONFIG_JBD2) += jbd2/ obj-$(CONFIG_EXT2_FS) += ext2/ obj-$(CONFIG_CRAMFS) += cramfs/ +obj-$(CONFIG_SQUASHFS) += squashfs/ obj-y += ramfs/ obj-$(CONFIG_HUGETLBFS) += hugetlbfs/ obj-$(CONFIG_CODA_FS) += coda/ diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile new file mode 100644 index 00000000000..8258cf9a031 --- /dev/null +++ b/fs/squashfs/Makefile @@ -0,0 +1,8 @@ +# +# Makefile for the linux squashfs routines. +# + +obj-$(CONFIG_SQUASHFS) += squashfs.o +squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o +squashfs-y += namei.o super.o symlink.o +#squashfs-y += squashfs2_0.o diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c new file mode 100644 index 00000000000..c837dfc2b3c --- /dev/null +++ b/fs/squashfs/block.c @@ -0,0 +1,274 @@ +/* + * Squashfs - a compressed read only filesystem for Linux + * + * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 + * Phillip Lougher <phillip@lougher.demon.co.uk> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * block.c + */ + +/* + * This file implements the low-level routines to read and decompress + * datablocks and metadata blocks. + */ + +#include <linux/fs.h> +#include <linux/vfs.h> +#include <linux/slab.h> +#include <linux/mutex.h> +#include <linux/string.h> +#include <linux/buffer_head.h> +#include <linux/zlib.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "squashfs_fs_i.h" +#include "squashfs.h" + +/* + * Read the metadata block length, this is stored in the first two + * bytes of the metadata block. + */ +static struct buffer_head *get_block_length(struct super_block *sb, + u64 *cur_index, int *offset, int *length) +{ + struct squashfs_sb_info *msblk = sb->s_fs_info; + struct buffer_head *bh; + + bh = sb_bread(sb, *cur_index); + if (bh == NULL) + return NULL; + + if (msblk->devblksize - *offset == 1) { + *length = (unsigned char) bh->b_data[*offset]; + put_bh(bh); + bh = sb_bread(sb, ++(*cur_index)); + if (bh == NULL) + return NULL; + *length |= (unsigned char) bh->b_data[0] << 8; + *offset = 1; + } else { + *length = (unsigned char) bh->b_data[*offset] | + (unsigned char) bh->b_data[*offset + 1] << 8; + *offset += 2; + } + + return bh; +} + + +/* + * Read and decompress a metadata block or datablock. Length is non-zero + * if a datablock is being read (the size is stored elsewhere in the + * filesystem), otherwise the length is obtained from the first two bytes of + * the metadata block. A bit in the length field indicates if the block + * is stored uncompressed in the filesystem (usually because compression + * generated a larger block - this does occasionally happen with zlib). + */ +int squashfs_read_data(struct super_block *sb, void **buffer, u64 index, + int length, u64 *next_index, int srclength) +{ + struct squashfs_sb_info *msblk = sb->s_fs_info; + struct buffer_head **bh; + int offset = index & ((1 << msblk->devblksize_log2) - 1); + u64 cur_index = index >> msblk->devblksize_log2; + int bytes, compressed, b = 0, k = 0, page = 0, avail; + + + bh = kcalloc((msblk->block_size >> msblk->devblksize_log2) + 1, + sizeof(*bh), GFP_KERNEL); + if (bh == NULL) + return -ENOMEM; + + if (length) { + /* + * Datablock. + */ + bytes = -offset; + compressed = SQUASHFS_COMPRESSED_BLOCK(length); + length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length); + if (next_index) + *next_index = index + length; + + TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n", + index, compressed ? "" : "un", length, srclength); + + if (length < 0 || length > srclength || + (index + length) > msblk->bytes_used) + goto read_failure; + + for (b = 0; bytes < length; b++, cur_index++) { + bh[b] = sb_getblk(sb, cur_index); + if (bh[b] == NULL) + goto block_release; + bytes += msblk->devblksize; + } + ll_rw_block(READ, b, bh); + } else { + /* + * Metadata block. + */ + if ((index + 2) > msblk->bytes_used) + goto read_failure; + + bh[0] = get_block_length(sb, &cur_index, &offset, &length); + if (bh[0] == NULL) + goto read_failure; + b = 1; + + bytes = msblk->devblksize - offset; + compressed = SQUASHFS_COMPRESSED(length); + length = SQUASHFS_COMPRESSED_SIZE(length); + if (next_index) + *next_index = index + length + 2; + + TRACE("Block @ 0x%llx, %scompressed size %d\n", index, + compressed ? "" : "un", length); + + if (length < 0 || length > srclength || + (index + length) > msblk->bytes_used) + goto block_release; + + for (; bytes < length; b++) { + bh[b] = sb_getblk(sb, ++cur_index); + if (bh[b] == NULL) + goto block_release; + bytes += msblk->devblksize; + } + ll_rw_block(READ, b - 1, bh + 1); + } + + if (compressed) { + int zlib_err = 0, zlib_init = 0; + + /* + * Uncompress block. + */ + + mutex_lock(&msblk->read_data_mutex); + + msblk->stream.avail_out = 0; + msblk->stream.avail_in = 0; + + bytes = length; + do { + if (msblk->stream.avail_in == 0 && k < b) { + avail = min(bytes, msblk->devblksize - offset); + bytes -= avail; + wait_on_buffer(bh[k]); + if (!buffer_uptodate(bh[k])) + goto release_mutex; + + if (avail == 0) { + offset = 0; + put_bh(bh[k++]); + continue; + } + + msblk->stream.next_in = bh[k]->b_data + offset; + msblk->stream.avail_in = avail; + offset = 0; + } + + if (msblk->stream.avail_out == 0) { + msblk->stream.next_out = buffer[page++]; + msblk->stream.avail_out = PAGE_CACHE_SIZE; + } + + if (!zlib_init) { + zlib_err = zlib_inflateInit(&msblk->stream); + if (zlib_err != Z_OK) { + ERROR("zlib_inflateInit returned" + " unexpected result 0x%x," + " srclength %d\n", zlib_err, + srclength); + goto release_mutex; + } + zlib_init = 1; + } + + zlib_err = zlib_inflate(&msblk->stream, Z_NO_FLUSH); + + if (msblk->stream.avail_in == 0 && k < b) + put_bh(bh[k++]); + } while (zlib_err == Z_OK); + + if (zlib_err != Z_STREAM_END) { + ERROR("zlib_inflate returned unexpected result" + " 0x%x, srclength %d, avail_in %d," + " avail_out %d\n", zlib_err, srclength, + msblk->stream.avail_in, + msblk->stream.avail_out); + goto release_mutex; + } + + zlib_err = zlib_inflateEnd(&msblk->stream); + if (zlib_err != Z_OK) { + ERROR("zlib_inflateEnd returned unexpected result 0x%x," + " srclength %d\n", zlib_err, srclength); + goto release_mutex; + } + length = msblk->stream.total_out; + mutex_unlock(&msblk->read_data_mutex); + } else { + /* + * Block is uncompressed. + */ + int i, in, pg_offset = 0; + + for (i = 0; i < b; i++) { + wait_on_buffer(bh[i]); + if (!buffer_uptodate(bh[i])) + goto block_release; + } + + for (bytes = length; k < b; k++) { + in = min(bytes, msblk->devblksize - offset); + bytes -= in; + while (in) { + if (pg_offset == PAGE_CACHE_SIZE) { + page++; + pg_offset = 0; + } + avail = min_t(int, in, PAGE_CACHE_SIZE - + pg_offset); + memcpy(buffer[page] + pg_offset, + bh[k]->b_data + offset, avail); + in -= avail; + pg_offset += avail; + offset += avail; + } + offset = 0; + put_bh(bh[k]); + } + } + + kfree(bh); + return length; + +release_mutex: + mutex_unlock(&msblk->read_data_mutex); + +block_release: + for (; k < b; k++) + put_bh(bh[k]); + +read_failure: + ERROR("sb_bread failed reading block 0x%llx\n", cur_index); + kfree(bh); + return -EIO; +} diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c new file mode 100644 index 00000000000..f29eda16d25 --- /dev/null +++ b/fs/squashfs/cache.c @@ -0,0 +1,412 @@ +/* + * Squashfs - a compressed read only filesystem for Linux + * + * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 + * Phillip Lougher <phillip@lougher.demon.co.uk> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * cache.c + */ + +/* + * Blocks in Squashfs are compressed. To avoid repeatedly decompressing + * recently accessed data Squashfs uses two small metadata and fragment caches. + * + * This file implements a generic cache implementation used for both caches, + * plus functions layered ontop of the generic cache implementation to + * access the metadata and fragment caches. + * + * To avoid out of memory and fragmentation isssues with vmalloc the cache + * uses sequences of kmalloced PAGE_CACHE_SIZE buffers. + * + * It should be noted that the cache is not used for file datablocks, these + * are decompressed and cached in the page-cache in the normal way. The + * cache is only used to temporarily cache fragment and metadata blocks + * which have been read as as a result of a metadata (i.e. inode or + * directory) or fragment access. Because metadata and fragments are packed + * together into blocks (to gain greater compression) the read of a particular + * piece of metadata or fragment will retrieve other metadata/fragments which + * have been packed with it, these because of locality-of-reference may be read + * in the near future. Temporarily caching them ensures they are available for + * near future access without requiring an additional read and decompress. + */ + +#include <linux/fs.h> +#include <linux/vfs.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/sched.h> +#include <linux/spinlock.h> +#include <linux/wait.h> +#include <linux/zlib.h> +#include <linux/pagemap.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "squashfs_fs_i.h" +#include "squashfs.h" + +/* + * Look-up block in cache, and increment usage count. If not in cache, read + * and decompress it from disk. + */ +struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb, + struct squashfs_cache *cache, u64 block, int length) +{ + int i, n; + struct squashfs_cache_entry *entry; + + spin_lock(&cache->lock); + + while (1) { + for (i = 0; i < cache->entries; i++) + if (cache->entry[i].block == block) + break; + + if (i == cache->entries) { + /* + * Block not in cache, if all cache entries are used + * go to sleep waiting for one to become available. + */ + if (cache->unused == 0) { + cache->num_waiters++; + spin_unlock(&cache->lock); + wait_event(cache->wait_queue, cache->unused); + spin_lock(&cache->lock); + cache->num_waiters--; + continue; + } + + /* + * At least one unused cache entry. A simple + * round-robin strategy is used to choose the entry to + * be evicted from the cache. + */ + i = cache->next_blk; + for (n = 0; n < cache->entries; n++) { + if (cache->entry[i].refcount == 0) + break; + i = (i + 1) % cache->entries; + } + + cache->next_blk = (i + 1) % cache->entries; + entry = &cache->entry[i]; + + /* + * Initialise choosen cache entry, and fill it in from + * disk. + */ + cache->unused--; + entry->block = block; + entry->refcount = 1; + entry->pending = 1; + entry->num_waiters = 0; + entry->error = 0; + spin_unlock(&cache->lock); + + entry->length = squashfs_read_data(sb, entry->data, + block, length, &entry->next_index, + cache->block_size); + + spin_lock(&cache->lock); + + if (entry->length < 0) + entry->error = entry->length; + + entry->pending = 0; + + /* + * While filling this entry one or more other processes + * have looked it up in the cache, and have slept + * waiting for it to become available. + */ + if (entry->num_waiters) { + spin_unlock(&cache->lock); + wake_up_all(&entry->wait_queue); + } else + spin_unlock(&cache->lock); + + goto out; + } + + /* + * Block already in cache. Increment refcount so it doesn't + * get reused until we're finished with it, if it was + * previously unused there's one less cache entry available + * for reuse. + */ + entry = &cache->entry[i]; + if (entry->refcount == 0) + cache->unused--; + entry->refcount++; + + /* + * If the entry is currently being filled in by another process + * go to sleep waiting for it to become available. + */ + if (entry->pending) { + entry->num_waiters++; + spin_unlock(&cache->lock); + wait_event(entry->wait_queue, !entry->pending); + } else + spin_unlock(&cache->lock); + + goto out; + } + +out: + TRACE("Got %s %d, start block %lld, refcount %d, error %d\n", + cache->name, i, entry->block, entry->refcount, entry->error); + + if (entry->error) + ERROR("Unable to read %s cache entry [%llx]\n", cache->name, + block); + return entry; +} + + +/* + * Release cache entry, once usage count is zero it can be reused. + */ +void squashfs_cache_put(struct squashfs_cache_entry *entry) +{ + struct squashfs_cache *cache = entry->cache; + + spin_lock(&cache->lock); + entry->refcount--; + if (entry->refcount == 0) { + cache->unused++; + /* + * If there's any processes waiting for a block to become + * available, wake one up. + */ + if (cache->num_waiters) { + spin_unlock(&cache->lock); + wake_up(&cache->wait_queue); + return; + } + } + spin_unlock(&cache->lock); +} + +/* + * Delete cache reclaiming all kmalloced buffers. + */ +void squashfs_cache_delete(struct squashfs_cache *cache) +{ + int i, j; + + if (cache == NULL) + return; + + for (i = 0; i < cache->entries; i++) { + if (cache->entry[i].data) { + for (j = 0; j < cache->pages; j++) + kfree(cache->entry[i].data[j]); + kfree(cache->entry[i].data); + } + } + + kfree(cache->entry); + kfree(cache); +} + + +/* + * Initialise cache allocating the specified number of entries, each of + * size block_size. To avoid vmalloc fragmentation issues each entry + * is allocated as a sequence of kmalloced PAGE_CACHE_SIZE buffers. + */ +struct squashfs_cache *squashfs_cache_init(char *name, int entries, + int block_size) +{ + int i, j; + struct squashfs_cache *cache = kzalloc(sizeof(*cache), GFP_KERNEL); + + if (cache == NULL) { + ERROR("Failed to allocate %s cache\n", name); + return NULL; + } + + cache->entry = kcalloc(entries, sizeof(*(cache->entry)), GFP_KERNEL); + if (cache->entry == NULL) { + ERROR("Failed to allocate %s cache\n", name); + goto cleanup; + } + + cache->next_blk = 0; + cache->unused = entries; + cache->entries = entries; + cache->block_size = block_size; + cache->pages = block_size >> PAGE_CACHE_SHIFT; + cache->name = name; + cache->num_waiters = 0; + spin_lock_init(&cache->lock); + init_waitqueue_head(&cache->wait_queue); + + for (i = 0; i < entries; i++) { + struct squashfs_cache_entry *entry = &cache->entry[i]; + + init_waitqueue_head(&cache->entry[i].wait_queue); + entry->cache = cache; + entry->block = SQUASHFS_INVALID_BLK; + entry->data = kcalloc(cache->pages, sizeof(void *), GFP_KERNEL); + if (entry->data == NULL) { + ERROR("Failed to allocate %s cache entry\n", name); + goto cleanup; + } + + for (j = 0; j < cache->pages; j++) { + entry->data[j] = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL); + if (entry->data[j] == NULL) { + ERROR("Failed to allocate %s buffer\n", name); + goto cleanup; + } + } + } + + return cache; + +cleanup: + squashfs_cache_delete(cache); + return NULL; +} + + +/* + * Copy upto length bytes from cache entry to buffer starting at offset bytes + * into the cache entry. If there's not length bytes then copy the number of + * bytes available. In all cases return the number of bytes copied. + */ +int squashfs_copy_data(void *buffer, struct squashfs_cache_entry *entry, + int offset, int length) +{ + int remaining = length; + + if (length == 0) + return 0; + else if (buffer == NULL) + return min(length, entry->length - offset); + + while (offset < entry->length) { + void *buff = entry->data[offset / PAGE_CACHE_SIZE] + + (offset % PAGE_CACHE_SIZE); + int bytes = min_t(int, entry->length - offset, + PAGE_CACHE_SIZE - (offset % PAGE_CACHE_SIZE)); + + if (bytes >= remaining) { + memcpy(buffer, buff, remaining); + remaining = 0; + break; + } + + memcpy(buffer, buff, bytes); + buffer += bytes; + remaining -= bytes; + offset += bytes; + } + + return length - remaining; +} + + +/* + * Read length bytes from metadata position <block, offset> (block is the + * start of the compressed block on disk, and offset is the offset into + * the block once decompressed). Data is packed into consecutive blocks, + * and length bytes may require reading more than one block. + */ +int squashfs_read_metadata(struct super_block *sb, void *buffer, + u64 *block, int *offset, int length) +{ + struct squashfs_sb_info *msblk = sb->s_fs_info; + int bytes, copied = length; + struct squashfs_cache_entry *entry; + + TRACE("Entered squashfs_read_metadata [%llx:%x]\n", *block, *offset); + + while (length) { + entry = squashfs_cache_get(sb, msblk->block_cache, *block, 0); + if (entry->error) + return entry->error; + else if (*offset >= entry->length) + return -EIO; + + bytes = squashfs_copy_data(buffer, entry, *offset, length); + if (buffer) + buffer += bytes; + length -= bytes; + *offset += bytes; + + if (*offset == entry->length) { + *block = entry->next_index; + *offset = 0; + } + + squashfs_cache_put(entry); + } + + return copied; +} + + +/* + * Look-up in the fragmment cache the fragment located at <start_block> in the + * filesystem. If necessary read and decompress it from disk. + */ +struct squashfs_cache_entry *squashfs_get_fragment(struct super_block *sb, + u64 start_block, int length) +{ + struct squashfs_sb_info *msblk = sb->s_fs_info; + + return squashfs_cache_get(sb, msblk->fragment_cache, start_block, + length); +} + + +/* + * Read and decompress the datablock located at <start_block> in the + * filesystem. The cache is used here to avoid duplicating locking and + * read/decompress code. + */ +struct squashfs_cache_entry *squashfs_get_datablock(struct super_block *sb, + u64 start_block, int length) +{ + struct squashfs_sb_info *msblk = sb->s_fs_info; + + return squashfs_cache_get(sb, msblk->read_page, start_block, length); +} + + +/* + * Read a filesystem table (uncompressed sequence of bytes) from disk + */ +int squashfs_read_table(struct super_block *sb, void *buffer, u64 block, + int length) +{ + int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + int i, res; + void **data = kcalloc(pages, sizeof(void *), GFP_KERNEL); + if (data == NULL) + return -ENOMEM; + + for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE) + data[i] = buffer; + res = squashfs_read_data(sb, data, block, length | + SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length); + kfree(data); + return res; +} diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c new file mode 100644 index 00000000000..566b0eaed86 --- /dev/null +++ b/fs/squashfs/dir.c @@ -0,0 +1,235 @@ +/* + * Squashfs - a compressed read only filesystem for Linux + * + * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 + * Phillip Lougher <phillip@lougher.demon.co.uk> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * dir.c + */ + +/* + * This file implements code to read directories from disk. + * + * See namei.c for a description of directory organisation on disk. + */ + +#include <linux/fs.h> +#include <linux/vfs.h> +#include <linux/slab.h> +#include <linux/zlib.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "squashfs_fs_i.h" +#include "squashfs.h" + +static const unsigned char squashfs_filetype_table[] = { + DT_UNKNOWN, DT_DIR, DT_REG, DT_LNK, DT_BLK, DT_CHR, DT_FIFO, DT_SOCK +}; + +/* + * Lookup offset (f_pos) in the directory index, returning the + * metadata block containing it. + * + * If we get an error reading the index then return the part of the index + * (if any) we have managed to read - the index isn't essential, just + * quicker. + */ +static int get_dir_index_using_offset(struct super_block *sb, + u64 *next_block, int *next_offset, u64 index_start, int index_offset, + int i_count, u64 f_pos) +{ + struct squashfs_sb_info *msblk = sb->s_fs_info; + int err, i, index, length = 0; + struct squashfs_dir_index dir_index; + + TRACE("Entered get_dir_index_using_offset, i_count %d, f_pos %lld\n", + i_count, f_pos); + + /* + * Translate from external f_pos to the internal f_pos. This + * is offset by 3 because we invent "." and ".." entries which are + * not actually stored in the directory. + */ + if (f_pos < 3) + return f_pos; + f_pos -= 3; + + for (i = 0; i < i_count; i++) { + err = squashfs_read_metadata(sb, &dir_index, &index_start, + &index_offset, sizeof(dir_index)); + if (err < 0) + break; + + index = le32_to_cpu(dir_index.index); + if (index > f_pos) + /* + * Found the index we're looking for. + */ + break; + + err = squashfs_read_metadata(sb, NULL, &index_start, + &index_offset, le32_to_cpu(dir_index.size) + 1); + if (err < 0) + break; + + length = index; + *next_block = le32_to_cpu(dir_index.start_block) + + msblk->directory_table; + } + + *next_offset = (length + *next_offset) % SQUASHFS_METADATA_SIZE; + + /* + * Translate back from internal f_pos to external f_pos. + */ + return length + 3; +} + + +static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir) +{ + struct inode *inode = file->f_dentry->d_inode; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + u64 block = squashfs_i(inode)->start + msblk->directory_table; + int offset = squashfs_i(inode)->offset, length = 0, dir_count, size, + type, err; + unsigned int inode_number; + struct squashfs_dir_header dirh; + struct squashfs_dir_entry *dire; + + TRACE("Entered squashfs_readdir [%llx:%x]\n", block, offset); + + dire = kmalloc(sizeof(*dire) + SQUASHFS_NAME_LEN + 1, GFP_KERNEL); + if (dire == NULL) { + ERROR("Failed to allocate squashfs_dir_entry\n"); + goto finish; + } + + /* + * Return "." and ".." entries as the first two filenames in the + * directory. To maximise compression these two entries are not + * stored in the directory, and so we invent them here. + * + * It also means that the external f_pos is offset by 3 from the + * on-disk directory f_pos. + */ + while (file->f_pos < 3) { + char *name; + int i_ino; + + if (file->f_pos == 0) { + name = "."; + size = 1; + i_ino = inode->i_ino; + } else { + name = ".."; + size = 2; + i_ino = squashfs_i(inode)->parent; + } + + TRACE("Calling filldir(%p, %s, %d, %lld, %d, %d)\n", + dirent, name, size, file->f_pos, i_ino, + squashfs_filetype_table[1]); + + if (filldir(dirent, name, size, file->f_pos, i_ino, + squashfs_filetype_table[1]) < 0) { + TRACE("Filldir returned less than 0\n"); + goto finish; + } + + file->f_pos += size; + } + + length = get_dir_index_using_offset(inode->i_sb, &block, &offset, + squashfs_i(inode)->dir_idx_start, + squashfs_i(inode)->dir_idx_offset, + squashfs_i(inode)->dir_idx_cnt, + file->f_pos); + + while (length < i_size_read(inode)) { + /* + * Read directory header + */ + err = squashfs_read_metadata(inode->i_sb, &dirh, &block, + &offset, sizeof(dirh)); + if (err < 0) + goto failed_read; + + length += sizeof(dirh); + + dir_count = le32_to_cpu(dirh.count) + 1; + while (dir_count--) { + /* + * Read directory entry. + */ + err = squashfs_read_metadata(inode->i_sb, dire, &block, + &offset, sizeof(*dire)); + if (err < 0) + goto failed_read; + + size = le16_to_cpu(dire->size) + 1; + + err = squashfs_read_metadata(inode->i_sb, dire->name, + &block, &offset, size); + if (err < 0) + goto failed_read; + + length += sizeof(*dire) + size; + + if (file->f_pos >= length) + continue; + + dire->name[size] = '\0'; + inode_number = le32_to_cpu(dirh.inode_number) + + ((short) le16_to_cpu(dire->inode_number)); + type = le16_to_cpu(dire->type); + + TRACE("Calling filldir(%p, %s, %d, %lld, %x:%x, %d, %d)" + "\n", dirent, dire->name, size, + file->f_pos, + le32_to_cpu(dirh.start_block), + le16_to_cpu(dire->offset), + inode_number, + squashfs_filetype_table[type]); + + if (filldir(dirent, dire->name, size, file->f_pos, + inode_number, + squashfs_filetype_table[type]) < 0) { + TRACE("Filldir returned less than 0\n"); + goto finish; + } + + file->f_pos = length; + } + } + +finish: + kfree(dire); + return 0; + +failed_read: + ERROR("Unable to read directory block [%llx:%x]\n", block, offset); + kfree(dire); + return 0; +} + + +const struct file_operations squashfs_dir_ops = { + .read = generic_read_dir, + .readdir = squashfs_readdir +}; diff --git a/fs/squashfs/export.c b/fs/squashfs/export.c new file mode 100644 index 00000000000..69e971d5ddc --- /dev/null +++ b/fs/squashfs/export.c @@ -0,0 +1,155 @@ +/* + * Squashfs - a compressed read only filesystem for Linux + * + * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 + * Phillip Lougher <phillip@lougher.demon.co.uk> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * export.c + */ + +/* + * This file implements code to make Squashfs filesystems exportable (NFS etc.) + * + * The export code uses an inode lookup table to map inode numbers passed in + * filehandles to an inode location on disk. This table is stored compressed + * into metadata blocks. A second index table is used to locate these. This + * second index table for speed of access (and because it is small) is read at + * mount time and cached in memory. + * + * The inode lookup table is used only by the export code, inode disk + * locations are directly encoded in directories, enabling direct access + * without an intermediate lookup for all operations except the export ops. + */ + +#include <linux/fs.h> +#include <linux/vfs.h> +#include <linux/dcache.h> +#include <linux/exportfs.h> +#include <linux/zlib.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "squashfs_fs_i.h" +#include "squashfs.h" + +/* + * Look-up inode number (ino) in table, returning the inode location. + */ +static long long squashfs_inode_lookup(struct super_block *sb, int ino_num) +{ + struct squashfs_sb_info *msblk = sb->s_fs_info; + int blk = SQUASHFS_LOOKUP_BLOCK(ino_num - 1); + int offset = SQUASHFS_LOOKUP_BLOCK_OFFSET(ino_num - 1); + u64 start = le64_to_cpu(msblk->inode_lookup_table[blk]); + __le64 ino; + int err; + + TRACE("Entered squashfs_inode_lookup, inode_number = %d\n", ino_num); + + err = squashfs_read_metadata(sb, &ino, &start, &offset, sizeof(ino)); + if (err < 0) + return err; + + TRACE("squashfs_inode_lookup, inode = 0x%llx\n", + (u64) le64_to_cpu(ino)); + + return le64_to_cpu(ino); +} + + +static struct dentry *squashfs_export_iget(struct super_block *sb, + unsigned int ino_num) +{ + long long ino; + struct dentry *dentry = ERR_PTR(-ENOENT); + + TRACE("Entered squashfs_export_iget\n"); + + ino = squashfs_inode_lookup(sb, ino_num); + if (ino >= 0) + dentry = d_obtain_alias(squashfs_iget(sb, ino, ino_num)); + + return dentry; +} + + +static struct dentry *squashfs_fh_to_dentry(struct super_block *sb, + struct fid *fid, int fh_len, int fh_type) +{ + if ((fh_type != FILEID_INO32_GEN && fh_type != FILEID_INO32_GEN_PARENT) + || fh_len < 2) + return NULL; + + return squashfs_export_iget(sb, fid->i32.ino); +} + + +static struct dentry *squashfs_fh_to_parent(struct super_block *sb, + struct fid *fid, int fh_len, int fh_type) +{ + if (fh_type != FILEID_INO32_GEN_PARENT || fh_len < 4) + return NULL; + + return squashfs_export_iget(sb, fid->i32.parent_ino); +} + + +static struct dentry *squashfs_get_parent(struct dentry *child) +{ + struct inode *inode = child->d_inode; + unsigned int parent_ino = squashfs_i(inode)->parent; + + return squashfs_export_iget(inode->i_sb, parent_ino); +} + + +/* + * Read uncompressed inode lookup table indexes off disk into memory + */ +__le64 *squashfs_read_inode_lookup_table(struct super_block *sb, + u64 lookup_table_start, unsigned int inodes) +{ + unsigned int length = SQUASHFS_LOOKUP_BLOCK_BYTES(inodes); + __le64 *inode_lookup_table; + int err; + + TRACE("In read_inode_lookup_table, length %d\n", length); + + /* Allocate inode lookup table indexes */ + inode_lookup_table = kmalloc(length, GFP_KERNEL); + if (inode_lookup_table == NULL) { + ERROR("Failed to allocate inode lookup table\n"); + return ERR_PTR(-ENOMEM); + } + + err = squashfs_read_table(sb, inode_lookup_table, lookup_table_start, + length); + if (err < 0) { + ERROR("unable to read inode lookup table\n"); + kfree(inode_lookup_table); + return ERR_PTR(err); + } + + return inode_lookup_table; +} + + +const struct export_operations squashfs_export_ops = { + .fh_to_dentry = squashfs_fh_to_dentry, + .fh_to_parent = squashfs_fh_to_parent, + .get_parent = squashfs_get_parent +}; diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c new file mode 100644 index 00000000000..717767d831d --- /dev/null +++ b/fs/squashfs/file.c @@ -0,0 +1,502 @@ +/* + * Squashfs - a compressed read only filesystem for Linux + * + * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 + * Phillip Lougher <phillip@lougher.demon.co.uk> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * file.c + */ + +/* + * This file contains code for handling regular files. A regular file + * consists of a sequence of contiguous compressed blocks, and/or a + * compressed fragment block (tail-end packed block). The compressed size + * of each datablock is stored in a block list contained within the + * file inode (itself stored in one or more compressed metadata blocks). + * + * To speed up access to datablocks when reading 'large' files (256 Mbytes or + * larger), the code implements an index cache that caches the mapping from + * block index to datablock location on disk. + * + * The index cache allows Squashfs to handle large files (up to 1.75 TiB) while + * retaining a simple and space-efficient block list on disk. The cache + * is split into slots, caching up to eight 224 GiB files (128 KiB blocks). + * Larger files use multiple slots, with 1.75 TiB files using all 8 slots. + * The index cache is designed to be memory efficient, and by default uses + * 16 KiB. + */ + +#include <linux/fs.h> +#include <linux/vfs.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/pagemap.h> +#include <linux/mutex.h> +#include <linux/zlib.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "squashfs_fs_i.h" +#include "squashfs.h" + +/* + * Locate cache slot in range [offset, index] for specified inode. If + * there's more than one return the slot closest to index. + */ +static struct meta_index *locate_meta_index(struct inode *inode, int offset, + int index) +{ + struct meta_index *meta = NULL; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + int i; + + mutex_lock(&msblk->meta_index_mutex); + + TRACE("locate_meta_index: index %d, offset %d\n", index, offset); + + if (msblk->meta_index == NULL) + goto not_allocated; + + for (i = 0; i < SQUASHFS_META_SLOTS; i++) { + if (msblk->meta_index[i].inode_number == inode->i_ino && + msblk->meta_index[i].offset >= offset && + msblk->meta_index[i].offset <= index && + msblk->meta_index[i].locked == 0) { + TRACE("locate_meta_index: entry %d, offset %d\n", i, + msblk->meta_index[i].offset); + meta = &msblk->meta_index[i]; + offset = meta->offset; + } + } + + if (meta) + meta->locked = 1; + +not_allocated: + mutex_unlock(&msblk->meta_index_mutex); + + return meta; +} + + +/* + * Find and initialise an empty cache slot for index offset. + */ +static struct meta_index *empty_meta_index(struct inode *inode, int offset, + int skip) +{ + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + struct meta_index *meta = NULL; + int i; + + mutex_lock(&msblk->meta_index_mutex); + + TRACE("empty_meta_index: offset %d, skip %d\n", offset, skip); + + if (msblk->meta_index == NULL) { + /* + * First time cache index has been used, allocate and + * initialise. The cache index could be allocated at + * mount time but doing it here means it is allocated only + * if a 'large' file is read. + */ + msblk->meta_index = kcalloc(SQUASHFS_META_SLOTS, + sizeof(*(msblk->meta_index)), GFP_KERNEL); + if (msblk->meta_index == NULL) { + ERROR("Failed to allocate meta_index\n"); + goto failed; + } + for (i = 0; i < SQUASHFS_META_SLOTS; i++) { + msblk->meta_index[i].inode_number = 0; + msblk->meta_index[i].locked = 0; + } + msblk->next_meta_index = 0; + } + + for (i = SQUASHFS_META_SLOTS; i && + msblk->meta_index[msblk->next_meta_index].locked; i--) + msblk->next_meta_index = (msblk->next_meta_index + 1) % + SQUASHFS_META_SLOTS; + + if (i == 0) { + TRACE("empty_meta_index: failed!\n"); + goto failed; + } + + TRACE("empty_meta_index: returned meta entry %d, %p\n", + msblk->next_meta_index, + &msblk->meta_index[msblk->next_meta_index]); + + meta = &msblk->meta_index[msblk->next_meta_index]; + msblk->next_meta_index = (msblk->next_meta_index + 1) % + SQUASHFS_META_SLOTS; + + meta->inode_number = inode->i_ino; + meta->offset = offset; + meta->skip = skip; + meta->entries = 0; + meta->locked = 1; + +failed: + mutex_unlock(&msblk->meta_index_mutex); + return meta; +} + + +static void release_meta_index(struct inode *inode, struct meta_index *meta) +{ + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + mutex_lock(&msblk->meta_index_mutex); + meta->locked = 0; + mutex_unlock(&msblk->meta_index_mutex); +} + + +/* + * Read the next n blocks from the block list, starting from + * metadata block <start_block, offset>. + */ +static long long read_indexes(struct super_block *sb, int n, + u64 *start_block, int *offset) +{ + int err, i; + long long block = 0; + __le32 *blist = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL); + + if (blist == NULL) { + ERROR("read_indexes: Failed to allocate block_list\n"); + return -ENOMEM; + } + + while (n) { + int blocks = min_t(int, n, PAGE_CACHE_SIZE >> 2); + + err = squashfs_read_metadata(sb, blist, start_block, + offset, blocks << 2); + if (err < 0) { + ERROR("read_indexes: reading block [%llx:%x]\n", + *start_block, *offset); + goto failure; + } + + for (i = 0; i < blocks; i++) { + int size = le32_to_cpu(blist[i]); + block += SQUASHFS_COMPRESSED_SIZE_BLOCK(size); + } + n -= blocks; + } + + kfree(blist); + return block; + +failure: + kfree(blist); + return err; +} + + +/* + * Each cache index slot has SQUASHFS_META_ENTRIES, each of which + * can cache one index -> datablock/blocklist-block mapping. We wish + * to distribute these over the length of the file, entry[0] maps index x, + * entry[1] maps index x + skip, entry[2] maps index x + 2 * skip, and so on. + * The larger the file, the greater the skip factor. The skip factor is + * limited to the size of the metadata cache (SQUASHFS_CACHED_BLKS) to ensure + * the number of metadata blocks that need to be read fits into the cache. + * If the skip factor is limited in this way then the file will use multiple + * slots. + */ +static inline int calculate_skip(int blocks) +{ + int skip = blocks / ((SQUASHFS_META_ENTRIES + 1) + * SQUASHFS_META_INDEXES); + return min(SQUASHFS_CACHED_BLKS - 1, skip + 1); +} + + +/* + * Search and grow the index cache for the specified inode, returning the + * on-disk locations of the datablock and block list metadata block + * <index_block, index_offset> for index (scaled to nearest cache index). + */ +static int fill_meta_index(struct inode *inode, int index, + u64 *index_block, int *index_offset, u64 *data_block) +{ + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + int skip = calculate_skip(i_size_read(inode) >> msblk->block_log); + int offset = 0; + struct meta_index *meta; + struct meta_entry *meta_entry; + u64 cur_index_block = squashfs_i(inode)->block_list_start; + int cur_offset = squashfs_i(inode)->offset; + u64 cur_data_block = squashfs_i(inode)->start; + int err, i; + + /* + * Scale index to cache index (cache slot entry) + */ + index /= SQUASHFS_META_INDEXES * skip; + + while (offset < index) { + meta = locate_meta_index(inode, offset + 1, index); + + if (meta == NULL) { + meta = empty_meta_index(inode, offset + 1, skip); + if (meta == NULL) + goto all_done; + } else { + offset = index < meta->offset + meta->entries ? index : + meta->offset + meta->entries - 1; + meta_entry = &meta->meta_entry[offset - meta->offset]; + cur_index_block = meta_entry->index_block + + msblk->inode_table; + cur_offset = meta_entry->offset; + cur_data_block = meta_entry->data_block; + TRACE("get_meta_index: offset %d, meta->offset %d, " + "meta->entries %d\n", offset, meta->offset, + meta->entries); + TRACE("get_meta_index: index_block 0x%llx, offset 0x%x" + " data_block 0x%llx\n", cur_index_block, + cur_offset, cur_data_block); + } + + /* + * If necessary grow cache slot by reading block list. Cache + * slot is extended up to index or to the end of the slot, in + * which case further slots will be used. + */ + for (i = meta->offset + meta->entries; i <= index && + i < meta->offset + SQUASHFS_META_ENTRIES; i++) { + int blocks = skip * SQUASHFS_META_INDEXES; + long long res = read_indexes(inode->i_sb, blocks, + &cur_index_block, &cur_offset); + + if (res < 0) { + if (meta->entries == 0) + /* + * Don't leave an empty slot on read + * error allocated to this inode... + */ + meta->inode_number = 0; + err = res; + goto failed; + } + + cur_data_block += res; + meta_entry = &meta->meta_entry[i - meta->offset]; + meta_entry->index_block = cur_index_block - + msblk->inode_table; + meta_entry->offset = cur_offset; + meta_entry->data_block = cur_data_block; + meta->entries++; + offset++; + } + + TRACE("get_meta_index: meta->offset %d, meta->entries %d\n", + meta->offset, meta->entries); + + release_meta_index(inode, meta); + } + +all_done: + *index_block = cur_index_block; + *index_offset = cur_offset; + *data_block = cur_data_block; + + /* + * Scale cache index (cache slot entry) to index + */ + return offset * SQUASHFS_META_INDEXES * skip; + +failed: + release_meta_index(inode, meta); + return err; +} + + +/* + * Get the on-disk location and compressed size of the datablock + * specified by index. Fill_meta_index() does most of the work. + */ +static int read_blocklist(struct inode *inode, int index, u64 *block) +{ + u64 start; + long long blks; + int offset; + __le32 size; + int res = fill_meta_index(inode, index, &start, &offset, block); + + TRACE("read_blocklist: res %d, index %d, start 0x%llx, offset" + " 0x%x, block 0x%llx\n", res, index, start, offset, + *block); + + if (res < 0) + return res; + + /* + * res contains the index of the mapping returned by fill_meta_index(), + * this will likely be less than the desired index (because the + * meta_index cache works at a higher granularity). Read any + * extra block indexes needed. + */ + if (res < index) { + blks = read_indexes(inode->i_sb, index - res, &start, &offset); + if (blks < 0) + return (int) blks; + *block += blks; + } + + /* + * Read length of block specified by index. + */ + res = squashfs_read_metadata(inode->i_sb, &size, &start, &offset, + sizeof(size)); + if (res < 0) + return res; + return le32_to_cpu(size); +} + + +static int squashfs_readpage(struct file *file, struct page *page) +{ + struct inode *inode = page->mapping->host; + struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; + int bytes, i, offset = 0, sparse = 0; + struct squashfs_cache_entry *buffer = NULL; + void *pageaddr; + + int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1; + int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT); + int start_index = page->index & ~mask; + int end_index = start_index | mask; + int file_end = i_size_read(inode) >> msblk->block_log; + + TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n", + page->index, squashfs_i(inode)->start); + + if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT)) + goto out; + + if (index < file_end || squashfs_i(inode)->fragment_block == + SQUASHFS_INVALID_BLK) { + /* + * Reading a datablock from disk. Need to read block list + * to get location and block size. + */ + u64 block = 0; + int bsize = read_blocklist(inode, index, &block); + if (bsize < 0) + goto error_out; + + if (bsize == 0) { /* hole */ + bytes = index == file_end ? + (i_size_read(inode) & (msblk->block_size - 1)) : + msblk->block_size; + sparse = 1; + } else { + /* + * Read and decompress datablock. + */ + buffer = squashfs_get_datablock(inode->i_sb, + block, bsize); + if (buffer->error) { + ERROR("Unable to read page, block %llx, size %x" + "\n", block, bsize); + squashfs_cache_put(buffer); + goto error_out; + } + bytes = buffer->length; + } + } else { + /* + * Datablock is stored inside a fragment (tail-end packed + * block). + */ + buffer = squashfs_get_fragment(inode->i_sb, + squashfs_i(inode)->fragment_block, + squashfs_i(inode)->fragment_size); + + if (buffer->error) { + ERROR("Unable to read page, block %llx, size %x\n", + squashfs_i(inode)->fragment_block, + squashfs_i(inode)->fragment_size); + squashfs_cache_put(buffer); + goto error_out; + } + bytes = i_size_read(inode) & (msblk->block_size - 1); + offset = squashfs_i(inode)->fragment_offset; + } + + /* + * Loop copying datablock into pages. As the datablock likely covers + * many PAGE_CACHE_SIZE pages (default block size is 128 KiB) explicitly + * grab the pages from the page cache, except for the page that we've + * been called to fill. + */ + for (i = start_index; i <= end_index && bytes > 0; i++, + bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) { + struct page *push_page; + int avail = sparse ? 0 : min_t(int, bytes, PAGE_CACHE_SIZE); + + TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail); + + push_page = (i == page->index) ? page : + grab_cache_page_nowait(page->mapping, i); + + if (!push_page) + continue; + + if (PageUptodate(push_page)) + goto skip_page; + + pageaddr = kmap_atomic(push_page, KM_USER0); + squashfs_copy_data(pageaddr, buffer, offset, avail); + memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail); + kunmap_atomic(pageaddr, KM_USER0); + flush_dcache_page(push_page); + SetPageUptodate(push_page); +skip_page: + unlock_page(push_page); + if (i != page->index) + page_cache_release(push_page); + } + + if (!sparse) + squashfs_cache_put(buffer); + + return 0; + +error_out: + SetPageError(page); +out: + pageaddr = kmap_atomic(page, KM_USER0); + memset(pageaddr, 0, PAGE_CACHE_SIZE); + kunmap_atomic(pageaddr, KM_USER0); + flush_dcache_page(page); + if (!PageError(page)) + SetPageUptodate(page); + unlock_page(page); + + return 0; +} + + +const struct address_space_operations squashfs_aops = { + .readpage = squashfs_readpage +}; diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c new file mode 100644 index 00000000000..b5a2c15bbbc --- /dev/null +++ b/fs/squashfs/fragment.c @@ -0,0 +1,98 @@ +/* + * Squashfs - a compressed read only filesystem for Linux + * + * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 + * Phillip Lougher <phillip@lougher.demon.co.uk> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * fragment.c + */ + +/* + * This file implements code to handle compressed fragments (tail-end packed + * datablocks). + * + * Regular files contain a fragment index which is mapped to a fragment + * location on disk and compressed size using a fragment lookup table. + * Like everything in Squashfs this fragment lookup table is itself stored + * compressed into metadata blocks. A second index table is used to locate + * these. This second index table for speed of access (and because it + * is small) is read at mount time and cached in memory. + */ + +#include <linux/fs.h> +#include <linux/vfs.h> +#include <linux/slab.h> +#include <linux/zlib.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "squashfs_fs_i.h" +#include "squashfs.h" + +/* + * Look-up fragment using the fragment index table. Return the on disk + * location of the fragment and its compressed size + */ +int squashfs_frag_lookup(struct super_block *sb, unsigned int fragment, + u64 *fragment_block) +{ + struct squashfs_sb_info *msblk = sb->s_fs_info; + int block = SQUASHFS_FRAGMENT_INDEX(fragment); + int offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment); + u64 start_block = le64_to_cpu(msblk->fragment_index[block]); + struct squashfs_fragment_entry fragment_entry; + int size; + + size = squashfs_read_metadata(sb, &fragment_entry, &start_block, + &offset, sizeof(fragment_entry)); + if (size < 0) + return size; + + *fragment_block = le64_to_cpu(fragment_entry.start_block); + size = le32_to_cpu(fragment_entry.size); + + return size; +} + + +/* + * Read the uncompressed fragment lookup table indexes off disk into memory + */ +__le64 *squashfs_read_fragment_index_table(struct super_block *sb, + u64 fragment_table_start, unsigned int fragments) +{ + unsigned int length = SQUASHFS_FRAGMENT_INDEX_BYTES(fragments); + __le64 *fragment_index; + int err; + + /* Allocate fragment lookup table indexes */ + fragment_index = kmalloc(length, GFP_KERNEL); + if (fragment_index == NULL) { + ERROR("Failed to allocate fragment index table\n"); + return ERR_PTR(-ENOMEM); + } + + err = squashfs_read_table(sb, fragment_index, fragment_table_start, + length); + if (err < 0) { + ERROR("unable to read fragment index table\n"); + kfree(fragment_index); + return ERR_PTR(err); + } + + return fragment_index; +} diff --git a/fs/squashfs/id.c b/fs/squashfs/id.c new file mode 100644 index 00000000000..3795b837ba2 --- /dev/null +++ b/fs/squashfs/id.c @@ -0,0 +1,94 @@ +/* + * Squashfs - a compressed read only filesystem for Linux + * + * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 + * Phillip Lougher <phillip@lougher.demon.co.uk> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * id.c + */ + +/* + * This file implements code to handle uids and gids. + * + * For space efficiency regular files store uid and gid indexes, which are + * converted to 32-bit uids/gids using an id look up table. This table is + * stored compressed into metadata blocks. A second index table is used to + * locate these. This second index table for speed of access (and because it + * is small) is read at mount time and cached in memory. + */ + +#include <linux/fs.h> +#include <linux/vfs.h> +#include <linux/slab.h> +#include <linux/zlib.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "squashfs_fs_i.h" +#include "squashfs.h" + +/* + * Map uid/gid index into real 32-bit uid/gid using the id look up table + */ +int squashfs_get_id(struct super_block *sb, unsigned int index, + unsigned int *id) +{ + struct squashfs_sb_info *msblk = sb->s_fs_info; + int block = SQUASHFS_ID_BLOCK(index); + int offset = SQUASHFS_ID_BLOCK_OFFSET(index); + u64 start_block = le64_to_cpu(msblk->id_table[block]); + __le32 disk_id; + int err; + + err = squashfs_read_metadata(sb, &disk_id, &start_block, &offset, + sizeof(disk_id)); + if (err < 0) + return err; + + *id = le32_to_cpu(disk_id); + return 0; +} + + +/* + * Read uncompressed id lookup table indexes from disk into memory + */ +__le64 *squashfs_read_id_index_table(struct super_block *sb, + u64 id_table_start, unsigned short no_ids) +{ + unsigned int length = SQUASHFS_ID_BLOCK_BYTES(no_ids); + __le64 *id_table; + int err; + + TRACE("In read_id_index_table, length %d\n", length); + + /* Allocate id lookup table indexes */ + id_table = kmalloc(length, GFP_KERNEL); + if (id_table == NULL) { + ERROR("Failed to allocate id index table\n"); + return ERR_PTR(-ENOMEM); + } + + err = squashfs_read_table(sb, id_table, id_table_start, length); + if (err < 0) { + ERROR("unable to read id index table\n"); + kfree(id_table); + return ERR_PTR(err); + } + + return id_table; +} diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c new file mode 100644 index 00000000000..7a63398bb85 --- /dev/null +++ b/fs/squashfs/inode.c @@ -0,0 +1,346 @@ +/* + * Squashfs - a compressed read only filesystem for Linux + * + * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 + * Phillip Lougher <phillip@lougher.demon.co.uk> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * inode.c + */ + +/* + * This file implements code to create and read inodes from disk. + * + * Inodes in Squashfs are identified by a 48-bit inode which encodes the + * location of the compressed metadata block containing the inode, and the byte + * offset into that block where the inode is placed (<block, offset>). + * + * To maximise compression there are different inodes for each file type + * (regular file, directory, device, etc.), the inode contents and length + * varying with the type. + * + * To further maximise compression, two types of regular file inode and + * directory inode are defined: inodes optimised for frequently occurring + * regular files and directories, and extended types where extra + * information has to be stored. + */ + +#include <linux/fs.h> +#include <linux/vfs.h> +#include <linux/zlib.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "squashfs_fs_i.h" +#include "squashfs.h" + +/* + * Initialise VFS inode with the base inode information common to all + * Squashfs inode types. Sqsh_ino contains the unswapped base inode + * off disk. + */ +static int squashfs_new_inode(struct super_block *sb, struct inode *inode, + struct squashfs_base_inode *sqsh_ino) +{ + int err; + + err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->uid), &inode->i_uid); + if (err) + return err; + + err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->guid), &inode->i_gid); + if (err) + return err; + + inode->i_ino = le32_to_cpu(sqsh_ino->inode_number); + inode->i_mtime.tv_sec = le32_to_cpu(sqsh_ino->mtime); + inode->i_atime.tv_sec = inode->i_mtime.tv_sec; + inode->i_ctime.tv_sec = inode->i_mtime.tv_sec; + inode->i_mode = le16_to_cpu(sqsh_ino->mode); + inode->i_size = 0; + + return err; +} + + +struct inode *squashfs_iget(struct super_block *sb, long long ino, + unsigned int ino_number) +{ + struct inode *inode = iget_locked(sb, ino_number); + int err; + + TRACE("Entered squashfs_iget\n"); + + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + + err = squashfs_read_inode(inode, ino); + if (err) { + iget_failed(inode); + return ERR_PTR(err); + } + + unlock_new_inode(inode); + return inode; +} + + +/* + * Initialise VFS inode by reading inode from inode table (compressed + * metadata). The format and amount of data read depends on type. + */ +int squashfs_read_inode(struct inode *inode, long long ino) +{ + struct super_block *sb = inode->i_sb; + struct squashfs_sb_info *msblk = sb->s_fs_info; + u64 block = SQUASHFS_INODE_BLK(ino) + msblk->inode_table; + int err, type, offset = SQUASHFS_INODE_OFFSET(ino); + union squashfs_inode squashfs_ino; + struct squashfs_base_inode *sqshb_ino = &squashfs_ino.base; + + TRACE("Entered squashfs_read_inode\n"); + + /* + * Read inode base common to all inode types. + */ + err = squashfs_read_metadata(sb, sqshb_ino, &block, + &offset, sizeof(*sqshb_ino)); + if (err < 0) + goto failed_read; + + err = squashfs_new_inode(sb, inode, sqshb_ino); + if (err) + goto failed_read; + + block = SQUASHFS_INODE_BLK(ino) + msblk->inode_table; + offset = SQUASHFS_INODE_OFFSET(ino); + + type = le16_to_cpu(sqshb_ino->inode_type); + switch (type) { + case SQUASHFS_REG_TYPE: { + unsigned int frag_offset, frag_size, frag; + u64 frag_blk; + struct squashfs_reg_inode *sqsh_ino = &squashfs_ino.reg; + + err = squashfs_read_metadata(sb, sqsh_ino, &block, &offset, + sizeof(*sqsh_ino)); + if (err < 0) + goto failed_read; + + frag = le32_to_cpu(sqsh_ino->fragment); + if (frag != SQUASHFS_INVALID_FRAG) { + frag_offset = le32_to_cpu(sqsh_ino->offset); + frag_size = squashfs_frag_lookup(sb, frag, &frag_blk); + if (frag_size < 0) { + err = frag_size; + goto failed_read; + } + } else { + frag_blk = SQUASHFS_INVALID_BLK; + frag_size = 0; + frag_offset = 0; + } + + inode->i_nlink = 1; + inode->i_size = le32_to_cpu(sqsh_ino->file_size); + inode->i_fop = &generic_ro_fops; + inode->i_mode |= S_IFREG; + inode->i_blocks = ((inode->i_size - 1) >> 9) + 1; + squashfs_i(inode)->fragment_block = frag_blk; + squashfs_i(inode)->fragment_size = frag_size; + squashfs_i(inode)->fragment_offset = frag_offset; + squashfs_i(inode)->start = le32_to_cpu(sqsh_ino->start_block); + squashfs_i(inode)->block_list_start = block; + squashfs_i(inode)->offset = offset; + inode->i_data.a_ops = &squashfs_aops; + + TRACE("File inode %x:%x, start_block %llx, block_list_start " + "%llx, offset %x\n", SQUASHFS_INODE_BLK(ino), + offset, squashfs_i(inode)->start, block, offset); + break; + } + case SQUASHFS_LREG_TYPE: { + unsigned int frag_offset, frag_size, frag; + u64 frag_blk; + struct squashfs_lreg_inode *sqsh_ino = &squashfs_ino.lreg; + + err = squashfs_read_metadata(sb, sqsh_ino, &block, &offset, + sizeof(*sqsh_ino)); + if (err < 0) + goto failed_read; + + frag = le32_to_cpu(sqsh_ino->fragment); + if (frag != SQUASHFS_INVALID_FRAG) { + frag_offset = le32_to_cpu(sqsh_ino->offset); + frag_size = squashfs_frag_lookup(sb, frag, &frag_blk); + if (frag_size < 0) { + err = frag_size; + goto failed_read; + } + } else { + frag_blk = SQUASHFS_INVALID_BLK; + frag_size = 0; + frag_offset = 0; + } + + inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); + inode->i_size = le64_to_cpu(sqsh_ino->file_size); + inode->i_fop = &generic_ro_fops; + inode->i_mode |= S_IFREG; + inode->i_blocks = ((inode->i_size - + le64_to_cpu(sqsh_ino->sparse) - 1) >> 9) + 1; + + squashfs_i(inode)->fragment_block = frag_blk; + squashfs_i(inode)->fragment_size = frag_size; + squashfs_i(inode)->fragment_offset = frag_offset; + squashfs_i(inode)->start = le64_to_cpu(sqsh_ino->start_block); + squashfs_i(inode)->block_list_start = block; + squashfs_i(inode)->offset = offset; + inode->i_data.a_ops = &squashfs_aops; + + TRACE("File inode %x:%x, start_block %llx, block_list_start " + "%llx, offset %x\n", SQUASHFS_INODE_BLK(ino), + offset, squashfs_i(inode)->start, block, offset); + break; + } + case SQUASHFS_DIR_TYPE: { + struct squashfs_dir_inode *sqsh_ino = &squashfs_ino.dir; + + err = squashfs_read_metadata(sb, sqsh_ino, &block, &offset, + sizeof(*sqsh_ino)); + if (err < 0) + goto failed_read; + + inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); + inode->i_size = le16_to_cpu(sqsh_ino->file_size); + inode->i_op = &squashfs_dir_inode_ops; + inode->i_fop = &squashfs_dir_ops; + inode->i_mode |= S_IFDIR; + squashfs_i(inode)->start = le32_to_cpu(sqsh_ino->start_block); + squashfs_i(inode)->offset = le16_to_cpu(sqsh_ino->offset); + squashfs_i(inode)->dir_idx_cnt = 0; + squashfs_i(inode)->parent = le32_to_cpu(sqsh_ino->parent_inode); + + TRACE("Directory inode %x:%x, start_block %llx, offset %x\n", + SQUASHFS_INODE_BLK(ino), offset, + squashfs_i(inode)->start, + le16_to_cpu(sqsh_ino->offset)); + break; + } + case SQUASHFS_LDIR_TYPE: { + struct squashfs_ldir_inode *sqsh_ino = &squashfs_ino.ldir; + + err = squashfs_read_metadata(sb, sqsh_ino, &block, &offset, + sizeof(*sqsh_ino)); + if (err < 0) + goto failed_read; + + inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); + inode->i_size = le32_to_cpu(sqsh_ino->file_size); + inode->i_op = &squashfs_dir_inode_ops; + inode->i_fop = &squashfs_dir_ops; + inode->i_mode |= S_IFDIR; + squashfs_i(inode)->start = le32_to_cpu(sqsh_ino->start_block); + squashfs_i(inode)->offset = le16_to_cpu(sqsh_ino->offset); + squashfs_i(inode)->dir_idx_start = block; + squashfs_i(inode)->dir_idx_offset = offset; + squashfs_i(inode)->dir_idx_cnt = le16_to_cpu(sqsh_ino->i_count); + squashfs_i(inode)->parent = le32_to_cpu(sqsh_ino->parent_inode); + + TRACE("Long directory inode %x:%x, start_block %llx, offset " + "%x\n", SQUASHFS_INODE_BLK(ino), offset, + squashfs_i(inode)->start, + le16_to_cpu(sqsh_ino->offset)); + break; + } + case SQUASHFS_SYMLINK_TYPE: + case SQUASHFS_LSYMLINK_TYPE: { + struct squashfs_symlink_inode *sqsh_ino = &squashfs_ino.symlink; + + err = squashfs_read_metadata(sb, sqsh_ino, &block, &offset, + sizeof(*sqsh_ino)); + if (err < 0) + goto failed_read; + + inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); + inode->i_size = le32_to_cpu(sqsh_ino->symlink_size); + inode->i_op = &page_symlink_inode_operations; + inode->i_data.a_ops = &squashfs_symlink_aops; + inode->i_mode |= S_IFLNK; + squashfs_i(inode)->start = block; + squashfs_i(inode)->offset = offset; + + TRACE("Symbolic link inode %x:%x, start_block %llx, offset " + "%x\n", SQUASHFS_INODE_BLK(ino), offset, + block, offset); + break; + } + case SQUASHFS_BLKDEV_TYPE: + case SQUASHFS_CHRDEV_TYPE: + case SQUASHFS_LBLKDEV_TYPE: + case SQUASHFS_LCHRDEV_TYPE: { + struct squashfs_dev_inode *sqsh_ino = &squashfs_ino.dev; + unsigned int rdev; + + err = squashfs_read_metadata(sb, sqsh_ino, &block, &offset, + sizeof(*sqsh_ino)); + if (err < 0) + goto failed_read; + + if (type == SQUASHFS_CHRDEV_TYPE) + inode->i_mode |= S_IFCHR; + else + inode->i_mode |= S_IFBLK; + inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); + rdev = le32_to_cpu(sqsh_ino->rdev); + init_special_inode(inode, inode->i_mode, new_decode_dev(rdev)); + + TRACE("Device inode %x:%x, rdev %x\n", + SQUASHFS_INODE_BLK(ino), offset, rdev); + break; + } + case SQUASHFS_FIFO_TYPE: + case SQUASHFS_SOCKET_TYPE: + case SQUASHFS_LFIFO_TYPE: + case SQUASHFS_LSOCKET_TYPE: { + struct squashfs_ipc_inode *sqsh_ino = &squashfs_ino.ipc; + + err = squashfs_read_metadata(sb, sqsh_ino, &block, &offset, + sizeof(*sqsh_ino)); + if (err < 0) + goto failed_read; + + if (type == SQUASHFS_FIFO_TYPE) + inode->i_mode |= S_IFIFO; + else + inode->i_mode |= S_IFSOCK; + inode->i_nlink = le32_to_cpu(sqsh_ino->nlink); + init_special_inode(inode, inode->i_mode, 0); + break; + } + default: + ERROR("Unknown inode type %d in squashfs_iget!\n", type); + return -EINVAL; + } + + return 0; + +failed_read: + ERROR("Unable to read inode 0x%llx\n", ino); + return err; +} diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c new file mode 100644 index 00000000000..9e398653b22 --- /dev/null +++ b/fs/squashfs/namei.c @@ -0,0 +1,242 @@ +/* + * Squashfs - a compressed read only filesystem for Linux + * + * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 + * Phillip Lougher <phillip@lougher.demon.co.uk> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * namei.c + */ + +/* + * This file implements code to do filename lookup in directories. + * + * Like inodes, directories are packed into compressed metadata blocks, stored + * in a directory table. Directories are accessed using the start address of + * the metablock containing the directory and the offset into the + * decompressed block (<block, offset>). + * + * Directories are organised in a slightly complex way, and are not simply + * a list of file names. The organisation takes advantage of the + * fact that (in most cases) the inodes of the files will be in the same + * compressed metadata block, and therefore, can share the start block. + * Directories are therefore organised in a two level list, a directory + * header containing the shared start block value, and a sequence of directory + * entries, each of which share the shared start block. A new directory header + * is written once/if the inode start block changes. The directory + * header/directory entry list is repeated as many times as necessary. + * + * Directories are sorted, and can contain a directory index to speed up + * file lookup. Directory indexes store one entry per metablock, each entry + * storing the index/filename mapping to the first directory header + * in each metadata block. Directories are sorted in alphabetical order, + * and at lookup the index is scanned linearly looking for the first filename + * alphabetically larger than the filename being looked up. At this point the + * location of the metadata block the filename is in has been found. + * The general idea of the index is ensure only one metadata block needs to be + * decompressed to do a lookup irrespective of the length of the directory. + * This scheme has the advantage that it doesn't require extra memory overhead + * and doesn't require much extra storage on disk. + */ + +#include <linux/fs.h> +#include <linux/vfs.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/dcache.h> +#include <linux/zlib.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "squashfs_fs_i.h" +#include "squashfs.h" + +/* + * Lookup name in the directory index, returning the location of the metadata + * block containing it, and the directory index this represents. + * + * If we get an error reading the index then return the part of the index + * (if any) we have managed to read - the index isn't essential, just + * quicker. + */ +static int get_dir_index_using_name(struct super_block *sb, + u64 *next_block, int *next_offset, u64 index_start, + int index_offset, int i_count, const char *name, + int len) +{ + struct squashfs_sb_info *msblk = sb->s_fs_info; + int i, size, length = 0, err; + struct squashfs_dir_index *index; + char *str; + + TRACE("Entered get_dir_index_using_name, i_count %d\n", i_count); + + index = kmalloc(sizeof(*index) + SQUASHFS_NAME_LEN * 2 + 2, GFP_KERNEL); + if (index == NULL) { + ERROR("Failed to allocate squashfs_dir_index\n"); + goto out; + } + + str = &index->name[SQUASHFS_NAME_LEN + 1]; + strncpy(str, name, len); + str[len] = '\0'; + + for (i = 0; i < i_count; i++) { + err = squashfs_read_metadata(sb, index, &index_start, + &index_offset, sizeof(*index)); + if (err < 0) + break; + + + size = le32_to_cpu(index->size) + 1; + + err = squashfs_read_metadata(sb, index->name, &index_start, + &index_offset, size); + if (err < 0) + break; + + index->name[size] = '\0'; + + if (strcmp(index->name, str) > 0) + break; + + length = le32_to_cpu(index->index); + *next_block = le32_to_cpu(index->start_block) + + msblk->directory_table; + } + + *next_offset = (length + *next_offset) % SQUASHFS_METADATA_SIZE; + kfree(index); + +out: + /* + * Return index (f_pos) of the looked up metadata block. Translate + * from internal f_pos to external f_pos which is offset by 3 because + * we invent "." and ".." entries which are not actually stored in the + * directory. + */ + return length + 3; +} + + +static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + const unsigned char *name = dentry->d_name.name; + int len = dentry->d_name.len; + struct inode *inode = NULL; + struct squashfs_sb_info *msblk = dir->i_sb->s_fs_info; + struct squashfs_dir_header dirh; + struct squashfs_dir_entry *dire; + u64 block = squashfs_i(dir)->start + msblk->directory_table; + int offset = squashfs_i(dir)->offset; + int err, length = 0, dir_count, size; + + TRACE("Entered squashfs_lookup [%llx:%x]\n", block, offset); + + dire = kmalloc(sizeof(*dire) + SQUASHFS_NAME_LEN + 1, GFP_KERNEL); + if (dire == NULL) { + ERROR("Failed to allocate squashfs_dir_entry\n"); + return ERR_PTR(-ENOMEM); + } + + if (len > SQUASHFS_NAME_LEN) { + err = -ENAMETOOLONG; + goto failed; + } + + length = get_dir_index_using_name(dir->i_sb, &block, &offset, + squashfs_i(dir)->dir_idx_start, + squashfs_i(dir)->dir_idx_offset, + squashfs_i(dir)->dir_idx_cnt, name, len); + + while (length < i_size_read(dir)) { + /* + * Read directory header. + */ + err = squashfs_read_metadata(dir->i_sb, &dirh, &block, + &offset, sizeof(dirh)); + if (err < 0) + goto read_failure; + + length += sizeof(dirh); + + dir_count = le32_to_cpu(dirh.count) + 1; + while (dir_count--) { + /* + * Read directory entry. + */ + err = squashfs_read_metadata(dir->i_sb, dire, &block, + &offset, sizeof(*dire)); + if (err < 0) + goto read_failure; + + size = le16_to_cpu(dire->size) + 1; + + err = squashfs_read_metadata(dir->i_sb, dire->name, + &block, &offset, size); + if (err < 0) + goto read_failure; + + length += sizeof(*dire) + size; + + if (name[0] < dire->name[0]) + goto exit_lookup; + + if (len == size && !strncmp(name, dire->name, len)) { + unsigned int blk, off, ino_num; + long long ino; + blk = le32_to_cpu(dirh.start_block); + off = le16_to_cpu(dire->offset); + ino_num = le32_to_cpu(dirh.inode_number) + + (short) le16_to_cpu(dire->inode_number); + ino = SQUASHFS_MKINODE(blk, off); + + TRACE("calling squashfs_iget for directory " + "entry %s, inode %x:%x, %d\n", name, + blk, off, ino_num); + + inode = squashfs_iget(dir->i_sb, ino, ino_num); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto failed; + } + + goto exit_lookup; + } + } + } + +exit_lookup: + kfree(dire); + if (inode) + return d_splice_alias(inode, dentry); + d_add(dentry, inode); + return ERR_PTR(0); + +read_failure: + ERROR("Unable to read directory block [%llx:%x]\n", + squashfs_i(dir)->start + msblk->directory_table, + squashfs_i(dir)->offset); +failed: + kfree(dire); + return ERR_PTR(err); +} + + +const struct inode_operations squashfs_dir_inode_ops = { + .lookup = squashfs_lookup +}; diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h new file mode 100644 index 00000000000..6b2515d027d --- /dev/null +++ b/fs/squashfs/squashfs.h @@ -0,0 +1,90 @@ +/* + * Squashfs - a compressed read only filesystem for Linux + * + * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 + * Phillip Lougher <phillip@lougher.demon.co.uk> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * squashfs.h + */ + +#define TRACE(s, args...) pr_debug("SQUASHFS: "s, ## args) + +#define ERROR(s, args...) pr_err("SQUASHFS error: "s, ## args) + +#define WARNING(s, args...) pr_warning("SQUASHFS: "s, ## args) + +static inline struct squashfs_inode_info *squashfs_i(struct inode *inode) +{ + return list_entry(inode, struct squashfs_inode_info, vfs_inode); +} + +/* block.c */ +extern int squashfs_read_data(struct super_block *, void **, u64, int, u64 *, + int); + +/* cache.c */ +extern struct squashfs_cache *squashfs_cache_init(char *, int, int); +extern void squashfs_cache_delete(struct squashfs_cache *); +extern struct squashfs_cache_entry *squashfs_cache_get(struct super_block *, + struct squashfs_cache *, u64, int); +extern void squashfs_cache_put(struct squashfs_cache_entry *); +extern int squashfs_copy_data(void *, struct squashfs_cache_entry *, int, int); +extern int squashfs_read_metadata(struct super_block *, void *, u64 *, + int *, int); +extern struct squashfs_cache_entry *squashfs_get_fragment(struct super_block *, + u64, int); +extern struct squashfs_cache_entry *squashfs_get_datablock(struct super_block *, + u64, int); +extern int squashfs_read_table(struct super_block *, void *, u64, int); + +/* export.c */ +extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, + unsigned int); + +/* fragment.c */ +extern int squashfs_frag_lookup(struct super_block *, unsigned int, u64 *); +extern __le64 *squashfs_read_fragment_index_table(struct super_block *, + u64, unsigned int); + +/* id.c */ +extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *); +extern __le64 *squashfs_read_id_index_table(struct super_block *, u64, + unsigned short); + +/* inode.c */ +extern struct inode *squashfs_iget(struct super_block *, long long, + unsigned int); +extern int squashfs_read_inode(struct inode *, long long); + +/* + * Inodes and files operations + */ + +/* dir.c */ +extern const struct file_operations squashfs_dir_ops; + +/* export.c */ +extern const struct export_operations squashfs_export_ops; + +/* file.c */ +extern const struct address_space_operations squashfs_aops; + +/* namei.c */ +extern const struct inode_operations squashfs_dir_inode_ops; + +/* symlink.c */ +extern const struct address_space_operations squashfs_symlink_aops; diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h new file mode 100644 index 00000000000..6840da1bf21 --- /dev/null +++ b/fs/squashfs/squashfs_fs.h @@ -0,0 +1,381 @@ +#ifndef SQUASHFS_FS +#define SQUASHFS_FS +/* + * Squashfs + * + * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 + * Phillip Lougher <phillip@lougher.demon.co.uk> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * squashfs_fs.h + */ + +#define SQUASHFS_CACHED_FRAGMENTS CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE +#define SQUASHFS_MAJOR 4 +#define SQUASHFS_MINOR 0 +#define SQUASHFS_MAGIC 0x73717368 +#define SQUASHFS_START 0 + +/* size of metadata (inode and directory) blocks */ +#define SQUASHFS_METADATA_SIZE 8192 +#define SQUASHFS_METADATA_LOG 13 + +/* default size of data blocks */ +#define SQUASHFS_FILE_SIZE 131072 +#define SQUASHFS_FILE_LOG 17 + +#define SQUASHFS_FILE_MAX_SIZE 1048576 +#define SQUASHFS_FILE_MAX_LOG 20 + +/* Max number of uids and gids */ +#define SQUASHFS_IDS 65536 + +/* Max length of filename (not 255) */ +#define SQUASHFS_NAME_LEN 256 + +#define SQUASHFS_INVALID_FRAG (0xffffffffU) +#define SQUASHFS_INVALID_BLK (-1LL) + +/* Filesystem flags */ +#define SQUASHFS_NOI 0 +#define SQUASHFS_NOD 1 +#define SQUASHFS_NOF 3 +#define SQUASHFS_NO_FRAG 4 +#define SQUASHFS_ALWAYS_FRAG 5 +#define SQUASHFS_DUPLICATE 6 +#define SQUASHFS_EXPORT 7 + +#define SQUASHFS_BIT(flag, bit) ((flag >> bit) & 1) + +#define SQUASHFS_UNCOMPRESSED_INODES(flags) SQUASHFS_BIT(flags, \ + SQUASHFS_NOI) + +#define SQUASHFS_UNCOMPRESSED_DATA(flags) SQUASHFS_BIT(flags, \ + SQUASHFS_NOD) + +#define SQUASHFS_UNCOMPRESSED_FRAGMENTS(flags) SQUASHFS_BIT(flags, \ + SQUASHFS_NOF) + +#define SQUASHFS_NO_FRAGMENTS(flags) SQUASHFS_BIT(flags, \ + SQUASHFS_NO_FRAG) + +#define SQUASHFS_ALWAYS_FRAGMENTS(flags) SQUASHFS_BIT(flags, \ + SQUASHFS_ALWAYS_FRAG) + +#define SQUASHFS_DUPLICATES(flags) SQUASHFS_BIT(flags, \ + SQUASHFS_DUPLICATE) + +#define SQUASHFS_EXPORTABLE(flags) SQUASHFS_BIT(flags, \ + SQUASHFS_EXPORT) + +/* Max number of types and file types */ +#define SQUASHFS_DIR_TYPE 1 +#define SQUASHFS_REG_TYPE 2 +#define SQUASHFS_SYMLINK_TYPE 3 +#define SQUASHFS_BLKDEV_TYPE 4 +#define SQUASHFS_CHRDEV_TYPE 5 +#define SQUASHFS_FIFO_TYPE 6 +#define SQUASHFS_SOCKET_TYPE 7 +#define SQUASHFS_LDIR_TYPE 8 +#define SQUASHFS_LREG_TYPE 9 +#define SQUASHFS_LSYMLINK_TYPE 10 +#define SQUASHFS_LBLKDEV_TYPE 11 +#define SQUASHFS_LCHRDEV_TYPE 12 +#define SQUASHFS_LFIFO_TYPE 13 +#define SQUASHFS_LSOCKET_TYPE 14 + +/* Flag whether block is compressed or uncompressed, bit is set if block is + * uncompressed */ +#define SQUASHFS_COMPRESSED_BIT (1 << 15) + +#define SQUASHFS_COMPRESSED_SIZE(B) (((B) & ~SQUASHFS_COMPRESSED_BIT) ? \ + (B) & ~SQUASHFS_COMPRESSED_BIT : SQUASHFS_COMPRESSED_BIT) + +#define SQUASHFS_COMPRESSED(B) (!((B) & SQUASHFS_COMPRESSED_BIT)) + +#define SQUASHFS_COMPRESSED_BIT_BLOCK (1 << 24) + +#define SQUASHFS_COMPRESSED_SIZE_BLOCK(B) ((B) & \ + ~SQUASHFS_COMPRESSED_BIT_BLOCK) + +#define SQUASHFS_COMPRESSED_BLOCK(B) (!((B) & SQUASHFS_COMPRESSED_BIT_BLOCK)) + +/* + * Inode number ops. Inodes consist of a compressed block number, and an + * uncompressed offset within that block + */ +#define SQUASHFS_INODE_BLK(A) ((unsigned int) ((A) >> 16)) + +#define SQUASHFS_INODE_OFFSET(A) ((unsigned int) ((A) & 0xffff)) + +#define SQUASHFS_MKINODE(A, B) ((long long)(((long long) (A)\ + << 16) + (B))) + +/* Translate between VFS mode and squashfs mode */ +#define SQUASHFS_MODE(A) ((A) & 0xfff) + +/* fragment and fragment table defines */ +#define SQUASHFS_FRAGMENT_BYTES(A) \ + ((A) * sizeof(struct squashfs_fragment_entry)) + +#define SQUASHFS_FRAGMENT_INDEX(A) (SQUASHFS_FRAGMENT_BYTES(A) / \ + SQUASHFS_METADATA_SIZE) + +#define SQUASHFS_FRAGMENT_INDEX_OFFSET(A) (SQUASHFS_FRAGMENT_BYTES(A) % \ + SQUASHFS_METADATA_SIZE) + +#define SQUASHFS_FRAGMENT_INDEXES(A) ((SQUASHFS_FRAGMENT_BYTES(A) + \ + SQUASHFS_METADATA_SIZE - 1) / \ + SQUASHFS_METADATA_SIZE) + +#define SQUASHFS_FRAGMENT_INDEX_BYTES(A) (SQUASHFS_FRAGMENT_INDEXES(A) *\ + sizeof(u64)) + +/* inode lookup table defines */ +#define SQUASHFS_LOOKUP_BYTES(A) ((A) * sizeof(u64)) + +#define SQUASHFS_LOOKUP_BLOCK(A) (SQUASHFS_LOOKUP_BYTES(A) / \ + SQUASHFS_METADATA_SIZE) + +#define SQUASHFS_LOOKUP_BLOCK_OFFSET(A) (SQUASHFS_LOOKUP_BYTES(A) % \ + SQUASHFS_METADATA_SIZE) + +#define SQUASHFS_LOOKUP_BLOCKS(A) ((SQUASHFS_LOOKUP_BYTES(A) + \ + SQUASHFS_METADATA_SIZE - 1) / \ + SQUASHFS_METADATA_SIZE) + +#define SQUASHFS_LOOKUP_BLOCK_BYTES(A) (SQUASHFS_LOOKUP_BLOCKS(A) *\ + sizeof(u64)) + +/* uid/gid lookup table defines */ +#define SQUASHFS_ID_BYTES(A) ((A) * sizeof(unsigned int)) + +#define SQUASHFS_ID_BLOCK(A) (SQUASHFS_ID_BYTES(A) / \ + SQUASHFS_METADATA_SIZE) + +#define SQUASHFS_ID_BLOCK_OFFSET(A) (SQUASHFS_ID_BYTES(A) % \ + SQUASHFS_METADATA_SIZE) + +#define SQUASHFS_ID_BLOCKS(A) ((SQUASHFS_ID_BYTES(A) + \ + SQUASHFS_METADATA_SIZE - 1) / \ + SQUASHFS_METADATA_SIZE) + +#define SQUASHFS_ID_BLOCK_BYTES(A) (SQUASHFS_ID_BLOCKS(A) *\ + sizeof(u64)) + +/* cached data constants for filesystem */ +#define SQUASHFS_CACHED_BLKS 8 + +#define SQUASHFS_MAX_FILE_SIZE_LOG 64 + +#define SQUASHFS_MAX_FILE_SIZE (1LL << \ + (SQUASHFS_MAX_FILE_SIZE_LOG - 2)) + +#define SQUASHFS_MARKER_BYTE 0xff + +/* meta index cache */ +#define SQUASHFS_META_INDEXES (SQUASHFS_METADATA_SIZE / sizeof(unsigned int)) +#define SQUASHFS_META_ENTRIES 127 +#define SQUASHFS_META_SLOTS 8 + +struct meta_entry { + u64 data_block; + unsigned int index_block; + unsigned short offset; + unsigned short pad; +}; + +struct meta_index { + unsigned int inode_number; + unsigned int offset; + unsigned short entries; + unsigned short skip; + unsigned short locked; + unsigned short pad; + struct meta_entry meta_entry[SQUASHFS_META_ENTRIES]; +}; + + +/* + * definitions for structures on disk + */ +#define ZLIB_COMPRESSION 1 + +struct squashfs_super_block { + __le32 s_magic; + __le32 inodes; + __le32 mkfs_time; + __le32 block_size; + __le32 fragments; + __le16 compression; + __le16 block_log; + __le16 flags; + __le16 no_ids; + __le16 s_major; + __le16 s_minor; + __le64 root_inode; + __le64 bytes_used; + __le64 id_table_start; + __le64 xattr_table_start; + __le64 inode_table_start; + __le64 directory_table_start; + __le64 fragment_table_start; + __le64 lookup_table_start; +}; + +struct squashfs_dir_index { + __le32 index; + __le32 start_block; + __le32 size; + unsigned char name[0]; +}; + +struct squashfs_base_inode { + __le16 inode_type; + __le16 mode; + __le16 uid; + __le16 guid; + __le32 mtime; + __le32 inode_number; +}; + +struct squashfs_ipc_inode { + __le16 inode_type; + __le16 mode; + __le16 uid; + __le16 guid; + __le32 mtime; + __le32 inode_number; + __le32 nlink; +}; + +struct squashfs_dev_inode { + __le16 inode_type; + __le16 mode; + __le16 uid; + __le16 guid; + __le32 mtime; + __le32 inode_number; + __le32 nlink; + __le32 rdev; +}; + +struct squashfs_symlink_inode { + __le16 inode_type; + __le16 mode; + __le16 uid; + __le16 guid; + __le32 mtime; + __le32 inode_number; + __le32 nlink; + __le32 symlink_size; + char symlink[0]; +}; + +struct squashfs_reg_inode { + __le16 inode_type; + __le16 mode; + __le16 uid; + __le16 guid; + __le32 mtime; + __le32 inode_number; + __le32 start_block; + __le32 fragment; + __le32 offset; + __le32 file_size; + __le16 block_list[0]; +}; + +struct squashfs_lreg_inode { + __le16 inode_type; + __le16 mode; + __le16 uid; + __le16 guid; + __le32 mtime; + __le32 inode_number; + __le64 start_block; + __le64 file_size; + __le64 sparse; + __le32 nlink; + __le32 fragment; + __le32 offset; + __le32 xattr; + __le16 block_list[0]; +}; + +struct squashfs_dir_inode { + __le16 inode_type; + __le16 mode; + __le16 uid; + __le16 guid; + __le32 mtime; + __le32 inode_number; + __le32 start_block; + __le32 nlink; + __le16 file_size; + __le16 offset; + __le32 parent_inode; +}; + +struct squashfs_ldir_inode { + __le16 inode_type; + __le16 mode; + __le16 uid; + __le16 guid; + __le32 mtime; + __le32 inode_number; + __le32 nlink; + __le32 file_size; + __le32 start_block; + __le32 parent_inode; + __le16 i_count; + __le16 offset; + __le32 xattr; + struct squashfs_dir_index index[0]; +}; + +union squashfs_inode { + struct squashfs_base_inode base; + struct squashfs_dev_inode dev; + struct squashfs_symlink_inode symlink; + struct squashfs_reg_inode reg; + struct squashfs_lreg_inode lreg; + struct squashfs_dir_inode dir; + struct squashfs_ldir_inode ldir; + struct squashfs_ipc_inode ipc; +}; + +struct squashfs_dir_entry { + __le16 offset; + __le16 inode_number; + __le16 type; + __le16 size; + char name[0]; +}; + +struct squashfs_dir_header { + __le32 count; + __le32 start_block; + __le32 inode_number; +}; + +struct squashfs_fragment_entry { + __le64 start_block; + __le32 size; + unsigned int unused; +}; + +#endif diff --git a/fs/squashfs/squashfs_fs_i.h b/fs/squashfs/squashfs_fs_i.h new file mode 100644 index 00000000000..fbfca30c0c6 --- /dev/null +++ b/fs/squashfs/squashfs_fs_i.h @@ -0,0 +1,45 @@ +#ifndef SQUASHFS_FS_I +#define SQUASHFS_FS_I +/* + * Squashfs + * + * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 + * Phillip Lougher <phillip@lougher.demon.co.uk> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * squashfs_fs_i.h + */ + +struct squashfs_inode_info { + u64 start; + int offset; + union { + struct { + u64 fragment_block; + int fragment_size; + int fragment_offset; + u64 block_list_start; + }; + struct { + u64 dir_idx_start; + int dir_idx_offset; + int dir_idx_cnt; + int parent; + }; + }; + struct inode vfs_inode; +}; +#endif diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h new file mode 100644 index 00000000000..c8c65614dd1 --- /dev/null +++ b/fs/squashfs/squashfs_fs_sb.h @@ -0,0 +1,76 @@ +#ifndef SQUASHFS_FS_SB +#define SQUASHFS_FS_SB +/* + * Squashfs + * + * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 + * Phillip Lougher <phillip@lougher.demon.co.uk> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * squashfs_fs_sb.h + */ + +#include "squashfs_fs.h" + +struct squashfs_cache { + char *name; + int entries; + int next_blk; + int num_waiters; + int unused; + int block_size; + int pages; + spinlock_t lock; + wait_queue_head_t wait_queue; + struct squashfs_cache_entry *entry; +}; + +struct squashfs_cache_entry { + u64 block; + int length; + int refcount; + u64 next_index; + int pending; + int error; + int num_waiters; + wait_queue_head_t wait_queue; + struct squashfs_cache *cache; + void **data; +}; + +struct squashfs_sb_info { + int devblksize; + int devblksize_log2; + struct squashfs_cache *block_cache; + struct squashfs_cache *fragment_cache; + struct squashfs_cache *read_page; + int next_meta_index; + __le64 *id_table; + __le64 *fragment_index; + unsigned int *fragment_index_2; + struct mutex read_data_mutex; + struct mutex meta_index_mutex; + struct meta_index *meta_index; + z_stream stream; + __le64 *inode_lookup_table; + u64 inode_table; + u64 directory_table; + unsigned int block_size; + unsigned short block_log; + long long bytes_used; + unsigned int inodes; +}; +#endif diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c new file mode 100644 index 00000000000..a0466d7467b --- /dev/null +++ b/fs/squashfs/super.c @@ -0,0 +1,440 @@ +/* + * Squashfs - a compressed read only filesystem for Linux + * + * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 + * Phillip Lougher <phillip@lougher.demon.co.uk> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * super.c + */ + +/* + * This file implements code to read the superblock, read and initialise + * in-memory structures at mount time, and all the VFS glue code to register + * the filesystem. + */ + +#include <linux/fs.h> +#include <linux/vfs.h> +#include <linux/slab.h> +#include <linux/mutex.h> +#include <linux/pagemap.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/zlib.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "squashfs_fs_i.h" +#include "squashfs.h" + +static struct file_system_type squashfs_fs_type; +static struct super_operations squashfs_super_ops; + +static int supported_squashfs_filesystem(short major, short minor, short comp) +{ + if (major < SQUASHFS_MAJOR) { + ERROR("Major/Minor mismatch, older Squashfs %d.%d " + "filesystems are unsupported\n", major, minor); + return -EINVAL; + } else if (major > SQUASHFS_MAJOR || minor > SQUASHFS_MINOR) { + ERROR("Major/Minor mismatch, trying to mount newer " + "%d.%d filesystem\n", major, minor); + ERROR("Please update your kernel\n"); + return -EINVAL; + } + + if (comp != ZLIB_COMPRESSION) + return -EINVAL; + + return 0; +} + + +static int squashfs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct squashfs_sb_info *msblk; + struct squashfs_super_block *sblk = NULL; + char b[BDEVNAME_SIZE]; + struct inode *root; + long long root_inode; + unsigned short flags; + unsigned int fragments; + u64 lookup_table_start; + int err; + + TRACE("Entered squashfs_fill_superblock\n"); + + sb->s_fs_info = kzalloc(sizeof(*msblk), GFP_KERNEL); + if (sb->s_fs_info == NULL) { + ERROR("Failed to allocate squashfs_sb_info\n"); + return -ENOMEM; + } + msblk = sb->s_fs_info; + + msblk->stream.workspace = kmalloc(zlib_inflate_workspacesize(), + GFP_KERNEL); + if (msblk->stream.workspace == NULL) { + ERROR("Failed to allocate zlib workspace\n"); + goto failure; + } + + sblk = kzalloc(sizeof(*sblk), GFP_KERNEL); + if (sblk == NULL) { + ERROR("Failed to allocate squashfs_super_block\n"); + goto failure; + } + + msblk->devblksize = sb_min_blocksize(sb, BLOCK_SIZE); + msblk->devblksize_log2 = ffz(~msblk->devblksize); + + mutex_init(&msblk->read_data_mutex); + mutex_init(&msblk->meta_index_mutex); + + /* + * msblk->bytes_used is checked in squashfs_read_table to ensure reads + * are not beyond filesystem end. But as we're using + * squashfs_read_table here to read the superblock (including the value + * of bytes_used) we need to set it to an initial sensible dummy value + */ + msblk->bytes_used = sizeof(*sblk); + err = squashfs_read_table(sb, sblk, SQUASHFS_START, sizeof(*sblk)); + + if (err < 0) { + ERROR("unable to read squashfs_super_block\n"); + goto failed_mount; + } + + /* Check it is a SQUASHFS superblock */ + sb->s_magic = le32_to_cpu(sblk->s_magic); + if (sb->s_magic != SQUASHFS_MAGIC) { + if (!silent) + ERROR("Can't find a SQUASHFS superblock on %s\n", + bdevname(sb->s_bdev, b)); + err = -EINVAL; + goto failed_mount; + } + + /* Check the MAJOR & MINOR versions and compression type */ + err = supported_squashfs_filesystem(le16_to_cpu(sblk->s_major), + le16_to_cpu(sblk->s_minor), + le16_to_cpu(sblk->compression)); + if (err < 0) + goto failed_mount; + + err = -EINVAL; + + /* + * Check if there's xattrs in the filesystem. These are not + * supported in this version, so warn that they will be ignored. + */ + if (le64_to_cpu(sblk->xattr_table_start) != SQUASHFS_INVALID_BLK) + ERROR("Xattrs in filesystem, these will be ignored\n"); + + /* Check the filesystem does not extend beyond the end of the + block device */ + msblk->bytes_used = le64_to_cpu(sblk->bytes_used); + if (msblk->bytes_used < 0 || msblk->bytes_used > + i_size_read(sb->s_bdev->bd_inode)) + goto failed_mount; + + /* Check block size for sanity */ + msblk->block_size = le32_to_cpu(sblk->block_size); + if (msblk->block_size > SQUASHFS_FILE_MAX_SIZE) + goto failed_mount; + + msblk->block_log = le16_to_cpu(sblk->block_log); + if (msblk->block_log > SQUASHFS_FILE_MAX_LOG) + goto failed_mount; + + /* Check the root inode for sanity */ + root_inode = le64_to_cpu(sblk->root_inode); + if (SQUASHFS_INODE_OFFSET(root_inode) > SQUASHFS_METADATA_SIZE) + goto failed_mount; + + msblk->inode_table = le64_to_cpu(sblk->inode_table_start); + msblk->directory_table = le64_to_cpu(sblk->directory_table_start); + msblk->inodes = le32_to_cpu(sblk->inodes); + flags = le16_to_cpu(sblk->flags); + + TRACE("Found valid superblock on %s\n", bdevname(sb->s_bdev, b)); + TRACE("Inodes are %scompressed\n", SQUASHFS_UNCOMPRESSED_INODES(flags) + ? "un" : ""); + TRACE("Data is %scompressed\n", SQUASHFS_UNCOMPRESSED_DATA(flags) + ? "un" : ""); + TRACE("Filesystem size %lld bytes\n", msblk->bytes_used); + TRACE("Block size %d\n", msblk->block_size); + TRACE("Number of inodes %d\n", msblk->inodes); + TRACE("Number of fragments %d\n", le32_to_cpu(sblk->fragments)); + TRACE("Number of ids %d\n", le16_to_cpu(sblk->no_ids)); + TRACE("sblk->inode_table_start %llx\n", msblk->inode_table); + TRACE("sblk->directory_table_start %llx\n", msblk->directory_table); + TRACE("sblk->fragment_table_start %llx\n", + (u64) le64_to_cpu(sblk->fragment_table_start)); + TRACE("sblk->id_table_start %llx\n", + (u64) le64_to_cpu(sblk->id_table_start)); + + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_flags |= MS_RDONLY; + sb->s_op = &squashfs_super_ops; + + err = -ENOMEM; + + msblk->block_cache = squashfs_cache_init("metadata", + SQUASHFS_CACHED_BLKS, SQUASHFS_METADATA_SIZE); + if (msblk->block_cache == NULL) + goto failed_mount; + + /* Allocate read_page block */ + msblk->read_page = squashfs_cache_init("data", 1, msblk->block_size); + if (msblk->read_page == NULL) { + ERROR("Failed to allocate read_page block\n"); + goto failed_mount; + } + + /* Allocate and read id index table */ + msblk->id_table = squashfs_read_id_index_table(sb, + le64_to_cpu(sblk->id_table_start), le16_to_cpu(sblk->no_ids)); + if (IS_ERR(msblk->id_table)) { + err = PTR_ERR(msblk->id_table); + msblk->id_table = NULL; + goto failed_mount; + } + + fragments = le32_to_cpu(sblk->fragments); + if (fragments == 0) + goto allocate_lookup_table; + + msblk->fragment_cache = squashfs_cache_init("fragment", + SQUASHFS_CACHED_FRAGMENTS, msblk->block_size); + if (msblk->fragment_cache == NULL) { + err = -ENOMEM; + goto failed_mount; + } + + /* Allocate and read fragment index table */ + msblk->fragment_index = squashfs_read_fragment_index_table(sb, + le64_to_cpu(sblk->fragment_table_start), fragments); + if (IS_ERR(msblk->fragment_index)) { + err = PTR_ERR(msblk->fragment_index); + msblk->fragment_index = NULL; + goto failed_mount; + } + +allocate_lookup_table: + lookup_table_start = le64_to_cpu(sblk->lookup_table_start); + if (lookup_table_start == SQUASHFS_INVALID_BLK) + goto allocate_root; + + /* Allocate and read inode lookup table */ + msblk->inode_lookup_table = squashfs_read_inode_lookup_table(sb, + lookup_table_start, msblk->inodes); + if (IS_ERR(msblk->inode_lookup_table)) { + err = PTR_ERR(msblk->inode_lookup_table); + msblk->inode_lookup_table = NULL; + goto failed_mount; + } + + sb->s_export_op = &squashfs_export_ops; + +allocate_root: + root = new_inode(sb); + if (!root) { + err = -ENOMEM; + goto failed_mount; + } + + err = squashfs_read_inode(root, root_inode); + if (err) { + iget_failed(root); + goto failed_mount; + } + insert_inode_hash(root); + + sb->s_root = d_alloc_root(root); + if (sb->s_root == NULL) { + ERROR("Root inode create failed\n"); + err = -ENOMEM; + iput(root); + goto failed_mount; + } + + TRACE("Leaving squashfs_fill_super\n"); + kfree(sblk); + return 0; + +failed_mount: + squashfs_cache_delete(msblk->block_cache); + squashfs_cache_delete(msblk->fragment_cache); + squashfs_cache_delete(msblk->read_page); + kfree(msblk->inode_lookup_table); + kfree(msblk->fragment_index); + kfree(msblk->id_table); + kfree(msblk->stream.workspace); + kfree(sb->s_fs_info); + sb->s_fs_info = NULL; + kfree(sblk); + return err; + +failure: + kfree(msblk->stream.workspace); + kfree(sb->s_fs_info); + sb->s_fs_info = NULL; + return -ENOMEM; +} + + +static int squashfs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct squashfs_sb_info *msblk = dentry->d_sb->s_fs_info; + + TRACE("Entered squashfs_statfs\n"); + + buf->f_type = SQUASHFS_MAGIC; + buf->f_bsize = msblk->block_size; + buf->f_blocks = ((msblk->bytes_used - 1) >> msblk->block_log) + 1; + buf->f_bfree = buf->f_bavail = 0; + buf->f_files = msblk->inodes; + buf->f_ffree = 0; + buf->f_namelen = SQUASHFS_NAME_LEN; + + return 0; +} + + +static int squashfs_remount(struct super_block *sb, int *flags, char *data) +{ + *flags |= MS_RDONLY; + return 0; +} + + +static void squashfs_put_super(struct super_block *sb) +{ + if (sb->s_fs_info) { + struct squashfs_sb_info *sbi = sb->s_fs_info; + squashfs_cache_delete(sbi->block_cache); + squashfs_cache_delete(sbi->fragment_cache); + squashfs_cache_delete(sbi->read_page); + kfree(sbi->id_table); + kfree(sbi->fragment_index); + kfree(sbi->meta_index); + kfree(sbi->stream.workspace); + kfree(sb->s_fs_info); + sb->s_fs_info = NULL; + } +} + + +static int squashfs_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data, + struct vfsmount *mnt) +{ + return get_sb_bdev(fs_type, flags, dev_name, data, squashfs_fill_super, + mnt); +} + + +static struct kmem_cache *squashfs_inode_cachep; + + +static void init_once(void *foo) +{ + struct squashfs_inode_info *ei = foo; + + inode_init_once(&ei->vfs_inode); +} + + +static int __init init_inodecache(void) +{ + squashfs_inode_cachep = kmem_cache_create("squashfs_inode_cache", + sizeof(struct squashfs_inode_info), 0, + SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, init_once); + + return squashfs_inode_cachep ? 0 : -ENOMEM; +} + + +static void destroy_inodecache(void) +{ + kmem_cache_destroy(squashfs_inode_cachep); +} + + +static int __init init_squashfs_fs(void) +{ + int err = init_inodecache(); + + if (err) + return err; + + err = register_filesystem(&squashfs_fs_type); + if (err) { + destroy_inodecache(); + return err; + } + + printk(KERN_INFO "squashfs: version 4.0 (2009/01/03) " + "Phillip Lougher\n"); + + return 0; +} + + +static void __exit exit_squashfs_fs(void) +{ + unregister_filesystem(&squashfs_fs_type); + destroy_inodecache(); +} + + +static struct inode *squashfs_alloc_inode(struct super_block *sb) +{ + struct squashfs_inode_info *ei = + kmem_cache_alloc(squashfs_inode_cachep, GFP_KERNEL); + + return ei ? &ei->vfs_inode : NULL; +} + + +static void squashfs_destroy_inode(struct inode *inode) +{ + kmem_cache_free(squashfs_inode_cachep, squashfs_i(inode)); +} + + +static struct file_system_type squashfs_fs_type = { + .owner = THIS_MODULE, + .name = "squashfs", + .get_sb = squashfs_get_sb, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV +}; + +static struct super_operations squashfs_super_ops = { + .alloc_inode = squashfs_alloc_inode, + .destroy_inode = squashfs_destroy_inode, + .statfs = squashfs_statfs, + .put_super = squashfs_put_super, + .remount_fs = squashfs_remount +}; + +module_init(init_squashfs_fs); +module_exit(exit_squashfs_fs); +MODULE_DESCRIPTION("squashfs 4.0, a compressed read-only filesystem"); +MODULE_AUTHOR("Phillip Lougher <phillip@lougher.demon.co.uk>"); +MODULE_LICENSE("GPL"); diff --git a/fs/squashfs/symlink.c b/fs/squashfs/symlink.c new file mode 100644 index 00000000000..83d87880aac --- /dev/null +++ b/fs/squashfs/symlink.c @@ -0,0 +1,118 @@ +/* + * Squashfs - a compressed read only filesystem for Linux + * + * Copyright (c) 2002, 2003, 2004, 2005, 2006, 2007, 2008 + * Phillip Lougher <phillip@lougher.demon.co.uk> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * symlink.c + */ + +/* + * This file implements code to handle symbolic links. + * + * The data contents of symbolic links are stored inside the symbolic + * link inode within the inode table. This allows the normally small symbolic + * link to be compressed as part of the inode table, achieving much greater + * compression than if the symbolic link was compressed individually. + */ + +#include <linux/fs.h> +#include <linux/vfs.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/pagemap.h> +#include <linux/zlib.h> + +#include "squashfs_fs.h" +#include "squashfs_fs_sb.h" +#include "squashfs_fs_i.h" +#include "squashfs.h" + +static int squashfs_symlink_readpage(struct file *file, struct page *page) +{ + struct inode *inode = page->mapping->host; + struct super_block *sb = inode->i_sb; + struct squashfs_sb_info *msblk = sb->s_fs_info; + int index = page->index << PAGE_CACHE_SHIFT; + u64 block = squashfs_i(inode)->start; + int offset = squashfs_i(inode)->offset; + int length = min_t(int, i_size_read(inode) - index, PAGE_CACHE_SIZE); + int bytes, copied; + void *pageaddr; + struct squashfs_cache_entry *entry; + + TRACE("Entered squashfs_symlink_readpage, page index %ld, start block " + "%llx, offset %x\n", page->index, block, offset); + + /* + * Skip index bytes into symlink metadata. + */ + if (index) { + bytes = squashfs_read_metadata(sb, NULL, &block, &offset, + index); + if (bytes < 0) { + ERROR("Unable to read symlink [%llx:%x]\n", + squashfs_i(inode)->start, + squashfs_i(inode)->offset); + goto error_out; + } + } + + /* + * Read length bytes from symlink metadata. Squashfs_read_metadata + * is not used here because it can sleep and we want to use + * kmap_atomic to map the page. Instead call the underlying + * squashfs_cache_get routine. As length bytes may overlap metadata + * blocks, we may need to call squashfs_cache_get multiple times. + */ + for (bytes = 0; bytes < length; offset = 0, bytes += copied) { + entry = squashfs_cache_get(sb, msblk->block_cache, block, 0); + if (entry->error) { + ERROR("Unable to read symlink [%llx:%x]\n", + squashfs_i(inode)->start, + squashfs_i(inode)->offset); + squashfs_cache_put(entry); + goto error_out; + } + + pageaddr = kmap_atomic(page, KM_USER0); + copied = squashfs_copy_data(pageaddr + bytes, entry, offset, + length - bytes); + if (copied == length - bytes) + memset(pageaddr + length, 0, PAGE_CACHE_SIZE - length); + else + block = entry->next_index; + kunmap_atomic(pageaddr, KM_USER0); + squashfs_cache_put(entry); + } + + flush_dcache_page(page); + SetPageUptodate(page); + unlock_page(page); + return 0; + +error_out: + SetPageError(page); + unlock_page(page); + return 0; +} + + +const struct address_space_operations squashfs_symlink_aops = { + .readpage = squashfs_symlink_readpage +}; diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c index a7c748fa977..0f0f0cf3ba9 100644 --- a/init/do_mounts_rd.c +++ b/init/do_mounts_rd.c @@ -9,6 +9,7 @@ #include <linux/string.h> #include "do_mounts.h" +#include "../fs/squashfs/squashfs_fs.h" int __initdata rd_prompt = 1;/* 1 = prompt for RAM disk, 0 = don't prompt */ @@ -41,6 +42,7 @@ static int __init crd_load(int in_fd, int out_fd); * ext2 * romfs * cramfs + * squashfs * gzip */ static int __init @@ -51,6 +53,7 @@ identify_ramdisk_image(int fd, int start_block) struct ext2_super_block *ext2sb; struct romfs_super_block *romfsb; struct cramfs_super *cramfsb; + struct squashfs_super_block *squashfsb; int nblocks = -1; unsigned char *buf; @@ -62,6 +65,7 @@ identify_ramdisk_image(int fd, int start_block) ext2sb = (struct ext2_super_block *) buf; romfsb = (struct romfs_super_block *) buf; cramfsb = (struct cramfs_super *) buf; + squashfsb = (struct squashfs_super_block *) buf; memset(buf, 0xe5, size); /* @@ -99,6 +103,16 @@ identify_ramdisk_image(int fd, int start_block) goto done; } + /* squashfs is at block zero too */ + if (le32_to_cpu(squashfsb->s_magic) == SQUASHFS_MAGIC) { + printk(KERN_NOTICE + "RAMDISK: squashfs filesystem found at block %d\n", + start_block); + nblocks = (le64_to_cpu(squashfsb->bytes_used) + BLOCK_SIZE - 1) + >> BLOCK_SIZE_BITS; + goto done; + } + /* * Read block 1 to test for minix and ext2 superblock */ |