summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig213
-rw-r--r--lib/Kconfig.debug1157
-rw-r--r--lib/Kconfig.kgdb82
-rw-r--r--lib/Kconfig.kmemcheck94
-rw-r--r--lib/Makefile118
-rw-r--r--lib/argv_split.c101
-rw-r--r--lib/atomic64.c186
-rw-r--r--lib/atomic64_test.c166
-rw-r--r--lib/audit.c66
-rw-r--r--lib/bcd.c14
-rw-r--r--lib/bitmap.c1102
-rw-r--r--lib/bitrev.c59
-rw-r--r--lib/btree.c798
-rw-r--r--lib/bug.c183
-rw-r--r--lib/bust_spinlocks.c32
-rw-r--r--lib/check_signature.c26
-rw-r--r--lib/checksum.c203
-rw-r--r--lib/cmdline.c159
-rw-r--r--lib/cpu-notifier-error-inject.c63
-rw-r--r--lib/cpumask.c178
-rw-r--r--lib/crc-ccitt.c69
-rw-r--r--lib/crc-itu-t.c69
-rw-r--r--lib/crc-t10dif.c67
-rw-r--r--lib/crc16.c67
-rw-r--r--lib/crc32.c471
-rw-r--r--lib/crc32defs.h32
-rw-r--r--lib/crc7.c68
-rw-r--r--lib/ctype.c36
-rw-r--r--lib/debug_locks.c48
-rw-r--r--lib/debugobjects.c1049
-rw-r--r--lib/dec_and_lock.c34
-rw-r--r--lib/decompress.c59
-rw-r--r--lib/decompress_bunzip2.c758
-rw-r--r--lib/decompress_inflate.c176
-rw-r--r--lib/decompress_unlzma.c667
-rw-r--r--lib/decompress_unlzo.c217
-rw-r--r--lib/devres.c352
-rw-r--r--lib/div64.c110
-rw-r--r--lib/dma-debug.c1302
-rw-r--r--lib/dump_stack.c15
-rw-r--r--lib/dynamic_debug.c770
-rw-r--r--lib/extable.c93
-rw-r--r--lib/fault-inject.c315
-rw-r--r--lib/find_last_bit.c45
-rw-r--r--lib/find_next_bit.c275
-rw-r--r--lib/flex_array.c350
-rw-r--r--lib/gcd.c18
-rw-r--r--lib/gen_crc32table.c99
-rw-r--r--lib/genalloc.c215
-rw-r--r--lib/halfmd4.c66
-rw-r--r--lib/hexdump.c224
-rw-r--r--lib/hweight.c67
-rw-r--r--lib/idr.c937
-rw-r--r--lib/inflate.c1309
-rw-r--r--lib/int_sqrt.c32
-rw-r--r--lib/iomap.c282
-rw-r--r--lib/iomap_copy.c70
-rw-r--r--lib/iommu-helper.c40
-rw-r--r--lib/ioremap.c92
-rw-r--r--lib/irq_regs.c17
-rw-r--r--lib/is_single_threaded.c58
-rw-r--r--lib/kasprintf.c45
-rw-r--r--lib/kernel_lock.c143
-rw-r--r--lib/klist.c365
-rw-r--r--lib/kobject.c973
-rw-r--r--lib/kobject_uevent.c425
-rw-r--r--lib/kref.c67
-rw-r--r--lib/lcm.c15
-rw-r--r--lib/libcrc32c.c81
-rw-r--r--lib/list_debug.c62
-rw-r--r--lib/list_sort.c217
-rw-r--r--lib/locking-selftest-hardirq.h9
-rw-r--r--lib/locking-selftest-mutex.h11
-rw-r--r--lib/locking-selftest-rlock-hardirq.h2
-rw-r--r--lib/locking-selftest-rlock-softirq.h2
-rw-r--r--lib/locking-selftest-rlock.h14
-rw-r--r--lib/locking-selftest-rsem.h14
-rw-r--r--lib/locking-selftest-softirq.h9
-rw-r--r--lib/locking-selftest-spin-hardirq.h2
-rw-r--r--lib/locking-selftest-spin-softirq.h2
-rw-r--r--lib/locking-selftest-spin.h11
-rw-r--r--lib/locking-selftest-wlock-hardirq.h2
-rw-r--r--lib/locking-selftest-wlock-softirq.h2
-rw-r--r--lib/locking-selftest-wlock.h14
-rw-r--r--lib/locking-selftest-wsem.h14
-rw-r--r--lib/locking-selftest.c1218
-rw-r--r--lib/lru_cache.c560
-rw-r--r--lib/lzo/Makefile5
-rw-r--r--lib/lzo/lzo1x_compress.c226
-rw-r--r--lib/lzo/lzo1x_decompress.c255
-rw-r--r--lib/lzo/lzodefs.h43
-rw-r--r--lib/nlattr.c502
-rw-r--r--lib/parser.c231
-rw-r--r--lib/percpu_counter.c174
-rw-r--r--lib/plist.c121
-rw-r--r--lib/prio_heap.c70
-rw-r--r--lib/prio_tree.c484
-rw-r--r--lib/proportions.c407
-rw-r--r--lib/radix-tree.c1419
-rw-r--r--lib/raid6/Makefile75
-rw-r--r--lib/raid6/algos.c154
-rw-r--r--lib/raid6/altivec.uc130
-rw-r--r--lib/raid6/int.uc117
-rw-r--r--lib/raid6/mktables.c132
-rw-r--r--lib/raid6/mmx.c142
-rw-r--r--lib/raid6/recov.c132
-rw-r--r--lib/raid6/sse1.c162
-rw-r--r--lib/raid6/sse2.c262
-rw-r--r--lib/raid6/test/Makefile72
-rw-r--r--lib/raid6/test/test.c124
-rw-r--r--lib/raid6/unroll.awk20
-rw-r--r--lib/raid6/x86.h61
-rw-r--r--lib/random32.c150
-rw-r--r--lib/ratelimit.c67
-rw-r--r--lib/rational.c63
-rw-r--r--lib/rbtree.c459
-rw-r--r--lib/reciprocal_div.c9
-rw-r--r--lib/reed_solomon/Makefile6
-rw-r--r--lib/reed_solomon/decode_rs.c271
-rw-r--r--lib/reed_solomon/encode_rs.c54
-rw-r--r--lib/reed_solomon/reed_solomon.c384
-rw-r--r--lib/rwsem-spinlock.c323
-rw-r--r--lib/rwsem.c284
-rw-r--r--lib/scatterlist.c519
-rw-r--r--lib/sha1.c95
-rw-r--r--lib/show_mem.c63
-rw-r--r--lib/smp_processor_id.c55
-rw-r--r--lib/sort.c123
-rw-r--r--lib/spinlock_debug.c297
-rw-r--r--lib/string.c729
-rw-r--r--lib/string_helpers.c68
-rw-r--r--lib/swiotlb.c923
-rw-r--r--lib/syscall.c75
-rw-r--r--lib/textsearch.c324
-rw-r--r--lib/ts_bm.c207
-rw-r--r--lib/ts_fsm.c341
-rw-r--r--lib/ts_kmp.c157
-rw-r--r--lib/uuid.c53
-rw-r--r--lib/vsprintf.c2152
-rw-r--r--lib/zlib_deflate/Makefile11
-rw-r--r--lib/zlib_deflate/deflate.c1253
-rw-r--r--lib/zlib_deflate/deflate_syms.c18
-rw-r--r--lib/zlib_deflate/deftree.c1113
-rw-r--r--lib/zlib_deflate/defutil.h334
-rw-r--r--lib/zlib_inflate/Makefile19
-rw-r--r--lib/zlib_inflate/inffast.c363
-rw-r--r--lib/zlib_inflate/inffast.h11
-rw-r--r--lib/zlib_inflate/inffixed.h94
-rw-r--r--lib/zlib_inflate/inflate.c918
-rw-r--r--lib/zlib_inflate/inflate.h111
-rw-r--r--lib/zlib_inflate/inflate_syms.c20
-rw-r--r--lib/zlib_inflate/inftrees.c315
-rw-r--r--lib/zlib_inflate/inftrees.h59
-rw-r--r--lib/zlib_inflate/infutil.c49
-rw-r--r--lib/zlib_inflate/infutil.h25
155 files changed, 38803 insertions, 0 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
new file mode 100644
index 00000000..fa9bf2c0
--- /dev/null
+++ b/lib/Kconfig
@@ -0,0 +1,213 @@
+#
+# Library configuration
+#
+
+config BINARY_PRINTF
+ def_bool n
+
+menu "Library routines"
+
+config RAID6_PQ
+ tristate
+
+config BITREVERSE
+ tristate
+
+config RATIONAL
+ boolean
+
+config GENERIC_FIND_FIRST_BIT
+ bool
+
+config GENERIC_FIND_NEXT_BIT
+ bool
+
+config GENERIC_FIND_LAST_BIT
+ bool
+ default y
+
+config CRC_CCITT
+ tristate "CRC-CCITT functions"
+ help
+ This option is provided for the case where no in-kernel-tree
+ modules require CRC-CCITT functions, but a module built outside
+ the kernel tree does. Such modules that use library CRC-CCITT
+ functions require M here.
+
+config CRC16
+ tristate "CRC16 functions"
+ help
+ This option is provided for the case where no in-kernel-tree
+ modules require CRC16 functions, but a module built outside
+ the kernel tree does. Such modules that use library CRC16
+ functions require M here.
+
+config CRC_T10DIF
+ tristate "CRC calculation for the T10 Data Integrity Field"
+ help
+ This option is only needed if a module that's not in the
+ kernel tree needs to calculate CRC checks for use with the
+ SCSI data integrity subsystem.
+
+config CRC_ITU_T
+ tristate "CRC ITU-T V.41 functions"
+ help
+ This option is provided for the case where no in-kernel-tree
+ modules require CRC ITU-T V.41 functions, but a module built outside
+ the kernel tree does. Such modules that use library CRC ITU-T V.41
+ functions require M here.
+
+config CRC32
+ tristate "CRC32 functions"
+ default y
+ select BITREVERSE
+ help
+ This option is provided for the case where no in-kernel-tree
+ modules require CRC32 functions, but a module built outside the
+ kernel tree does. Such modules that use library CRC32 functions
+ require M here.
+
+config CRC7
+ tristate "CRC7 functions"
+ help
+ This option is provided for the case where no in-kernel-tree
+ modules require CRC7 functions, but a module built outside
+ the kernel tree does. Such modules that use library CRC7
+ functions require M here.
+
+config LIBCRC32C
+ tristate "CRC32c (Castagnoli, et al) Cyclic Redundancy-Check"
+ select CRYPTO
+ select CRYPTO_CRC32C
+ help
+ This option is provided for the case where no in-kernel-tree
+ modules require CRC32c functions, but a module built outside the
+ kernel tree does. Such modules that use library CRC32c functions
+ require M here. See Castagnoli93.
+ Module will be libcrc32c.
+
+config AUDIT_GENERIC
+ bool
+ depends on AUDIT && !AUDIT_ARCH
+ default y
+
+#
+# compression support is select'ed if needed
+#
+config ZLIB_INFLATE
+ tristate
+
+config ZLIB_DEFLATE
+ tristate
+
+config LZO_COMPRESS
+ tristate
+
+config LZO_DECOMPRESS
+ tristate
+
+#
+# These all provide a common interface (hence the apparent duplication with
+# ZLIB_INFLATE; DECOMPRESS_GZIP is just a wrapper.)
+#
+config DECOMPRESS_GZIP
+ select ZLIB_INFLATE
+ tristate
+
+config DECOMPRESS_BZIP2
+ tristate
+
+config DECOMPRESS_LZMA
+ tristate
+
+config DECOMPRESS_LZO
+ select LZO_DECOMPRESS
+ tristate
+
+#
+# Generic allocator support is selected if needed
+#
+config GENERIC_ALLOCATOR
+ boolean
+
+#
+# reed solomon support is select'ed if needed
+#
+config REED_SOLOMON
+ tristate
+
+config REED_SOLOMON_ENC8
+ boolean
+
+config REED_SOLOMON_DEC8
+ boolean
+
+config REED_SOLOMON_ENC16
+ boolean
+
+config REED_SOLOMON_DEC16
+ boolean
+
+#
+# Textsearch support is select'ed if needed
+#
+config TEXTSEARCH
+ boolean
+
+config TEXTSEARCH_KMP
+ tristate
+
+config TEXTSEARCH_BM
+ tristate
+
+config TEXTSEARCH_FSM
+ tristate
+
+config BTREE
+ boolean
+
+config HAS_IOMEM
+ boolean
+ depends on !NO_IOMEM
+ default y
+
+config HAS_IOPORT
+ boolean
+ depends on HAS_IOMEM && !NO_IOPORT
+ default y
+
+config HAS_DMA
+ boolean
+ depends on !NO_DMA
+ default y
+
+config CHECK_SIGNATURE
+ bool
+
+config CPUMASK_OFFSTACK
+ bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
+ help
+ Use dynamic allocation for cpumask_var_t, instead of putting
+ them on the stack. This is a bit more expensive, but avoids
+ stack overflow.
+
+config DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
+ bool "Disable obsolete cpumask functions" if DEBUG_PER_CPU_MAPS
+ depends on EXPERIMENTAL && BROKEN
+
+#
+# Netlink attribute parsing support is select'ed if needed
+#
+config NLATTR
+ bool
+
+#
+# Generic 64-bit atomic support is selected if needed
+#
+config GENERIC_ATOMIC64
+ bool
+
+config LRU_CACHE
+ tristate
+
+endmenu
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
new file mode 100644
index 00000000..1b4afd2e
--- /dev/null
+++ b/lib/Kconfig.debug
@@ -0,0 +1,1157 @@
+
+config PRINTK_TIME
+ bool "Show timing information on printks"
+ depends on PRINTK
+ help
+ Selecting this option causes timing information to be
+ included in printk output. This allows you to measure
+ the interval between kernel operations, including bootup
+ operations. This is useful for identifying long delays
+ in kernel startup.
+
+config ENABLE_WARN_DEPRECATED
+ bool "Enable __deprecated logic"
+ default y
+ help
+ Enable the __deprecated logic in the kernel build.
+ Disable this to suppress the "warning: 'foo' is deprecated
+ (declared at kernel/power/somefile.c:1234)" messages.
+
+config ENABLE_MUST_CHECK
+ bool "Enable __must_check logic"
+ default y
+ help
+ Enable the __must_check logic in the kernel build. Disable this to
+ suppress the "warning: ignoring return value of 'foo', declared with
+ attribute warn_unused_result" messages.
+
+config FRAME_WARN
+ int "Warn for stack frames larger than (needs gcc 4.4)"
+ range 0 8192
+ default 1024 if !64BIT
+ default 2048 if 64BIT
+ help
+ Tell gcc to warn at build time for stack frames larger than this.
+ Setting this too low will cause a lot of warnings.
+ Setting it to 0 disables the warning.
+ Requires gcc 4.4
+
+config MAGIC_SYSRQ
+ bool "Magic SysRq key"
+ depends on !UML
+ help
+ If you say Y here, you will have some control over the system even
+ if the system crashes for example during kernel debugging (e.g., you
+ will be able to flush the buffer cache to disk, reboot the system
+ immediately or dump some status information). This is accomplished
+ by pressing various keys while holding SysRq (Alt+PrintScreen). It
+ also works on a serial console (on PC hardware at least), if you
+ send a BREAK and then within 5 seconds a command keypress. The
+ keys are documented in <file:Documentation/sysrq.txt>. Don't say Y
+ unless you really know what this hack does.
+
+config STRIP_ASM_SYMS
+ bool "Strip assembler-generated symbols during link"
+ default n
+ help
+ Strip internal assembler-generated symbols during a link (symbols
+ that look like '.Lxxx') so they don't pollute the output of
+ get_wchan() and suchlike.
+
+config UNUSED_SYMBOLS
+ bool "Enable unused/obsolete exported symbols"
+ default y if X86
+ help
+ Unused but exported symbols make the kernel needlessly bigger. For
+ that reason most of these unused exports will soon be removed. This
+ option is provided temporarily to provide a transition period in case
+ some external kernel module needs one of these symbols anyway. If you
+ encounter such a case in your module, consider if you are actually
+ using the right API. (rationale: since nobody in the kernel is using
+ this in a module, there is a pretty good chance it's actually the
+ wrong interface to use). If you really need the symbol, please send a
+ mail to the linux kernel mailing list mentioning the symbol and why
+ you really need it, and what the merge plan to the mainline kernel for
+ your module is.
+
+config DEBUG_FS
+ bool "Debug Filesystem"
+ help
+ debugfs is a virtual file system that kernel developers use to put
+ debugging files into. Enable this option to be able to read and
+ write to these files.
+
+ For detailed documentation on the debugfs API, see
+ Documentation/DocBook/filesystems.
+
+ If unsure, say N.
+
+config HEADERS_CHECK
+ bool "Run 'make headers_check' when building vmlinux"
+ depends on !UML
+ help
+ This option will extract the user-visible kernel headers whenever
+ building the kernel, and will run basic sanity checks on them to
+ ensure that exported files do not attempt to include files which
+ were not exported, etc.
+
+ If you're making modifications to header files which are
+ relevant for userspace, say 'Y', and check the headers
+ exported to $(INSTALL_HDR_PATH) (usually 'usr/include' in
+ your build tree), to make sure they're suitable.
+
+config DEBUG_SECTION_MISMATCH
+ bool "Enable full Section mismatch analysis"
+ depends on UNDEFINED || (BLACKFIN)
+ default y
+ # This option is on purpose disabled for now.
+ # It will be enabled when we are down to a reasonable number
+ # of section mismatch warnings (< 10 for an allyesconfig build)
+ help
+ The section mismatch analysis checks if there are illegal
+ references from one section to another section.
+ Linux will during link or during runtime drop some sections
+ and any use of code/data previously in these sections will
+ most likely result in an oops.
+ In the code functions and variables are annotated with
+ __init, __devinit etc. (see full list in include/linux/init.h)
+ which results in the code/data being placed in specific sections.
+ The section mismatch analysis is always done after a full
+ kernel build but enabling this option will in addition
+ do the following:
+ - Add the option -fno-inline-functions-called-once to gcc
+ When inlining a function annotated __init in a non-init
+ function we would lose the section information and thus
+ the analysis would not catch the illegal reference.
+ This option tells gcc to inline less but will also
+ result in a larger kernel.
+ - Run the section mismatch analysis for each module/built-in.o
+ When we run the section mismatch analysis on vmlinux.o we
+ lose valueble information about where the mismatch was
+ introduced.
+ Running the analysis for each module/built-in.o file
+ will tell where the mismatch happens much closer to the
+ source. The drawback is that we will report the same
+ mismatch at least twice.
+ - Enable verbose reporting from modpost to help solving
+ the section mismatches reported.
+
+config DEBUG_KERNEL
+ bool "Kernel debugging"
+ help
+ Say Y here if you are developing drivers or trying to debug and
+ identify kernel problems.
+
+config DEBUG_SHIRQ
+ bool "Debug shared IRQ handlers"
+ depends on DEBUG_KERNEL && GENERIC_HARDIRQS
+ help
+ Enable this to generate a spurious interrupt as soon as a shared
+ interrupt handler is registered, and just before one is deregistered.
+ Drivers ought to be able to handle interrupts coming in at those
+ points; some don't and need to be caught.
+
+config LOCKUP_DETECTOR
+ bool "Detect Hard and Soft Lockups"
+ depends on DEBUG_KERNEL && !S390
+ help
+ Say Y here to enable the kernel to act as a watchdog to detect
+ hard and soft lockups.
+
+ Softlockups are bugs that cause the kernel to loop in kernel
+ mode for more than 60 seconds, without giving other tasks a
+ chance to run. The current stack trace is displayed upon
+ detection and the system will stay locked up.
+
+ Hardlockups are bugs that cause the CPU to loop in kernel mode
+ for more than 60 seconds, without letting other interrupts have a
+ chance to run. The current stack trace is displayed upon detection
+ and the system will stay locked up.
+
+ The overhead should be minimal. A periodic hrtimer runs to
+ generate interrupts and kick the watchdog task every 10-12 seconds.
+ An NMI is generated every 60 seconds or so to check for hardlockups.
+
+config HARDLOCKUP_DETECTOR
+ def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI
+
+config BOOTPARAM_SOFTLOCKUP_PANIC
+ bool "Panic (Reboot) On Soft Lockups"
+ depends on LOCKUP_DETECTOR
+ help
+ Say Y here to enable the kernel to panic on "soft lockups",
+ which are bugs that cause the kernel to loop in kernel
+ mode for more than 60 seconds, without giving other tasks a
+ chance to run.
+
+ The panic can be used in combination with panic_timeout,
+ to cause the system to reboot automatically after a
+ lockup has been detected. This feature is useful for
+ high-availability systems that have uptime guarantees and
+ where a lockup must be resolved ASAP.
+
+ Say N if unsure.
+
+config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
+ int
+ depends on LOCKUP_DETECTOR
+ range 0 1
+ default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
+ default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
+
+config DETECT_HUNG_TASK
+ bool "Detect Hung Tasks"
+ depends on DEBUG_KERNEL
+ default DETECT_SOFTLOCKUP
+ help
+ Say Y here to enable the kernel to detect "hung tasks",
+ which are bugs that cause the task to be stuck in
+ uninterruptible "D" state indefinitiley.
+
+ When a hung task is detected, the kernel will print the
+ current stack trace (which you should report), but the
+ task will stay in uninterruptible state. If lockdep is
+ enabled then all held locks will also be reported. This
+ feature has negligible overhead.
+
+config BOOTPARAM_HUNG_TASK_PANIC
+ bool "Panic (Reboot) On Hung Tasks"
+ depends on DETECT_HUNG_TASK
+ help
+ Say Y here to enable the kernel to panic on "hung tasks",
+ which are bugs that cause the kernel to leave a task stuck
+ in uninterruptible "D" state.
+
+ The panic can be used in combination with panic_timeout,
+ to cause the system to reboot automatically after a
+ hung task has been detected. This feature is useful for
+ high-availability systems that have uptime guarantees and
+ where a hung tasks must be resolved ASAP.
+
+ Say N if unsure.
+
+config BOOTPARAM_HUNG_TASK_PANIC_VALUE
+ int
+ depends on DETECT_HUNG_TASK
+ range 0 1
+ default 0 if !BOOTPARAM_HUNG_TASK_PANIC
+ default 1 if BOOTPARAM_HUNG_TASK_PANIC
+
+config SCHED_DEBUG
+ bool "Collect scheduler debugging info"
+ depends on DEBUG_KERNEL && PROC_FS
+ default y
+ help
+ If you say Y here, the /proc/sched_debug file will be provided
+ that can help debug the scheduler. The runtime overhead of this
+ option is minimal.
+
+config SCHEDSTATS
+ bool "Collect scheduler statistics"
+ depends on DEBUG_KERNEL && PROC_FS
+ help
+ If you say Y here, additional code will be inserted into the
+ scheduler and related routines to collect statistics about
+ scheduler behavior and provide them in /proc/schedstat. These
+ stats may be useful for both tuning and debugging the scheduler
+ If you aren't debugging the scheduler or trying to tune a specific
+ application, you can say N to avoid the very slight overhead
+ this adds.
+
+config TIMER_STATS
+ bool "Collect kernel timers statistics"
+ depends on DEBUG_KERNEL && PROC_FS
+ help
+ If you say Y here, additional code will be inserted into the
+ timer routines to collect statistics about kernel timers being
+ reprogrammed. The statistics can be read from /proc/timer_stats.
+ The statistics collection is started by writing 1 to /proc/timer_stats,
+ writing 0 stops it. This feature is useful to collect information
+ about timer usage patterns in kernel and userspace. This feature
+ is lightweight if enabled in the kernel config but not activated
+ (it defaults to deactivated on bootup and will only be activated
+ if some application like powertop activates it explicitly).
+
+config DEBUG_OBJECTS
+ bool "Debug object operations"
+ depends on DEBUG_KERNEL
+ help
+ If you say Y here, additional code will be inserted into the
+ kernel to track the life time of various objects and validate
+ the operations on those objects.
+
+config DEBUG_OBJECTS_SELFTEST
+ bool "Debug objects selftest"
+ depends on DEBUG_OBJECTS
+ help
+ This enables the selftest of the object debug code.
+
+config DEBUG_OBJECTS_FREE
+ bool "Debug objects in freed memory"
+ depends on DEBUG_OBJECTS
+ help
+ This enables checks whether a k/v free operation frees an area
+ which contains an object which has not been deactivated
+ properly. This can make kmalloc/kfree-intensive workloads
+ much slower.
+
+config DEBUG_OBJECTS_TIMERS
+ bool "Debug timer objects"
+ depends on DEBUG_OBJECTS
+ help
+ If you say Y here, additional code will be inserted into the
+ timer routines to track the life time of timer objects and
+ validate the timer operations.
+
+config DEBUG_OBJECTS_WORK
+ bool "Debug work objects"
+ depends on DEBUG_OBJECTS
+ help
+ If you say Y here, additional code will be inserted into the
+ work queue routines to track the life time of work objects and
+ validate the work operations.
+
+config DEBUG_OBJECTS_RCU_HEAD
+ bool "Debug RCU callbacks objects"
+ depends on DEBUG_OBJECTS && PREEMPT
+ help
+ Enable this to turn on debugging of RCU list heads (call_rcu() usage).
+
+config DEBUG_OBJECTS_ENABLE_DEFAULT
+ int "debug_objects bootup default value (0-1)"
+ range 0 1
+ default "1"
+ depends on DEBUG_OBJECTS
+ help
+ Debug objects boot parameter default value
+
+config DEBUG_SLAB
+ bool "Debug slab memory allocations"
+ depends on DEBUG_KERNEL && SLAB && !KMEMCHECK
+ help
+ Say Y here to have the kernel do limited verification on memory
+ allocation as well as poisoning memory on free to catch use of freed
+ memory. This can make kmalloc/kfree-intensive workloads much slower.
+
+config DEBUG_SLAB_LEAK
+ bool "Memory leak debugging"
+ depends on DEBUG_SLAB
+
+config SLUB_DEBUG_ON
+ bool "SLUB debugging on by default"
+ depends on SLUB && SLUB_DEBUG && !KMEMCHECK
+ default n
+ help
+ Boot with debugging on by default. SLUB boots by default with
+ the runtime debug capabilities switched off. Enabling this is
+ equivalent to specifying the "slub_debug" parameter on boot.
+ There is no support for more fine grained debug control like
+ possible with slub_debug=xxx. SLUB debugging may be switched
+ off in a kernel built with CONFIG_SLUB_DEBUG_ON by specifying
+ "slub_debug=-".
+
+config SLUB_STATS
+ default n
+ bool "Enable SLUB performance statistics"
+ depends on SLUB && SLUB_DEBUG && SYSFS
+ help
+ SLUB statistics are useful to debug SLUBs allocation behavior in
+ order find ways to optimize the allocator. This should never be
+ enabled for production use since keeping statistics slows down
+ the allocator by a few percentage points. The slabinfo command
+ supports the determination of the most active slabs to figure
+ out which slabs are relevant to a particular load.
+ Try running: slabinfo -DA
+
+config DEBUG_KMEMLEAK
+ bool "Kernel memory leak detector"
+ depends on DEBUG_KERNEL && EXPERIMENTAL && !MEMORY_HOTPLUG && \
+ (X86 || ARM || PPC || S390 || SPARC64 || SUPERH || MICROBLAZE)
+
+ select DEBUG_FS if SYSFS
+ select STACKTRACE if STACKTRACE_SUPPORT
+ select KALLSYMS
+ select CRC32
+ help
+ Say Y here if you want to enable the memory leak
+ detector. The memory allocation/freeing is traced in a way
+ similar to the Boehm's conservative garbage collector, the
+ difference being that the orphan objects are not freed but
+ only shown in /sys/kernel/debug/kmemleak. Enabling this
+ feature will introduce an overhead to memory
+ allocations. See Documentation/kmemleak.txt for more
+ details.
+
+ Enabling DEBUG_SLAB or SLUB_DEBUG may increase the chances
+ of finding leaks due to the slab objects poisoning.
+
+ In order to access the kmemleak file, debugfs needs to be
+ mounted (usually at /sys/kernel/debug).
+
+config DEBUG_KMEMLEAK_EARLY_LOG_SIZE
+ int "Maximum kmemleak early log entries"
+ depends on DEBUG_KMEMLEAK
+ range 200 40000
+ default 400
+ help
+ Kmemleak must track all the memory allocations to avoid
+ reporting false positives. Since memory may be allocated or
+ freed before kmemleak is initialised, an early log buffer is
+ used to store these actions. If kmemleak reports "early log
+ buffer exceeded", please increase this value.
+
+config DEBUG_KMEMLEAK_TEST
+ tristate "Simple test for the kernel memory leak detector"
+ depends on DEBUG_KMEMLEAK
+ help
+ Say Y or M here to build a test for the kernel memory leak
+ detector. This option enables a module that explicitly leaks
+ memory.
+
+ If unsure, say N.
+
+config DEBUG_KMEMLEAK_DEFAULT_OFF
+ bool "Default kmemleak to off"
+ depends on DEBUG_KMEMLEAK
+ help
+ Say Y here to disable kmemleak by default. It can then be enabled
+ on the command line via kmemleak=on.
+
+config DEBUG_PREEMPT
+ bool "Debug preemptible kernel"
+ depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT
+ default y
+ help
+ If you say Y here then the kernel will use a debug variant of the
+ commonly used smp_processor_id() function and will print warnings
+ if kernel code uses it in a preemption-unsafe way. Also, the kernel
+ will detect preemption count underflows.
+
+config DEBUG_RT_MUTEXES
+ bool "RT Mutex debugging, deadlock detection"
+ depends on DEBUG_KERNEL && RT_MUTEXES
+ help
+ This allows rt mutex semantics violations and rt mutex related
+ deadlocks (lockups) to be detected and reported automatically.
+
+config DEBUG_PI_LIST
+ bool
+ default y
+ depends on DEBUG_RT_MUTEXES
+
+config RT_MUTEX_TESTER
+ bool "Built-in scriptable tester for rt-mutexes"
+ depends on DEBUG_KERNEL && RT_MUTEXES
+ help
+ This option enables a rt-mutex tester.
+
+config DEBUG_SPINLOCK
+ bool "Spinlock and rw-lock debugging: basic checks"
+ depends on DEBUG_KERNEL
+ help
+ Say Y here and build SMP to catch missing spinlock initialization
+ and certain other kinds of spinlock errors commonly made. This is
+ best used in conjunction with the NMI watchdog so that spinlock
+ deadlocks are also debuggable.
+
+config DEBUG_MUTEXES
+ bool "Mutex debugging: basic checks"
+ depends on DEBUG_KERNEL
+ help
+ This feature allows mutex semantics violations to be detected and
+ reported.
+
+config DEBUG_LOCK_ALLOC
+ bool "Lock debugging: detect incorrect freeing of live locks"
+ depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
+ select DEBUG_SPINLOCK
+ select DEBUG_MUTEXES
+ select LOCKDEP
+ help
+ This feature will check whether any held lock (spinlock, rwlock,
+ mutex or rwsem) is incorrectly freed by the kernel, via any of the
+ memory-freeing routines (kfree(), kmem_cache_free(), free_pages(),
+ vfree(), etc.), whether a live lock is incorrectly reinitialized via
+ spin_lock_init()/mutex_init()/etc., or whether there is any lock
+ held during task exit.
+
+config PROVE_LOCKING
+ bool "Lock debugging: prove locking correctness"
+ depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
+ select LOCKDEP
+ select DEBUG_SPINLOCK
+ select DEBUG_MUTEXES
+ select DEBUG_LOCK_ALLOC
+ default n
+ help
+ This feature enables the kernel to prove that all locking
+ that occurs in the kernel runtime is mathematically
+ correct: that under no circumstance could an arbitrary (and
+ not yet triggered) combination of observed locking
+ sequences (on an arbitrary number of CPUs, running an
+ arbitrary number of tasks and interrupt contexts) cause a
+ deadlock.
+
+ In short, this feature enables the kernel to report locking
+ related deadlocks before they actually occur.
+
+ The proof does not depend on how hard and complex a
+ deadlock scenario would be to trigger: how many
+ participant CPUs, tasks and irq-contexts would be needed
+ for it to trigger. The proof also does not depend on
+ timing: if a race and a resulting deadlock is possible
+ theoretically (no matter how unlikely the race scenario
+ is), it will be proven so and will immediately be
+ reported by the kernel (once the event is observed that
+ makes the deadlock theoretically possible).
+
+ If a deadlock is impossible (i.e. the locking rules, as
+ observed by the kernel, are mathematically correct), the
+ kernel reports nothing.
+
+ NOTE: this feature can also be enabled for rwlocks, mutexes
+ and rwsems - in which case all dependencies between these
+ different locking variants are observed and mapped too, and
+ the proof of observed correctness is also maintained for an
+ arbitrary combination of these separate locking variants.
+
+ For more details, see Documentation/lockdep-design.txt.
+
+config PROVE_RCU
+ bool "RCU debugging: prove RCU correctness"
+ depends on PROVE_LOCKING
+ default n
+ help
+ This feature enables lockdep extensions that check for correct
+ use of RCU APIs. This is currently under development. Say Y
+ if you want to debug RCU usage or help work on the PROVE_RCU
+ feature.
+
+ Say N if you are unsure.
+
+config PROVE_RCU_REPEATEDLY
+ bool "RCU debugging: don't disable PROVE_RCU on first splat"
+ depends on PROVE_RCU
+ default n
+ help
+ By itself, PROVE_RCU will disable checking upon issuing the
+ first warning (or "splat"). This feature prevents such
+ disabling, allowing multiple RCU-lockdep warnings to be printed
+ on a single reboot.
+
+ Say N if you are unsure.
+
+config LOCKDEP
+ bool
+ depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
+ select STACKTRACE
+ select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE
+ select KALLSYMS
+ select KALLSYMS_ALL
+
+config LOCK_STAT
+ bool "Lock usage statistics"
+ depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
+ select LOCKDEP
+ select DEBUG_SPINLOCK
+ select DEBUG_MUTEXES
+ select DEBUG_LOCK_ALLOC
+ default n
+ help
+ This feature enables tracking lock contention points
+
+ For more details, see Documentation/lockstat.txt
+
+ This also enables lock events required by "perf lock",
+ subcommand of perf.
+ If you want to use "perf lock", you also need to turn on
+ CONFIG_EVENT_TRACING.
+
+ CONFIG_LOCK_STAT defines "contended" and "acquired" lock events.
+ (CONFIG_LOCKDEP defines "acquire" and "release" events.)
+
+config DEBUG_LOCKDEP
+ bool "Lock dependency engine debugging"
+ depends on DEBUG_KERNEL && LOCKDEP
+ help
+ If you say Y here, the lock dependency engine will do
+ additional runtime checks to debug itself, at the price
+ of more runtime overhead.
+
+config TRACE_IRQFLAGS
+ depends on DEBUG_KERNEL
+ bool
+ default y
+ depends on TRACE_IRQFLAGS_SUPPORT
+ depends on PROVE_LOCKING
+
+config DEBUG_SPINLOCK_SLEEP
+ bool "Spinlock debugging: sleep-inside-spinlock checking"
+ depends on DEBUG_KERNEL
+ help
+ If you say Y here, various routines which may sleep will become very
+ noisy if they are called with a spinlock held.
+
+config DEBUG_LOCKING_API_SELFTESTS
+ bool "Locking API boot-time self-tests"
+ depends on DEBUG_KERNEL
+ help
+ Say Y here if you want the kernel to run a short self-test during
+ bootup. The self-test checks whether common types of locking bugs
+ are detected by debugging mechanisms or not. (if you disable
+ lock debugging then those bugs wont be detected of course.)
+ The following locking APIs are covered: spinlocks, rwlocks,
+ mutexes and rwsems.
+
+config STACKTRACE
+ bool
+ depends on STACKTRACE_SUPPORT
+
+config DEBUG_KOBJECT
+ bool "kobject debugging"
+ depends on DEBUG_KERNEL
+ help
+ If you say Y here, some extra kobject debugging messages will be sent
+ to the syslog.
+
+config DEBUG_HIGHMEM
+ bool "Highmem debugging"
+ depends on DEBUG_KERNEL && HIGHMEM
+ help
+ This options enables addition error checking for high memory systems.
+ Disable for production systems.
+
+config DEBUG_BUGVERBOSE
+ bool "Verbose BUG() reporting (adds 70K)" if DEBUG_KERNEL && EMBEDDED
+ depends on BUG
+ depends on ARM || AVR32 || M32R || M68K || SPARC32 || SPARC64 || \
+ FRV || SUPERH || GENERIC_BUG || BLACKFIN || MN10300
+ default y
+ help
+ Say Y here to make BUG() panics output the file name and line number
+ of the BUG call as well as the EIP and oops trace. This aids
+ debugging but costs about 70-100K of memory.
+
+config DEBUG_INFO
+ bool "Compile the kernel with debug info"
+ depends on DEBUG_KERNEL
+ help
+ If you say Y here the resulting kernel image will include
+ debugging info resulting in a larger kernel image.
+ This adds debug symbols to the kernel and modules (gcc -g), and
+ is needed if you intend to use kernel crashdump or binary object
+ tools like crash, kgdb, LKCD, gdb, etc on the kernel.
+ Say Y here only if you plan to debug the kernel.
+
+ If unsure, say N.
+
+config DEBUG_INFO_REDUCED
+ bool "Reduce debugging information"
+ depends on DEBUG_INFO
+ help
+ If you say Y here gcc is instructed to generate less debugging
+ information for structure types. This means that tools that
+ need full debugging information (like kgdb or systemtap) won't
+ be happy. But if you merely need debugging information to
+ resolve line numbers there is no loss. Advantage is that
+ build directory object sizes shrink dramatically over a full
+ DEBUG_INFO build and compile times are reduced too.
+ Only works with newer gcc versions.
+
+config DEBUG_VM
+ bool "Debug VM"
+ depends on DEBUG_KERNEL
+ help
+ Enable this to turn on extended checks in the virtual-memory system
+ that may impact performance.
+
+ If unsure, say N.
+
+config DEBUG_VIRTUAL
+ bool "Debug VM translations"
+ depends on DEBUG_KERNEL && X86
+ help
+ Enable some costly sanity checks in virtual to page code. This can
+ catch mistakes with virt_to_page() and friends.
+
+ If unsure, say N.
+
+config DEBUG_NOMMU_REGIONS
+ bool "Debug the global anon/private NOMMU mapping region tree"
+ depends on DEBUG_KERNEL && !MMU
+ help
+ This option causes the global tree of anonymous and private mapping
+ regions to be regularly checked for invalid topology.
+
+config DEBUG_WRITECOUNT
+ bool "Debug filesystem writers count"
+ depends on DEBUG_KERNEL
+ help
+ Enable this to catch wrong use of the writers count in struct
+ vfsmount. This will increase the size of each file struct by
+ 32 bits.
+
+ If unsure, say N.
+
+config DEBUG_MEMORY_INIT
+ bool "Debug memory initialisation" if EMBEDDED
+ default !EMBEDDED
+ help
+ Enable this for additional checks during memory initialisation.
+ The sanity checks verify aspects of the VM such as the memory model
+ and other information provided by the architecture. Verbose
+ information will be printed at KERN_DEBUG loglevel depending
+ on the mminit_loglevel= command-line option.
+
+ If unsure, say Y
+
+config DEBUG_LIST
+ bool "Debug linked list manipulation"
+ depends on DEBUG_KERNEL
+ help
+ Enable this to turn on extended checks in the linked-list
+ walking routines.
+
+ If unsure, say N.
+
+config DEBUG_SG
+ bool "Debug SG table operations"
+ depends on DEBUG_KERNEL
+ help
+ Enable this to turn on checks on scatter-gather tables. This can
+ help find problems with drivers that do not properly initialize
+ their sg tables.
+
+ If unsure, say N.
+
+config DEBUG_NOTIFIERS
+ bool "Debug notifier call chains"
+ depends on DEBUG_KERNEL
+ help
+ Enable this to turn on sanity checking for notifier call chains.
+ This is most useful for kernel developers to make sure that
+ modules properly unregister themselves from notifier chains.
+ This is a relatively cheap check but if you care about maximum
+ performance, say N.
+
+config DEBUG_CREDENTIALS
+ bool "Debug credential management"
+ depends on DEBUG_KERNEL
+ help
+ Enable this to turn on some debug checking for credential
+ management. The additional code keeps track of the number of
+ pointers from task_structs to any given cred struct, and checks to
+ see that this number never exceeds the usage count of the cred
+ struct.
+
+ Furthermore, if SELinux is enabled, this also checks that the
+ security pointer in the cred struct is never seen to be invalid.
+
+ If unsure, say N.
+
+#
+# Select this config option from the architecture Kconfig, if it
+# it is preferred to always offer frame pointers as a config
+# option on the architecture (regardless of KERNEL_DEBUG):
+#
+config ARCH_WANT_FRAME_POINTERS
+ bool
+ help
+
+config FRAME_POINTER
+ bool "Compile the kernel with frame pointers"
+ depends on DEBUG_KERNEL && \
+ (CRIS || M68K || M68KNOMMU || FRV || UML || \
+ AVR32 || SUPERH || BLACKFIN || MN10300) || \
+ ARCH_WANT_FRAME_POINTERS
+ default y if (DEBUG_INFO && UML) || ARCH_WANT_FRAME_POINTERS
+ help
+ If you say Y here the resulting kernel image will be slightly
+ larger and slower, but it gives very useful debugging information
+ in case of kernel bugs. (precise oopses/stacktraces/warnings)
+
+config BOOT_PRINTK_DELAY
+ bool "Delay each boot printk message by N milliseconds"
+ depends on DEBUG_KERNEL && PRINTK && GENERIC_CALIBRATE_DELAY
+ help
+ This build option allows you to read kernel boot messages
+ by inserting a short delay after each one. The delay is
+ specified in milliseconds on the kernel command line,
+ using "boot_delay=N".
+
+ It is likely that you would also need to use "lpj=M" to preset
+ the "loops per jiffie" value.
+ See a previous boot log for the "lpj" value to use for your
+ system, and then set "lpj=M" before setting "boot_delay=N".
+ NOTE: Using this option may adversely affect SMP systems.
+ I.e., processors other than the first one may not boot up.
+ BOOT_PRINTK_DELAY also may cause DETECT_SOFTLOCKUP to detect
+ what it believes to be lockup conditions.
+
+config RCU_TORTURE_TEST
+ tristate "torture tests for RCU"
+ depends on DEBUG_KERNEL
+ default n
+ help
+ This option provides a kernel module that runs torture tests
+ on the RCU infrastructure. The kernel module may be built
+ after the fact on the running kernel to be tested, if desired.
+
+ Say Y here if you want RCU torture tests to be built into
+ the kernel.
+ Say M if you want the RCU torture tests to build as a module.
+ Say N if you are unsure.
+
+config RCU_TORTURE_TEST_RUNNABLE
+ bool "torture tests for RCU runnable by default"
+ depends on RCU_TORTURE_TEST = y
+ default n
+ help
+ This option provides a way to build the RCU torture tests
+ directly into the kernel without them starting up at boot
+ time. You can use /proc/sys/kernel/rcutorture_runnable
+ to manually override this setting. This /proc file is
+ available only when the RCU torture tests have been built
+ into the kernel.
+
+ Say Y here if you want the RCU torture tests to start during
+ boot (you probably don't).
+ Say N here if you want the RCU torture tests to start only
+ after being manually enabled via /proc.
+
+config RCU_CPU_STALL_DETECTOR
+ bool "Check for stalled CPUs delaying RCU grace periods"
+ depends on TREE_RCU || TREE_PREEMPT_RCU
+ default y
+ help
+ This option causes RCU to printk information on which
+ CPUs are delaying the current grace period, but only when
+ the grace period extends for excessive time periods.
+
+ Say N if you want to disable such checks.
+
+ Say Y if you are unsure.
+
+config RCU_CPU_STALL_VERBOSE
+ bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR"
+ depends on RCU_CPU_STALL_DETECTOR && TREE_PREEMPT_RCU
+ default y
+ help
+ This option causes RCU to printk detailed per-task information
+ for any tasks that are stalling the current RCU grace period.
+
+ Say N if you are unsure.
+
+ Say Y if you want to enable such checks.
+
+config KPROBES_SANITY_TEST
+ bool "Kprobes sanity tests"
+ depends on DEBUG_KERNEL
+ depends on KPROBES
+ default n
+ help
+ This option provides for testing basic kprobes functionality on
+ boot. A sample kprobe, jprobe and kretprobe are inserted and
+ verified for functionality.
+
+ Say N if you are unsure.
+
+config BACKTRACE_SELF_TEST
+ tristate "Self test for the backtrace code"
+ depends on DEBUG_KERNEL
+ default n
+ help
+ This option provides a kernel module that can be used to test
+ the kernel stack backtrace code. This option is not useful
+ for distributions or general kernels, but only for kernel
+ developers working on architecture code.
+
+ Note that if you want to also test saved backtraces, you will
+ have to enable STACKTRACE as well.
+
+ Say N if you are unsure.
+
+config DEBUG_BLOCK_EXT_DEVT
+ bool "Force extended block device numbers and spread them"
+ depends on DEBUG_KERNEL
+ depends on BLOCK
+ default n
+ help
+ BIG FAT WARNING: ENABLING THIS OPTION MIGHT BREAK BOOTING ON
+ SOME DISTRIBUTIONS. DO NOT ENABLE THIS UNLESS YOU KNOW WHAT
+ YOU ARE DOING. Distros, please enable this and fix whatever
+ is broken.
+
+ Conventionally, block device numbers are allocated from
+ predetermined contiguous area. However, extended block area
+ may introduce non-contiguous block device numbers. This
+ option forces most block device numbers to be allocated from
+ the extended space and spreads them to discover kernel or
+ userland code paths which assume predetermined contiguous
+ device number allocation.
+
+ Note that turning on this debug option shuffles all the
+ device numbers for all IDE and SCSI devices including libata
+ ones, so root partition specified using device number
+ directly (via rdev or root=MAJ:MIN) won't work anymore.
+ Textual device names (root=/dev/sdXn) will continue to work.
+
+ Say N if you are unsure.
+
+config DEBUG_FORCE_WEAK_PER_CPU
+ bool "Force weak per-cpu definitions"
+ depends on DEBUG_KERNEL
+ help
+ s390 and alpha require percpu variables in modules to be
+ defined weak to work around addressing range issue which
+ puts the following two restrictions on percpu variable
+ definitions.
+
+ 1. percpu symbols must be unique whether static or not
+ 2. percpu variables can't be defined inside a function
+
+ To ensure that generic code follows the above rules, this
+ option forces all percpu variables to be defined as weak.
+
+config LKDTM
+ tristate "Linux Kernel Dump Test Tool Module"
+ depends on DEBUG_FS
+ depends on BLOCK
+ default n
+ help
+ This module enables testing of the different dumping mechanisms by
+ inducing system failures at predefined crash points.
+ If you don't need it: say N
+ Choose M here to compile this code as a module. The module will be
+ called lkdtm.
+
+ Documentation on how to use the module can be found in
+ Documentation/fault-injection/provoke-crashes.txt
+
+config CPU_NOTIFIER_ERROR_INJECT
+ tristate "CPU notifier error injection module"
+ depends on HOTPLUG_CPU && DEBUG_KERNEL
+ help
+ This option provides a kernel module that can be used to test
+ the error handling of the cpu notifiers
+
+ To compile this code as a module, choose M here: the module will
+ be called cpu-notifier-error-inject.
+
+ If unsure, say N.
+
+config FAULT_INJECTION
+ bool "Fault-injection framework"
+ depends on DEBUG_KERNEL
+ help
+ Provide fault-injection framework.
+ For more details, see Documentation/fault-injection/.
+
+config FAILSLAB
+ bool "Fault-injection capability for kmalloc"
+ depends on FAULT_INJECTION
+ depends on SLAB || SLUB
+ help
+ Provide fault-injection capability for kmalloc.
+
+config FAIL_PAGE_ALLOC
+ bool "Fault-injection capabilitiy for alloc_pages()"
+ depends on FAULT_INJECTION
+ help
+ Provide fault-injection capability for alloc_pages().
+
+config FAIL_MAKE_REQUEST
+ bool "Fault-injection capability for disk IO"
+ depends on FAULT_INJECTION && BLOCK
+ help
+ Provide fault-injection capability for disk IO.
+
+config FAIL_IO_TIMEOUT
+ bool "Fault-injection capability for faking disk interrupts"
+ depends on FAULT_INJECTION && BLOCK
+ help
+ Provide fault-injection capability on end IO handling. This
+ will make the block layer "forget" an interrupt as configured,
+ thus exercising the error handling.
+
+ Only works with drivers that use the generic timeout handling,
+ for others it wont do anything.
+
+config FAULT_INJECTION_DEBUG_FS
+ bool "Debugfs entries for fault-injection capabilities"
+ depends on FAULT_INJECTION && SYSFS && DEBUG_FS
+ help
+ Enable configuration of fault-injection capabilities via debugfs.
+
+config FAULT_INJECTION_STACKTRACE_FILTER
+ bool "stacktrace filter for fault-injection capabilities"
+ depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
+ depends on !X86_64
+ select STACKTRACE
+ select FRAME_POINTER if !PPC && !S390 && !MICROBLAZE
+ help
+ Provide stacktrace filter for fault-injection capabilities
+
+config LATENCYTOP
+ bool "Latency measuring infrastructure"
+ depends on HAVE_LATENCYTOP_SUPPORT
+ depends on DEBUG_KERNEL
+ depends on STACKTRACE_SUPPORT
+ depends on PROC_FS
+ select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE
+ select KALLSYMS
+ select KALLSYMS_ALL
+ select STACKTRACE
+ select SCHEDSTATS
+ select SCHED_DEBUG
+ help
+ Enable this option if you want to use the LatencyTOP tool
+ to find out which userspace is blocking on what kernel operations.
+
+config SYSCTL_SYSCALL_CHECK
+ bool "Sysctl checks"
+ depends on SYSCTL
+ ---help---
+ sys_sysctl uses binary paths that have been found challenging
+ to properly maintain and use. This enables checks that help
+ you to keep things correct.
+
+source mm/Kconfig.debug
+source kernel/trace/Kconfig
+
+config PROVIDE_OHCI1394_DMA_INIT
+ bool "Remote debugging over FireWire early on boot"
+ depends on PCI && X86
+ help
+ If you want to debug problems which hang or crash the kernel early
+ on boot and the crashing machine has a FireWire port, you can use
+ this feature to remotely access the memory of the crashed machine
+ over FireWire. This employs remote DMA as part of the OHCI1394
+ specification which is now the standard for FireWire controllers.
+
+ With remote DMA, you can monitor the printk buffer remotely using
+ firescope and access all memory below 4GB using fireproxy from gdb.
+ Even controlling a kernel debugger is possible using remote DMA.
+
+ Usage:
+
+ If ohci1394_dma=early is used as boot parameter, it will initialize
+ all OHCI1394 controllers which are found in the PCI config space.
+
+ As all changes to the FireWire bus such as enabling and disabling
+ devices cause a bus reset and thereby disable remote DMA for all
+ devices, be sure to have the cable plugged and FireWire enabled on
+ the debugging host before booting the debug target for debugging.
+
+ This code (~1k) is freed after boot. By then, the firewire stack
+ in charge of the OHCI-1394 controllers should be used instead.
+
+ See Documentation/debugging-via-ohci1394.txt for more information.
+
+config FIREWIRE_OHCI_REMOTE_DMA
+ bool "Remote debugging over FireWire with firewire-ohci"
+ depends on FIREWIRE_OHCI
+ help
+ This option lets you use the FireWire bus for remote debugging
+ with help of the firewire-ohci driver. It enables unfiltered
+ remote DMA in firewire-ohci.
+ See Documentation/debugging-via-ohci1394.txt for more information.
+
+ If unsure, say N.
+
+config BUILD_DOCSRC
+ bool "Build targets in Documentation/ tree"
+ depends on HEADERS_CHECK
+ help
+ This option attempts to build objects from the source files in the
+ kernel Documentation/ tree.
+
+ Say N if you are unsure.
+
+config DYNAMIC_DEBUG
+ bool "Enable dynamic printk() support"
+ default n
+ depends on PRINTK
+ depends on DEBUG_FS
+ help
+
+ Compiles debug level messages into the kernel, which would not
+ otherwise be available at runtime. These messages can then be
+ enabled/disabled based on various levels of scope - per source file,
+ function, module, format string, and line number. This mechanism
+ implicitly enables all pr_debug() and dev_dbg() calls. The impact of
+ this compile option is a larger kernel text size of about 2%.
+
+ Usage:
+
+ Dynamic debugging is controlled via the 'dynamic_debug/control' file,
+ which is contained in the 'debugfs' filesystem. Thus, the debugfs
+ filesystem must first be mounted before making use of this feature.
+ We refer the control file as: <debugfs>/dynamic_debug/control. This
+ file contains a list of the debug statements that can be enabled. The
+ format for each line of the file is:
+
+ filename:lineno [module]function flags format
+
+ filename : source file of the debug statement
+ lineno : line number of the debug statement
+ module : module that contains the debug statement
+ function : function that contains the debug statement
+ flags : 'p' means the line is turned 'on' for printing
+ format : the format used for the debug statement
+
+ From a live system:
+
+ nullarbor:~ # cat <debugfs>/dynamic_debug/control
+ # filename:lineno [module]function flags format
+ fs/aio.c:222 [aio]__put_ioctx - "__put_ioctx:\040freeing\040%p\012"
+ fs/aio.c:248 [aio]ioctx_alloc - "ENOMEM:\040nr_events\040too\040high\012"
+ fs/aio.c:1770 [aio]sys_io_cancel - "calling\040cancel\012"
+
+ Example usage:
+
+ // enable the message at line 1603 of file svcsock.c
+ nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' >
+ <debugfs>/dynamic_debug/control
+
+ // enable all the messages in file svcsock.c
+ nullarbor:~ # echo -n 'file svcsock.c +p' >
+ <debugfs>/dynamic_debug/control
+
+ // enable all the messages in the NFS server module
+ nullarbor:~ # echo -n 'module nfsd +p' >
+ <debugfs>/dynamic_debug/control
+
+ // enable all 12 messages in the function svc_process()
+ nullarbor:~ # echo -n 'func svc_process +p' >
+ <debugfs>/dynamic_debug/control
+
+ // disable all 12 messages in the function svc_process()
+ nullarbor:~ # echo -n 'func svc_process -p' >
+ <debugfs>/dynamic_debug/control
+
+ See Documentation/dynamic-debug-howto.txt for additional information.
+
+config DMA_API_DEBUG
+ bool "Enable debugging of DMA-API usage"
+ depends on HAVE_DMA_API_DEBUG
+ help
+ Enable this option to debug the use of the DMA API by device drivers.
+ With this option you will be able to detect common bugs in device
+ drivers like double-freeing of DMA mappings or freeing mappings that
+ were never allocated.
+ This option causes a performance degredation. Use only if you want
+ to debug device drivers. If unsure, say N.
+
+config ATOMIC64_SELFTEST
+ bool "Perform an atomic64_t self-test at boot"
+ help
+ Enable this option to test the atomic64_t functions at boot.
+
+ If unsure, say N.
+
+source "samples/Kconfig"
+
+source "lib/Kconfig.kgdb"
+
+source "lib/Kconfig.kmemcheck"
diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb
new file mode 100644
index 00000000..43cb93fa
--- /dev/null
+++ b/lib/Kconfig.kgdb
@@ -0,0 +1,82 @@
+
+config HAVE_ARCH_KGDB
+ bool
+
+menuconfig KGDB
+ bool "KGDB: kernel debugger"
+ depends on HAVE_ARCH_KGDB
+ depends on DEBUG_KERNEL && EXPERIMENTAL
+ help
+ If you say Y here, it will be possible to remotely debug the
+ kernel using gdb. It is recommended but not required, that
+ you also turn on the kernel config option
+ CONFIG_FRAME_POINTER to aid in producing more reliable stack
+ backtraces in the external debugger. Documentation of
+ kernel debugger is available at http://kgdb.sourceforge.net
+ as well as in DocBook form in Documentation/DocBook/. If
+ unsure, say N.
+
+if KGDB
+
+config KGDB_SERIAL_CONSOLE
+ tristate "KGDB: use kgdb over the serial console"
+ select CONSOLE_POLL
+ select MAGIC_SYSRQ
+ default y
+ help
+ Share a serial console with kgdb. Sysrq-g must be used
+ to break in initially.
+
+config KGDB_TESTS
+ bool "KGDB: internal test suite"
+ default n
+ help
+ This is a kgdb I/O module specifically designed to test
+ kgdb's internal functions. This kgdb I/O module is
+ intended to for the development of new kgdb stubs
+ as well as regression testing the kgdb internals.
+ See the drivers/misc/kgdbts.c for the details about
+ the tests. The most basic of this I/O module is to boot
+ a kernel boot arguments "kgdbwait kgdbts=V1F100"
+
+config KGDB_TESTS_ON_BOOT
+ bool "KGDB: Run tests on boot"
+ depends on KGDB_TESTS
+ default n
+ help
+ Run the kgdb tests on boot up automatically without the need
+ to pass in a kernel parameter
+
+config KGDB_TESTS_BOOT_STRING
+ string "KGDB: which internal kgdb tests to run"
+ depends on KGDB_TESTS_ON_BOOT
+ default "V1F100"
+ help
+ This is the command string to send the kgdb test suite on
+ boot. See the drivers/misc/kgdbts.c for detailed
+ information about other strings you could use beyond the
+ default of V1F100.
+
+config KGDB_LOW_LEVEL_TRAP
+ bool "KGDB: Allow debugging with traps in notifiers"
+ depends on X86 || MIPS
+ default n
+ help
+ This will add an extra call back to kgdb for the breakpoint
+ exception handler on which will will allow kgdb to step
+ through a notify handler.
+
+config KGDB_KDB
+ bool "KGDB_KDB: include kdb frontend for kgdb"
+ default n
+ help
+ KDB frontend for kernel
+
+config KDB_KEYBOARD
+ bool "KGDB_KDB: keyboard as input device"
+ depends on VT && KGDB_KDB
+ default n
+ help
+ KDB can use a PS/2 type keyboard for an input device
+
+endif # KGDB
diff --git a/lib/Kconfig.kmemcheck b/lib/Kconfig.kmemcheck
new file mode 100644
index 00000000..846e039a
--- /dev/null
+++ b/lib/Kconfig.kmemcheck
@@ -0,0 +1,94 @@
+config HAVE_ARCH_KMEMCHECK
+ bool
+
+if HAVE_ARCH_KMEMCHECK
+
+menuconfig KMEMCHECK
+ bool "kmemcheck: trap use of uninitialized memory"
+ depends on DEBUG_KERNEL
+ depends on !X86_USE_3DNOW
+ depends on SLUB || SLAB
+ depends on !CC_OPTIMIZE_FOR_SIZE
+ depends on !FUNCTION_TRACER
+ select FRAME_POINTER
+ select STACKTRACE
+ default n
+ help
+ This option enables tracing of dynamically allocated kernel memory
+ to see if memory is used before it has been given an initial value.
+ Be aware that this requires half of your memory for bookkeeping and
+ will insert extra code at *every* read and write to tracked memory
+ thus slow down the kernel code (but user code is unaffected).
+
+ The kernel may be started with kmemcheck=0 or kmemcheck=1 to disable
+ or enable kmemcheck at boot-time. If the kernel is started with
+ kmemcheck=0, the large memory and CPU overhead is not incurred.
+
+choice
+ prompt "kmemcheck: default mode at boot"
+ depends on KMEMCHECK
+ default KMEMCHECK_ONESHOT_BY_DEFAULT
+ help
+ This option controls the default behaviour of kmemcheck when the
+ kernel boots and no kmemcheck= parameter is given.
+
+config KMEMCHECK_DISABLED_BY_DEFAULT
+ bool "disabled"
+ depends on KMEMCHECK
+
+config KMEMCHECK_ENABLED_BY_DEFAULT
+ bool "enabled"
+ depends on KMEMCHECK
+
+config KMEMCHECK_ONESHOT_BY_DEFAULT
+ bool "one-shot"
+ depends on KMEMCHECK
+ help
+ In one-shot mode, only the first error detected is reported before
+ kmemcheck is disabled.
+
+endchoice
+
+config KMEMCHECK_QUEUE_SIZE
+ int "kmemcheck: error queue size"
+ depends on KMEMCHECK
+ default 64
+ help
+ Select the maximum number of errors to store in the queue. Since
+ errors can occur virtually anywhere and in any context, we need a
+ temporary storage area which is guarantueed not to generate any
+ other faults. The queue will be emptied as soon as a tasklet may
+ be scheduled. If the queue is full, new error reports will be
+ lost.
+
+config KMEMCHECK_SHADOW_COPY_SHIFT
+ int "kmemcheck: shadow copy size (5 => 32 bytes, 6 => 64 bytes)"
+ depends on KMEMCHECK
+ range 2 8
+ default 5
+ help
+ Select the number of shadow bytes to save along with each entry of
+ the queue. These bytes indicate what parts of an allocation are
+ initialized, uninitialized, etc. and will be displayed when an
+ error is detected to help the debugging of a particular problem.
+
+config KMEMCHECK_PARTIAL_OK
+ bool "kmemcheck: allow partially uninitialized memory"
+ depends on KMEMCHECK
+ default y
+ help
+ This option works around certain GCC optimizations that produce
+ 32-bit reads from 16-bit variables where the upper 16 bits are
+ thrown away afterwards. This may of course also hide some real
+ bugs.
+
+config KMEMCHECK_BITOPS_OK
+ bool "kmemcheck: allow bit-field manipulation"
+ depends on KMEMCHECK
+ default n
+ help
+ This option silences warnings that would be generated for bit-field
+ accesses where not all the bits are initialized at the same time.
+ This may also hide some real bugs.
+
+endif
diff --git a/lib/Makefile b/lib/Makefile
new file mode 100644
index 00000000..e6a3763b
--- /dev/null
+++ b/lib/Makefile
@@ -0,0 +1,118 @@
+#
+# Makefile for some libs needed in the kernel.
+#
+
+ifdef CONFIG_FUNCTION_TRACER
+ORIG_CFLAGS := $(KBUILD_CFLAGS)
+KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
+endif
+
+lib-y := ctype.o string.o vsprintf.o cmdline.o \
+ rbtree.o radix-tree.o dump_stack.o \
+ idr.o int_sqrt.o extable.o prio_tree.o \
+ sha1.o irq_regs.o reciprocal_div.o argv_split.o \
+ proportions.o prio_heap.o ratelimit.o show_mem.o \
+ is_single_threaded.o plist.o decompress.o flex_array.o
+
+lib-$(CONFIG_MMU) += ioremap.o
+lib-$(CONFIG_SMP) += cpumask.o
+
+lib-y += kobject.o kref.o klist.o
+
+obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
+ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
+ string_helpers.o gcd.o lcm.o list_sort.o uuid.o
+
+ifeq ($(CONFIG_DEBUG_KOBJECT),y)
+CFLAGS_kobject.o += -DDEBUG
+CFLAGS_kobject_uevent.o += -DDEBUG
+endif
+
+lib-$(CONFIG_HOTPLUG) += kobject_uevent.o
+obj-$(CONFIG_GENERIC_IOMAP) += iomap.o
+obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
+obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
+obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
+obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
+lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
+lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
+lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o
+lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
+obj-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o
+
+CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
+obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
+
+obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
+obj-$(CONFIG_BTREE) += btree.o
+obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
+obj-$(CONFIG_DEBUG_LIST) += list_debug.o
+obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
+
+ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
+ lib-y += dec_and_lock.o
+endif
+
+obj-$(CONFIG_BITREVERSE) += bitrev.o
+obj-$(CONFIG_RATIONAL) += rational.o
+obj-$(CONFIG_CRC_CCITT) += crc-ccitt.o
+obj-$(CONFIG_CRC16) += crc16.o
+obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o
+obj-$(CONFIG_CRC_ITU_T) += crc-itu-t.o
+obj-$(CONFIG_CRC32) += crc32.o
+obj-$(CONFIG_CRC7) += crc7.o
+obj-$(CONFIG_LIBCRC32C) += libcrc32c.o
+obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o
+
+obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/
+obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/
+obj-$(CONFIG_REED_SOLOMON) += reed_solomon/
+obj-$(CONFIG_LZO_COMPRESS) += lzo/
+obj-$(CONFIG_LZO_DECOMPRESS) += lzo/
+obj-$(CONFIG_RAID6_PQ) += raid6/
+
+lib-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o
+lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o
+lib-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o
+lib-$(CONFIG_DECOMPRESS_LZO) += decompress_unlzo.o
+
+obj-$(CONFIG_TEXTSEARCH) += textsearch.o
+obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o
+obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o
+obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o
+obj-$(CONFIG_SMP) += percpu_counter.o
+obj-$(CONFIG_AUDIT_GENERIC) += audit.o
+
+obj-$(CONFIG_SWIOTLB) += swiotlb.o
+obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o
+obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
+obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o
+
+lib-$(CONFIG_GENERIC_BUG) += bug.o
+
+obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o
+
+obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o
+
+obj-$(CONFIG_NLATTR) += nlattr.o
+
+obj-$(CONFIG_LRU_CACHE) += lru_cache.o
+
+obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o
+
+obj-$(CONFIG_GENERIC_CSUM) += checksum.o
+
+obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o
+
+obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o
+
+hostprogs-y := gen_crc32table
+clean-files := crc32table.h
+
+$(obj)/crc32.o: $(obj)/crc32table.h
+
+quiet_cmd_crc32 = GEN $@
+ cmd_crc32 = $< > $@
+
+$(obj)/crc32table.h: $(obj)/gen_crc32table
+ $(call cmd,crc32)
diff --git a/lib/argv_split.c b/lib/argv_split.c
new file mode 100644
index 00000000..4b1b083f
--- /dev/null
+++ b/lib/argv_split.c
@@ -0,0 +1,101 @@
+/*
+ * Helper function for splitting a string into an argv-like array.
+ */
+
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+static const char *skip_arg(const char *cp)
+{
+ while (*cp && !isspace(*cp))
+ cp++;
+
+ return cp;
+}
+
+static int count_argc(const char *str)
+{
+ int count = 0;
+
+ while (*str) {
+ str = skip_spaces(str);
+ if (*str) {
+ count++;
+ str = skip_arg(str);
+ }
+ }
+
+ return count;
+}
+
+/**
+ * argv_free - free an argv
+ * @argv - the argument vector to be freed
+ *
+ * Frees an argv and the strings it points to.
+ */
+void argv_free(char **argv)
+{
+ char **p;
+ for (p = argv; *p; p++)
+ kfree(*p);
+
+ kfree(argv);
+}
+EXPORT_SYMBOL(argv_free);
+
+/**
+ * argv_split - split a string at whitespace, returning an argv
+ * @gfp: the GFP mask used to allocate memory
+ * @str: the string to be split
+ * @argcp: returned argument count
+ *
+ * Returns an array of pointers to strings which are split out from
+ * @str. This is performed by strictly splitting on white-space; no
+ * quote processing is performed. Multiple whitespace characters are
+ * considered to be a single argument separator. The returned array
+ * is always NULL-terminated. Returns NULL on memory allocation
+ * failure.
+ */
+char **argv_split(gfp_t gfp, const char *str, int *argcp)
+{
+ int argc = count_argc(str);
+ char **argv = kzalloc(sizeof(*argv) * (argc+1), gfp);
+ char **argvp;
+
+ if (argv == NULL)
+ goto out;
+
+ if (argcp)
+ *argcp = argc;
+
+ argvp = argv;
+
+ while (*str) {
+ str = skip_spaces(str);
+
+ if (*str) {
+ const char *p = str;
+ char *t;
+
+ str = skip_arg(str);
+
+ t = kstrndup(p, str-p, gfp);
+ if (t == NULL)
+ goto fail;
+ *argvp++ = t;
+ }
+ }
+ *argvp = NULL;
+
+ out:
+ return argv;
+
+ fail:
+ argv_free(argv);
+ return NULL;
+}
+EXPORT_SYMBOL(argv_split);
diff --git a/lib/atomic64.c b/lib/atomic64.c
new file mode 100644
index 00000000..a21c12bc
--- /dev/null
+++ b/lib/atomic64.c
@@ -0,0 +1,186 @@
+/*
+ * Generic implementation of 64-bit atomics using spinlocks,
+ * useful on processors that don't have 64-bit atomic instructions.
+ *
+ * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+#include <linux/cache.h>
+#include <linux/spinlock.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <asm/atomic.h>
+
+/*
+ * We use a hashed array of spinlocks to provide exclusive access
+ * to each atomic64_t variable. Since this is expected to used on
+ * systems with small numbers of CPUs (<= 4 or so), we use a
+ * relatively small array of 16 spinlocks to avoid wasting too much
+ * memory on the spinlock array.
+ */
+#define NR_LOCKS 16
+
+/*
+ * Ensure each lock is in a separate cacheline.
+ */
+static union {
+ spinlock_t lock;
+ char pad[L1_CACHE_BYTES];
+} atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp;
+
+static inline spinlock_t *lock_addr(const atomic64_t *v)
+{
+ unsigned long addr = (unsigned long) v;
+
+ addr >>= L1_CACHE_SHIFT;
+ addr ^= (addr >> 8) ^ (addr >> 16);
+ return &atomic64_lock[addr & (NR_LOCKS - 1)].lock;
+}
+
+long long atomic64_read(const atomic64_t *v)
+{
+ unsigned long flags;
+ spinlock_t *lock = lock_addr(v);
+ long long val;
+
+ spin_lock_irqsave(lock, flags);
+ val = v->counter;
+ spin_unlock_irqrestore(lock, flags);
+ return val;
+}
+EXPORT_SYMBOL(atomic64_read);
+
+void atomic64_set(atomic64_t *v, long long i)
+{
+ unsigned long flags;
+ spinlock_t *lock = lock_addr(v);
+
+ spin_lock_irqsave(lock, flags);
+ v->counter = i;
+ spin_unlock_irqrestore(lock, flags);
+}
+EXPORT_SYMBOL(atomic64_set);
+
+void atomic64_add(long long a, atomic64_t *v)
+{
+ unsigned long flags;
+ spinlock_t *lock = lock_addr(v);
+
+ spin_lock_irqsave(lock, flags);
+ v->counter += a;
+ spin_unlock_irqrestore(lock, flags);
+}
+EXPORT_SYMBOL(atomic64_add);
+
+long long atomic64_add_return(long long a, atomic64_t *v)
+{
+ unsigned long flags;
+ spinlock_t *lock = lock_addr(v);
+ long long val;
+
+ spin_lock_irqsave(lock, flags);
+ val = v->counter += a;
+ spin_unlock_irqrestore(lock, flags);
+ return val;
+}
+EXPORT_SYMBOL(atomic64_add_return);
+
+void atomic64_sub(long long a, atomic64_t *v)
+{
+ unsigned long flags;
+ spinlock_t *lock = lock_addr(v);
+
+ spin_lock_irqsave(lock, flags);
+ v->counter -= a;
+ spin_unlock_irqrestore(lock, flags);
+}
+EXPORT_SYMBOL(atomic64_sub);
+
+long long atomic64_sub_return(long long a, atomic64_t *v)
+{
+ unsigned long flags;
+ spinlock_t *lock = lock_addr(v);
+ long long val;
+
+ spin_lock_irqsave(lock, flags);
+ val = v->counter -= a;
+ spin_unlock_irqrestore(lock, flags);
+ return val;
+}
+EXPORT_SYMBOL(atomic64_sub_return);
+
+long long atomic64_dec_if_positive(atomic64_t *v)
+{
+ unsigned long flags;
+ spinlock_t *lock = lock_addr(v);
+ long long val;
+
+ spin_lock_irqsave(lock, flags);
+ val = v->counter - 1;
+ if (val >= 0)
+ v->counter = val;
+ spin_unlock_irqrestore(lock, flags);
+ return val;
+}
+EXPORT_SYMBOL(atomic64_dec_if_positive);
+
+long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n)
+{
+ unsigned long flags;
+ spinlock_t *lock = lock_addr(v);
+ long long val;
+
+ spin_lock_irqsave(lock, flags);
+ val = v->counter;
+ if (val == o)
+ v->counter = n;
+ spin_unlock_irqrestore(lock, flags);
+ return val;
+}
+EXPORT_SYMBOL(atomic64_cmpxchg);
+
+long long atomic64_xchg(atomic64_t *v, long long new)
+{
+ unsigned long flags;
+ spinlock_t *lock = lock_addr(v);
+ long long val;
+
+ spin_lock_irqsave(lock, flags);
+ val = v->counter;
+ v->counter = new;
+ spin_unlock_irqrestore(lock, flags);
+ return val;
+}
+EXPORT_SYMBOL(atomic64_xchg);
+
+int atomic64_add_unless(atomic64_t *v, long long a, long long u)
+{
+ unsigned long flags;
+ spinlock_t *lock = lock_addr(v);
+ int ret = 0;
+
+ spin_lock_irqsave(lock, flags);
+ if (v->counter != u) {
+ v->counter += a;
+ ret = 1;
+ }
+ spin_unlock_irqrestore(lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(atomic64_add_unless);
+
+static int init_atomic64_lock(void)
+{
+ int i;
+
+ for (i = 0; i < NR_LOCKS; ++i)
+ spin_lock_init(&atomic64_lock[i].lock);
+ return 0;
+}
+
+pure_initcall(init_atomic64_lock);
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
new file mode 100644
index 00000000..44524cc8
--- /dev/null
+++ b/lib/atomic64_test.c
@@ -0,0 +1,166 @@
+/*
+ * Testsuite for atomic64_t functions
+ *
+ * Copyright © 2010 Luca Barbieri
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <asm/atomic.h>
+
+#define INIT(c) do { atomic64_set(&v, c); r = c; } while (0)
+static __init int test_atomic64(void)
+{
+ long long v0 = 0xaaa31337c001d00dLL;
+ long long v1 = 0xdeadbeefdeafcafeLL;
+ long long v2 = 0xfaceabadf00df001LL;
+ long long onestwos = 0x1111111122222222LL;
+ long long one = 1LL;
+
+ atomic64_t v = ATOMIC64_INIT(v0);
+ long long r = v0;
+ BUG_ON(v.counter != r);
+
+ atomic64_set(&v, v1);
+ r = v1;
+ BUG_ON(v.counter != r);
+ BUG_ON(atomic64_read(&v) != r);
+
+ INIT(v0);
+ atomic64_add(onestwos, &v);
+ r += onestwos;
+ BUG_ON(v.counter != r);
+
+ INIT(v0);
+ atomic64_add(-one, &v);
+ r += -one;
+ BUG_ON(v.counter != r);
+
+ INIT(v0);
+ r += onestwos;
+ BUG_ON(atomic64_add_return(onestwos, &v) != r);
+ BUG_ON(v.counter != r);
+
+ INIT(v0);
+ r += -one;
+ BUG_ON(atomic64_add_return(-one, &v) != r);
+ BUG_ON(v.counter != r);
+
+ INIT(v0);
+ atomic64_sub(onestwos, &v);
+ r -= onestwos;
+ BUG_ON(v.counter != r);
+
+ INIT(v0);
+ atomic64_sub(-one, &v);
+ r -= -one;
+ BUG_ON(v.counter != r);
+
+ INIT(v0);
+ r -= onestwos;
+ BUG_ON(atomic64_sub_return(onestwos, &v) != r);
+ BUG_ON(v.counter != r);
+
+ INIT(v0);
+ r -= -one;
+ BUG_ON(atomic64_sub_return(-one, &v) != r);
+ BUG_ON(v.counter != r);
+
+ INIT(v0);
+ atomic64_inc(&v);
+ r += one;
+ BUG_ON(v.counter != r);
+
+ INIT(v0);
+ r += one;
+ BUG_ON(atomic64_inc_return(&v) != r);
+ BUG_ON(v.counter != r);
+
+ INIT(v0);
+ atomic64_dec(&v);
+ r -= one;
+ BUG_ON(v.counter != r);
+
+ INIT(v0);
+ r -= one;
+ BUG_ON(atomic64_dec_return(&v) != r);
+ BUG_ON(v.counter != r);
+
+ INIT(v0);
+ BUG_ON(atomic64_xchg(&v, v1) != v0);
+ r = v1;
+ BUG_ON(v.counter != r);
+
+ INIT(v0);
+ BUG_ON(atomic64_cmpxchg(&v, v0, v1) != v0);
+ r = v1;
+ BUG_ON(v.counter != r);
+
+ INIT(v0);
+ BUG_ON(atomic64_cmpxchg(&v, v2, v1) != v0);
+ BUG_ON(v.counter != r);
+
+ INIT(v0);
+ BUG_ON(atomic64_add_unless(&v, one, v0));
+ BUG_ON(v.counter != r);
+
+ INIT(v0);
+ BUG_ON(!atomic64_add_unless(&v, one, v1));
+ r += one;
+ BUG_ON(v.counter != r);
+
+#if defined(CONFIG_X86) || defined(CONFIG_MIPS) || defined(CONFIG_PPC) || \
+ defined(CONFIG_S390) || defined(_ASM_GENERIC_ATOMIC64_H) || defined(CONFIG_ARM)
+ INIT(onestwos);
+ BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1));
+ r -= one;
+ BUG_ON(v.counter != r);
+
+ INIT(0);
+ BUG_ON(atomic64_dec_if_positive(&v) != -one);
+ BUG_ON(v.counter != r);
+
+ INIT(-one);
+ BUG_ON(atomic64_dec_if_positive(&v) != (-one - one));
+ BUG_ON(v.counter != r);
+#else
+#warning Please implement atomic64_dec_if_positive for your architecture, and add it to the IF above
+#endif
+
+ INIT(onestwos);
+ BUG_ON(!atomic64_inc_not_zero(&v));
+ r += one;
+ BUG_ON(v.counter != r);
+
+ INIT(0);
+ BUG_ON(atomic64_inc_not_zero(&v));
+ BUG_ON(v.counter != r);
+
+ INIT(-one);
+ BUG_ON(!atomic64_inc_not_zero(&v));
+ r += one;
+ BUG_ON(v.counter != r);
+
+#ifdef CONFIG_X86
+ printk(KERN_INFO "atomic64 test passed for %s platform %s CX8 and %s SSE\n",
+#ifdef CONFIG_X86_64
+ "x86-64",
+#elif defined(CONFIG_X86_CMPXCHG64)
+ "i586+",
+#else
+ "i386+",
+#endif
+ boot_cpu_has(X86_FEATURE_CX8) ? "with" : "without",
+ boot_cpu_has(X86_FEATURE_XMM) ? "with" : "without");
+#else
+ printk(KERN_INFO "atomic64 test passed\n");
+#endif
+
+ return 0;
+}
+
+core_initcall(test_atomic64);
diff --git a/lib/audit.c b/lib/audit.c
new file mode 100644
index 00000000..8e7dc1c6
--- /dev/null
+++ b/lib/audit.c
@@ -0,0 +1,66 @@
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/audit.h>
+#include <asm/unistd.h>
+
+static unsigned dir_class[] = {
+#include <asm-generic/audit_dir_write.h>
+~0U
+};
+
+static unsigned read_class[] = {
+#include <asm-generic/audit_read.h>
+~0U
+};
+
+static unsigned write_class[] = {
+#include <asm-generic/audit_write.h>
+~0U
+};
+
+static unsigned chattr_class[] = {
+#include <asm-generic/audit_change_attr.h>
+~0U
+};
+
+static unsigned signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int audit_classify_arch(int arch)
+{
+ return 0;
+}
+
+int audit_classify_syscall(int abi, unsigned syscall)
+{
+ switch(syscall) {
+ case __NR_open:
+ return 2;
+#ifdef __NR_openat
+ case __NR_openat:
+ return 3;
+#endif
+#ifdef __NR_socketcall
+ case __NR_socketcall:
+ return 4;
+#endif
+ case __NR_execve:
+ return 5;
+ default:
+ return 0;
+ }
+}
+
+static int __init audit_classes_init(void)
+{
+ audit_register_class(AUDIT_CLASS_WRITE, write_class);
+ audit_register_class(AUDIT_CLASS_READ, read_class);
+ audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class);
+ audit_register_class(AUDIT_CLASS_CHATTR, chattr_class);
+ audit_register_class(AUDIT_CLASS_SIGNAL, signal_class);
+ return 0;
+}
+
+__initcall(audit_classes_init);
diff --git a/lib/bcd.c b/lib/bcd.c
new file mode 100644
index 00000000..d74257fd
--- /dev/null
+++ b/lib/bcd.c
@@ -0,0 +1,14 @@
+#include <linux/bcd.h>
+#include <linux/module.h>
+
+unsigned bcd2bin(unsigned char val)
+{
+ return (val & 0x0f) + (val >> 4) * 10;
+}
+EXPORT_SYMBOL(bcd2bin);
+
+unsigned char bin2bcd(unsigned val)
+{
+ return ((val / 10) << 4) + val % 10;
+}
+EXPORT_SYMBOL(bin2bcd);
diff --git a/lib/bitmap.c b/lib/bitmap.c
new file mode 100644
index 00000000..a02625a2
--- /dev/null
+++ b/lib/bitmap.c
@@ -0,0 +1,1102 @@
+/*
+ * lib/bitmap.c
+ * Helper functions for bitmap.h.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/errno.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <asm/uaccess.h>
+
+/*
+ * bitmaps provide an array of bits, implemented using an an
+ * array of unsigned longs. The number of valid bits in a
+ * given bitmap does _not_ need to be an exact multiple of
+ * BITS_PER_LONG.
+ *
+ * The possible unused bits in the last, partially used word
+ * of a bitmap are 'don't care'. The implementation makes
+ * no particular effort to keep them zero. It ensures that
+ * their value will not affect the results of any operation.
+ * The bitmap operations that return Boolean (bitmap_empty,
+ * for example) or scalar (bitmap_weight, for example) results
+ * carefully filter out these unused bits from impacting their
+ * results.
+ *
+ * These operations actually hold to a slightly stronger rule:
+ * if you don't input any bitmaps to these ops that have some
+ * unused bits set, then they won't output any set unused bits
+ * in output bitmaps.
+ *
+ * The byte ordering of bitmaps is more natural on little
+ * endian architectures. See the big-endian headers
+ * include/asm-ppc64/bitops.h and include/asm-s390/bitops.h
+ * for the best explanations of this ordering.
+ */
+
+int __bitmap_empty(const unsigned long *bitmap, int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;
+ for (k = 0; k < lim; ++k)
+ if (bitmap[k])
+ return 0;
+
+ if (bits % BITS_PER_LONG)
+ if (bitmap[k] & BITMAP_LAST_WORD_MASK(bits))
+ return 0;
+
+ return 1;
+}
+EXPORT_SYMBOL(__bitmap_empty);
+
+int __bitmap_full(const unsigned long *bitmap, int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;
+ for (k = 0; k < lim; ++k)
+ if (~bitmap[k])
+ return 0;
+
+ if (bits % BITS_PER_LONG)
+ if (~bitmap[k] & BITMAP_LAST_WORD_MASK(bits))
+ return 0;
+
+ return 1;
+}
+EXPORT_SYMBOL(__bitmap_full);
+
+int __bitmap_equal(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;
+ for (k = 0; k < lim; ++k)
+ if (bitmap1[k] != bitmap2[k])
+ return 0;
+
+ if (bits % BITS_PER_LONG)
+ if ((bitmap1[k] ^ bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits))
+ return 0;
+
+ return 1;
+}
+EXPORT_SYMBOL(__bitmap_equal);
+
+void __bitmap_complement(unsigned long *dst, const unsigned long *src, int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;
+ for (k = 0; k < lim; ++k)
+ dst[k] = ~src[k];
+
+ if (bits % BITS_PER_LONG)
+ dst[k] = ~src[k] & BITMAP_LAST_WORD_MASK(bits);
+}
+EXPORT_SYMBOL(__bitmap_complement);
+
+/**
+ * __bitmap_shift_right - logical right shift of the bits in a bitmap
+ * @dst : destination bitmap
+ * @src : source bitmap
+ * @shift : shift by this many bits
+ * @bits : bitmap size, in bits
+ *
+ * Shifting right (dividing) means moving bits in the MS -> LS bit
+ * direction. Zeros are fed into the vacated MS positions and the
+ * LS bits shifted off the bottom are lost.
+ */
+void __bitmap_shift_right(unsigned long *dst,
+ const unsigned long *src, int shift, int bits)
+{
+ int k, lim = BITS_TO_LONGS(bits), left = bits % BITS_PER_LONG;
+ int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG;
+ unsigned long mask = (1UL << left) - 1;
+ for (k = 0; off + k < lim; ++k) {
+ unsigned long upper, lower;
+
+ /*
+ * If shift is not word aligned, take lower rem bits of
+ * word above and make them the top rem bits of result.
+ */
+ if (!rem || off + k + 1 >= lim)
+ upper = 0;
+ else {
+ upper = src[off + k + 1];
+ if (off + k + 1 == lim - 1 && left)
+ upper &= mask;
+ }
+ lower = src[off + k];
+ if (left && off + k == lim - 1)
+ lower &= mask;
+ dst[k] = upper << (BITS_PER_LONG - rem) | lower >> rem;
+ if (left && k == lim - 1)
+ dst[k] &= mask;
+ }
+ if (off)
+ memset(&dst[lim - off], 0, off*sizeof(unsigned long));
+}
+EXPORT_SYMBOL(__bitmap_shift_right);
+
+
+/**
+ * __bitmap_shift_left - logical left shift of the bits in a bitmap
+ * @dst : destination bitmap
+ * @src : source bitmap
+ * @shift : shift by this many bits
+ * @bits : bitmap size, in bits
+ *
+ * Shifting left (multiplying) means moving bits in the LS -> MS
+ * direction. Zeros are fed into the vacated LS bit positions
+ * and those MS bits shifted off the top are lost.
+ */
+
+void __bitmap_shift_left(unsigned long *dst,
+ const unsigned long *src, int shift, int bits)
+{
+ int k, lim = BITS_TO_LONGS(bits), left = bits % BITS_PER_LONG;
+ int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG;
+ for (k = lim - off - 1; k >= 0; --k) {
+ unsigned long upper, lower;
+
+ /*
+ * If shift is not word aligned, take upper rem bits of
+ * word below and make them the bottom rem bits of result.
+ */
+ if (rem && k > 0)
+ lower = src[k - 1];
+ else
+ lower = 0;
+ upper = src[k];
+ if (left && k == lim - 1)
+ upper &= (1UL << left) - 1;
+ dst[k + off] = lower >> (BITS_PER_LONG - rem) | upper << rem;
+ if (left && k + off == lim - 1)
+ dst[k + off] &= (1UL << left) - 1;
+ }
+ if (off)
+ memset(dst, 0, off*sizeof(unsigned long));
+}
+EXPORT_SYMBOL(__bitmap_shift_left);
+
+int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k;
+ int nr = BITS_TO_LONGS(bits);
+ unsigned long result = 0;
+
+ for (k = 0; k < nr; k++)
+ result |= (dst[k] = bitmap1[k] & bitmap2[k]);
+ return result != 0;
+}
+EXPORT_SYMBOL(__bitmap_and);
+
+void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k;
+ int nr = BITS_TO_LONGS(bits);
+
+ for (k = 0; k < nr; k++)
+ dst[k] = bitmap1[k] | bitmap2[k];
+}
+EXPORT_SYMBOL(__bitmap_or);
+
+void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k;
+ int nr = BITS_TO_LONGS(bits);
+
+ for (k = 0; k < nr; k++)
+ dst[k] = bitmap1[k] ^ bitmap2[k];
+}
+EXPORT_SYMBOL(__bitmap_xor);
+
+int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k;
+ int nr = BITS_TO_LONGS(bits);
+ unsigned long result = 0;
+
+ for (k = 0; k < nr; k++)
+ result |= (dst[k] = bitmap1[k] & ~bitmap2[k]);
+ return result != 0;
+}
+EXPORT_SYMBOL(__bitmap_andnot);
+
+int __bitmap_intersects(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;
+ for (k = 0; k < lim; ++k)
+ if (bitmap1[k] & bitmap2[k])
+ return 1;
+
+ if (bits % BITS_PER_LONG)
+ if ((bitmap1[k] & bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits))
+ return 1;
+ return 0;
+}
+EXPORT_SYMBOL(__bitmap_intersects);
+
+int __bitmap_subset(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;
+ for (k = 0; k < lim; ++k)
+ if (bitmap1[k] & ~bitmap2[k])
+ return 0;
+
+ if (bits % BITS_PER_LONG)
+ if ((bitmap1[k] & ~bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits))
+ return 0;
+ return 1;
+}
+EXPORT_SYMBOL(__bitmap_subset);
+
+int __bitmap_weight(const unsigned long *bitmap, int bits)
+{
+ int k, w = 0, lim = bits/BITS_PER_LONG;
+
+ for (k = 0; k < lim; k++)
+ w += hweight_long(bitmap[k]);
+
+ if (bits % BITS_PER_LONG)
+ w += hweight_long(bitmap[k] & BITMAP_LAST_WORD_MASK(bits));
+
+ return w;
+}
+EXPORT_SYMBOL(__bitmap_weight);
+
+#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG))
+
+void bitmap_set(unsigned long *map, int start, int nr)
+{
+ unsigned long *p = map + BIT_WORD(start);
+ const int size = start + nr;
+ int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
+ unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
+
+ while (nr - bits_to_set >= 0) {
+ *p |= mask_to_set;
+ nr -= bits_to_set;
+ bits_to_set = BITS_PER_LONG;
+ mask_to_set = ~0UL;
+ p++;
+ }
+ if (nr) {
+ mask_to_set &= BITMAP_LAST_WORD_MASK(size);
+ *p |= mask_to_set;
+ }
+}
+EXPORT_SYMBOL(bitmap_set);
+
+void bitmap_clear(unsigned long *map, int start, int nr)
+{
+ unsigned long *p = map + BIT_WORD(start);
+ const int size = start + nr;
+ int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+ unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+
+ while (nr - bits_to_clear >= 0) {
+ *p &= ~mask_to_clear;
+ nr -= bits_to_clear;
+ bits_to_clear = BITS_PER_LONG;
+ mask_to_clear = ~0UL;
+ p++;
+ }
+ if (nr) {
+ mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+ *p &= ~mask_to_clear;
+ }
+}
+EXPORT_SYMBOL(bitmap_clear);
+
+/**
+ * bitmap_find_next_zero_area - find a contiguous aligned zero area
+ * @map: The address to base the search on
+ * @size: The bitmap size in bits
+ * @start: The bitnumber to start searching at
+ * @nr: The number of zeroed bits we're looking for
+ * @align_mask: Alignment mask for zero area
+ * @align_offset: Alignment offset for zero area.
+ *
+ * The @align_mask should be one less than a power of 2; the effect is that
+ * the bit offset of all zero areas this function finds plus @align_offset
+ * is multiple of that power of 2.
+ */
+unsigned long bitmap_find_next_zero_area_off(unsigned long *map,
+ unsigned long size,
+ unsigned long start,
+ unsigned int nr,
+ unsigned long align_mask,
+ unsigned long align_offset)
+{
+ unsigned long index, end, i;
+again:
+ index = find_next_zero_bit(map, size, start);
+
+ /* Align allocation */
+ index = __ALIGN_MASK(index + align_offset, align_mask) - align_offset;
+
+ end = index + nr;
+ if (end > size)
+ return end;
+ i = find_next_bit(map, end, index);
+ if (i < end) {
+ start = i + 1;
+ goto again;
+ }
+ return index;
+}
+EXPORT_SYMBOL(bitmap_find_next_zero_area_off);
+
+/*
+ * Bitmap printing & parsing functions: first version by Bill Irwin,
+ * second version by Paul Jackson, third by Joe Korty.
+ */
+
+#define CHUNKSZ 32
+#define nbits_to_hold_value(val) fls(val)
+#define unhex(c) (isdigit(c) ? (c - '0') : (toupper(c) - 'A' + 10))
+#define BASEDEC 10 /* fancier cpuset lists input in decimal */
+
+/**
+ * bitmap_scnprintf - convert bitmap to an ASCII hex string.
+ * @buf: byte buffer into which string is placed
+ * @buflen: reserved size of @buf, in bytes
+ * @maskp: pointer to bitmap to convert
+ * @nmaskbits: size of bitmap, in bits
+ *
+ * Exactly @nmaskbits bits are displayed. Hex digits are grouped into
+ * comma-separated sets of eight digits per set.
+ */
+int bitmap_scnprintf(char *buf, unsigned int buflen,
+ const unsigned long *maskp, int nmaskbits)
+{
+ int i, word, bit, len = 0;
+ unsigned long val;
+ const char *sep = "";
+ int chunksz;
+ u32 chunkmask;
+
+ chunksz = nmaskbits & (CHUNKSZ - 1);
+ if (chunksz == 0)
+ chunksz = CHUNKSZ;
+
+ i = ALIGN(nmaskbits, CHUNKSZ) - CHUNKSZ;
+ for (; i >= 0; i -= CHUNKSZ) {
+ chunkmask = ((1ULL << chunksz) - 1);
+ word = i / BITS_PER_LONG;
+ bit = i % BITS_PER_LONG;
+ val = (maskp[word] >> bit) & chunkmask;
+ len += scnprintf(buf+len, buflen-len, "%s%0*lx", sep,
+ (chunksz+3)/4, val);
+ chunksz = CHUNKSZ;
+ sep = ",";
+ }
+ return len;
+}
+EXPORT_SYMBOL(bitmap_scnprintf);
+
+/**
+ * __bitmap_parse - convert an ASCII hex string into a bitmap.
+ * @buf: pointer to buffer containing string.
+ * @buflen: buffer size in bytes. If string is smaller than this
+ * then it must be terminated with a \0.
+ * @is_user: location of buffer, 0 indicates kernel space
+ * @maskp: pointer to bitmap array that will contain result.
+ * @nmaskbits: size of bitmap, in bits.
+ *
+ * Commas group hex digits into chunks. Each chunk defines exactly 32
+ * bits of the resultant bitmask. No chunk may specify a value larger
+ * than 32 bits (%-EOVERFLOW), and if a chunk specifies a smaller value
+ * then leading 0-bits are prepended. %-EINVAL is returned for illegal
+ * characters and for grouping errors such as "1,,5", ",44", "," and "".
+ * Leading and trailing whitespace accepted, but not embedded whitespace.
+ */
+int __bitmap_parse(const char *buf, unsigned int buflen,
+ int is_user, unsigned long *maskp,
+ int nmaskbits)
+{
+ int c, old_c, totaldigits, ndigits, nchunks, nbits;
+ u32 chunk;
+ const char __user *ubuf = buf;
+
+ bitmap_zero(maskp, nmaskbits);
+
+ nchunks = nbits = totaldigits = c = 0;
+ do {
+ chunk = ndigits = 0;
+
+ /* Get the next chunk of the bitmap */
+ while (buflen) {
+ old_c = c;
+ if (is_user) {
+ if (__get_user(c, ubuf++))
+ return -EFAULT;
+ }
+ else
+ c = *buf++;
+ buflen--;
+ if (isspace(c))
+ continue;
+
+ /*
+ * If the last character was a space and the current
+ * character isn't '\0', we've got embedded whitespace.
+ * This is a no-no, so throw an error.
+ */
+ if (totaldigits && c && isspace(old_c))
+ return -EINVAL;
+
+ /* A '\0' or a ',' signal the end of the chunk */
+ if (c == '\0' || c == ',')
+ break;
+
+ if (!isxdigit(c))
+ return -EINVAL;
+
+ /*
+ * Make sure there are at least 4 free bits in 'chunk'.
+ * If not, this hexdigit will overflow 'chunk', so
+ * throw an error.
+ */
+ if (chunk & ~((1UL << (CHUNKSZ - 4)) - 1))
+ return -EOVERFLOW;
+
+ chunk = (chunk << 4) | unhex(c);
+ ndigits++; totaldigits++;
+ }
+ if (ndigits == 0)
+ return -EINVAL;
+ if (nchunks == 0 && chunk == 0)
+ continue;
+
+ __bitmap_shift_left(maskp, maskp, CHUNKSZ, nmaskbits);
+ *maskp |= chunk;
+ nchunks++;
+ nbits += (nchunks == 1) ? nbits_to_hold_value(chunk) : CHUNKSZ;
+ if (nbits > nmaskbits)
+ return -EOVERFLOW;
+ } while (buflen && c == ',');
+
+ return 0;
+}
+EXPORT_SYMBOL(__bitmap_parse);
+
+/**
+ * bitmap_parse_user - convert an ASCII hex string in a user buffer into a bitmap
+ *
+ * @ubuf: pointer to user buffer containing string.
+ * @ulen: buffer size in bytes. If string is smaller than this
+ * then it must be terminated with a \0.
+ * @maskp: pointer to bitmap array that will contain result.
+ * @nmaskbits: size of bitmap, in bits.
+ *
+ * Wrapper for __bitmap_parse(), providing it with user buffer.
+ *
+ * We cannot have this as an inline function in bitmap.h because it needs
+ * linux/uaccess.h to get the access_ok() declaration and this causes
+ * cyclic dependencies.
+ */
+int bitmap_parse_user(const char __user *ubuf,
+ unsigned int ulen, unsigned long *maskp,
+ int nmaskbits)
+{
+ if (!access_ok(VERIFY_READ, ubuf, ulen))
+ return -EFAULT;
+ return __bitmap_parse((const char *)ubuf, ulen, 1, maskp, nmaskbits);
+}
+EXPORT_SYMBOL(bitmap_parse_user);
+
+/*
+ * bscnl_emit(buf, buflen, rbot, rtop, bp)
+ *
+ * Helper routine for bitmap_scnlistprintf(). Write decimal number
+ * or range to buf, suppressing output past buf+buflen, with optional
+ * comma-prefix. Return len of what would be written to buf, if it
+ * all fit.
+ */
+static inline int bscnl_emit(char *buf, int buflen, int rbot, int rtop, int len)
+{
+ if (len > 0)
+ len += scnprintf(buf + len, buflen - len, ",");
+ if (rbot == rtop)
+ len += scnprintf(buf + len, buflen - len, "%d", rbot);
+ else
+ len += scnprintf(buf + len, buflen - len, "%d-%d", rbot, rtop);
+ return len;
+}
+
+/**
+ * bitmap_scnlistprintf - convert bitmap to list format ASCII string
+ * @buf: byte buffer into which string is placed
+ * @buflen: reserved size of @buf, in bytes
+ * @maskp: pointer to bitmap to convert
+ * @nmaskbits: size of bitmap, in bits
+ *
+ * Output format is a comma-separated list of decimal numbers and
+ * ranges. Consecutively set bits are shown as two hyphen-separated
+ * decimal numbers, the smallest and largest bit numbers set in
+ * the range. Output format is compatible with the format
+ * accepted as input by bitmap_parselist().
+ *
+ * The return value is the number of characters which would be
+ * generated for the given input, excluding the trailing '\0', as
+ * per ISO C99.
+ */
+int bitmap_scnlistprintf(char *buf, unsigned int buflen,
+ const unsigned long *maskp, int nmaskbits)
+{
+ int len = 0;
+ /* current bit is 'cur', most recently seen range is [rbot, rtop] */
+ int cur, rbot, rtop;
+
+ if (buflen == 0)
+ return 0;
+ buf[0] = 0;
+
+ rbot = cur = find_first_bit(maskp, nmaskbits);
+ while (cur < nmaskbits) {
+ rtop = cur;
+ cur = find_next_bit(maskp, nmaskbits, cur+1);
+ if (cur >= nmaskbits || cur > rtop + 1) {
+ len = bscnl_emit(buf, buflen, rbot, rtop, len);
+ rbot = cur;
+ }
+ }
+ return len;
+}
+EXPORT_SYMBOL(bitmap_scnlistprintf);
+
+/**
+ * bitmap_parselist - convert list format ASCII string to bitmap
+ * @bp: read nul-terminated user string from this buffer
+ * @maskp: write resulting mask here
+ * @nmaskbits: number of bits in mask to be written
+ *
+ * Input format is a comma-separated list of decimal numbers and
+ * ranges. Consecutively set bits are shown as two hyphen-separated
+ * decimal numbers, the smallest and largest bit numbers set in
+ * the range.
+ *
+ * Returns 0 on success, -errno on invalid input strings.
+ * Error values:
+ * %-EINVAL: second number in range smaller than first
+ * %-EINVAL: invalid character in string
+ * %-ERANGE: bit number specified too large for mask
+ */
+int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits)
+{
+ unsigned a, b;
+
+ bitmap_zero(maskp, nmaskbits);
+ do {
+ if (!isdigit(*bp))
+ return -EINVAL;
+ b = a = simple_strtoul(bp, (char **)&bp, BASEDEC);
+ if (*bp == '-') {
+ bp++;
+ if (!isdigit(*bp))
+ return -EINVAL;
+ b = simple_strtoul(bp, (char **)&bp, BASEDEC);
+ }
+ if (!(a <= b))
+ return -EINVAL;
+ if (b >= nmaskbits)
+ return -ERANGE;
+ while (a <= b) {
+ set_bit(a, maskp);
+ a++;
+ }
+ if (*bp == ',')
+ bp++;
+ } while (*bp != '\0' && *bp != '\n');
+ return 0;
+}
+EXPORT_SYMBOL(bitmap_parselist);
+
+/**
+ * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap
+ * @buf: pointer to a bitmap
+ * @pos: a bit position in @buf (0 <= @pos < @bits)
+ * @bits: number of valid bit positions in @buf
+ *
+ * Map the bit at position @pos in @buf (of length @bits) to the
+ * ordinal of which set bit it is. If it is not set or if @pos
+ * is not a valid bit position, map to -1.
+ *
+ * If for example, just bits 4 through 7 are set in @buf, then @pos
+ * values 4 through 7 will get mapped to 0 through 3, respectively,
+ * and other @pos values will get mapped to 0. When @pos value 7
+ * gets mapped to (returns) @ord value 3 in this example, that means
+ * that bit 7 is the 3rd (starting with 0th) set bit in @buf.
+ *
+ * The bit positions 0 through @bits are valid positions in @buf.
+ */
+static int bitmap_pos_to_ord(const unsigned long *buf, int pos, int bits)
+{
+ int i, ord;
+
+ if (pos < 0 || pos >= bits || !test_bit(pos, buf))
+ return -1;
+
+ i = find_first_bit(buf, bits);
+ ord = 0;
+ while (i < pos) {
+ i = find_next_bit(buf, bits, i + 1);
+ ord++;
+ }
+ BUG_ON(i != pos);
+
+ return ord;
+}
+
+/**
+ * bitmap_ord_to_pos - find position of n-th set bit in bitmap
+ * @buf: pointer to bitmap
+ * @ord: ordinal bit position (n-th set bit, n >= 0)
+ * @bits: number of valid bit positions in @buf
+ *
+ * Map the ordinal offset of bit @ord in @buf to its position in @buf.
+ * Value of @ord should be in range 0 <= @ord < weight(buf), else
+ * results are undefined.
+ *
+ * If for example, just bits 4 through 7 are set in @buf, then @ord
+ * values 0 through 3 will get mapped to 4 through 7, respectively,
+ * and all other @ord values return undefined values. When @ord value 3
+ * gets mapped to (returns) @pos value 7 in this example, that means
+ * that the 3rd set bit (starting with 0th) is at position 7 in @buf.
+ *
+ * The bit positions 0 through @bits are valid positions in @buf.
+ */
+static int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits)
+{
+ int pos = 0;
+
+ if (ord >= 0 && ord < bits) {
+ int i;
+
+ for (i = find_first_bit(buf, bits);
+ i < bits && ord > 0;
+ i = find_next_bit(buf, bits, i + 1))
+ ord--;
+ if (i < bits && ord == 0)
+ pos = i;
+ }
+
+ return pos;
+}
+
+/**
+ * bitmap_remap - Apply map defined by a pair of bitmaps to another bitmap
+ * @dst: remapped result
+ * @src: subset to be remapped
+ * @old: defines domain of map
+ * @new: defines range of map
+ * @bits: number of bits in each of these bitmaps
+ *
+ * Let @old and @new define a mapping of bit positions, such that
+ * whatever position is held by the n-th set bit in @old is mapped
+ * to the n-th set bit in @new. In the more general case, allowing
+ * for the possibility that the weight 'w' of @new is less than the
+ * weight of @old, map the position of the n-th set bit in @old to
+ * the position of the m-th set bit in @new, where m == n % w.
+ *
+ * If either of the @old and @new bitmaps are empty, or if @src and
+ * @dst point to the same location, then this routine copies @src
+ * to @dst.
+ *
+ * The positions of unset bits in @old are mapped to themselves
+ * (the identify map).
+ *
+ * Apply the above specified mapping to @src, placing the result in
+ * @dst, clearing any bits previously set in @dst.
+ *
+ * For example, lets say that @old has bits 4 through 7 set, and
+ * @new has bits 12 through 15 set. This defines the mapping of bit
+ * position 4 to 12, 5 to 13, 6 to 14 and 7 to 15, and of all other
+ * bit positions unchanged. So if say @src comes into this routine
+ * with bits 1, 5 and 7 set, then @dst should leave with bits 1,
+ * 13 and 15 set.
+ */
+void bitmap_remap(unsigned long *dst, const unsigned long *src,
+ const unsigned long *old, const unsigned long *new,
+ int bits)
+{
+ int oldbit, w;
+
+ if (dst == src) /* following doesn't handle inplace remaps */
+ return;
+ bitmap_zero(dst, bits);
+
+ w = bitmap_weight(new, bits);
+ for_each_set_bit(oldbit, src, bits) {
+ int n = bitmap_pos_to_ord(old, oldbit, bits);
+
+ if (n < 0 || w == 0)
+ set_bit(oldbit, dst); /* identity map */
+ else
+ set_bit(bitmap_ord_to_pos(new, n % w, bits), dst);
+ }
+}
+EXPORT_SYMBOL(bitmap_remap);
+
+/**
+ * bitmap_bitremap - Apply map defined by a pair of bitmaps to a single bit
+ * @oldbit: bit position to be mapped
+ * @old: defines domain of map
+ * @new: defines range of map
+ * @bits: number of bits in each of these bitmaps
+ *
+ * Let @old and @new define a mapping of bit positions, such that
+ * whatever position is held by the n-th set bit in @old is mapped
+ * to the n-th set bit in @new. In the more general case, allowing
+ * for the possibility that the weight 'w' of @new is less than the
+ * weight of @old, map the position of the n-th set bit in @old to
+ * the position of the m-th set bit in @new, where m == n % w.
+ *
+ * The positions of unset bits in @old are mapped to themselves
+ * (the identify map).
+ *
+ * Apply the above specified mapping to bit position @oldbit, returning
+ * the new bit position.
+ *
+ * For example, lets say that @old has bits 4 through 7 set, and
+ * @new has bits 12 through 15 set. This defines the mapping of bit
+ * position 4 to 12, 5 to 13, 6 to 14 and 7 to 15, and of all other
+ * bit positions unchanged. So if say @oldbit is 5, then this routine
+ * returns 13.
+ */
+int bitmap_bitremap(int oldbit, const unsigned long *old,
+ const unsigned long *new, int bits)
+{
+ int w = bitmap_weight(new, bits);
+ int n = bitmap_pos_to_ord(old, oldbit, bits);
+ if (n < 0 || w == 0)
+ return oldbit;
+ else
+ return bitmap_ord_to_pos(new, n % w, bits);
+}
+EXPORT_SYMBOL(bitmap_bitremap);
+
+/**
+ * bitmap_onto - translate one bitmap relative to another
+ * @dst: resulting translated bitmap
+ * @orig: original untranslated bitmap
+ * @relmap: bitmap relative to which translated
+ * @bits: number of bits in each of these bitmaps
+ *
+ * Set the n-th bit of @dst iff there exists some m such that the
+ * n-th bit of @relmap is set, the m-th bit of @orig is set, and
+ * the n-th bit of @relmap is also the m-th _set_ bit of @relmap.
+ * (If you understood the previous sentence the first time your
+ * read it, you're overqualified for your current job.)
+ *
+ * In other words, @orig is mapped onto (surjectively) @dst,
+ * using the the map { <n, m> | the n-th bit of @relmap is the
+ * m-th set bit of @relmap }.
+ *
+ * Any set bits in @orig above bit number W, where W is the
+ * weight of (number of set bits in) @relmap are mapped nowhere.
+ * In particular, if for all bits m set in @orig, m >= W, then
+ * @dst will end up empty. In situations where the possibility
+ * of such an empty result is not desired, one way to avoid it is
+ * to use the bitmap_fold() operator, below, to first fold the
+ * @orig bitmap over itself so that all its set bits x are in the
+ * range 0 <= x < W. The bitmap_fold() operator does this by
+ * setting the bit (m % W) in @dst, for each bit (m) set in @orig.
+ *
+ * Example [1] for bitmap_onto():
+ * Let's say @relmap has bits 30-39 set, and @orig has bits
+ * 1, 3, 5, 7, 9 and 11 set. Then on return from this routine,
+ * @dst will have bits 31, 33, 35, 37 and 39 set.
+ *
+ * When bit 0 is set in @orig, it means turn on the bit in
+ * @dst corresponding to whatever is the first bit (if any)
+ * that is turned on in @relmap. Since bit 0 was off in the
+ * above example, we leave off that bit (bit 30) in @dst.
+ *
+ * When bit 1 is set in @orig (as in the above example), it
+ * means turn on the bit in @dst corresponding to whatever
+ * is the second bit that is turned on in @relmap. The second
+ * bit in @relmap that was turned on in the above example was
+ * bit 31, so we turned on bit 31 in @dst.
+ *
+ * Similarly, we turned on bits 33, 35, 37 and 39 in @dst,
+ * because they were the 4th, 6th, 8th and 10th set bits
+ * set in @relmap, and the 4th, 6th, 8th and 10th bits of
+ * @orig (i.e. bits 3, 5, 7 and 9) were also set.
+ *
+ * When bit 11 is set in @orig, it means turn on the bit in
+ * @dst corresponding to whatever is the twelth bit that is
+ * turned on in @relmap. In the above example, there were
+ * only ten bits turned on in @relmap (30..39), so that bit
+ * 11 was set in @orig had no affect on @dst.
+ *
+ * Example [2] for bitmap_fold() + bitmap_onto():
+ * Let's say @relmap has these ten bits set:
+ * 40 41 42 43 45 48 53 61 74 95
+ * (for the curious, that's 40 plus the first ten terms of the
+ * Fibonacci sequence.)
+ *
+ * Further lets say we use the following code, invoking
+ * bitmap_fold() then bitmap_onto, as suggested above to
+ * avoid the possitility of an empty @dst result:
+ *
+ * unsigned long *tmp; // a temporary bitmap's bits
+ *
+ * bitmap_fold(tmp, orig, bitmap_weight(relmap, bits), bits);
+ * bitmap_onto(dst, tmp, relmap, bits);
+ *
+ * Then this table shows what various values of @dst would be, for
+ * various @orig's. I list the zero-based positions of each set bit.
+ * The tmp column shows the intermediate result, as computed by
+ * using bitmap_fold() to fold the @orig bitmap modulo ten
+ * (the weight of @relmap).
+ *
+ * @orig tmp @dst
+ * 0 0 40
+ * 1 1 41
+ * 9 9 95
+ * 10 0 40 (*)
+ * 1 3 5 7 1 3 5 7 41 43 48 61
+ * 0 1 2 3 4 0 1 2 3 4 40 41 42 43 45
+ * 0 9 18 27 0 9 8 7 40 61 74 95
+ * 0 10 20 30 0 40
+ * 0 11 22 33 0 1 2 3 40 41 42 43
+ * 0 12 24 36 0 2 4 6 40 42 45 53
+ * 78 102 211 1 2 8 41 42 74 (*)
+ *
+ * (*) For these marked lines, if we hadn't first done bitmap_fold()
+ * into tmp, then the @dst result would have been empty.
+ *
+ * If either of @orig or @relmap is empty (no set bits), then @dst
+ * will be returned empty.
+ *
+ * If (as explained above) the only set bits in @orig are in positions
+ * m where m >= W, (where W is the weight of @relmap) then @dst will
+ * once again be returned empty.
+ *
+ * All bits in @dst not set by the above rule are cleared.
+ */
+void bitmap_onto(unsigned long *dst, const unsigned long *orig,
+ const unsigned long *relmap, int bits)
+{
+ int n, m; /* same meaning as in above comment */
+
+ if (dst == orig) /* following doesn't handle inplace mappings */
+ return;
+ bitmap_zero(dst, bits);
+
+ /*
+ * The following code is a more efficient, but less
+ * obvious, equivalent to the loop:
+ * for (m = 0; m < bitmap_weight(relmap, bits); m++) {
+ * n = bitmap_ord_to_pos(orig, m, bits);
+ * if (test_bit(m, orig))
+ * set_bit(n, dst);
+ * }
+ */
+
+ m = 0;
+ for_each_set_bit(n, relmap, bits) {
+ /* m == bitmap_pos_to_ord(relmap, n, bits) */
+ if (test_bit(m, orig))
+ set_bit(n, dst);
+ m++;
+ }
+}
+EXPORT_SYMBOL(bitmap_onto);
+
+/**
+ * bitmap_fold - fold larger bitmap into smaller, modulo specified size
+ * @dst: resulting smaller bitmap
+ * @orig: original larger bitmap
+ * @sz: specified size
+ * @bits: number of bits in each of these bitmaps
+ *
+ * For each bit oldbit in @orig, set bit oldbit mod @sz in @dst.
+ * Clear all other bits in @dst. See further the comment and
+ * Example [2] for bitmap_onto() for why and how to use this.
+ */
+void bitmap_fold(unsigned long *dst, const unsigned long *orig,
+ int sz, int bits)
+{
+ int oldbit;
+
+ if (dst == orig) /* following doesn't handle inplace mappings */
+ return;
+ bitmap_zero(dst, bits);
+
+ for_each_set_bit(oldbit, orig, bits)
+ set_bit(oldbit % sz, dst);
+}
+EXPORT_SYMBOL(bitmap_fold);
+
+/*
+ * Common code for bitmap_*_region() routines.
+ * bitmap: array of unsigned longs corresponding to the bitmap
+ * pos: the beginning of the region
+ * order: region size (log base 2 of number of bits)
+ * reg_op: operation(s) to perform on that region of bitmap
+ *
+ * Can set, verify and/or release a region of bits in a bitmap,
+ * depending on which combination of REG_OP_* flag bits is set.
+ *
+ * A region of a bitmap is a sequence of bits in the bitmap, of
+ * some size '1 << order' (a power of two), aligned to that same
+ * '1 << order' power of two.
+ *
+ * Returns 1 if REG_OP_ISFREE succeeds (region is all zero bits).
+ * Returns 0 in all other cases and reg_ops.
+ */
+
+enum {
+ REG_OP_ISFREE, /* true if region is all zero bits */
+ REG_OP_ALLOC, /* set all bits in region */
+ REG_OP_RELEASE, /* clear all bits in region */
+};
+
+static int __reg_op(unsigned long *bitmap, int pos, int order, int reg_op)
+{
+ int nbits_reg; /* number of bits in region */
+ int index; /* index first long of region in bitmap */
+ int offset; /* bit offset region in bitmap[index] */
+ int nlongs_reg; /* num longs spanned by region in bitmap */
+ int nbitsinlong; /* num bits of region in each spanned long */
+ unsigned long mask; /* bitmask for one long of region */
+ int i; /* scans bitmap by longs */
+ int ret = 0; /* return value */
+
+ /*
+ * Either nlongs_reg == 1 (for small orders that fit in one long)
+ * or (offset == 0 && mask == ~0UL) (for larger multiword orders.)
+ */
+ nbits_reg = 1 << order;
+ index = pos / BITS_PER_LONG;
+ offset = pos - (index * BITS_PER_LONG);
+ nlongs_reg = BITS_TO_LONGS(nbits_reg);
+ nbitsinlong = min(nbits_reg, BITS_PER_LONG);
+
+ /*
+ * Can't do "mask = (1UL << nbitsinlong) - 1", as that
+ * overflows if nbitsinlong == BITS_PER_LONG.
+ */
+ mask = (1UL << (nbitsinlong - 1));
+ mask += mask - 1;
+ mask <<= offset;
+
+ switch (reg_op) {
+ case REG_OP_ISFREE:
+ for (i = 0; i < nlongs_reg; i++) {
+ if (bitmap[index + i] & mask)
+ goto done;
+ }
+ ret = 1; /* all bits in region free (zero) */
+ break;
+
+ case REG_OP_ALLOC:
+ for (i = 0; i < nlongs_reg; i++)
+ bitmap[index + i] |= mask;
+ break;
+
+ case REG_OP_RELEASE:
+ for (i = 0; i < nlongs_reg; i++)
+ bitmap[index + i] &= ~mask;
+ break;
+ }
+done:
+ return ret;
+}
+
+/**
+ * bitmap_find_free_region - find a contiguous aligned mem region
+ * @bitmap: array of unsigned longs corresponding to the bitmap
+ * @bits: number of bits in the bitmap
+ * @order: region size (log base 2 of number of bits) to find
+ *
+ * Find a region of free (zero) bits in a @bitmap of @bits bits and
+ * allocate them (set them to one). Only consider regions of length
+ * a power (@order) of two, aligned to that power of two, which
+ * makes the search algorithm much faster.
+ *
+ * Return the bit offset in bitmap of the allocated region,
+ * or -errno on failure.
+ */
+int bitmap_find_free_region(unsigned long *bitmap, int bits, int order)
+{
+ int pos, end; /* scans bitmap by regions of size order */
+
+ for (pos = 0 ; (end = pos + (1 << order)) <= bits; pos = end) {
+ if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE))
+ continue;
+ __reg_op(bitmap, pos, order, REG_OP_ALLOC);
+ return pos;
+ }
+ return -ENOMEM;
+}
+EXPORT_SYMBOL(bitmap_find_free_region);
+
+/**
+ * bitmap_release_region - release allocated bitmap region
+ * @bitmap: array of unsigned longs corresponding to the bitmap
+ * @pos: beginning of bit region to release
+ * @order: region size (log base 2 of number of bits) to release
+ *
+ * This is the complement to __bitmap_find_free_region() and releases
+ * the found region (by clearing it in the bitmap).
+ *
+ * No return value.
+ */
+void bitmap_release_region(unsigned long *bitmap, int pos, int order)
+{
+ __reg_op(bitmap, pos, order, REG_OP_RELEASE);
+}
+EXPORT_SYMBOL(bitmap_release_region);
+
+/**
+ * bitmap_allocate_region - allocate bitmap region
+ * @bitmap: array of unsigned longs corresponding to the bitmap
+ * @pos: beginning of bit region to allocate
+ * @order: region size (log base 2 of number of bits) to allocate
+ *
+ * Allocate (set bits in) a specified region of a bitmap.
+ *
+ * Return 0 on success, or %-EBUSY if specified region wasn't
+ * free (not all bits were zero).
+ */
+int bitmap_allocate_region(unsigned long *bitmap, int pos, int order)
+{
+ if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE))
+ return -EBUSY;
+ __reg_op(bitmap, pos, order, REG_OP_ALLOC);
+ return 0;
+}
+EXPORT_SYMBOL(bitmap_allocate_region);
+
+/**
+ * bitmap_copy_le - copy a bitmap, putting the bits into little-endian order.
+ * @dst: destination buffer
+ * @src: bitmap to copy
+ * @nbits: number of bits in the bitmap
+ *
+ * Require nbits % BITS_PER_LONG == 0.
+ */
+void bitmap_copy_le(void *dst, const unsigned long *src, int nbits)
+{
+ unsigned long *d = dst;
+ int i;
+
+ for (i = 0; i < nbits/BITS_PER_LONG; i++) {
+ if (BITS_PER_LONG == 64)
+ d[i] = cpu_to_le64(src[i]);
+ else
+ d[i] = cpu_to_le32(src[i]);
+ }
+}
+EXPORT_SYMBOL(bitmap_copy_le);
diff --git a/lib/bitrev.c b/lib/bitrev.c
new file mode 100644
index 00000000..39562034
--- /dev/null
+++ b/lib/bitrev.c
@@ -0,0 +1,59 @@
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/bitrev.h>
+
+MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>");
+MODULE_DESCRIPTION("Bit ordering reversal functions");
+MODULE_LICENSE("GPL");
+
+const u8 byte_rev_table[256] = {
+ 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
+ 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
+ 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
+ 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
+ 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
+ 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
+ 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
+ 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
+ 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
+ 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
+ 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
+ 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
+ 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
+ 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
+ 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
+ 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
+ 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
+ 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
+ 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
+ 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
+ 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
+ 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
+ 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
+ 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
+ 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
+ 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
+ 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
+ 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
+ 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
+ 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
+ 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
+ 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
+};
+EXPORT_SYMBOL_GPL(byte_rev_table);
+
+u16 bitrev16(u16 x)
+{
+ return (bitrev8(x & 0xff) << 8) | bitrev8(x >> 8);
+}
+EXPORT_SYMBOL(bitrev16);
+
+/**
+ * bitrev32 - reverse the order of bits in a u32 value
+ * @x: value to be bit-reversed
+ */
+u32 bitrev32(u32 x)
+{
+ return (bitrev16(x & 0xffff) << 16) | bitrev16(x >> 16);
+}
+EXPORT_SYMBOL(bitrev32);
diff --git a/lib/btree.c b/lib/btree.c
new file mode 100644
index 00000000..c9c6f035
--- /dev/null
+++ b/lib/btree.c
@@ -0,0 +1,798 @@
+/*
+ * lib/btree.c - Simple In-memory B+Tree
+ *
+ * As should be obvious for Linux kernel code, license is GPLv2
+ *
+ * Copyright (c) 2007-2008 Joern Engel <joern@logfs.org>
+ * Bits and pieces stolen from Peter Zijlstra's code, which is
+ * Copyright 2007, Red Hat Inc. Peter Zijlstra <pzijlstr@redhat.com>
+ * GPLv2
+ *
+ * see http://programming.kicks-ass.net/kernel-patches/vma_lookup/btree.patch
+ *
+ * A relatively simple B+Tree implementation. I have written it as a learning
+ * excercise to understand how B+Trees work. Turned out to be useful as well.
+ *
+ * B+Trees can be used similar to Linux radix trees (which don't have anything
+ * in common with textbook radix trees, beware). Prerequisite for them working
+ * well is that access to a random tree node is much faster than a large number
+ * of operations within each node.
+ *
+ * Disks have fulfilled the prerequisite for a long time. More recently DRAM
+ * has gained similar properties, as memory access times, when measured in cpu
+ * cycles, have increased. Cacheline sizes have increased as well, which also
+ * helps B+Trees.
+ *
+ * Compared to radix trees, B+Trees are more efficient when dealing with a
+ * sparsely populated address space. Between 25% and 50% of the memory is
+ * occupied with valid pointers. When densely populated, radix trees contain
+ * ~98% pointers - hard to beat. Very sparse radix trees contain only ~2%
+ * pointers.
+ *
+ * This particular implementation stores pointers identified by a long value.
+ * Storing NULL pointers is illegal, lookup will return NULL when no entry
+ * was found.
+ *
+ * A tricks was used that is not commonly found in textbooks. The lowest
+ * values are to the right, not to the left. All used slots within a node
+ * are on the left, all unused slots contain NUL values. Most operations
+ * simply loop once over all slots and terminate on the first NUL.
+ */
+
+#include <linux/btree.h>
+#include <linux/cache.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define NODESIZE MAX(L1_CACHE_BYTES, 128)
+
+struct btree_geo {
+ int keylen;
+ int no_pairs;
+ int no_longs;
+};
+
+struct btree_geo btree_geo32 = {
+ .keylen = 1,
+ .no_pairs = NODESIZE / sizeof(long) / 2,
+ .no_longs = NODESIZE / sizeof(long) / 2,
+};
+EXPORT_SYMBOL_GPL(btree_geo32);
+
+#define LONG_PER_U64 (64 / BITS_PER_LONG)
+struct btree_geo btree_geo64 = {
+ .keylen = LONG_PER_U64,
+ .no_pairs = NODESIZE / sizeof(long) / (1 + LONG_PER_U64),
+ .no_longs = LONG_PER_U64 * (NODESIZE / sizeof(long) / (1 + LONG_PER_U64)),
+};
+EXPORT_SYMBOL_GPL(btree_geo64);
+
+struct btree_geo btree_geo128 = {
+ .keylen = 2 * LONG_PER_U64,
+ .no_pairs = NODESIZE / sizeof(long) / (1 + 2 * LONG_PER_U64),
+ .no_longs = 2 * LONG_PER_U64 * (NODESIZE / sizeof(long) / (1 + 2 * LONG_PER_U64)),
+};
+EXPORT_SYMBOL_GPL(btree_geo128);
+
+static struct kmem_cache *btree_cachep;
+
+void *btree_alloc(gfp_t gfp_mask, void *pool_data)
+{
+ return kmem_cache_alloc(btree_cachep, gfp_mask);
+}
+EXPORT_SYMBOL_GPL(btree_alloc);
+
+void btree_free(void *element, void *pool_data)
+{
+ kmem_cache_free(btree_cachep, element);
+}
+EXPORT_SYMBOL_GPL(btree_free);
+
+static unsigned long *btree_node_alloc(struct btree_head *head, gfp_t gfp)
+{
+ unsigned long *node;
+
+ node = mempool_alloc(head->mempool, gfp);
+ if (likely(node))
+ memset(node, 0, NODESIZE);
+ return node;
+}
+
+static int longcmp(const unsigned long *l1, const unsigned long *l2, size_t n)
+{
+ size_t i;
+
+ for (i = 0; i < n; i++) {
+ if (l1[i] < l2[i])
+ return -1;
+ if (l1[i] > l2[i])
+ return 1;
+ }
+ return 0;
+}
+
+static unsigned long *longcpy(unsigned long *dest, const unsigned long *src,
+ size_t n)
+{
+ size_t i;
+
+ for (i = 0; i < n; i++)
+ dest[i] = src[i];
+ return dest;
+}
+
+static unsigned long *longset(unsigned long *s, unsigned long c, size_t n)
+{
+ size_t i;
+
+ for (i = 0; i < n; i++)
+ s[i] = c;
+ return s;
+}
+
+static void dec_key(struct btree_geo *geo, unsigned long *key)
+{
+ unsigned long val;
+ int i;
+
+ for (i = geo->keylen - 1; i >= 0; i--) {
+ val = key[i];
+ key[i] = val - 1;
+ if (val)
+ break;
+ }
+}
+
+static unsigned long *bkey(struct btree_geo *geo, unsigned long *node, int n)
+{
+ return &node[n * geo->keylen];
+}
+
+static void *bval(struct btree_geo *geo, unsigned long *node, int n)
+{
+ return (void *)node[geo->no_longs + n];
+}
+
+static void setkey(struct btree_geo *geo, unsigned long *node, int n,
+ unsigned long *key)
+{
+ longcpy(bkey(geo, node, n), key, geo->keylen);
+}
+
+static void setval(struct btree_geo *geo, unsigned long *node, int n,
+ void *val)
+{
+ node[geo->no_longs + n] = (unsigned long) val;
+}
+
+static void clearpair(struct btree_geo *geo, unsigned long *node, int n)
+{
+ longset(bkey(geo, node, n), 0, geo->keylen);
+ node[geo->no_longs + n] = 0;
+}
+
+static inline void __btree_init(struct btree_head *head)
+{
+ head->node = NULL;
+ head->height = 0;
+}
+
+void btree_init_mempool(struct btree_head *head, mempool_t *mempool)
+{
+ __btree_init(head);
+ head->mempool = mempool;
+}
+EXPORT_SYMBOL_GPL(btree_init_mempool);
+
+int btree_init(struct btree_head *head)
+{
+ __btree_init(head);
+ head->mempool = mempool_create(0, btree_alloc, btree_free, NULL);
+ if (!head->mempool)
+ return -ENOMEM;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(btree_init);
+
+void btree_destroy(struct btree_head *head)
+{
+ mempool_destroy(head->mempool);
+ head->mempool = NULL;
+}
+EXPORT_SYMBOL_GPL(btree_destroy);
+
+void *btree_last(struct btree_head *head, struct btree_geo *geo,
+ unsigned long *key)
+{
+ int height = head->height;
+ unsigned long *node = head->node;
+
+ if (height == 0)
+ return NULL;
+
+ for ( ; height > 1; height--)
+ node = bval(geo, node, 0);
+
+ longcpy(key, bkey(geo, node, 0), geo->keylen);
+ return bval(geo, node, 0);
+}
+EXPORT_SYMBOL_GPL(btree_last);
+
+static int keycmp(struct btree_geo *geo, unsigned long *node, int pos,
+ unsigned long *key)
+{
+ return longcmp(bkey(geo, node, pos), key, geo->keylen);
+}
+
+static int keyzero(struct btree_geo *geo, unsigned long *key)
+{
+ int i;
+
+ for (i = 0; i < geo->keylen; i++)
+ if (key[i])
+ return 0;
+
+ return 1;
+}
+
+void *btree_lookup(struct btree_head *head, struct btree_geo *geo,
+ unsigned long *key)
+{
+ int i, height = head->height;
+ unsigned long *node = head->node;
+
+ if (height == 0)
+ return NULL;
+
+ for ( ; height > 1; height--) {
+ for (i = 0; i < geo->no_pairs; i++)
+ if (keycmp(geo, node, i, key) <= 0)
+ break;
+ if (i == geo->no_pairs)
+ return NULL;
+ node = bval(geo, node, i);
+ if (!node)
+ return NULL;
+ }
+
+ if (!node)
+ return NULL;
+
+ for (i = 0; i < geo->no_pairs; i++)
+ if (keycmp(geo, node, i, key) == 0)
+ return bval(geo, node, i);
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(btree_lookup);
+
+int btree_update(struct btree_head *head, struct btree_geo *geo,
+ unsigned long *key, void *val)
+{
+ int i, height = head->height;
+ unsigned long *node = head->node;
+
+ if (height == 0)
+ return -ENOENT;
+
+ for ( ; height > 1; height--) {
+ for (i = 0; i < geo->no_pairs; i++)
+ if (keycmp(geo, node, i, key) <= 0)
+ break;
+ if (i == geo->no_pairs)
+ return -ENOENT;
+ node = bval(geo, node, i);
+ if (!node)
+ return -ENOENT;
+ }
+
+ if (!node)
+ return -ENOENT;
+
+ for (i = 0; i < geo->no_pairs; i++)
+ if (keycmp(geo, node, i, key) == 0) {
+ setval(geo, node, i, val);
+ return 0;
+ }
+ return -ENOENT;
+}
+EXPORT_SYMBOL_GPL(btree_update);
+
+/*
+ * Usually this function is quite similar to normal lookup. But the key of
+ * a parent node may be smaller than the smallest key of all its siblings.
+ * In such a case we cannot just return NULL, as we have only proven that no
+ * key smaller than __key, but larger than this parent key exists.
+ * So we set __key to the parent key and retry. We have to use the smallest
+ * such parent key, which is the last parent key we encountered.
+ */
+void *btree_get_prev(struct btree_head *head, struct btree_geo *geo,
+ unsigned long *__key)
+{
+ int i, height;
+ unsigned long *node, *oldnode;
+ unsigned long *retry_key = NULL, key[geo->keylen];
+
+ if (keyzero(geo, __key))
+ return NULL;
+
+ if (head->height == 0)
+ return NULL;
+retry:
+ longcpy(key, __key, geo->keylen);
+ dec_key(geo, key);
+
+ node = head->node;
+ for (height = head->height ; height > 1; height--) {
+ for (i = 0; i < geo->no_pairs; i++)
+ if (keycmp(geo, node, i, key) <= 0)
+ break;
+ if (i == geo->no_pairs)
+ goto miss;
+ oldnode = node;
+ node = bval(geo, node, i);
+ if (!node)
+ goto miss;
+ retry_key = bkey(geo, oldnode, i);
+ }
+
+ if (!node)
+ goto miss;
+
+ for (i = 0; i < geo->no_pairs; i++) {
+ if (keycmp(geo, node, i, key) <= 0) {
+ if (bval(geo, node, i)) {
+ longcpy(__key, bkey(geo, node, i), geo->keylen);
+ return bval(geo, node, i);
+ } else
+ goto miss;
+ }
+ }
+miss:
+ if (retry_key) {
+ __key = retry_key;
+ retry_key = NULL;
+ goto retry;
+ }
+ return NULL;
+}
+
+static int getpos(struct btree_geo *geo, unsigned long *node,
+ unsigned long *key)
+{
+ int i;
+
+ for (i = 0; i < geo->no_pairs; i++) {
+ if (keycmp(geo, node, i, key) <= 0)
+ break;
+ }
+ return i;
+}
+
+static int getfill(struct btree_geo *geo, unsigned long *node, int start)
+{
+ int i;
+
+ for (i = start; i < geo->no_pairs; i++)
+ if (!bval(geo, node, i))
+ break;
+ return i;
+}
+
+/*
+ * locate the correct leaf node in the btree
+ */
+static unsigned long *find_level(struct btree_head *head, struct btree_geo *geo,
+ unsigned long *key, int level)
+{
+ unsigned long *node = head->node;
+ int i, height;
+
+ for (height = head->height; height > level; height--) {
+ for (i = 0; i < geo->no_pairs; i++)
+ if (keycmp(geo, node, i, key) <= 0)
+ break;
+
+ if ((i == geo->no_pairs) || !bval(geo, node, i)) {
+ /* right-most key is too large, update it */
+ /* FIXME: If the right-most key on higher levels is
+ * always zero, this wouldn't be necessary. */
+ i--;
+ setkey(geo, node, i, key);
+ }
+ BUG_ON(i < 0);
+ node = bval(geo, node, i);
+ }
+ BUG_ON(!node);
+ return node;
+}
+
+static int btree_grow(struct btree_head *head, struct btree_geo *geo,
+ gfp_t gfp)
+{
+ unsigned long *node;
+ int fill;
+
+ node = btree_node_alloc(head, gfp);
+ if (!node)
+ return -ENOMEM;
+ if (head->node) {
+ fill = getfill(geo, head->node, 0);
+ setkey(geo, node, 0, bkey(geo, head->node, fill - 1));
+ setval(geo, node, 0, head->node);
+ }
+ head->node = node;
+ head->height++;
+ return 0;
+}
+
+static void btree_shrink(struct btree_head *head, struct btree_geo *geo)
+{
+ unsigned long *node;
+ int fill;
+
+ if (head->height <= 1)
+ return;
+
+ node = head->node;
+ fill = getfill(geo, node, 0);
+ BUG_ON(fill > 1);
+ head->node = bval(geo, node, 0);
+ head->height--;
+ mempool_free(node, head->mempool);
+}
+
+static int btree_insert_level(struct btree_head *head, struct btree_geo *geo,
+ unsigned long *key, void *val, int level,
+ gfp_t gfp)
+{
+ unsigned long *node;
+ int i, pos, fill, err;
+
+ BUG_ON(!val);
+ if (head->height < level) {
+ err = btree_grow(head, geo, gfp);
+ if (err)
+ return err;
+ }
+
+retry:
+ node = find_level(head, geo, key, level);
+ pos = getpos(geo, node, key);
+ fill = getfill(geo, node, pos);
+ /* two identical keys are not allowed */
+ BUG_ON(pos < fill && keycmp(geo, node, pos, key) == 0);
+
+ if (fill == geo->no_pairs) {
+ /* need to split node */
+ unsigned long *new;
+
+ new = btree_node_alloc(head, gfp);
+ if (!new)
+ return -ENOMEM;
+ err = btree_insert_level(head, geo,
+ bkey(geo, node, fill / 2 - 1),
+ new, level + 1, gfp);
+ if (err) {
+ mempool_free(new, head->mempool);
+ return err;
+ }
+ for (i = 0; i < fill / 2; i++) {
+ setkey(geo, new, i, bkey(geo, node, i));
+ setval(geo, new, i, bval(geo, node, i));
+ setkey(geo, node, i, bkey(geo, node, i + fill / 2));
+ setval(geo, node, i, bval(geo, node, i + fill / 2));
+ clearpair(geo, node, i + fill / 2);
+ }
+ if (fill & 1) {
+ setkey(geo, node, i, bkey(geo, node, fill - 1));
+ setval(geo, node, i, bval(geo, node, fill - 1));
+ clearpair(geo, node, fill - 1);
+ }
+ goto retry;
+ }
+ BUG_ON(fill >= geo->no_pairs);
+
+ /* shift and insert */
+ for (i = fill; i > pos; i--) {
+ setkey(geo, node, i, bkey(geo, node, i - 1));
+ setval(geo, node, i, bval(geo, node, i - 1));
+ }
+ setkey(geo, node, pos, key);
+ setval(geo, node, pos, val);
+
+ return 0;
+}
+
+int btree_insert(struct btree_head *head, struct btree_geo *geo,
+ unsigned long *key, void *val, gfp_t gfp)
+{
+ return btree_insert_level(head, geo, key, val, 1, gfp);
+}
+EXPORT_SYMBOL_GPL(btree_insert);
+
+static void *btree_remove_level(struct btree_head *head, struct btree_geo *geo,
+ unsigned long *key, int level);
+static void merge(struct btree_head *head, struct btree_geo *geo, int level,
+ unsigned long *left, int lfill,
+ unsigned long *right, int rfill,
+ unsigned long *parent, int lpos)
+{
+ int i;
+
+ for (i = 0; i < rfill; i++) {
+ /* Move all keys to the left */
+ setkey(geo, left, lfill + i, bkey(geo, right, i));
+ setval(geo, left, lfill + i, bval(geo, right, i));
+ }
+ /* Exchange left and right child in parent */
+ setval(geo, parent, lpos, right);
+ setval(geo, parent, lpos + 1, left);
+ /* Remove left (formerly right) child from parent */
+ btree_remove_level(head, geo, bkey(geo, parent, lpos), level + 1);
+ mempool_free(right, head->mempool);
+}
+
+static void rebalance(struct btree_head *head, struct btree_geo *geo,
+ unsigned long *key, int level, unsigned long *child, int fill)
+{
+ unsigned long *parent, *left = NULL, *right = NULL;
+ int i, no_left, no_right;
+
+ if (fill == 0) {
+ /* Because we don't steal entries from a neigbour, this case
+ * can happen. Parent node contains a single child, this
+ * node, so merging with a sibling never happens.
+ */
+ btree_remove_level(head, geo, key, level + 1);
+ mempool_free(child, head->mempool);
+ return;
+ }
+
+ parent = find_level(head, geo, key, level + 1);
+ i = getpos(geo, parent, key);
+ BUG_ON(bval(geo, parent, i) != child);
+
+ if (i > 0) {
+ left = bval(geo, parent, i - 1);
+ no_left = getfill(geo, left, 0);
+ if (fill + no_left <= geo->no_pairs) {
+ merge(head, geo, level,
+ left, no_left,
+ child, fill,
+ parent, i - 1);
+ return;
+ }
+ }
+ if (i + 1 < getfill(geo, parent, i)) {
+ right = bval(geo, parent, i + 1);
+ no_right = getfill(geo, right, 0);
+ if (fill + no_right <= geo->no_pairs) {
+ merge(head, geo, level,
+ child, fill,
+ right, no_right,
+ parent, i);
+ return;
+ }
+ }
+ /*
+ * We could also try to steal one entry from the left or right
+ * neighbor. By not doing so we changed the invariant from
+ * "all nodes are at least half full" to "no two neighboring
+ * nodes can be merged". Which means that the average fill of
+ * all nodes is still half or better.
+ */
+}
+
+static void *btree_remove_level(struct btree_head *head, struct btree_geo *geo,
+ unsigned long *key, int level)
+{
+ unsigned long *node;
+ int i, pos, fill;
+ void *ret;
+
+ if (level > head->height) {
+ /* we recursed all the way up */
+ head->height = 0;
+ head->node = NULL;
+ return NULL;
+ }
+
+ node = find_level(head, geo, key, level);
+ pos = getpos(geo, node, key);
+ fill = getfill(geo, node, pos);
+ if ((level == 1) && (keycmp(geo, node, pos, key) != 0))
+ return NULL;
+ ret = bval(geo, node, pos);
+
+ /* remove and shift */
+ for (i = pos; i < fill - 1; i++) {
+ setkey(geo, node, i, bkey(geo, node, i + 1));
+ setval(geo, node, i, bval(geo, node, i + 1));
+ }
+ clearpair(geo, node, fill - 1);
+
+ if (fill - 1 < geo->no_pairs / 2) {
+ if (level < head->height)
+ rebalance(head, geo, key, level, node, fill - 1);
+ else if (fill - 1 == 1)
+ btree_shrink(head, geo);
+ }
+
+ return ret;
+}
+
+void *btree_remove(struct btree_head *head, struct btree_geo *geo,
+ unsigned long *key)
+{
+ if (head->height == 0)
+ return NULL;
+
+ return btree_remove_level(head, geo, key, 1);
+}
+EXPORT_SYMBOL_GPL(btree_remove);
+
+int btree_merge(struct btree_head *target, struct btree_head *victim,
+ struct btree_geo *geo, gfp_t gfp)
+{
+ unsigned long key[geo->keylen];
+ unsigned long dup[geo->keylen];
+ void *val;
+ int err;
+
+ BUG_ON(target == victim);
+
+ if (!(target->node)) {
+ /* target is empty, just copy fields over */
+ target->node = victim->node;
+ target->height = victim->height;
+ __btree_init(victim);
+ return 0;
+ }
+
+ /* TODO: This needs some optimizations. Currently we do three tree
+ * walks to remove a single object from the victim.
+ */
+ for (;;) {
+ if (!btree_last(victim, geo, key))
+ break;
+ val = btree_lookup(victim, geo, key);
+ err = btree_insert(target, geo, key, val, gfp);
+ if (err)
+ return err;
+ /* We must make a copy of the key, as the original will get
+ * mangled inside btree_remove. */
+ longcpy(dup, key, geo->keylen);
+ btree_remove(victim, geo, dup);
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(btree_merge);
+
+static size_t __btree_for_each(struct btree_head *head, struct btree_geo *geo,
+ unsigned long *node, unsigned long opaque,
+ void (*func)(void *elem, unsigned long opaque,
+ unsigned long *key, size_t index,
+ void *func2),
+ void *func2, int reap, int height, size_t count)
+{
+ int i;
+ unsigned long *child;
+
+ for (i = 0; i < geo->no_pairs; i++) {
+ child = bval(geo, node, i);
+ if (!child)
+ break;
+ if (height > 1)
+ count = __btree_for_each(head, geo, child, opaque,
+ func, func2, reap, height - 1, count);
+ else
+ func(child, opaque, bkey(geo, node, i), count++,
+ func2);
+ }
+ if (reap)
+ mempool_free(node, head->mempool);
+ return count;
+}
+
+static void empty(void *elem, unsigned long opaque, unsigned long *key,
+ size_t index, void *func2)
+{
+}
+
+void visitorl(void *elem, unsigned long opaque, unsigned long *key,
+ size_t index, void *__func)
+{
+ visitorl_t func = __func;
+
+ func(elem, opaque, *key, index);
+}
+EXPORT_SYMBOL_GPL(visitorl);
+
+void visitor32(void *elem, unsigned long opaque, unsigned long *__key,
+ size_t index, void *__func)
+{
+ visitor32_t func = __func;
+ u32 *key = (void *)__key;
+
+ func(elem, opaque, *key, index);
+}
+EXPORT_SYMBOL_GPL(visitor32);
+
+void visitor64(void *elem, unsigned long opaque, unsigned long *__key,
+ size_t index, void *__func)
+{
+ visitor64_t func = __func;
+ u64 *key = (void *)__key;
+
+ func(elem, opaque, *key, index);
+}
+EXPORT_SYMBOL_GPL(visitor64);
+
+void visitor128(void *elem, unsigned long opaque, unsigned long *__key,
+ size_t index, void *__func)
+{
+ visitor128_t func = __func;
+ u64 *key = (void *)__key;
+
+ func(elem, opaque, key[0], key[1], index);
+}
+EXPORT_SYMBOL_GPL(visitor128);
+
+size_t btree_visitor(struct btree_head *head, struct btree_geo *geo,
+ unsigned long opaque,
+ void (*func)(void *elem, unsigned long opaque,
+ unsigned long *key,
+ size_t index, void *func2),
+ void *func2)
+{
+ size_t count = 0;
+
+ if (!func2)
+ func = empty;
+ if (head->node)
+ count = __btree_for_each(head, geo, head->node, opaque, func,
+ func2, 0, head->height, 0);
+ return count;
+}
+EXPORT_SYMBOL_GPL(btree_visitor);
+
+size_t btree_grim_visitor(struct btree_head *head, struct btree_geo *geo,
+ unsigned long opaque,
+ void (*func)(void *elem, unsigned long opaque,
+ unsigned long *key,
+ size_t index, void *func2),
+ void *func2)
+{
+ size_t count = 0;
+
+ if (!func2)
+ func = empty;
+ if (head->node)
+ count = __btree_for_each(head, geo, head->node, opaque, func,
+ func2, 1, head->height, 0);
+ __btree_init(head);
+ return count;
+}
+EXPORT_SYMBOL_GPL(btree_grim_visitor);
+
+static int __init btree_module_init(void)
+{
+ btree_cachep = kmem_cache_create("btree_node", NODESIZE, 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ return 0;
+}
+
+static void __exit btree_module_exit(void)
+{
+ kmem_cache_destroy(btree_cachep);
+}
+
+/* If core code starts using btree, initialization should happen even earlier */
+module_init(btree_module_init);
+module_exit(btree_module_exit);
+
+MODULE_AUTHOR("Joern Engel <joern@logfs.org>");
+MODULE_AUTHOR("Johannes Berg <johannes@sipsolutions.net>");
+MODULE_LICENSE("GPL");
diff --git a/lib/bug.c b/lib/bug.c
new file mode 100644
index 00000000..19552096
--- /dev/null
+++ b/lib/bug.c
@@ -0,0 +1,183 @@
+/*
+ Generic support for BUG()
+
+ This respects the following config options:
+
+ CONFIG_BUG - emit BUG traps. Nothing happens without this.
+ CONFIG_GENERIC_BUG - enable this code.
+ CONFIG_GENERIC_BUG_RELATIVE_POINTERS - use 32-bit pointers relative to
+ the containing struct bug_entry for bug_addr and file.
+ CONFIG_DEBUG_BUGVERBOSE - emit full file+line information for each BUG
+
+ CONFIG_BUG and CONFIG_DEBUG_BUGVERBOSE are potentially user-settable
+ (though they're generally always on).
+
+ CONFIG_GENERIC_BUG is set by each architecture using this code.
+
+ To use this, your architecture must:
+
+ 1. Set up the config options:
+ - Enable CONFIG_GENERIC_BUG if CONFIG_BUG
+
+ 2. Implement BUG (and optionally BUG_ON, WARN, WARN_ON)
+ - Define HAVE_ARCH_BUG
+ - Implement BUG() to generate a faulting instruction
+ - NOTE: struct bug_entry does not have "file" or "line" entries
+ when CONFIG_DEBUG_BUGVERBOSE is not enabled, so you must generate
+ the values accordingly.
+
+ 3. Implement the trap
+ - In the illegal instruction trap handler (typically), verify
+ that the fault was in kernel mode, and call report_bug()
+ - report_bug() will return whether it was a false alarm, a warning,
+ or an actual bug.
+ - You must implement the is_valid_bugaddr(bugaddr) callback which
+ returns true if the eip is a real kernel address, and it points
+ to the expected BUG trap instruction.
+
+ Jeremy Fitzhardinge <jeremy@goop.org> 2006
+ */
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/sched.h>
+
+extern const struct bug_entry __start___bug_table[], __stop___bug_table[];
+
+static inline unsigned long bug_addr(const struct bug_entry *bug)
+{
+#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
+ return bug->bug_addr;
+#else
+ return (unsigned long)bug + bug->bug_addr_disp;
+#endif
+}
+
+#ifdef CONFIG_MODULES
+static LIST_HEAD(module_bug_list);
+
+static const struct bug_entry *module_find_bug(unsigned long bugaddr)
+{
+ struct module *mod;
+
+ list_for_each_entry(mod, &module_bug_list, bug_list) {
+ const struct bug_entry *bug = mod->bug_table;
+ unsigned i;
+
+ for (i = 0; i < mod->num_bugs; ++i, ++bug)
+ if (bugaddr == bug_addr(bug))
+ return bug;
+ }
+ return NULL;
+}
+
+void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
+ struct module *mod)
+{
+ char *secstrings;
+ unsigned int i;
+
+ mod->bug_table = NULL;
+ mod->num_bugs = 0;
+
+ /* Find the __bug_table section, if present */
+ secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+ for (i = 1; i < hdr->e_shnum; i++) {
+ if (strcmp(secstrings+sechdrs[i].sh_name, "__bug_table"))
+ continue;
+ mod->bug_table = (void *) sechdrs[i].sh_addr;
+ mod->num_bugs = sechdrs[i].sh_size / sizeof(struct bug_entry);
+ break;
+ }
+
+ /*
+ * Strictly speaking this should have a spinlock to protect against
+ * traversals, but since we only traverse on BUG()s, a spinlock
+ * could potentially lead to deadlock and thus be counter-productive.
+ */
+ list_add(&mod->bug_list, &module_bug_list);
+}
+
+void module_bug_cleanup(struct module *mod)
+{
+ list_del(&mod->bug_list);
+}
+
+#else
+
+static inline const struct bug_entry *module_find_bug(unsigned long bugaddr)
+{
+ return NULL;
+}
+#endif
+
+const struct bug_entry *find_bug(unsigned long bugaddr)
+{
+ const struct bug_entry *bug;
+
+ for (bug = __start___bug_table; bug < __stop___bug_table; ++bug)
+ if (bugaddr == bug_addr(bug))
+ return bug;
+
+ return module_find_bug(bugaddr);
+}
+
+enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
+{
+ const struct bug_entry *bug;
+ const char *file;
+ unsigned line, warning;
+
+ if (!is_valid_bugaddr(bugaddr))
+ return BUG_TRAP_TYPE_NONE;
+
+ bug = find_bug(bugaddr);
+
+ file = NULL;
+ line = 0;
+ warning = 0;
+
+ if (bug) {
+#ifdef CONFIG_DEBUG_BUGVERBOSE
+#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
+ file = bug->file;
+#else
+ file = (const char *)bug + bug->file_disp;
+#endif
+ line = bug->line;
+#endif
+ warning = (bug->flags & BUGFLAG_WARNING) != 0;
+ }
+
+ if (warning) {
+ /* this is a WARN_ON rather than BUG/BUG_ON */
+ printk(KERN_WARNING "------------[ cut here ]------------\n");
+
+ if (file)
+ printk(KERN_WARNING "WARNING: at %s:%u\n",
+ file, line);
+ else
+ printk(KERN_WARNING "WARNING: at %p "
+ "[verbose debug info unavailable]\n",
+ (void *)bugaddr);
+
+ print_modules();
+ show_regs(regs);
+ print_oops_end_marker();
+ add_taint(BUG_GET_TAINT(bug));
+ return BUG_TRAP_TYPE_WARN;
+ }
+
+ printk(KERN_EMERG "------------[ cut here ]------------\n");
+
+ if (file)
+ printk(KERN_CRIT "kernel BUG at %s:%u!\n",
+ file, line);
+ else
+ printk(KERN_CRIT "Kernel BUG at %p "
+ "[verbose debug info unavailable]\n",
+ (void *)bugaddr);
+
+ return BUG_TRAP_TYPE_BUG;
+}
diff --git a/lib/bust_spinlocks.c b/lib/bust_spinlocks.c
new file mode 100644
index 00000000..9681d54b
--- /dev/null
+++ b/lib/bust_spinlocks.c
@@ -0,0 +1,32 @@
+/*
+ * lib/bust_spinlocks.c
+ *
+ * Provides a minimal bust_spinlocks for architectures which don't have one of their own.
+ *
+ * bust_spinlocks() clears any spinlocks which would prevent oops, die(), BUG()
+ * and panic() information from reaching the user.
+ */
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/tty.h>
+#include <linux/wait.h>
+#include <linux/vt_kern.h>
+#include <linux/console.h>
+
+
+void __attribute__((weak)) bust_spinlocks(int yes)
+{
+ if (yes) {
+ ++oops_in_progress;
+ } else {
+#ifdef CONFIG_VT
+ unblank_screen();
+#endif
+ console_unblank();
+ if (--oops_in_progress == 0)
+ wake_up_klogd();
+ }
+}
+
+
diff --git a/lib/check_signature.c b/lib/check_signature.c
new file mode 100644
index 00000000..fd6af199
--- /dev/null
+++ b/lib/check_signature.c
@@ -0,0 +1,26 @@
+#include <linux/io.h>
+#include <linux/module.h>
+
+/**
+ * check_signature - find BIOS signatures
+ * @io_addr: mmio address to check
+ * @signature: signature block
+ * @length: length of signature
+ *
+ * Perform a signature comparison with the mmio address io_addr. This
+ * address should have been obtained by ioremap.
+ * Returns 1 on a match.
+ */
+
+int check_signature(const volatile void __iomem *io_addr,
+ const unsigned char *signature, int length)
+{
+ while (length--) {
+ if (readb(io_addr) != *signature)
+ return 0;
+ io_addr++;
+ signature++;
+ }
+ return 1;
+}
+EXPORT_SYMBOL(check_signature);
diff --git a/lib/checksum.c b/lib/checksum.c
new file mode 100644
index 00000000..09750873
--- /dev/null
+++ b/lib/checksum.c
@@ -0,0 +1,203 @@
+/*
+ *
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * IP/TCP/UDP checksumming routines
+ *
+ * Authors: Jorge Cwik, <jorge@laser.satlink.net>
+ * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ * Tom May, <ftom@netcom.com>
+ * Andreas Schwab, <schwab@issan.informatik.uni-dortmund.de>
+ * Lots of code moved from tcp.c and ip.c; see those files
+ * for more names.
+ *
+ * 03/02/96 Jes Sorensen, Andreas Schwab, Roman Hodek:
+ * Fixed some nasty bugs, causing some horrible crashes.
+ * A: At some points, the sum (%0) was used as
+ * length-counter instead of the length counter
+ * (%1). Thanks to Roman Hodek for pointing this out.
+ * B: GCC seems to mess up if one uses too many
+ * data-registers to hold input values and one tries to
+ * specify d0 and d1 as scratch registers. Letting gcc
+ * choose these registers itself solves the problem.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/* Revised by Kenneth Albanowski for m68knommu. Basic problem: unaligned access
+ kills, so most of the assembly has to go. */
+
+#include <linux/module.h>
+#include <net/checksum.h>
+
+#include <asm/byteorder.h>
+
+#ifndef do_csum
+static inline unsigned short from32to16(unsigned int x)
+{
+ /* add up 16-bit and 16-bit for 16+c bit */
+ x = (x & 0xffff) + (x >> 16);
+ /* add up carry.. */
+ x = (x & 0xffff) + (x >> 16);
+ return x;
+}
+
+static unsigned int do_csum(const unsigned char *buff, int len)
+{
+ int odd, count;
+ unsigned int result = 0;
+
+ if (len <= 0)
+ goto out;
+ odd = 1 & (unsigned long) buff;
+ if (odd) {
+#ifdef __LITTLE_ENDIAN
+ result += (*buff << 8);
+#else
+ result = *buff;
+#endif
+ len--;
+ buff++;
+ }
+ count = len >> 1; /* nr of 16-bit words.. */
+ if (count) {
+ if (2 & (unsigned long) buff) {
+ result += *(unsigned short *) buff;
+ count--;
+ len -= 2;
+ buff += 2;
+ }
+ count >>= 1; /* nr of 32-bit words.. */
+ if (count) {
+ unsigned int carry = 0;
+ do {
+ unsigned int w = *(unsigned int *) buff;
+ count--;
+ buff += 4;
+ result += carry;
+ result += w;
+ carry = (w > result);
+ } while (count);
+ result += carry;
+ result = (result & 0xffff) + (result >> 16);
+ }
+ if (len & 2) {
+ result += *(unsigned short *) buff;
+ buff += 2;
+ }
+ }
+ if (len & 1)
+#ifdef __LITTLE_ENDIAN
+ result += *buff;
+#else
+ result += (*buff << 8);
+#endif
+ result = from32to16(result);
+ if (odd)
+ result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
+out:
+ return result;
+}
+#endif
+
+/*
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries.
+ */
+__sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+ return (__force __sum16)~do_csum(iph, ihl*4);
+}
+EXPORT_SYMBOL(ip_fast_csum);
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+__wsum csum_partial(const void *buff, int len, __wsum wsum)
+{
+ unsigned int sum = (__force unsigned int)wsum;
+ unsigned int result = do_csum(buff, len);
+
+ /* add in old sum, and carry.. */
+ result += sum;
+ if (sum > result)
+ result += 1;
+ return (__force __wsum)result;
+}
+EXPORT_SYMBOL(csum_partial);
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+__sum16 ip_compute_csum(const void *buff, int len)
+{
+ return (__force __sum16)~do_csum(buff, len);
+}
+EXPORT_SYMBOL(ip_compute_csum);
+
+/*
+ * copy from fs while checksumming, otherwise like csum_partial
+ */
+__wsum
+csum_partial_copy_from_user(const void __user *src, void *dst, int len,
+ __wsum sum, int *csum_err)
+{
+ int missing;
+
+ missing = __copy_from_user(dst, src, len);
+ if (missing) {
+ memset(dst + len - missing, 0, missing);
+ *csum_err = -EFAULT;
+ } else
+ *csum_err = 0;
+
+ return csum_partial(dst, len, sum);
+}
+EXPORT_SYMBOL(csum_partial_copy_from_user);
+
+/*
+ * copy from ds while checksumming, otherwise like csum_partial
+ */
+__wsum
+csum_partial_copy(const void *src, void *dst, int len, __wsum sum)
+{
+ memcpy(dst, src, len);
+ return csum_partial(dst, len, sum);
+}
+EXPORT_SYMBOL(csum_partial_copy);
+
+#ifndef csum_tcpudp_nofold
+__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
+ unsigned short len,
+ unsigned short proto,
+ __wsum sum)
+{
+ unsigned long long s = (__force u32)sum;
+
+ s += (__force u32)saddr;
+ s += (__force u32)daddr;
+#ifdef __BIG_ENDIAN
+ s += proto + len;
+#else
+ s += (proto + len) << 8;
+#endif
+ s += (s >> 32);
+ return (__force __wsum)s;
+}
+EXPORT_SYMBOL(csum_tcpudp_nofold);
+#endif
diff --git a/lib/cmdline.c b/lib/cmdline.c
new file mode 100644
index 00000000..f5f3ad8b
--- /dev/null
+++ b/lib/cmdline.c
@@ -0,0 +1,159 @@
+/*
+ * linux/lib/cmdline.c
+ * Helper functions generally used for parsing kernel command line
+ * and module options.
+ *
+ * Code and copyrights come from init/main.c and arch/i386/kernel/setup.c.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ *
+ * GNU Indent formatting options for this file: -kr -i8 -npsl -pcs
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+/*
+ * If a hyphen was found in get_option, this will handle the
+ * range of numbers, M-N. This will expand the range and insert
+ * the values[M, M+1, ..., N] into the ints array in get_options.
+ */
+
+static int get_range(char **str, int *pint)
+{
+ int x, inc_counter, upper_range;
+
+ (*str)++;
+ upper_range = simple_strtol((*str), NULL, 0);
+ inc_counter = upper_range - *pint;
+ for (x = *pint; x < upper_range; x++)
+ *pint++ = x;
+ return inc_counter;
+}
+
+/**
+ * get_option - Parse integer from an option string
+ * @str: option string
+ * @pint: (output) integer value parsed from @str
+ *
+ * Read an int from an option string; if available accept a subsequent
+ * comma as well.
+ *
+ * Return values:
+ * 0 - no int in string
+ * 1 - int found, no subsequent comma
+ * 2 - int found including a subsequent comma
+ * 3 - hyphen found to denote a range
+ */
+
+int get_option (char **str, int *pint)
+{
+ char *cur = *str;
+
+ if (!cur || !(*cur))
+ return 0;
+ *pint = simple_strtol (cur, str, 0);
+ if (cur == *str)
+ return 0;
+ if (**str == ',') {
+ (*str)++;
+ return 2;
+ }
+ if (**str == '-')
+ return 3;
+
+ return 1;
+}
+
+/**
+ * get_options - Parse a string into a list of integers
+ * @str: String to be parsed
+ * @nints: size of integer array
+ * @ints: integer array
+ *
+ * This function parses a string containing a comma-separated
+ * list of integers, a hyphen-separated range of _positive_ integers,
+ * or a combination of both. The parse halts when the array is
+ * full, or when no more numbers can be retrieved from the
+ * string.
+ *
+ * Return value is the character in the string which caused
+ * the parse to end (typically a null terminator, if @str is
+ * completely parseable).
+ */
+
+char *get_options(const char *str, int nints, int *ints)
+{
+ int res, i = 1;
+
+ while (i < nints) {
+ res = get_option ((char **)&str, ints + i);
+ if (res == 0)
+ break;
+ if (res == 3) {
+ int range_nums;
+ range_nums = get_range((char **)&str, ints + i);
+ if (range_nums < 0)
+ break;
+ /*
+ * Decrement the result by one to leave out the
+ * last number in the range. The next iteration
+ * will handle the upper number in the range
+ */
+ i += (range_nums - 1);
+ }
+ i++;
+ if (res == 1)
+ break;
+ }
+ ints[0] = i - 1;
+ return (char *)str;
+}
+
+/**
+ * memparse - parse a string with mem suffixes into a number
+ * @ptr: Where parse begins
+ * @retptr: (output) Optional pointer to next char after parse completes
+ *
+ * Parses a string into a number. The number stored at @ptr is
+ * potentially suffixed with %K (for kilobytes, or 1024 bytes),
+ * %M (for megabytes, or 1048576 bytes), or %G (for gigabytes, or
+ * 1073741824). If the number is suffixed with K, M, or G, then
+ * the return value is the number multiplied by one kilobyte, one
+ * megabyte, or one gigabyte, respectively.
+ */
+
+unsigned long long memparse(const char *ptr, char **retptr)
+{
+ char *endptr; /* local pointer to end of parsed string */
+
+ unsigned long long ret = simple_strtoull(ptr, &endptr, 0);
+
+ switch (*endptr) {
+ case 'G':
+ case 'g':
+ ret <<= 10;
+ case 'M':
+ case 'm':
+ ret <<= 10;
+ case 'K':
+ case 'k':
+ ret <<= 10;
+ endptr++;
+ default:
+ break;
+ }
+
+ if (retptr)
+ *retptr = endptr;
+
+ return ret;
+}
+
+
+EXPORT_SYMBOL(memparse);
+EXPORT_SYMBOL(get_option);
+EXPORT_SYMBOL(get_options);
diff --git a/lib/cpu-notifier-error-inject.c b/lib/cpu-notifier-error-inject.c
new file mode 100644
index 00000000..4dc20321
--- /dev/null
+++ b/lib/cpu-notifier-error-inject.c
@@ -0,0 +1,63 @@
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+
+static int priority;
+static int cpu_up_prepare_error;
+static int cpu_down_prepare_error;
+
+module_param(priority, int, 0);
+MODULE_PARM_DESC(priority, "specify cpu notifier priority");
+
+module_param(cpu_up_prepare_error, int, 0644);
+MODULE_PARM_DESC(cpu_up_prepare_error,
+ "specify error code to inject CPU_UP_PREPARE action");
+
+module_param(cpu_down_prepare_error, int, 0644);
+MODULE_PARM_DESC(cpu_down_prepare_error,
+ "specify error code to inject CPU_DOWN_PREPARE action");
+
+static int err_inject_cpu_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ int err = 0;
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+ err = cpu_up_prepare_error;
+ break;
+ case CPU_DOWN_PREPARE:
+ case CPU_DOWN_PREPARE_FROZEN:
+ err = cpu_down_prepare_error;
+ break;
+ }
+ if (err)
+ printk(KERN_INFO "Injecting error (%d) at cpu notifier\n", err);
+
+ return notifier_from_errno(err);
+}
+
+static struct notifier_block err_inject_cpu_notifier = {
+ .notifier_call = err_inject_cpu_callback,
+};
+
+static int err_inject_init(void)
+{
+ err_inject_cpu_notifier.priority = priority;
+
+ return register_hotcpu_notifier(&err_inject_cpu_notifier);
+}
+
+static void err_inject_exit(void)
+{
+ unregister_hotcpu_notifier(&err_inject_cpu_notifier);
+}
+
+module_init(err_inject_init);
+module_exit(err_inject_exit);
+
+MODULE_DESCRIPTION("CPU notifier error injection module");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>");
diff --git a/lib/cpumask.c b/lib/cpumask.c
new file mode 100644
index 00000000..05d6aca7
--- /dev/null
+++ b/lib/cpumask.c
@@ -0,0 +1,178 @@
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <linux/bootmem.h>
+
+int __first_cpu(const cpumask_t *srcp)
+{
+ return min_t(int, NR_CPUS, find_first_bit(srcp->bits, NR_CPUS));
+}
+EXPORT_SYMBOL(__first_cpu);
+
+int __next_cpu(int n, const cpumask_t *srcp)
+{
+ return min_t(int, NR_CPUS, find_next_bit(srcp->bits, NR_CPUS, n+1));
+}
+EXPORT_SYMBOL(__next_cpu);
+
+#if NR_CPUS > 64
+int __next_cpu_nr(int n, const cpumask_t *srcp)
+{
+ return min_t(int, nr_cpu_ids,
+ find_next_bit(srcp->bits, nr_cpu_ids, n+1));
+}
+EXPORT_SYMBOL(__next_cpu_nr);
+#endif
+
+int __any_online_cpu(const cpumask_t *mask)
+{
+ int cpu;
+
+ for_each_cpu_mask(cpu, *mask) {
+ if (cpu_online(cpu))
+ break;
+ }
+ return cpu;
+}
+EXPORT_SYMBOL(__any_online_cpu);
+
+/**
+ * cpumask_next_and - get the next cpu in *src1p & *src2p
+ * @n: the cpu prior to the place to search (ie. return will be > @n)
+ * @src1p: the first cpumask pointer
+ * @src2p: the second cpumask pointer
+ *
+ * Returns >= nr_cpu_ids if no further cpus set in both.
+ */
+int cpumask_next_and(int n, const struct cpumask *src1p,
+ const struct cpumask *src2p)
+{
+ while ((n = cpumask_next(n, src1p)) < nr_cpu_ids)
+ if (cpumask_test_cpu(n, src2p))
+ break;
+ return n;
+}
+EXPORT_SYMBOL(cpumask_next_and);
+
+/**
+ * cpumask_any_but - return a "random" in a cpumask, but not this one.
+ * @mask: the cpumask to search
+ * @cpu: the cpu to ignore.
+ *
+ * Often used to find any cpu but smp_processor_id() in a mask.
+ * Returns >= nr_cpu_ids if no cpus set.
+ */
+int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
+{
+ unsigned int i;
+
+ cpumask_check(cpu);
+ for_each_cpu(i, mask)
+ if (i != cpu)
+ break;
+ return i;
+}
+
+/* These are not inline because of header tangles. */
+#ifdef CONFIG_CPUMASK_OFFSTACK
+/**
+ * alloc_cpumask_var_node - allocate a struct cpumask on a given node
+ * @mask: pointer to cpumask_var_t where the cpumask is returned
+ * @flags: GFP_ flags
+ *
+ * Only defined when CONFIG_CPUMASK_OFFSTACK=y, otherwise is
+ * a nop returning a constant 1 (in <linux/cpumask.h>)
+ * Returns TRUE if memory allocation succeeded, FALSE otherwise.
+ *
+ * In addition, mask will be NULL if this fails. Note that gcc is
+ * usually smart enough to know that mask can never be NULL if
+ * CONFIG_CPUMASK_OFFSTACK=n, so does code elimination in that case
+ * too.
+ */
+bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
+{
+ *mask = kmalloc_node(cpumask_size(), flags, node);
+
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+ if (!*mask) {
+ printk(KERN_ERR "=> alloc_cpumask_var: failed!\n");
+ dump_stack();
+ }
+#endif
+ /* FIXME: Bandaid to save us from old primitives which go to NR_CPUS. */
+ if (*mask) {
+ unsigned char *ptr = (unsigned char *)cpumask_bits(*mask);
+ unsigned int tail;
+ tail = BITS_TO_LONGS(NR_CPUS - nr_cpumask_bits) * sizeof(long);
+ memset(ptr + cpumask_size() - tail, 0, tail);
+ }
+
+ return *mask != NULL;
+}
+EXPORT_SYMBOL(alloc_cpumask_var_node);
+
+bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
+{
+ return alloc_cpumask_var_node(mask, flags | __GFP_ZERO, node);
+}
+EXPORT_SYMBOL(zalloc_cpumask_var_node);
+
+/**
+ * alloc_cpumask_var - allocate a struct cpumask
+ * @mask: pointer to cpumask_var_t where the cpumask is returned
+ * @flags: GFP_ flags
+ *
+ * Only defined when CONFIG_CPUMASK_OFFSTACK=y, otherwise is
+ * a nop returning a constant 1 (in <linux/cpumask.h>).
+ *
+ * See alloc_cpumask_var_node.
+ */
+bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
+{
+ return alloc_cpumask_var_node(mask, flags, numa_node_id());
+}
+EXPORT_SYMBOL(alloc_cpumask_var);
+
+bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
+{
+ return alloc_cpumask_var(mask, flags | __GFP_ZERO);
+}
+EXPORT_SYMBOL(zalloc_cpumask_var);
+
+/**
+ * alloc_bootmem_cpumask_var - allocate a struct cpumask from the bootmem arena.
+ * @mask: pointer to cpumask_var_t where the cpumask is returned
+ *
+ * Only defined when CONFIG_CPUMASK_OFFSTACK=y, otherwise is
+ * a nop (in <linux/cpumask.h>).
+ * Either returns an allocated (zero-filled) cpumask, or causes the
+ * system to panic.
+ */
+void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask)
+{
+ *mask = alloc_bootmem(cpumask_size());
+}
+
+/**
+ * free_cpumask_var - frees memory allocated for a struct cpumask.
+ * @mask: cpumask to free
+ *
+ * This is safe on a NULL mask.
+ */
+void free_cpumask_var(cpumask_var_t mask)
+{
+ kfree(mask);
+}
+EXPORT_SYMBOL(free_cpumask_var);
+
+/**
+ * free_bootmem_cpumask_var - frees result of alloc_bootmem_cpumask_var
+ * @mask: cpumask to free
+ */
+void __init free_bootmem_cpumask_var(cpumask_var_t mask)
+{
+ free_bootmem((unsigned long)mask, cpumask_size());
+}
+#endif
diff --git a/lib/crc-ccitt.c b/lib/crc-ccitt.c
new file mode 100644
index 00000000..7f6dd68d
--- /dev/null
+++ b/lib/crc-ccitt.c
@@ -0,0 +1,69 @@
+/*
+ * linux/lib/crc-ccitt.c
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/crc-ccitt.h>
+
+/*
+ * This mysterious table is just the CRC of each possible byte. It can be
+ * computed using the standard bit-at-a-time methods. The polynomial can
+ * be seen in entry 128, 0x8408. This corresponds to x^0 + x^5 + x^12.
+ * Add the implicit x^16, and you have the standard CRC-CCITT.
+ */
+u16 const crc_ccitt_table[256] = {
+ 0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf,
+ 0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7,
+ 0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e,
+ 0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876,
+ 0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd,
+ 0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5,
+ 0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c,
+ 0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974,
+ 0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb,
+ 0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3,
+ 0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a,
+ 0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72,
+ 0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9,
+ 0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1,
+ 0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738,
+ 0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70,
+ 0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7,
+ 0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff,
+ 0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036,
+ 0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e,
+ 0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5,
+ 0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd,
+ 0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134,
+ 0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c,
+ 0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3,
+ 0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb,
+ 0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232,
+ 0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a,
+ 0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1,
+ 0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9,
+ 0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330,
+ 0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78
+};
+EXPORT_SYMBOL(crc_ccitt_table);
+
+/**
+ * crc_ccitt - recompute the CRC for the data buffer
+ * @crc: previous CRC value
+ * @buffer: data pointer
+ * @len: number of bytes in the buffer
+ */
+u16 crc_ccitt(u16 crc, u8 const *buffer, size_t len)
+{
+ while (len--)
+ crc = crc_ccitt_byte(crc, *buffer++);
+ return crc;
+}
+EXPORT_SYMBOL(crc_ccitt);
+
+MODULE_DESCRIPTION("CRC-CCITT calculations");
+MODULE_LICENSE("GPL");
diff --git a/lib/crc-itu-t.c b/lib/crc-itu-t.c
new file mode 100644
index 00000000..a63472b8
--- /dev/null
+++ b/lib/crc-itu-t.c
@@ -0,0 +1,69 @@
+/*
+ * crc-itu-t.c
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/crc-itu-t.h>
+
+/** CRC table for the CRC ITU-T V.41 0x0x1021 (x^16 + x^12 + x^15 + 1) */
+const u16 crc_itu_t_table[256] = {
+ 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
+ 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
+ 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
+ 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
+ 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
+ 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
+ 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
+ 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
+ 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
+ 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
+ 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
+ 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
+ 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
+ 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
+ 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
+ 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
+ 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
+ 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
+ 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
+ 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
+ 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
+ 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
+ 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
+ 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
+ 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
+ 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
+ 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
+ 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
+ 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
+ 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
+ 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
+ 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0
+};
+
+EXPORT_SYMBOL(crc_itu_t_table);
+
+/**
+ * crc_itu_t - Compute the CRC-ITU-T for the data buffer
+ *
+ * @crc: previous CRC value
+ * @buffer: data pointer
+ * @len: number of bytes in the buffer
+ *
+ * Returns the updated CRC value
+ */
+u16 crc_itu_t(u16 crc, const u8 *buffer, size_t len)
+{
+ while (len--)
+ crc = crc_itu_t_byte(crc, *buffer++);
+ return crc;
+}
+EXPORT_SYMBOL(crc_itu_t);
+
+MODULE_DESCRIPTION("CRC ITU-T V.41 calculations");
+MODULE_LICENSE("GPL");
+
diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c
new file mode 100644
index 00000000..fbbd66ed
--- /dev/null
+++ b/lib/crc-t10dif.c
@@ -0,0 +1,67 @@
+/*
+ * T10 Data Integrity Field CRC16 calculation
+ *
+ * Copyright (c) 2007 Oracle Corporation. All rights reserved.
+ * Written by Martin K. Petersen <martin.petersen@oracle.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/crc-t10dif.h>
+
+/* Table generated using the following polynomium:
+ * x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1
+ * gt: 0x8bb7
+ */
+static const __u16 t10_dif_crc_table[256] = {
+ 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
+ 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
+ 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
+ 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
+ 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
+ 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
+ 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
+ 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
+ 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
+ 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
+ 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
+ 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
+ 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
+ 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
+ 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
+ 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
+ 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
+ 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
+ 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
+ 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
+ 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
+ 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
+ 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
+ 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
+ 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
+ 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
+ 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
+ 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
+ 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
+ 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
+ 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
+ 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
+};
+
+__u16 crc_t10dif(const unsigned char *buffer, size_t len)
+{
+ __u16 crc = 0;
+ unsigned int i;
+
+ for (i = 0 ; i < len ; i++)
+ crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff];
+
+ return crc;
+}
+EXPORT_SYMBOL(crc_t10dif);
+
+MODULE_DESCRIPTION("T10 DIF CRC calculation");
+MODULE_LICENSE("GPL");
diff --git a/lib/crc16.c b/lib/crc16.c
new file mode 100644
index 00000000..8737b084
--- /dev/null
+++ b/lib/crc16.c
@@ -0,0 +1,67 @@
+/*
+ * crc16.c
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/crc16.h>
+
+/** CRC table for the CRC-16. The poly is 0x8005 (x^16 + x^15 + x^2 + 1) */
+u16 const crc16_table[256] = {
+ 0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241,
+ 0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440,
+ 0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40,
+ 0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841,
+ 0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40,
+ 0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41,
+ 0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641,
+ 0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040,
+ 0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240,
+ 0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441,
+ 0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41,
+ 0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840,
+ 0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41,
+ 0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40,
+ 0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640,
+ 0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041,
+ 0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240,
+ 0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441,
+ 0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41,
+ 0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840,
+ 0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41,
+ 0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40,
+ 0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640,
+ 0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041,
+ 0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241,
+ 0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440,
+ 0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40,
+ 0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841,
+ 0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40,
+ 0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41,
+ 0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641,
+ 0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040
+};
+EXPORT_SYMBOL(crc16_table);
+
+/**
+ * crc16 - compute the CRC-16 for the data buffer
+ * @crc: previous CRC value
+ * @buffer: data pointer
+ * @len: number of bytes in the buffer
+ *
+ * Returns the updated CRC value.
+ */
+u16 crc16(u16 crc, u8 const *buffer, size_t len)
+{
+ while (len--)
+ crc = crc16_byte(crc, *buffer++);
+ return crc;
+}
+EXPORT_SYMBOL(crc16);
+
+MODULE_DESCRIPTION("CRC16 calculations");
+MODULE_LICENSE("GPL");
+
diff --git a/lib/crc32.c b/lib/crc32.c
new file mode 100644
index 00000000..4855995f
--- /dev/null
+++ b/lib/crc32.c
@@ -0,0 +1,471 @@
+/*
+ * Oct 15, 2000 Matt Domsch <Matt_Domsch@dell.com>
+ * Nicer crc32 functions/docs submitted by linux@horizon.com. Thanks!
+ * Code was from the public domain, copyright abandoned. Code was
+ * subsequently included in the kernel, thus was re-licensed under the
+ * GNU GPL v2.
+ *
+ * Oct 12, 2000 Matt Domsch <Matt_Domsch@dell.com>
+ * Same crc32 function was used in 5 other places in the kernel.
+ * I made one version, and deleted the others.
+ * There are various incantations of crc32(). Some use a seed of 0 or ~0.
+ * Some xor at the end with ~0. The generic crc32() function takes
+ * seed as an argument, and doesn't xor at the end. Then individual
+ * users can do whatever they need.
+ * drivers/net/smc9194.c uses seed ~0, doesn't xor with ~0.
+ * fs/jffs2 uses seed 0, doesn't xor with ~0.
+ * fs/partitions/efi.c uses seed ~0, xor's with ~0.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <linux/crc32.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <asm/atomic.h>
+#include "crc32defs.h"
+#if CRC_LE_BITS == 8
+# define tole(x) __constant_cpu_to_le32(x)
+#else
+# define tole(x) (x)
+#endif
+
+#if CRC_BE_BITS == 8
+# define tobe(x) __constant_cpu_to_be32(x)
+#else
+# define tobe(x) (x)
+#endif
+#include "crc32table.h"
+
+MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>");
+MODULE_DESCRIPTION("Ethernet CRC32 calculations");
+MODULE_LICENSE("GPL");
+
+#if CRC_LE_BITS == 8 || CRC_BE_BITS == 8
+
+static inline u32
+crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
+{
+# ifdef __LITTLE_ENDIAN
+# define DO_CRC(x) crc = tab[0][(crc ^ (x)) & 255] ^ (crc >> 8)
+# define DO_CRC4 crc = tab[3][(crc) & 255] ^ \
+ tab[2][(crc >> 8) & 255] ^ \
+ tab[1][(crc >> 16) & 255] ^ \
+ tab[0][(crc >> 24) & 255]
+# else
+# define DO_CRC(x) crc = tab[0][((crc >> 24) ^ (x)) & 255] ^ (crc << 8)
+# define DO_CRC4 crc = tab[0][(crc) & 255] ^ \
+ tab[1][(crc >> 8) & 255] ^ \
+ tab[2][(crc >> 16) & 255] ^ \
+ tab[3][(crc >> 24) & 255]
+# endif
+ const u32 *b;
+ size_t rem_len;
+
+ /* Align it */
+ if (unlikely((long)buf & 3 && len)) {
+ do {
+ DO_CRC(*buf++);
+ } while ((--len) && ((long)buf)&3);
+ }
+ rem_len = len & 3;
+ /* load data 32 bits wide, xor data 32 bits wide. */
+ len = len >> 2;
+ b = (const u32 *)buf;
+ for (--b; len; --len) {
+ crc ^= *++b; /* use pre increment for speed */
+ DO_CRC4;
+ }
+ len = rem_len;
+ /* And the last few bytes */
+ if (len) {
+ u8 *p = (u8 *)(b + 1) - 1;
+ do {
+ DO_CRC(*++p); /* use pre increment for speed */
+ } while (--len);
+ }
+ return crc;
+#undef DO_CRC
+#undef DO_CRC4
+}
+#endif
+/**
+ * crc32_le() - Calculate bitwise little-endian Ethernet AUTODIN II CRC32
+ * @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for
+ * other uses, or the previous crc32 value if computing incrementally.
+ * @p: pointer to buffer over which CRC is run
+ * @len: length of buffer @p
+ */
+u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len);
+
+#if CRC_LE_BITS == 1
+/*
+ * In fact, the table-based code will work in this case, but it can be
+ * simplified by inlining the table in ?: form.
+ */
+
+u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
+{
+ int i;
+ while (len--) {
+ crc ^= *p++;
+ for (i = 0; i < 8; i++)
+ crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0);
+ }
+ return crc;
+}
+#else /* Table-based approach */
+
+u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
+{
+# if CRC_LE_BITS == 8
+ const u32 (*tab)[] = crc32table_le;
+
+ crc = __cpu_to_le32(crc);
+ crc = crc32_body(crc, p, len, tab);
+ return __le32_to_cpu(crc);
+# elif CRC_LE_BITS == 4
+ while (len--) {
+ crc ^= *p++;
+ crc = (crc >> 4) ^ crc32table_le[crc & 15];
+ crc = (crc >> 4) ^ crc32table_le[crc & 15];
+ }
+ return crc;
+# elif CRC_LE_BITS == 2
+ while (len--) {
+ crc ^= *p++;
+ crc = (crc >> 2) ^ crc32table_le[crc & 3];
+ crc = (crc >> 2) ^ crc32table_le[crc & 3];
+ crc = (crc >> 2) ^ crc32table_le[crc & 3];
+ crc = (crc >> 2) ^ crc32table_le[crc & 3];
+ }
+ return crc;
+# endif
+}
+#endif
+
+/**
+ * crc32_be() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32
+ * @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for
+ * other uses, or the previous crc32 value if computing incrementally.
+ * @p: pointer to buffer over which CRC is run
+ * @len: length of buffer @p
+ */
+u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len);
+
+#if CRC_BE_BITS == 1
+/*
+ * In fact, the table-based code will work in this case, but it can be
+ * simplified by inlining the table in ?: form.
+ */
+
+u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
+{
+ int i;
+ while (len--) {
+ crc ^= *p++ << 24;
+ for (i = 0; i < 8; i++)
+ crc =
+ (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE :
+ 0);
+ }
+ return crc;
+}
+
+#else /* Table-based approach */
+u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
+{
+# if CRC_BE_BITS == 8
+ const u32 (*tab)[] = crc32table_be;
+
+ crc = __cpu_to_be32(crc);
+ crc = crc32_body(crc, p, len, tab);
+ return __be32_to_cpu(crc);
+# elif CRC_BE_BITS == 4
+ while (len--) {
+ crc ^= *p++ << 24;
+ crc = (crc << 4) ^ crc32table_be[crc >> 28];
+ crc = (crc << 4) ^ crc32table_be[crc >> 28];
+ }
+ return crc;
+# elif CRC_BE_BITS == 2
+ while (len--) {
+ crc ^= *p++ << 24;
+ crc = (crc << 2) ^ crc32table_be[crc >> 30];
+ crc = (crc << 2) ^ crc32table_be[crc >> 30];
+ crc = (crc << 2) ^ crc32table_be[crc >> 30];
+ crc = (crc << 2) ^ crc32table_be[crc >> 30];
+ }
+ return crc;
+# endif
+}
+#endif
+
+EXPORT_SYMBOL(crc32_le);
+EXPORT_SYMBOL(crc32_be);
+
+/*
+ * A brief CRC tutorial.
+ *
+ * A CRC is a long-division remainder. You add the CRC to the message,
+ * and the whole thing (message+CRC) is a multiple of the given
+ * CRC polynomial. To check the CRC, you can either check that the
+ * CRC matches the recomputed value, *or* you can check that the
+ * remainder computed on the message+CRC is 0. This latter approach
+ * is used by a lot of hardware implementations, and is why so many
+ * protocols put the end-of-frame flag after the CRC.
+ *
+ * It's actually the same long division you learned in school, except that
+ * - We're working in binary, so the digits are only 0 and 1, and
+ * - When dividing polynomials, there are no carries. Rather than add and
+ * subtract, we just xor. Thus, we tend to get a bit sloppy about
+ * the difference between adding and subtracting.
+ *
+ * A 32-bit CRC polynomial is actually 33 bits long. But since it's
+ * 33 bits long, bit 32 is always going to be set, so usually the CRC
+ * is written in hex with the most significant bit omitted. (If you're
+ * familiar with the IEEE 754 floating-point format, it's the same idea.)
+ *
+ * Note that a CRC is computed over a string of *bits*, so you have
+ * to decide on the endianness of the bits within each byte. To get
+ * the best error-detecting properties, this should correspond to the
+ * order they're actually sent. For example, standard RS-232 serial is
+ * little-endian; the most significant bit (sometimes used for parity)
+ * is sent last. And when appending a CRC word to a message, you should
+ * do it in the right order, matching the endianness.
+ *
+ * Just like with ordinary division, the remainder is always smaller than
+ * the divisor (the CRC polynomial) you're dividing by. Each step of the
+ * division, you take one more digit (bit) of the dividend and append it
+ * to the current remainder. Then you figure out the appropriate multiple
+ * of the divisor to subtract to being the remainder back into range.
+ * In binary, it's easy - it has to be either 0 or 1, and to make the
+ * XOR cancel, it's just a copy of bit 32 of the remainder.
+ *
+ * When computing a CRC, we don't care about the quotient, so we can
+ * throw the quotient bit away, but subtract the appropriate multiple of
+ * the polynomial from the remainder and we're back to where we started,
+ * ready to process the next bit.
+ *
+ * A big-endian CRC written this way would be coded like:
+ * for (i = 0; i < input_bits; i++) {
+ * multiple = remainder & 0x80000000 ? CRCPOLY : 0;
+ * remainder = (remainder << 1 | next_input_bit()) ^ multiple;
+ * }
+ * Notice how, to get at bit 32 of the shifted remainder, we look
+ * at bit 31 of the remainder *before* shifting it.
+ *
+ * But also notice how the next_input_bit() bits we're shifting into
+ * the remainder don't actually affect any decision-making until
+ * 32 bits later. Thus, the first 32 cycles of this are pretty boring.
+ * Also, to add the CRC to a message, we need a 32-bit-long hole for it at
+ * the end, so we have to add 32 extra cycles shifting in zeros at the
+ * end of every message,
+ *
+ * So the standard trick is to rearrage merging in the next_input_bit()
+ * until the moment it's needed. Then the first 32 cycles can be precomputed,
+ * and merging in the final 32 zero bits to make room for the CRC can be
+ * skipped entirely.
+ * This changes the code to:
+ * for (i = 0; i < input_bits; i++) {
+ * remainder ^= next_input_bit() << 31;
+ * multiple = (remainder & 0x80000000) ? CRCPOLY : 0;
+ * remainder = (remainder << 1) ^ multiple;
+ * }
+ * With this optimization, the little-endian code is simpler:
+ * for (i = 0; i < input_bits; i++) {
+ * remainder ^= next_input_bit();
+ * multiple = (remainder & 1) ? CRCPOLY : 0;
+ * remainder = (remainder >> 1) ^ multiple;
+ * }
+ *
+ * Note that the other details of endianness have been hidden in CRCPOLY
+ * (which must be bit-reversed) and next_input_bit().
+ *
+ * However, as long as next_input_bit is returning the bits in a sensible
+ * order, we can actually do the merging 8 or more bits at a time rather
+ * than one bit at a time:
+ * for (i = 0; i < input_bytes; i++) {
+ * remainder ^= next_input_byte() << 24;
+ * for (j = 0; j < 8; j++) {
+ * multiple = (remainder & 0x80000000) ? CRCPOLY : 0;
+ * remainder = (remainder << 1) ^ multiple;
+ * }
+ * }
+ * Or in little-endian:
+ * for (i = 0; i < input_bytes; i++) {
+ * remainder ^= next_input_byte();
+ * for (j = 0; j < 8; j++) {
+ * multiple = (remainder & 1) ? CRCPOLY : 0;
+ * remainder = (remainder << 1) ^ multiple;
+ * }
+ * }
+ * If the input is a multiple of 32 bits, you can even XOR in a 32-bit
+ * word at a time and increase the inner loop count to 32.
+ *
+ * You can also mix and match the two loop styles, for example doing the
+ * bulk of a message byte-at-a-time and adding bit-at-a-time processing
+ * for any fractional bytes at the end.
+ *
+ * The only remaining optimization is to the byte-at-a-time table method.
+ * Here, rather than just shifting one bit of the remainder to decide
+ * in the correct multiple to subtract, we can shift a byte at a time.
+ * This produces a 40-bit (rather than a 33-bit) intermediate remainder,
+ * but again the multiple of the polynomial to subtract depends only on
+ * the high bits, the high 8 bits in this case.
+ *
+ * The multiple we need in that case is the low 32 bits of a 40-bit
+ * value whose high 8 bits are given, and which is a multiple of the
+ * generator polynomial. This is simply the CRC-32 of the given
+ * one-byte message.
+ *
+ * Two more details: normally, appending zero bits to a message which
+ * is already a multiple of a polynomial produces a larger multiple of that
+ * polynomial. To enable a CRC to detect this condition, it's common to
+ * invert the CRC before appending it. This makes the remainder of the
+ * message+crc come out not as zero, but some fixed non-zero value.
+ *
+ * The same problem applies to zero bits prepended to the message, and
+ * a similar solution is used. Instead of starting with a remainder of
+ * 0, an initial remainder of all ones is used. As long as you start
+ * the same way on decoding, it doesn't make a difference.
+ */
+
+#ifdef UNITTEST
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#if 0 /*Not used at present */
+static void
+buf_dump(char const *prefix, unsigned char const *buf, size_t len)
+{
+ fputs(prefix, stdout);
+ while (len--)
+ printf(" %02x", *buf++);
+ putchar('\n');
+
+}
+#endif
+
+static void bytereverse(unsigned char *buf, size_t len)
+{
+ while (len--) {
+ unsigned char x = bitrev8(*buf);
+ *buf++ = x;
+ }
+}
+
+static void random_garbage(unsigned char *buf, size_t len)
+{
+ while (len--)
+ *buf++ = (unsigned char) random();
+}
+
+#if 0 /* Not used at present */
+static void store_le(u32 x, unsigned char *buf)
+{
+ buf[0] = (unsigned char) x;
+ buf[1] = (unsigned char) (x >> 8);
+ buf[2] = (unsigned char) (x >> 16);
+ buf[3] = (unsigned char) (x >> 24);
+}
+#endif
+
+static void store_be(u32 x, unsigned char *buf)
+{
+ buf[0] = (unsigned char) (x >> 24);
+ buf[1] = (unsigned char) (x >> 16);
+ buf[2] = (unsigned char) (x >> 8);
+ buf[3] = (unsigned char) x;
+}
+
+/*
+ * This checks that CRC(buf + CRC(buf)) = 0, and that
+ * CRC commutes with bit-reversal. This has the side effect
+ * of bytewise bit-reversing the input buffer, and returns
+ * the CRC of the reversed buffer.
+ */
+static u32 test_step(u32 init, unsigned char *buf, size_t len)
+{
+ u32 crc1, crc2;
+ size_t i;
+
+ crc1 = crc32_be(init, buf, len);
+ store_be(crc1, buf + len);
+ crc2 = crc32_be(init, buf, len + 4);
+ if (crc2)
+ printf("\nCRC cancellation fail: 0x%08x should be 0\n",
+ crc2);
+
+ for (i = 0; i <= len + 4; i++) {
+ crc2 = crc32_be(init, buf, i);
+ crc2 = crc32_be(crc2, buf + i, len + 4 - i);
+ if (crc2)
+ printf("\nCRC split fail: 0x%08x\n", crc2);
+ }
+
+ /* Now swap it around for the other test */
+
+ bytereverse(buf, len + 4);
+ init = bitrev32(init);
+ crc2 = bitrev32(crc1);
+ if (crc1 != bitrev32(crc2))
+ printf("\nBit reversal fail: 0x%08x -> 0x%08x -> 0x%08x\n",
+ crc1, crc2, bitrev32(crc2));
+ crc1 = crc32_le(init, buf, len);
+ if (crc1 != crc2)
+ printf("\nCRC endianness fail: 0x%08x != 0x%08x\n", crc1,
+ crc2);
+ crc2 = crc32_le(init, buf, len + 4);
+ if (crc2)
+ printf("\nCRC cancellation fail: 0x%08x should be 0\n",
+ crc2);
+
+ for (i = 0; i <= len + 4; i++) {
+ crc2 = crc32_le(init, buf, i);
+ crc2 = crc32_le(crc2, buf + i, len + 4 - i);
+ if (crc2)
+ printf("\nCRC split fail: 0x%08x\n", crc2);
+ }
+
+ return crc1;
+}
+
+#define SIZE 64
+#define INIT1 0
+#define INIT2 0
+
+int main(void)
+{
+ unsigned char buf1[SIZE + 4];
+ unsigned char buf2[SIZE + 4];
+ unsigned char buf3[SIZE + 4];
+ int i, j;
+ u32 crc1, crc2, crc3;
+
+ for (i = 0; i <= SIZE; i++) {
+ printf("\rTesting length %d...", i);
+ fflush(stdout);
+ random_garbage(buf1, i);
+ random_garbage(buf2, i);
+ for (j = 0; j < i; j++)
+ buf3[j] = buf1[j] ^ buf2[j];
+
+ crc1 = test_step(INIT1, buf1, i);
+ crc2 = test_step(INIT2, buf2, i);
+ /* Now check that CRC(buf1 ^ buf2) = CRC(buf1) ^ CRC(buf2) */
+ crc3 = test_step(INIT1 ^ INIT2, buf3, i);
+ if (crc3 != (crc1 ^ crc2))
+ printf("CRC XOR fail: 0x%08x != 0x%08x ^ 0x%08x\n",
+ crc3, crc1, crc2);
+ }
+ printf("\nAll test complete. No failures expected.\n");
+ return 0;
+}
+
+#endif /* UNITTEST */
diff --git a/lib/crc32defs.h b/lib/crc32defs.h
new file mode 100644
index 00000000..9b6773d7
--- /dev/null
+++ b/lib/crc32defs.h
@@ -0,0 +1,32 @@
+/*
+ * There are multiple 16-bit CRC polynomials in common use, but this is
+ * *the* standard CRC-32 polynomial, first popularized by Ethernet.
+ * x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0
+ */
+#define CRCPOLY_LE 0xedb88320
+#define CRCPOLY_BE 0x04c11db7
+
+/* How many bits at a time to use. Requires a table of 4<<CRC_xx_BITS bytes. */
+/* For less performance-sensitive, use 4 */
+#ifndef CRC_LE_BITS
+# define CRC_LE_BITS 8
+#endif
+#ifndef CRC_BE_BITS
+# define CRC_BE_BITS 8
+#endif
+
+/*
+ * Little-endian CRC computation. Used with serial bit streams sent
+ * lsbit-first. Be sure to use cpu_to_le32() to append the computed CRC.
+ */
+#if CRC_LE_BITS > 8 || CRC_LE_BITS < 1 || CRC_LE_BITS & CRC_LE_BITS-1
+# error CRC_LE_BITS must be a power of 2 between 1 and 8
+#endif
+
+/*
+ * Big-endian CRC computation. Used with serial bit streams sent
+ * msbit-first. Be sure to use cpu_to_be32() to append the computed CRC.
+ */
+#if CRC_BE_BITS > 8 || CRC_BE_BITS < 1 || CRC_BE_BITS & CRC_BE_BITS-1
+# error CRC_BE_BITS must be a power of 2 between 1 and 8
+#endif
diff --git a/lib/crc7.c b/lib/crc7.c
new file mode 100644
index 00000000..f1c3a144
--- /dev/null
+++ b/lib/crc7.c
@@ -0,0 +1,68 @@
+/*
+ * crc7.c
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/crc7.h>
+
+
+/* Table for CRC-7 (polynomial x^7 + x^3 + 1) */
+const u8 crc7_syndrome_table[256] = {
+ 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f,
+ 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77,
+ 0x19, 0x10, 0x0b, 0x02, 0x3d, 0x34, 0x2f, 0x26,
+ 0x51, 0x58, 0x43, 0x4a, 0x75, 0x7c, 0x67, 0x6e,
+ 0x32, 0x3b, 0x20, 0x29, 0x16, 0x1f, 0x04, 0x0d,
+ 0x7a, 0x73, 0x68, 0x61, 0x5e, 0x57, 0x4c, 0x45,
+ 0x2b, 0x22, 0x39, 0x30, 0x0f, 0x06, 0x1d, 0x14,
+ 0x63, 0x6a, 0x71, 0x78, 0x47, 0x4e, 0x55, 0x5c,
+ 0x64, 0x6d, 0x76, 0x7f, 0x40, 0x49, 0x52, 0x5b,
+ 0x2c, 0x25, 0x3e, 0x37, 0x08, 0x01, 0x1a, 0x13,
+ 0x7d, 0x74, 0x6f, 0x66, 0x59, 0x50, 0x4b, 0x42,
+ 0x35, 0x3c, 0x27, 0x2e, 0x11, 0x18, 0x03, 0x0a,
+ 0x56, 0x5f, 0x44, 0x4d, 0x72, 0x7b, 0x60, 0x69,
+ 0x1e, 0x17, 0x0c, 0x05, 0x3a, 0x33, 0x28, 0x21,
+ 0x4f, 0x46, 0x5d, 0x54, 0x6b, 0x62, 0x79, 0x70,
+ 0x07, 0x0e, 0x15, 0x1c, 0x23, 0x2a, 0x31, 0x38,
+ 0x41, 0x48, 0x53, 0x5a, 0x65, 0x6c, 0x77, 0x7e,
+ 0x09, 0x00, 0x1b, 0x12, 0x2d, 0x24, 0x3f, 0x36,
+ 0x58, 0x51, 0x4a, 0x43, 0x7c, 0x75, 0x6e, 0x67,
+ 0x10, 0x19, 0x02, 0x0b, 0x34, 0x3d, 0x26, 0x2f,
+ 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c,
+ 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04,
+ 0x6a, 0x63, 0x78, 0x71, 0x4e, 0x47, 0x5c, 0x55,
+ 0x22, 0x2b, 0x30, 0x39, 0x06, 0x0f, 0x14, 0x1d,
+ 0x25, 0x2c, 0x37, 0x3e, 0x01, 0x08, 0x13, 0x1a,
+ 0x6d, 0x64, 0x7f, 0x76, 0x49, 0x40, 0x5b, 0x52,
+ 0x3c, 0x35, 0x2e, 0x27, 0x18, 0x11, 0x0a, 0x03,
+ 0x74, 0x7d, 0x66, 0x6f, 0x50, 0x59, 0x42, 0x4b,
+ 0x17, 0x1e, 0x05, 0x0c, 0x33, 0x3a, 0x21, 0x28,
+ 0x5f, 0x56, 0x4d, 0x44, 0x7b, 0x72, 0x69, 0x60,
+ 0x0e, 0x07, 0x1c, 0x15, 0x2a, 0x23, 0x38, 0x31,
+ 0x46, 0x4f, 0x54, 0x5d, 0x62, 0x6b, 0x70, 0x79
+};
+EXPORT_SYMBOL(crc7_syndrome_table);
+
+/**
+ * crc7 - update the CRC7 for the data buffer
+ * @crc: previous CRC7 value
+ * @buffer: data pointer
+ * @len: number of bytes in the buffer
+ * Context: any
+ *
+ * Returns the updated CRC7 value.
+ */
+u8 crc7(u8 crc, const u8 *buffer, size_t len)
+{
+ while (len--)
+ crc = crc7_byte(crc, *buffer++);
+ return crc;
+}
+EXPORT_SYMBOL(crc7);
+
+MODULE_DESCRIPTION("CRC7 calculations");
+MODULE_LICENSE("GPL");
diff --git a/lib/ctype.c b/lib/ctype.c
new file mode 100644
index 00000000..26baa620
--- /dev/null
+++ b/lib/ctype.c
@@ -0,0 +1,36 @@
+/*
+ * linux/lib/ctype.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <linux/ctype.h>
+#include <linux/module.h>
+
+const unsigned char _ctype[] = {
+_C,_C,_C,_C,_C,_C,_C,_C, /* 0-7 */
+_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C, /* 8-15 */
+_C,_C,_C,_C,_C,_C,_C,_C, /* 16-23 */
+_C,_C,_C,_C,_C,_C,_C,_C, /* 24-31 */
+_S|_SP,_P,_P,_P,_P,_P,_P,_P, /* 32-39 */
+_P,_P,_P,_P,_P,_P,_P,_P, /* 40-47 */
+_D,_D,_D,_D,_D,_D,_D,_D, /* 48-55 */
+_D,_D,_P,_P,_P,_P,_P,_P, /* 56-63 */
+_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U, /* 64-71 */
+_U,_U,_U,_U,_U,_U,_U,_U, /* 72-79 */
+_U,_U,_U,_U,_U,_U,_U,_U, /* 80-87 */
+_U,_U,_U,_P,_P,_P,_P,_P, /* 88-95 */
+_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L, /* 96-103 */
+_L,_L,_L,_L,_L,_L,_L,_L, /* 104-111 */
+_L,_L,_L,_L,_L,_L,_L,_L, /* 112-119 */
+_L,_L,_L,_P,_P,_P,_P,_C, /* 120-127 */
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 128-143 */
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 144-159 */
+_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 160-175 */
+_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 176-191 */
+_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U, /* 192-207 */
+_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L, /* 208-223 */
+_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L, /* 224-239 */
+_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L}; /* 240-255 */
+
+EXPORT_SYMBOL(_ctype);
diff --git a/lib/debug_locks.c b/lib/debug_locks.c
new file mode 100644
index 00000000..5bf0020b
--- /dev/null
+++ b/lib/debug_locks.c
@@ -0,0 +1,48 @@
+/*
+ * lib/debug_locks.c
+ *
+ * Generic place for common debugging facilities for various locks:
+ * spinlocks, rwlocks, mutexes and rwsems.
+ *
+ * Started by Ingo Molnar:
+ *
+ * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ */
+#include <linux/kernel.h>
+#include <linux/rwsem.h>
+#include <linux/mutex.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/debug_locks.h>
+
+/*
+ * We want to turn all lock-debugging facilities on/off at once,
+ * via a global flag. The reason is that once a single bug has been
+ * detected and reported, there might be cascade of followup bugs
+ * that would just muddy the log. So we report the first one and
+ * shut up after that.
+ */
+int debug_locks = 1;
+EXPORT_SYMBOL_GPL(debug_locks);
+
+/*
+ * The locking-testsuite uses <debug_locks_silent> to get a
+ * 'silent failure': nothing is printed to the console when
+ * a locking bug is detected.
+ */
+int debug_locks_silent;
+
+/*
+ * Generic 'turn off all lock debugging' function:
+ */
+int debug_locks_off(void)
+{
+ if (__debug_locks_off()) {
+ if (!debug_locks_silent) {
+ oops_in_progress = 1;
+ console_verbose();
+ return 1;
+ }
+ }
+ return 0;
+}
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
new file mode 100644
index 00000000..deebcc57
--- /dev/null
+++ b/lib/debugobjects.c
@@ -0,0 +1,1049 @@
+/*
+ * Generic infrastructure for lifetime debugging of objects.
+ *
+ * Started by Thomas Gleixner
+ *
+ * Copyright (C) 2008, Thomas Gleixner <tglx@linutronix.de>
+ *
+ * For licencing details see kernel-base/COPYING
+ */
+#include <linux/debugobjects.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+#include <linux/hash.h>
+
+#define ODEBUG_HASH_BITS 14
+#define ODEBUG_HASH_SIZE (1 << ODEBUG_HASH_BITS)
+
+#define ODEBUG_POOL_SIZE 512
+#define ODEBUG_POOL_MIN_LEVEL 256
+
+#define ODEBUG_CHUNK_SHIFT PAGE_SHIFT
+#define ODEBUG_CHUNK_SIZE (1 << ODEBUG_CHUNK_SHIFT)
+#define ODEBUG_CHUNK_MASK (~(ODEBUG_CHUNK_SIZE - 1))
+
+struct debug_bucket {
+ struct hlist_head list;
+ raw_spinlock_t lock;
+};
+
+static struct debug_bucket obj_hash[ODEBUG_HASH_SIZE];
+
+static struct debug_obj obj_static_pool[ODEBUG_POOL_SIZE] __initdata;
+
+static DEFINE_RAW_SPINLOCK(pool_lock);
+
+static HLIST_HEAD(obj_pool);
+
+static int obj_pool_min_free = ODEBUG_POOL_SIZE;
+static int obj_pool_free = ODEBUG_POOL_SIZE;
+static int obj_pool_used;
+static int obj_pool_max_used;
+static struct kmem_cache *obj_cache;
+
+static int debug_objects_maxchain __read_mostly;
+static int debug_objects_fixups __read_mostly;
+static int debug_objects_warnings __read_mostly;
+static int debug_objects_enabled __read_mostly
+ = CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT;
+
+static struct debug_obj_descr *descr_test __read_mostly;
+
+static void free_obj_work(struct work_struct *work);
+static DECLARE_WORK(debug_obj_work, free_obj_work);
+
+static int __init enable_object_debug(char *str)
+{
+ debug_objects_enabled = 1;
+ return 0;
+}
+
+static int __init disable_object_debug(char *str)
+{
+ debug_objects_enabled = 0;
+ return 0;
+}
+
+early_param("debug_objects", enable_object_debug);
+early_param("no_debug_objects", disable_object_debug);
+
+static const char *obj_states[ODEBUG_STATE_MAX] = {
+ [ODEBUG_STATE_NONE] = "none",
+ [ODEBUG_STATE_INIT] = "initialized",
+ [ODEBUG_STATE_INACTIVE] = "inactive",
+ [ODEBUG_STATE_ACTIVE] = "active",
+ [ODEBUG_STATE_DESTROYED] = "destroyed",
+ [ODEBUG_STATE_NOTAVAILABLE] = "not available",
+};
+
+static int fill_pool(void)
+{
+ gfp_t gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
+ struct debug_obj *new;
+ unsigned long flags;
+
+ if (likely(obj_pool_free >= ODEBUG_POOL_MIN_LEVEL))
+ return obj_pool_free;
+
+ if (unlikely(!obj_cache))
+ return obj_pool_free;
+
+ while (obj_pool_free < ODEBUG_POOL_MIN_LEVEL) {
+
+ new = kmem_cache_zalloc(obj_cache, gfp);
+ if (!new)
+ return obj_pool_free;
+
+ raw_spin_lock_irqsave(&pool_lock, flags);
+ hlist_add_head(&new->node, &obj_pool);
+ obj_pool_free++;
+ raw_spin_unlock_irqrestore(&pool_lock, flags);
+ }
+ return obj_pool_free;
+}
+
+/*
+ * Lookup an object in the hash bucket.
+ */
+static struct debug_obj *lookup_object(void *addr, struct debug_bucket *b)
+{
+ struct hlist_node *node;
+ struct debug_obj *obj;
+ int cnt = 0;
+
+ hlist_for_each_entry(obj, node, &b->list, node) {
+ cnt++;
+ if (obj->object == addr)
+ return obj;
+ }
+ if (cnt > debug_objects_maxchain)
+ debug_objects_maxchain = cnt;
+
+ return NULL;
+}
+
+/*
+ * Allocate a new object. If the pool is empty, switch off the debugger.
+ * Must be called with interrupts disabled.
+ */
+static struct debug_obj *
+alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
+{
+ struct debug_obj *obj = NULL;
+
+ raw_spin_lock(&pool_lock);
+ if (obj_pool.first) {
+ obj = hlist_entry(obj_pool.first, typeof(*obj), node);
+
+ obj->object = addr;
+ obj->descr = descr;
+ obj->state = ODEBUG_STATE_NONE;
+ obj->astate = 0;
+ hlist_del(&obj->node);
+
+ hlist_add_head(&obj->node, &b->list);
+
+ obj_pool_used++;
+ if (obj_pool_used > obj_pool_max_used)
+ obj_pool_max_used = obj_pool_used;
+
+ obj_pool_free--;
+ if (obj_pool_free < obj_pool_min_free)
+ obj_pool_min_free = obj_pool_free;
+ }
+ raw_spin_unlock(&pool_lock);
+
+ return obj;
+}
+
+/*
+ * workqueue function to free objects.
+ */
+static void free_obj_work(struct work_struct *work)
+{
+ struct debug_obj *obj;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&pool_lock, flags);
+ while (obj_pool_free > ODEBUG_POOL_SIZE) {
+ obj = hlist_entry(obj_pool.first, typeof(*obj), node);
+ hlist_del(&obj->node);
+ obj_pool_free--;
+ /*
+ * We release pool_lock across kmem_cache_free() to
+ * avoid contention on pool_lock.
+ */
+ raw_spin_unlock_irqrestore(&pool_lock, flags);
+ kmem_cache_free(obj_cache, obj);
+ raw_spin_lock_irqsave(&pool_lock, flags);
+ }
+ raw_spin_unlock_irqrestore(&pool_lock, flags);
+}
+
+/*
+ * Put the object back into the pool and schedule work to free objects
+ * if necessary.
+ */
+static void free_object(struct debug_obj *obj)
+{
+ unsigned long flags;
+ int sched = 0;
+
+ raw_spin_lock_irqsave(&pool_lock, flags);
+ /*
+ * schedule work when the pool is filled and the cache is
+ * initialized:
+ */
+ if (obj_pool_free > ODEBUG_POOL_SIZE && obj_cache)
+ sched = !work_pending(&debug_obj_work);
+ hlist_add_head(&obj->node, &obj_pool);
+ obj_pool_free++;
+ obj_pool_used--;
+ raw_spin_unlock_irqrestore(&pool_lock, flags);
+ if (sched)
+ schedule_work(&debug_obj_work);
+}
+
+/*
+ * We run out of memory. That means we probably have tons of objects
+ * allocated.
+ */
+static void debug_objects_oom(void)
+{
+ struct debug_bucket *db = obj_hash;
+ struct hlist_node *node, *tmp;
+ HLIST_HEAD(freelist);
+ struct debug_obj *obj;
+ unsigned long flags;
+ int i;
+
+ printk(KERN_WARNING "ODEBUG: Out of memory. ODEBUG disabled\n");
+
+ for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) {
+ raw_spin_lock_irqsave(&db->lock, flags);
+ hlist_move_list(&db->list, &freelist);
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+
+ /* Now free them */
+ hlist_for_each_entry_safe(obj, node, tmp, &freelist, node) {
+ hlist_del(&obj->node);
+ free_object(obj);
+ }
+ }
+}
+
+/*
+ * We use the pfn of the address for the hash. That way we can check
+ * for freed objects simply by checking the affected bucket.
+ */
+static struct debug_bucket *get_bucket(unsigned long addr)
+{
+ unsigned long hash;
+
+ hash = hash_long((addr >> ODEBUG_CHUNK_SHIFT), ODEBUG_HASH_BITS);
+ return &obj_hash[hash];
+}
+
+static void debug_print_object(struct debug_obj *obj, char *msg)
+{
+ static int limit;
+
+ if (limit < 5 && obj->descr != descr_test) {
+ limit++;
+ WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) "
+ "object type: %s\n",
+ msg, obj_states[obj->state], obj->astate,
+ obj->descr->name);
+ }
+ debug_objects_warnings++;
+}
+
+/*
+ * Try to repair the damage, so we have a better chance to get useful
+ * debug output.
+ */
+static void
+debug_object_fixup(int (*fixup)(void *addr, enum debug_obj_state state),
+ void * addr, enum debug_obj_state state)
+{
+ if (fixup)
+ debug_objects_fixups += fixup(addr, state);
+}
+
+static void debug_object_is_on_stack(void *addr, int onstack)
+{
+ int is_on_stack;
+ static int limit;
+
+ if (limit > 4)
+ return;
+
+ is_on_stack = object_is_on_stack(addr);
+ if (is_on_stack == onstack)
+ return;
+
+ limit++;
+ if (is_on_stack)
+ printk(KERN_WARNING
+ "ODEBUG: object is on stack, but not annotated\n");
+ else
+ printk(KERN_WARNING
+ "ODEBUG: object is not on stack, but annotated\n");
+ WARN_ON(1);
+}
+
+static void
+__debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack)
+{
+ enum debug_obj_state state;
+ struct debug_bucket *db;
+ struct debug_obj *obj;
+ unsigned long flags;
+
+ fill_pool();
+
+ db = get_bucket((unsigned long) addr);
+
+ raw_spin_lock_irqsave(&db->lock, flags);
+
+ obj = lookup_object(addr, db);
+ if (!obj) {
+ obj = alloc_object(addr, db, descr);
+ if (!obj) {
+ debug_objects_enabled = 0;
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+ debug_objects_oom();
+ return;
+ }
+ debug_object_is_on_stack(addr, onstack);
+ }
+
+ switch (obj->state) {
+ case ODEBUG_STATE_NONE:
+ case ODEBUG_STATE_INIT:
+ case ODEBUG_STATE_INACTIVE:
+ obj->state = ODEBUG_STATE_INIT;
+ break;
+
+ case ODEBUG_STATE_ACTIVE:
+ debug_print_object(obj, "init");
+ state = obj->state;
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+ debug_object_fixup(descr->fixup_init, addr, state);
+ return;
+
+ case ODEBUG_STATE_DESTROYED:
+ debug_print_object(obj, "init");
+ break;
+ default:
+ break;
+ }
+
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+}
+
+/**
+ * debug_object_init - debug checks when an object is initialized
+ * @addr: address of the object
+ * @descr: pointer to an object specific debug description structure
+ */
+void debug_object_init(void *addr, struct debug_obj_descr *descr)
+{
+ if (!debug_objects_enabled)
+ return;
+
+ __debug_object_init(addr, descr, 0);
+}
+
+/**
+ * debug_object_init_on_stack - debug checks when an object on stack is
+ * initialized
+ * @addr: address of the object
+ * @descr: pointer to an object specific debug description structure
+ */
+void debug_object_init_on_stack(void *addr, struct debug_obj_descr *descr)
+{
+ if (!debug_objects_enabled)
+ return;
+
+ __debug_object_init(addr, descr, 1);
+}
+
+/**
+ * debug_object_activate - debug checks when an object is activated
+ * @addr: address of the object
+ * @descr: pointer to an object specific debug description structure
+ */
+void debug_object_activate(void *addr, struct debug_obj_descr *descr)
+{
+ enum debug_obj_state state;
+ struct debug_bucket *db;
+ struct debug_obj *obj;
+ unsigned long flags;
+
+ if (!debug_objects_enabled)
+ return;
+
+ db = get_bucket((unsigned long) addr);
+
+ raw_spin_lock_irqsave(&db->lock, flags);
+
+ obj = lookup_object(addr, db);
+ if (obj) {
+ switch (obj->state) {
+ case ODEBUG_STATE_INIT:
+ case ODEBUG_STATE_INACTIVE:
+ obj->state = ODEBUG_STATE_ACTIVE;
+ break;
+
+ case ODEBUG_STATE_ACTIVE:
+ debug_print_object(obj, "activate");
+ state = obj->state;
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+ debug_object_fixup(descr->fixup_activate, addr, state);
+ return;
+
+ case ODEBUG_STATE_DESTROYED:
+ debug_print_object(obj, "activate");
+ break;
+ default:
+ break;
+ }
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+ return;
+ }
+
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+ /*
+ * This happens when a static object is activated. We
+ * let the type specific code decide whether this is
+ * true or not.
+ */
+ debug_object_fixup(descr->fixup_activate, addr,
+ ODEBUG_STATE_NOTAVAILABLE);
+}
+
+/**
+ * debug_object_deactivate - debug checks when an object is deactivated
+ * @addr: address of the object
+ * @descr: pointer to an object specific debug description structure
+ */
+void debug_object_deactivate(void *addr, struct debug_obj_descr *descr)
+{
+ struct debug_bucket *db;
+ struct debug_obj *obj;
+ unsigned long flags;
+
+ if (!debug_objects_enabled)
+ return;
+
+ db = get_bucket((unsigned long) addr);
+
+ raw_spin_lock_irqsave(&db->lock, flags);
+
+ obj = lookup_object(addr, db);
+ if (obj) {
+ switch (obj->state) {
+ case ODEBUG_STATE_INIT:
+ case ODEBUG_STATE_INACTIVE:
+ case ODEBUG_STATE_ACTIVE:
+ if (!obj->astate)
+ obj->state = ODEBUG_STATE_INACTIVE;
+ else
+ debug_print_object(obj, "deactivate");
+ break;
+
+ case ODEBUG_STATE_DESTROYED:
+ debug_print_object(obj, "deactivate");
+ break;
+ default:
+ break;
+ }
+ } else {
+ struct debug_obj o = { .object = addr,
+ .state = ODEBUG_STATE_NOTAVAILABLE,
+ .descr = descr };
+
+ debug_print_object(&o, "deactivate");
+ }
+
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+}
+
+/**
+ * debug_object_destroy - debug checks when an object is destroyed
+ * @addr: address of the object
+ * @descr: pointer to an object specific debug description structure
+ */
+void debug_object_destroy(void *addr, struct debug_obj_descr *descr)
+{
+ enum debug_obj_state state;
+ struct debug_bucket *db;
+ struct debug_obj *obj;
+ unsigned long flags;
+
+ if (!debug_objects_enabled)
+ return;
+
+ db = get_bucket((unsigned long) addr);
+
+ raw_spin_lock_irqsave(&db->lock, flags);
+
+ obj = lookup_object(addr, db);
+ if (!obj)
+ goto out_unlock;
+
+ switch (obj->state) {
+ case ODEBUG_STATE_NONE:
+ case ODEBUG_STATE_INIT:
+ case ODEBUG_STATE_INACTIVE:
+ obj->state = ODEBUG_STATE_DESTROYED;
+ break;
+ case ODEBUG_STATE_ACTIVE:
+ debug_print_object(obj, "destroy");
+ state = obj->state;
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+ debug_object_fixup(descr->fixup_destroy, addr, state);
+ return;
+
+ case ODEBUG_STATE_DESTROYED:
+ debug_print_object(obj, "destroy");
+ break;
+ default:
+ break;
+ }
+out_unlock:
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+}
+
+/**
+ * debug_object_free - debug checks when an object is freed
+ * @addr: address of the object
+ * @descr: pointer to an object specific debug description structure
+ */
+void debug_object_free(void *addr, struct debug_obj_descr *descr)
+{
+ enum debug_obj_state state;
+ struct debug_bucket *db;
+ struct debug_obj *obj;
+ unsigned long flags;
+
+ if (!debug_objects_enabled)
+ return;
+
+ db = get_bucket((unsigned long) addr);
+
+ raw_spin_lock_irqsave(&db->lock, flags);
+
+ obj = lookup_object(addr, db);
+ if (!obj)
+ goto out_unlock;
+
+ switch (obj->state) {
+ case ODEBUG_STATE_ACTIVE:
+ debug_print_object(obj, "free");
+ state = obj->state;
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+ debug_object_fixup(descr->fixup_free, addr, state);
+ return;
+ default:
+ hlist_del(&obj->node);
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+ free_object(obj);
+ return;
+ }
+out_unlock:
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+}
+
+/**
+ * debug_object_active_state - debug checks object usage state machine
+ * @addr: address of the object
+ * @descr: pointer to an object specific debug description structure
+ * @expect: expected state
+ * @next: state to move to if expected state is found
+ */
+void
+debug_object_active_state(void *addr, struct debug_obj_descr *descr,
+ unsigned int expect, unsigned int next)
+{
+ struct debug_bucket *db;
+ struct debug_obj *obj;
+ unsigned long flags;
+
+ if (!debug_objects_enabled)
+ return;
+
+ db = get_bucket((unsigned long) addr);
+
+ raw_spin_lock_irqsave(&db->lock, flags);
+
+ obj = lookup_object(addr, db);
+ if (obj) {
+ switch (obj->state) {
+ case ODEBUG_STATE_ACTIVE:
+ if (obj->astate == expect)
+ obj->astate = next;
+ else
+ debug_print_object(obj, "active_state");
+ break;
+
+ default:
+ debug_print_object(obj, "active_state");
+ break;
+ }
+ } else {
+ struct debug_obj o = { .object = addr,
+ .state = ODEBUG_STATE_NOTAVAILABLE,
+ .descr = descr };
+
+ debug_print_object(&o, "active_state");
+ }
+
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+}
+
+#ifdef CONFIG_DEBUG_OBJECTS_FREE
+static void __debug_check_no_obj_freed(const void *address, unsigned long size)
+{
+ unsigned long flags, oaddr, saddr, eaddr, paddr, chunks;
+ struct hlist_node *node, *tmp;
+ HLIST_HEAD(freelist);
+ struct debug_obj_descr *descr;
+ enum debug_obj_state state;
+ struct debug_bucket *db;
+ struct debug_obj *obj;
+ int cnt;
+
+ saddr = (unsigned long) address;
+ eaddr = saddr + size;
+ paddr = saddr & ODEBUG_CHUNK_MASK;
+ chunks = ((eaddr - paddr) + (ODEBUG_CHUNK_SIZE - 1));
+ chunks >>= ODEBUG_CHUNK_SHIFT;
+
+ for (;chunks > 0; chunks--, paddr += ODEBUG_CHUNK_SIZE) {
+ db = get_bucket(paddr);
+
+repeat:
+ cnt = 0;
+ raw_spin_lock_irqsave(&db->lock, flags);
+ hlist_for_each_entry_safe(obj, node, tmp, &db->list, node) {
+ cnt++;
+ oaddr = (unsigned long) obj->object;
+ if (oaddr < saddr || oaddr >= eaddr)
+ continue;
+
+ switch (obj->state) {
+ case ODEBUG_STATE_ACTIVE:
+ debug_print_object(obj, "free");
+ descr = obj->descr;
+ state = obj->state;
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+ debug_object_fixup(descr->fixup_free,
+ (void *) oaddr, state);
+ goto repeat;
+ default:
+ hlist_del(&obj->node);
+ hlist_add_head(&obj->node, &freelist);
+ break;
+ }
+ }
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+
+ /* Now free them */
+ hlist_for_each_entry_safe(obj, node, tmp, &freelist, node) {
+ hlist_del(&obj->node);
+ free_object(obj);
+ }
+
+ if (cnt > debug_objects_maxchain)
+ debug_objects_maxchain = cnt;
+ }
+}
+
+void debug_check_no_obj_freed(const void *address, unsigned long size)
+{
+ if (debug_objects_enabled)
+ __debug_check_no_obj_freed(address, size);
+}
+#endif
+
+#ifdef CONFIG_DEBUG_FS
+
+static int debug_stats_show(struct seq_file *m, void *v)
+{
+ seq_printf(m, "max_chain :%d\n", debug_objects_maxchain);
+ seq_printf(m, "warnings :%d\n", debug_objects_warnings);
+ seq_printf(m, "fixups :%d\n", debug_objects_fixups);
+ seq_printf(m, "pool_free :%d\n", obj_pool_free);
+ seq_printf(m, "pool_min_free :%d\n", obj_pool_min_free);
+ seq_printf(m, "pool_used :%d\n", obj_pool_used);
+ seq_printf(m, "pool_max_used :%d\n", obj_pool_max_used);
+ return 0;
+}
+
+static int debug_stats_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, debug_stats_show, NULL);
+}
+
+static const struct file_operations debug_stats_fops = {
+ .open = debug_stats_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int __init debug_objects_init_debugfs(void)
+{
+ struct dentry *dbgdir, *dbgstats;
+
+ if (!debug_objects_enabled)
+ return 0;
+
+ dbgdir = debugfs_create_dir("debug_objects", NULL);
+ if (!dbgdir)
+ return -ENOMEM;
+
+ dbgstats = debugfs_create_file("stats", 0444, dbgdir, NULL,
+ &debug_stats_fops);
+ if (!dbgstats)
+ goto err;
+
+ return 0;
+
+err:
+ debugfs_remove(dbgdir);
+
+ return -ENOMEM;
+}
+__initcall(debug_objects_init_debugfs);
+
+#else
+static inline void debug_objects_init_debugfs(void) { }
+#endif
+
+#ifdef CONFIG_DEBUG_OBJECTS_SELFTEST
+
+/* Random data structure for the self test */
+struct self_test {
+ unsigned long dummy1[6];
+ int static_init;
+ unsigned long dummy2[3];
+};
+
+static __initdata struct debug_obj_descr descr_type_test;
+
+/*
+ * fixup_init is called when:
+ * - an active object is initialized
+ */
+static int __init fixup_init(void *addr, enum debug_obj_state state)
+{
+ struct self_test *obj = addr;
+
+ switch (state) {
+ case ODEBUG_STATE_ACTIVE:
+ debug_object_deactivate(obj, &descr_type_test);
+ debug_object_init(obj, &descr_type_test);
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/*
+ * fixup_activate is called when:
+ * - an active object is activated
+ * - an unknown object is activated (might be a statically initialized object)
+ */
+static int __init fixup_activate(void *addr, enum debug_obj_state state)
+{
+ struct self_test *obj = addr;
+
+ switch (state) {
+ case ODEBUG_STATE_NOTAVAILABLE:
+ if (obj->static_init == 1) {
+ debug_object_init(obj, &descr_type_test);
+ debug_object_activate(obj, &descr_type_test);
+ /*
+ * Real code should return 0 here ! This is
+ * not a fixup of some bad behaviour. We
+ * merily call the debug_init function to keep
+ * track of the object.
+ */
+ return 1;
+ } else {
+ /* Real code needs to emit a warning here */
+ }
+ return 0;
+
+ case ODEBUG_STATE_ACTIVE:
+ debug_object_deactivate(obj, &descr_type_test);
+ debug_object_activate(obj, &descr_type_test);
+ return 1;
+
+ default:
+ return 0;
+ }
+}
+
+/*
+ * fixup_destroy is called when:
+ * - an active object is destroyed
+ */
+static int __init fixup_destroy(void *addr, enum debug_obj_state state)
+{
+ struct self_test *obj = addr;
+
+ switch (state) {
+ case ODEBUG_STATE_ACTIVE:
+ debug_object_deactivate(obj, &descr_type_test);
+ debug_object_destroy(obj, &descr_type_test);
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/*
+ * fixup_free is called when:
+ * - an active object is freed
+ */
+static int __init fixup_free(void *addr, enum debug_obj_state state)
+{
+ struct self_test *obj = addr;
+
+ switch (state) {
+ case ODEBUG_STATE_ACTIVE:
+ debug_object_deactivate(obj, &descr_type_test);
+ debug_object_free(obj, &descr_type_test);
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static int __init
+check_results(void *addr, enum debug_obj_state state, int fixups, int warnings)
+{
+ struct debug_bucket *db;
+ struct debug_obj *obj;
+ unsigned long flags;
+ int res = -EINVAL;
+
+ db = get_bucket((unsigned long) addr);
+
+ raw_spin_lock_irqsave(&db->lock, flags);
+
+ obj = lookup_object(addr, db);
+ if (!obj && state != ODEBUG_STATE_NONE) {
+ WARN(1, KERN_ERR "ODEBUG: selftest object not found\n");
+ goto out;
+ }
+ if (obj && obj->state != state) {
+ WARN(1, KERN_ERR "ODEBUG: selftest wrong state: %d != %d\n",
+ obj->state, state);
+ goto out;
+ }
+ if (fixups != debug_objects_fixups) {
+ WARN(1, KERN_ERR "ODEBUG: selftest fixups failed %d != %d\n",
+ fixups, debug_objects_fixups);
+ goto out;
+ }
+ if (warnings != debug_objects_warnings) {
+ WARN(1, KERN_ERR "ODEBUG: selftest warnings failed %d != %d\n",
+ warnings, debug_objects_warnings);
+ goto out;
+ }
+ res = 0;
+out:
+ raw_spin_unlock_irqrestore(&db->lock, flags);
+ if (res)
+ debug_objects_enabled = 0;
+ return res;
+}
+
+static __initdata struct debug_obj_descr descr_type_test = {
+ .name = "selftest",
+ .fixup_init = fixup_init,
+ .fixup_activate = fixup_activate,
+ .fixup_destroy = fixup_destroy,
+ .fixup_free = fixup_free,
+};
+
+static __initdata struct self_test obj = { .static_init = 0 };
+
+static void __init debug_objects_selftest(void)
+{
+ int fixups, oldfixups, warnings, oldwarnings;
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ fixups = oldfixups = debug_objects_fixups;
+ warnings = oldwarnings = debug_objects_warnings;
+ descr_test = &descr_type_test;
+
+ debug_object_init(&obj, &descr_type_test);
+ if (check_results(&obj, ODEBUG_STATE_INIT, fixups, warnings))
+ goto out;
+ debug_object_activate(&obj, &descr_type_test);
+ if (check_results(&obj, ODEBUG_STATE_ACTIVE, fixups, warnings))
+ goto out;
+ debug_object_activate(&obj, &descr_type_test);
+ if (check_results(&obj, ODEBUG_STATE_ACTIVE, ++fixups, ++warnings))
+ goto out;
+ debug_object_deactivate(&obj, &descr_type_test);
+ if (check_results(&obj, ODEBUG_STATE_INACTIVE, fixups, warnings))
+ goto out;
+ debug_object_destroy(&obj, &descr_type_test);
+ if (check_results(&obj, ODEBUG_STATE_DESTROYED, fixups, warnings))
+ goto out;
+ debug_object_init(&obj, &descr_type_test);
+ if (check_results(&obj, ODEBUG_STATE_DESTROYED, fixups, ++warnings))
+ goto out;
+ debug_object_activate(&obj, &descr_type_test);
+ if (check_results(&obj, ODEBUG_STATE_DESTROYED, fixups, ++warnings))
+ goto out;
+ debug_object_deactivate(&obj, &descr_type_test);
+ if (check_results(&obj, ODEBUG_STATE_DESTROYED, fixups, ++warnings))
+ goto out;
+ debug_object_free(&obj, &descr_type_test);
+ if (check_results(&obj, ODEBUG_STATE_NONE, fixups, warnings))
+ goto out;
+
+ obj.static_init = 1;
+ debug_object_activate(&obj, &descr_type_test);
+ if (check_results(&obj, ODEBUG_STATE_ACTIVE, ++fixups, warnings))
+ goto out;
+ debug_object_init(&obj, &descr_type_test);
+ if (check_results(&obj, ODEBUG_STATE_INIT, ++fixups, ++warnings))
+ goto out;
+ debug_object_free(&obj, &descr_type_test);
+ if (check_results(&obj, ODEBUG_STATE_NONE, fixups, warnings))
+ goto out;
+
+#ifdef CONFIG_DEBUG_OBJECTS_FREE
+ debug_object_init(&obj, &descr_type_test);
+ if (check_results(&obj, ODEBUG_STATE_INIT, fixups, warnings))
+ goto out;
+ debug_object_activate(&obj, &descr_type_test);
+ if (check_results(&obj, ODEBUG_STATE_ACTIVE, fixups, warnings))
+ goto out;
+ __debug_check_no_obj_freed(&obj, sizeof(obj));
+ if (check_results(&obj, ODEBUG_STATE_NONE, ++fixups, ++warnings))
+ goto out;
+#endif
+ printk(KERN_INFO "ODEBUG: selftest passed\n");
+
+out:
+ debug_objects_fixups = oldfixups;
+ debug_objects_warnings = oldwarnings;
+ descr_test = NULL;
+
+ local_irq_restore(flags);
+}
+#else
+static inline void debug_objects_selftest(void) { }
+#endif
+
+/*
+ * Called during early boot to initialize the hash buckets and link
+ * the static object pool objects into the poll list. After this call
+ * the object tracker is fully operational.
+ */
+void __init debug_objects_early_init(void)
+{
+ int i;
+
+ for (i = 0; i < ODEBUG_HASH_SIZE; i++)
+ raw_spin_lock_init(&obj_hash[i].lock);
+
+ for (i = 0; i < ODEBUG_POOL_SIZE; i++)
+ hlist_add_head(&obj_static_pool[i].node, &obj_pool);
+}
+
+/*
+ * Convert the statically allocated objects to dynamic ones:
+ */
+static int __init debug_objects_replace_static_objects(void)
+{
+ struct debug_bucket *db = obj_hash;
+ struct hlist_node *node, *tmp;
+ struct debug_obj *obj, *new;
+ HLIST_HEAD(objects);
+ int i, cnt = 0;
+
+ for (i = 0; i < ODEBUG_POOL_SIZE; i++) {
+ obj = kmem_cache_zalloc(obj_cache, GFP_KERNEL);
+ if (!obj)
+ goto free;
+ hlist_add_head(&obj->node, &objects);
+ }
+
+ /*
+ * When debug_objects_mem_init() is called we know that only
+ * one CPU is up, so disabling interrupts is enough
+ * protection. This avoids the lockdep hell of lock ordering.
+ */
+ local_irq_disable();
+
+ /* Remove the statically allocated objects from the pool */
+ hlist_for_each_entry_safe(obj, node, tmp, &obj_pool, node)
+ hlist_del(&obj->node);
+ /* Move the allocated objects to the pool */
+ hlist_move_list(&objects, &obj_pool);
+
+ /* Replace the active object references */
+ for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) {
+ hlist_move_list(&db->list, &objects);
+
+ hlist_for_each_entry(obj, node, &objects, node) {
+ new = hlist_entry(obj_pool.first, typeof(*obj), node);
+ hlist_del(&new->node);
+ /* copy object data */
+ *new = *obj;
+ hlist_add_head(&new->node, &db->list);
+ cnt++;
+ }
+ }
+
+ printk(KERN_DEBUG "ODEBUG: %d of %d active objects replaced\n", cnt,
+ obj_pool_used);
+ local_irq_enable();
+ return 0;
+free:
+ hlist_for_each_entry_safe(obj, node, tmp, &objects, node) {
+ hlist_del(&obj->node);
+ kmem_cache_free(obj_cache, obj);
+ }
+ return -ENOMEM;
+}
+
+/*
+ * Called after the kmem_caches are functional to setup a dedicated
+ * cache pool, which has the SLAB_DEBUG_OBJECTS flag set. This flag
+ * prevents that the debug code is called on kmem_cache_free() for the
+ * debug tracker objects to avoid recursive calls.
+ */
+void __init debug_objects_mem_init(void)
+{
+ if (!debug_objects_enabled)
+ return;
+
+ obj_cache = kmem_cache_create("debug_objects_cache",
+ sizeof (struct debug_obj), 0,
+ SLAB_DEBUG_OBJECTS, NULL);
+
+ if (!obj_cache || debug_objects_replace_static_objects()) {
+ debug_objects_enabled = 0;
+ if (obj_cache)
+ kmem_cache_destroy(obj_cache);
+ printk(KERN_WARNING "ODEBUG: out of memory.\n");
+ } else
+ debug_objects_selftest();
+}
diff --git a/lib/dec_and_lock.c b/lib/dec_and_lock.c
new file mode 100644
index 00000000..e73822aa
--- /dev/null
+++ b/lib/dec_and_lock.c
@@ -0,0 +1,34 @@
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <asm/atomic.h>
+
+/*
+ * This is an implementation of the notion of "decrement a
+ * reference count, and return locked if it decremented to zero".
+ *
+ * NOTE NOTE NOTE! This is _not_ equivalent to
+ *
+ * if (atomic_dec_and_test(&atomic)) {
+ * spin_lock(&lock);
+ * return 1;
+ * }
+ * return 0;
+ *
+ * because the spin-lock and the decrement must be
+ * "atomic".
+ */
+int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
+{
+ /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
+ if (atomic_add_unless(atomic, -1, 1))
+ return 0;
+
+ /* Otherwise do it the slow way */
+ spin_lock(lock);
+ if (atomic_dec_and_test(atomic))
+ return 1;
+ spin_unlock(lock);
+ return 0;
+}
+
+EXPORT_SYMBOL(_atomic_dec_and_lock);
diff --git a/lib/decompress.c b/lib/decompress.c
new file mode 100644
index 00000000..a7606815
--- /dev/null
+++ b/lib/decompress.c
@@ -0,0 +1,59 @@
+/*
+ * decompress.c
+ *
+ * Detect the decompression method based on magic number
+ */
+
+#include <linux/decompress/generic.h>
+
+#include <linux/decompress/bunzip2.h>
+#include <linux/decompress/unlzma.h>
+#include <linux/decompress/inflate.h>
+#include <linux/decompress/unlzo.h>
+
+#include <linux/types.h>
+#include <linux/string.h>
+
+#ifndef CONFIG_DECOMPRESS_GZIP
+# define gunzip NULL
+#endif
+#ifndef CONFIG_DECOMPRESS_BZIP2
+# define bunzip2 NULL
+#endif
+#ifndef CONFIG_DECOMPRESS_LZMA
+# define unlzma NULL
+#endif
+#ifndef CONFIG_DECOMPRESS_LZO
+# define unlzo NULL
+#endif
+
+static const struct compress_format {
+ unsigned char magic[2];
+ const char *name;
+ decompress_fn decompressor;
+} compressed_formats[] = {
+ { {037, 0213}, "gzip", gunzip },
+ { {037, 0236}, "gzip", gunzip },
+ { {0x42, 0x5a}, "bzip2", bunzip2 },
+ { {0x5d, 0x00}, "lzma", unlzma },
+ { {0x89, 0x4c}, "lzo", unlzo },
+ { {0, 0}, NULL, NULL }
+};
+
+decompress_fn decompress_method(const unsigned char *inbuf, int len,
+ const char **name)
+{
+ const struct compress_format *cf;
+
+ if (len < 2)
+ return NULL; /* Need at least this much... */
+
+ for (cf = compressed_formats; cf->name; cf++) {
+ if (!memcmp(inbuf, cf->magic, 2))
+ break;
+
+ }
+ if (name)
+ *name = cf->name;
+ return cf->decompressor;
+}
diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c
new file mode 100644
index 00000000..81c8bb1c
--- /dev/null
+++ b/lib/decompress_bunzip2.c
@@ -0,0 +1,758 @@
+/* vi: set sw = 4 ts = 4: */
+/* Small bzip2 deflate implementation, by Rob Landley (rob@landley.net).
+
+ Based on bzip2 decompression code by Julian R Seward (jseward@acm.org),
+ which also acknowledges contributions by Mike Burrows, David Wheeler,
+ Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten,
+ Robert Sedgewick, and Jon L. Bentley.
+
+ This code is licensed under the LGPLv2:
+ LGPL (http://www.gnu.org/copyleft/lgpl.html
+*/
+
+/*
+ Size and speed optimizations by Manuel Novoa III (mjn3@codepoet.org).
+
+ More efficient reading of Huffman codes, a streamlined read_bunzip()
+ function, and various other tweaks. In (limited) tests, approximately
+ 20% faster than bzcat on x86 and about 10% faster on arm.
+
+ Note that about 2/3 of the time is spent in read_unzip() reversing
+ the Burrows-Wheeler transformation. Much of that time is delay
+ resulting from cache misses.
+
+ I would ask that anyone benefiting from this work, especially those
+ using it in commercial products, consider making a donation to my local
+ non-profit hospice organization in the name of the woman I loved, who
+ passed away Feb. 12, 2003.
+
+ In memory of Toni W. Hagan
+
+ Hospice of Acadiana, Inc.
+ 2600 Johnston St., Suite 200
+ Lafayette, LA 70503-3240
+
+ Phone (337) 232-1234 or 1-800-738-2226
+ Fax (337) 232-1297
+
+ http://www.hospiceacadiana.com/
+
+ Manuel
+ */
+
+/*
+ Made it fit for running in Linux Kernel by Alain Knaff (alain@knaff.lu)
+*/
+
+
+#ifdef STATIC
+#define PREBOOT
+#else
+#include <linux/decompress/bunzip2.h>
+#include <linux/slab.h>
+#endif /* STATIC */
+
+#include <linux/decompress/mm.h>
+
+#ifndef INT_MAX
+#define INT_MAX 0x7fffffff
+#endif
+
+/* Constants for Huffman coding */
+#define MAX_GROUPS 6
+#define GROUP_SIZE 50 /* 64 would have been more efficient */
+#define MAX_HUFCODE_BITS 20 /* Longest Huffman code allowed */
+#define MAX_SYMBOLS 258 /* 256 literals + RUNA + RUNB */
+#define SYMBOL_RUNA 0
+#define SYMBOL_RUNB 1
+
+/* Status return values */
+#define RETVAL_OK 0
+#define RETVAL_LAST_BLOCK (-1)
+#define RETVAL_NOT_BZIP_DATA (-2)
+#define RETVAL_UNEXPECTED_INPUT_EOF (-3)
+#define RETVAL_UNEXPECTED_OUTPUT_EOF (-4)
+#define RETVAL_DATA_ERROR (-5)
+#define RETVAL_OUT_OF_MEMORY (-6)
+#define RETVAL_OBSOLETE_INPUT (-7)
+
+/* Other housekeeping constants */
+#define BZIP2_IOBUF_SIZE 4096
+
+/* This is what we know about each Huffman coding group */
+struct group_data {
+ /* We have an extra slot at the end of limit[] for a sentinal value. */
+ int limit[MAX_HUFCODE_BITS+1];
+ int base[MAX_HUFCODE_BITS];
+ int permute[MAX_SYMBOLS];
+ int minLen, maxLen;
+};
+
+/* Structure holding all the housekeeping data, including IO buffers and
+ memory that persists between calls to bunzip */
+struct bunzip_data {
+ /* State for interrupting output loop */
+ int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent;
+ /* I/O tracking data (file handles, buffers, positions, etc.) */
+ int (*fill)(void*, unsigned int);
+ int inbufCount, inbufPos /*, outbufPos*/;
+ unsigned char *inbuf /*,*outbuf*/;
+ unsigned int inbufBitCount, inbufBits;
+ /* The CRC values stored in the block header and calculated from the
+ data */
+ unsigned int crc32Table[256], headerCRC, totalCRC, writeCRC;
+ /* Intermediate buffer and its size (in bytes) */
+ unsigned int *dbuf, dbufSize;
+ /* These things are a bit too big to go on the stack */
+ unsigned char selectors[32768]; /* nSelectors = 15 bits */
+ struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */
+ int io_error; /* non-zero if we have IO error */
+ int byteCount[256];
+ unsigned char symToByte[256], mtfSymbol[256];
+};
+
+
+/* Return the next nnn bits of input. All reads from the compressed input
+ are done through this function. All reads are big endian */
+static unsigned int INIT get_bits(struct bunzip_data *bd, char bits_wanted)
+{
+ unsigned int bits = 0;
+
+ /* If we need to get more data from the byte buffer, do so.
+ (Loop getting one byte at a time to enforce endianness and avoid
+ unaligned access.) */
+ while (bd->inbufBitCount < bits_wanted) {
+ /* If we need to read more data from file into byte buffer, do
+ so */
+ if (bd->inbufPos == bd->inbufCount) {
+ if (bd->io_error)
+ return 0;
+ bd->inbufCount = bd->fill(bd->inbuf, BZIP2_IOBUF_SIZE);
+ if (bd->inbufCount <= 0) {
+ bd->io_error = RETVAL_UNEXPECTED_INPUT_EOF;
+ return 0;
+ }
+ bd->inbufPos = 0;
+ }
+ /* Avoid 32-bit overflow (dump bit buffer to top of output) */
+ if (bd->inbufBitCount >= 24) {
+ bits = bd->inbufBits&((1 << bd->inbufBitCount)-1);
+ bits_wanted -= bd->inbufBitCount;
+ bits <<= bits_wanted;
+ bd->inbufBitCount = 0;
+ }
+ /* Grab next 8 bits of input from buffer. */
+ bd->inbufBits = (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++];
+ bd->inbufBitCount += 8;
+ }
+ /* Calculate result */
+ bd->inbufBitCount -= bits_wanted;
+ bits |= (bd->inbufBits >> bd->inbufBitCount)&((1 << bits_wanted)-1);
+
+ return bits;
+}
+
+/* Unpacks the next block and sets up for the inverse burrows-wheeler step. */
+
+static int INIT get_next_block(struct bunzip_data *bd)
+{
+ struct group_data *hufGroup = NULL;
+ int *base = NULL;
+ int *limit = NULL;
+ int dbufCount, nextSym, dbufSize, groupCount, selector,
+ i, j, k, t, runPos, symCount, symTotal, nSelectors, *byteCount;
+ unsigned char uc, *symToByte, *mtfSymbol, *selectors;
+ unsigned int *dbuf, origPtr;
+
+ dbuf = bd->dbuf;
+ dbufSize = bd->dbufSize;
+ selectors = bd->selectors;
+ byteCount = bd->byteCount;
+ symToByte = bd->symToByte;
+ mtfSymbol = bd->mtfSymbol;
+
+ /* Read in header signature and CRC, then validate signature.
+ (last block signature means CRC is for whole file, return now) */
+ i = get_bits(bd, 24);
+ j = get_bits(bd, 24);
+ bd->headerCRC = get_bits(bd, 32);
+ if ((i == 0x177245) && (j == 0x385090))
+ return RETVAL_LAST_BLOCK;
+ if ((i != 0x314159) || (j != 0x265359))
+ return RETVAL_NOT_BZIP_DATA;
+ /* We can add support for blockRandomised if anybody complains.
+ There was some code for this in busybox 1.0.0-pre3, but nobody ever
+ noticed that it didn't actually work. */
+ if (get_bits(bd, 1))
+ return RETVAL_OBSOLETE_INPUT;
+ origPtr = get_bits(bd, 24);
+ if (origPtr > dbufSize)
+ return RETVAL_DATA_ERROR;
+ /* mapping table: if some byte values are never used (encoding things
+ like ascii text), the compression code removes the gaps to have fewer
+ symbols to deal with, and writes a sparse bitfield indicating which
+ values were present. We make a translation table to convert the
+ symbols back to the corresponding bytes. */
+ t = get_bits(bd, 16);
+ symTotal = 0;
+ for (i = 0; i < 16; i++) {
+ if (t&(1 << (15-i))) {
+ k = get_bits(bd, 16);
+ for (j = 0; j < 16; j++)
+ if (k&(1 << (15-j)))
+ symToByte[symTotal++] = (16*i)+j;
+ }
+ }
+ /* How many different Huffman coding groups does this block use? */
+ groupCount = get_bits(bd, 3);
+ if (groupCount < 2 || groupCount > MAX_GROUPS)
+ return RETVAL_DATA_ERROR;
+ /* nSelectors: Every GROUP_SIZE many symbols we select a new
+ Huffman coding group. Read in the group selector list,
+ which is stored as MTF encoded bit runs. (MTF = Move To
+ Front, as each value is used it's moved to the start of the
+ list.) */
+ nSelectors = get_bits(bd, 15);
+ if (!nSelectors)
+ return RETVAL_DATA_ERROR;
+ for (i = 0; i < groupCount; i++)
+ mtfSymbol[i] = i;
+ for (i = 0; i < nSelectors; i++) {
+ /* Get next value */
+ for (j = 0; get_bits(bd, 1); j++)
+ if (j >= groupCount)
+ return RETVAL_DATA_ERROR;
+ /* Decode MTF to get the next selector */
+ uc = mtfSymbol[j];
+ for (; j; j--)
+ mtfSymbol[j] = mtfSymbol[j-1];
+ mtfSymbol[0] = selectors[i] = uc;
+ }
+ /* Read the Huffman coding tables for each group, which code
+ for symTotal literal symbols, plus two run symbols (RUNA,
+ RUNB) */
+ symCount = symTotal+2;
+ for (j = 0; j < groupCount; j++) {
+ unsigned char length[MAX_SYMBOLS], temp[MAX_HUFCODE_BITS+1];
+ int minLen, maxLen, pp;
+ /* Read Huffman code lengths for each symbol. They're
+ stored in a way similar to mtf; record a starting
+ value for the first symbol, and an offset from the
+ previous value for everys symbol after that.
+ (Subtracting 1 before the loop and then adding it
+ back at the end is an optimization that makes the
+ test inside the loop simpler: symbol length 0
+ becomes negative, so an unsigned inequality catches
+ it.) */
+ t = get_bits(bd, 5)-1;
+ for (i = 0; i < symCount; i++) {
+ for (;;) {
+ if (((unsigned)t) > (MAX_HUFCODE_BITS-1))
+ return RETVAL_DATA_ERROR;
+
+ /* If first bit is 0, stop. Else
+ second bit indicates whether to
+ increment or decrement the value.
+ Optimization: grab 2 bits and unget
+ the second if the first was 0. */
+
+ k = get_bits(bd, 2);
+ if (k < 2) {
+ bd->inbufBitCount++;
+ break;
+ }
+ /* Add one if second bit 1, else
+ * subtract 1. Avoids if/else */
+ t += (((k+1)&2)-1);
+ }
+ /* Correct for the initial -1, to get the
+ * final symbol length */
+ length[i] = t+1;
+ }
+ /* Find largest and smallest lengths in this group */
+ minLen = maxLen = length[0];
+
+ for (i = 1; i < symCount; i++) {
+ if (length[i] > maxLen)
+ maxLen = length[i];
+ else if (length[i] < minLen)
+ minLen = length[i];
+ }
+
+ /* Calculate permute[], base[], and limit[] tables from
+ * length[].
+ *
+ * permute[] is the lookup table for converting
+ * Huffman coded symbols into decoded symbols. base[]
+ * is the amount to subtract from the value of a
+ * Huffman symbol of a given length when using
+ * permute[].
+ *
+ * limit[] indicates the largest numerical value a
+ * symbol with a given number of bits can have. This
+ * is how the Huffman codes can vary in length: each
+ * code with a value > limit[length] needs another
+ * bit.
+ */
+ hufGroup = bd->groups+j;
+ hufGroup->minLen = minLen;
+ hufGroup->maxLen = maxLen;
+ /* Note that minLen can't be smaller than 1, so we
+ adjust the base and limit array pointers so we're
+ not always wasting the first entry. We do this
+ again when using them (during symbol decoding).*/
+ base = hufGroup->base-1;
+ limit = hufGroup->limit-1;
+ /* Calculate permute[]. Concurrently, initialize
+ * temp[] and limit[]. */
+ pp = 0;
+ for (i = minLen; i <= maxLen; i++) {
+ temp[i] = limit[i] = 0;
+ for (t = 0; t < symCount; t++)
+ if (length[t] == i)
+ hufGroup->permute[pp++] = t;
+ }
+ /* Count symbols coded for at each bit length */
+ for (i = 0; i < symCount; i++)
+ temp[length[i]]++;
+ /* Calculate limit[] (the largest symbol-coding value
+ *at each bit length, which is (previous limit <<
+ *1)+symbols at this level), and base[] (number of
+ *symbols to ignore at each bit length, which is limit
+ *minus the cumulative count of symbols coded for
+ *already). */
+ pp = t = 0;
+ for (i = minLen; i < maxLen; i++) {
+ pp += temp[i];
+ /* We read the largest possible symbol size
+ and then unget bits after determining how
+ many we need, and those extra bits could be
+ set to anything. (They're noise from
+ future symbols.) At each level we're
+ really only interested in the first few
+ bits, so here we set all the trailing
+ to-be-ignored bits to 1 so they don't
+ affect the value > limit[length]
+ comparison. */
+ limit[i] = (pp << (maxLen - i)) - 1;
+ pp <<= 1;
+ base[i+1] = pp-(t += temp[i]);
+ }
+ limit[maxLen+1] = INT_MAX; /* Sentinal value for
+ * reading next sym. */
+ limit[maxLen] = pp+temp[maxLen]-1;
+ base[minLen] = 0;
+ }
+ /* We've finished reading and digesting the block header. Now
+ read this block's Huffman coded symbols from the file and
+ undo the Huffman coding and run length encoding, saving the
+ result into dbuf[dbufCount++] = uc */
+
+ /* Initialize symbol occurrence counters and symbol Move To
+ * Front table */
+ for (i = 0; i < 256; i++) {
+ byteCount[i] = 0;
+ mtfSymbol[i] = (unsigned char)i;
+ }
+ /* Loop through compressed symbols. */
+ runPos = dbufCount = symCount = selector = 0;
+ for (;;) {
+ /* Determine which Huffman coding group to use. */
+ if (!(symCount--)) {
+ symCount = GROUP_SIZE-1;
+ if (selector >= nSelectors)
+ return RETVAL_DATA_ERROR;
+ hufGroup = bd->groups+selectors[selector++];
+ base = hufGroup->base-1;
+ limit = hufGroup->limit-1;
+ }
+ /* Read next Huffman-coded symbol. */
+ /* Note: It is far cheaper to read maxLen bits and
+ back up than it is to read minLen bits and then an
+ additional bit at a time, testing as we go.
+ Because there is a trailing last block (with file
+ CRC), there is no danger of the overread causing an
+ unexpected EOF for a valid compressed file. As a
+ further optimization, we do the read inline
+ (falling back to a call to get_bits if the buffer
+ runs dry). The following (up to got_huff_bits:) is
+ equivalent to j = get_bits(bd, hufGroup->maxLen);
+ */
+ while (bd->inbufBitCount < hufGroup->maxLen) {
+ if (bd->inbufPos == bd->inbufCount) {
+ j = get_bits(bd, hufGroup->maxLen);
+ goto got_huff_bits;
+ }
+ bd->inbufBits =
+ (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++];
+ bd->inbufBitCount += 8;
+ };
+ bd->inbufBitCount -= hufGroup->maxLen;
+ j = (bd->inbufBits >> bd->inbufBitCount)&
+ ((1 << hufGroup->maxLen)-1);
+got_huff_bits:
+ /* Figure how how many bits are in next symbol and
+ * unget extras */
+ i = hufGroup->minLen;
+ while (j > limit[i])
+ ++i;
+ bd->inbufBitCount += (hufGroup->maxLen - i);
+ /* Huffman decode value to get nextSym (with bounds checking) */
+ if ((i > hufGroup->maxLen)
+ || (((unsigned)(j = (j>>(hufGroup->maxLen-i))-base[i]))
+ >= MAX_SYMBOLS))
+ return RETVAL_DATA_ERROR;
+ nextSym = hufGroup->permute[j];
+ /* We have now decoded the symbol, which indicates
+ either a new literal byte, or a repeated run of the
+ most recent literal byte. First, check if nextSym
+ indicates a repeated run, and if so loop collecting
+ how many times to repeat the last literal. */
+ if (((unsigned)nextSym) <= SYMBOL_RUNB) { /* RUNA or RUNB */
+ /* If this is the start of a new run, zero out
+ * counter */
+ if (!runPos) {
+ runPos = 1;
+ t = 0;
+ }
+ /* Neat trick that saves 1 symbol: instead of
+ or-ing 0 or 1 at each bit position, add 1
+ or 2 instead. For example, 1011 is 1 << 0
+ + 1 << 1 + 2 << 2. 1010 is 2 << 0 + 2 << 1
+ + 1 << 2. You can make any bit pattern
+ that way using 1 less symbol than the basic
+ or 0/1 method (except all bits 0, which
+ would use no symbols, but a run of length 0
+ doesn't mean anything in this context).
+ Thus space is saved. */
+ t += (runPos << nextSym);
+ /* +runPos if RUNA; +2*runPos if RUNB */
+
+ runPos <<= 1;
+ continue;
+ }
+ /* When we hit the first non-run symbol after a run,
+ we now know how many times to repeat the last
+ literal, so append that many copies to our buffer
+ of decoded symbols (dbuf) now. (The last literal
+ used is the one at the head of the mtfSymbol
+ array.) */
+ if (runPos) {
+ runPos = 0;
+ if (dbufCount+t >= dbufSize)
+ return RETVAL_DATA_ERROR;
+
+ uc = symToByte[mtfSymbol[0]];
+ byteCount[uc] += t;
+ while (t--)
+ dbuf[dbufCount++] = uc;
+ }
+ /* Is this the terminating symbol? */
+ if (nextSym > symTotal)
+ break;
+ /* At this point, nextSym indicates a new literal
+ character. Subtract one to get the position in the
+ MTF array at which this literal is currently to be
+ found. (Note that the result can't be -1 or 0,
+ because 0 and 1 are RUNA and RUNB. But another
+ instance of the first symbol in the mtf array,
+ position 0, would have been handled as part of a
+ run above. Therefore 1 unused mtf position minus 2
+ non-literal nextSym values equals -1.) */
+ if (dbufCount >= dbufSize)
+ return RETVAL_DATA_ERROR;
+ i = nextSym - 1;
+ uc = mtfSymbol[i];
+ /* Adjust the MTF array. Since we typically expect to
+ *move only a small number of symbols, and are bound
+ *by 256 in any case, using memmove here would
+ *typically be bigger and slower due to function call
+ *overhead and other assorted setup costs. */
+ do {
+ mtfSymbol[i] = mtfSymbol[i-1];
+ } while (--i);
+ mtfSymbol[0] = uc;
+ uc = symToByte[uc];
+ /* We have our literal byte. Save it into dbuf. */
+ byteCount[uc]++;
+ dbuf[dbufCount++] = (unsigned int)uc;
+ }
+ /* At this point, we've read all the Huffman-coded symbols
+ (and repeated runs) for this block from the input stream,
+ and decoded them into the intermediate buffer. There are
+ dbufCount many decoded bytes in dbuf[]. Now undo the
+ Burrows-Wheeler transform on dbuf. See
+ http://dogma.net/markn/articles/bwt/bwt.htm
+ */
+ /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */
+ j = 0;
+ for (i = 0; i < 256; i++) {
+ k = j+byteCount[i];
+ byteCount[i] = j;
+ j = k;
+ }
+ /* Figure out what order dbuf would be in if we sorted it. */
+ for (i = 0; i < dbufCount; i++) {
+ uc = (unsigned char)(dbuf[i] & 0xff);
+ dbuf[byteCount[uc]] |= (i << 8);
+ byteCount[uc]++;
+ }
+ /* Decode first byte by hand to initialize "previous" byte.
+ Note that it doesn't get output, and if the first three
+ characters are identical it doesn't qualify as a run (hence
+ writeRunCountdown = 5). */
+ if (dbufCount) {
+ if (origPtr >= dbufCount)
+ return RETVAL_DATA_ERROR;
+ bd->writePos = dbuf[origPtr];
+ bd->writeCurrent = (unsigned char)(bd->writePos&0xff);
+ bd->writePos >>= 8;
+ bd->writeRunCountdown = 5;
+ }
+ bd->writeCount = dbufCount;
+
+ return RETVAL_OK;
+}
+
+/* Undo burrows-wheeler transform on intermediate buffer to produce output.
+ If start_bunzip was initialized with out_fd =-1, then up to len bytes of
+ data are written to outbuf. Return value is number of bytes written or
+ error (all errors are negative numbers). If out_fd!=-1, outbuf and len
+ are ignored, data is written to out_fd and return is RETVAL_OK or error.
+*/
+
+static int INIT read_bunzip(struct bunzip_data *bd, char *outbuf, int len)
+{
+ const unsigned int *dbuf;
+ int pos, xcurrent, previous, gotcount;
+
+ /* If last read was short due to end of file, return last block now */
+ if (bd->writeCount < 0)
+ return bd->writeCount;
+
+ gotcount = 0;
+ dbuf = bd->dbuf;
+ pos = bd->writePos;
+ xcurrent = bd->writeCurrent;
+
+ /* We will always have pending decoded data to write into the output
+ buffer unless this is the very first call (in which case we haven't
+ Huffman-decoded a block into the intermediate buffer yet). */
+
+ if (bd->writeCopies) {
+ /* Inside the loop, writeCopies means extra copies (beyond 1) */
+ --bd->writeCopies;
+ /* Loop outputting bytes */
+ for (;;) {
+ /* If the output buffer is full, snapshot
+ * state and return */
+ if (gotcount >= len) {
+ bd->writePos = pos;
+ bd->writeCurrent = xcurrent;
+ bd->writeCopies++;
+ return len;
+ }
+ /* Write next byte into output buffer, updating CRC */
+ outbuf[gotcount++] = xcurrent;
+ bd->writeCRC = (((bd->writeCRC) << 8)
+ ^bd->crc32Table[((bd->writeCRC) >> 24)
+ ^xcurrent]);
+ /* Loop now if we're outputting multiple
+ * copies of this byte */
+ if (bd->writeCopies) {
+ --bd->writeCopies;
+ continue;
+ }
+decode_next_byte:
+ if (!bd->writeCount--)
+ break;
+ /* Follow sequence vector to undo
+ * Burrows-Wheeler transform */
+ previous = xcurrent;
+ pos = dbuf[pos];
+ xcurrent = pos&0xff;
+ pos >>= 8;
+ /* After 3 consecutive copies of the same
+ byte, the 4th is a repeat count. We count
+ down from 4 instead *of counting up because
+ testing for non-zero is faster */
+ if (--bd->writeRunCountdown) {
+ if (xcurrent != previous)
+ bd->writeRunCountdown = 4;
+ } else {
+ /* We have a repeated run, this byte
+ * indicates the count */
+ bd->writeCopies = xcurrent;
+ xcurrent = previous;
+ bd->writeRunCountdown = 5;
+ /* Sometimes there are just 3 bytes
+ * (run length 0) */
+ if (!bd->writeCopies)
+ goto decode_next_byte;
+ /* Subtract the 1 copy we'd output
+ * anyway to get extras */
+ --bd->writeCopies;
+ }
+ }
+ /* Decompression of this block completed successfully */
+ bd->writeCRC = ~bd->writeCRC;
+ bd->totalCRC = ((bd->totalCRC << 1) |
+ (bd->totalCRC >> 31)) ^ bd->writeCRC;
+ /* If this block had a CRC error, force file level CRC error. */
+ if (bd->writeCRC != bd->headerCRC) {
+ bd->totalCRC = bd->headerCRC+1;
+ return RETVAL_LAST_BLOCK;
+ }
+ }
+
+ /* Refill the intermediate buffer by Huffman-decoding next
+ * block of input */
+ /* (previous is just a convenient unused temp variable here) */
+ previous = get_next_block(bd);
+ if (previous) {
+ bd->writeCount = previous;
+ return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount;
+ }
+ bd->writeCRC = 0xffffffffUL;
+ pos = bd->writePos;
+ xcurrent = bd->writeCurrent;
+ goto decode_next_byte;
+}
+
+static int INIT nofill(void *buf, unsigned int len)
+{
+ return -1;
+}
+
+/* Allocate the structure, read file header. If in_fd ==-1, inbuf must contain
+ a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are
+ ignored, and data is read from file handle into temporary buffer. */
+static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len,
+ int (*fill)(void*, unsigned int))
+{
+ struct bunzip_data *bd;
+ unsigned int i, j, c;
+ const unsigned int BZh0 =
+ (((unsigned int)'B') << 24)+(((unsigned int)'Z') << 16)
+ +(((unsigned int)'h') << 8)+(unsigned int)'0';
+
+ /* Figure out how much data to allocate */
+ i = sizeof(struct bunzip_data);
+
+ /* Allocate bunzip_data. Most fields initialize to zero. */
+ bd = *bdp = malloc(i);
+ if (!bd)
+ return RETVAL_OUT_OF_MEMORY;
+ memset(bd, 0, sizeof(struct bunzip_data));
+ /* Setup input buffer */
+ bd->inbuf = inbuf;
+ bd->inbufCount = len;
+ if (fill != NULL)
+ bd->fill = fill;
+ else
+ bd->fill = nofill;
+
+ /* Init the CRC32 table (big endian) */
+ for (i = 0; i < 256; i++) {
+ c = i << 24;
+ for (j = 8; j; j--)
+ c = c&0x80000000 ? (c << 1)^0x04c11db7 : (c << 1);
+ bd->crc32Table[i] = c;
+ }
+
+ /* Ensure that file starts with "BZh['1'-'9']." */
+ i = get_bits(bd, 32);
+ if (((unsigned int)(i-BZh0-1)) >= 9)
+ return RETVAL_NOT_BZIP_DATA;
+
+ /* Fourth byte (ascii '1'-'9'), indicates block size in units of 100k of
+ uncompressed data. Allocate intermediate buffer for block. */
+ bd->dbufSize = 100000*(i-BZh0);
+
+ bd->dbuf = large_malloc(bd->dbufSize * sizeof(int));
+ if (!bd->dbuf)
+ return RETVAL_OUT_OF_MEMORY;
+ return RETVAL_OK;
+}
+
+/* Example usage: decompress src_fd to dst_fd. (Stops at end of bzip2 data,
+ not end of file.) */
+STATIC int INIT bunzip2(unsigned char *buf, int len,
+ int(*fill)(void*, unsigned int),
+ int(*flush)(void*, unsigned int),
+ unsigned char *outbuf,
+ int *pos,
+ void(*error_fn)(char *x))
+{
+ struct bunzip_data *bd;
+ int i = -1;
+ unsigned char *inbuf;
+
+ set_error_fn(error_fn);
+ if (flush)
+ outbuf = malloc(BZIP2_IOBUF_SIZE);
+
+ if (!outbuf) {
+ error("Could not allocate output bufer");
+ return RETVAL_OUT_OF_MEMORY;
+ }
+ if (buf)
+ inbuf = buf;
+ else
+ inbuf = malloc(BZIP2_IOBUF_SIZE);
+ if (!inbuf) {
+ error("Could not allocate input bufer");
+ i = RETVAL_OUT_OF_MEMORY;
+ goto exit_0;
+ }
+ i = start_bunzip(&bd, inbuf, len, fill);
+ if (!i) {
+ for (;;) {
+ i = read_bunzip(bd, outbuf, BZIP2_IOBUF_SIZE);
+ if (i <= 0)
+ break;
+ if (!flush)
+ outbuf += i;
+ else
+ if (i != flush(outbuf, i)) {
+ i = RETVAL_UNEXPECTED_OUTPUT_EOF;
+ break;
+ }
+ }
+ }
+ /* Check CRC and release memory */
+ if (i == RETVAL_LAST_BLOCK) {
+ if (bd->headerCRC != bd->totalCRC)
+ error("Data integrity error when decompressing.");
+ else
+ i = RETVAL_OK;
+ } else if (i == RETVAL_UNEXPECTED_OUTPUT_EOF) {
+ error("Compressed file ends unexpectedly");
+ }
+ if (!bd)
+ goto exit_1;
+ if (bd->dbuf)
+ large_free(bd->dbuf);
+ if (pos)
+ *pos = bd->inbufPos;
+ free(bd);
+exit_1:
+ if (!buf)
+ free(inbuf);
+exit_0:
+ if (flush)
+ free(outbuf);
+ return i;
+}
+
+#ifdef PREBOOT
+STATIC int INIT decompress(unsigned char *buf, int len,
+ int(*fill)(void*, unsigned int),
+ int(*flush)(void*, unsigned int),
+ unsigned char *outbuf,
+ int *pos,
+ void(*error_fn)(char *x))
+{
+ return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error_fn);
+}
+#endif
diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c
new file mode 100644
index 00000000..fc686c7a
--- /dev/null
+++ b/lib/decompress_inflate.c
@@ -0,0 +1,176 @@
+#ifdef STATIC
+/* Pre-boot environment: included */
+
+/* prevent inclusion of _LINUX_KERNEL_H in pre-boot environment: lots
+ * errors about console_printk etc... on ARM */
+#define _LINUX_KERNEL_H
+
+#include "zlib_inflate/inftrees.c"
+#include "zlib_inflate/inffast.c"
+#include "zlib_inflate/inflate.c"
+
+#else /* STATIC */
+/* initramfs et al: linked */
+
+#include <linux/zutil.h>
+
+#include "zlib_inflate/inftrees.h"
+#include "zlib_inflate/inffast.h"
+#include "zlib_inflate/inflate.h"
+
+#include "zlib_inflate/infutil.h"
+#include <linux/slab.h>
+
+#endif /* STATIC */
+
+#include <linux/decompress/mm.h>
+
+#define GZIP_IOBUF_SIZE (16*1024)
+
+static int nofill(void *buffer, unsigned int len)
+{
+ return -1;
+}
+
+/* Included from initramfs et al code */
+STATIC int INIT gunzip(unsigned char *buf, int len,
+ int(*fill)(void*, unsigned int),
+ int(*flush)(void*, unsigned int),
+ unsigned char *out_buf,
+ int *pos,
+ void(*error_fn)(char *x)) {
+ u8 *zbuf;
+ struct z_stream_s *strm;
+ int rc;
+ size_t out_len;
+
+ set_error_fn(error_fn);
+ rc = -1;
+ if (flush) {
+ out_len = 0x8000; /* 32 K */
+ out_buf = malloc(out_len);
+ } else {
+ out_len = 0x7fffffff; /* no limit */
+ }
+ if (!out_buf) {
+ error("Out of memory while allocating output buffer");
+ goto gunzip_nomem1;
+ }
+
+ if (buf)
+ zbuf = buf;
+ else {
+ zbuf = malloc(GZIP_IOBUF_SIZE);
+ len = 0;
+ }
+ if (!zbuf) {
+ error("Out of memory while allocating input buffer");
+ goto gunzip_nomem2;
+ }
+
+ strm = malloc(sizeof(*strm));
+ if (strm == NULL) {
+ error("Out of memory while allocating z_stream");
+ goto gunzip_nomem3;
+ }
+
+ strm->workspace = malloc(flush ? zlib_inflate_workspacesize() :
+ sizeof(struct inflate_state));
+ if (strm->workspace == NULL) {
+ error("Out of memory while allocating workspace");
+ goto gunzip_nomem4;
+ }
+
+ if (!fill)
+ fill = nofill;
+
+ if (len == 0)
+ len = fill(zbuf, GZIP_IOBUF_SIZE);
+
+ /* verify the gzip header */
+ if (len < 10 ||
+ zbuf[0] != 0x1f || zbuf[1] != 0x8b || zbuf[2] != 0x08) {
+ if (pos)
+ *pos = 0;
+ error("Not a gzip file");
+ goto gunzip_5;
+ }
+
+ /* skip over gzip header (1f,8b,08... 10 bytes total +
+ * possible asciz filename)
+ */
+ strm->next_in = zbuf + 10;
+ /* skip over asciz filename */
+ if (zbuf[3] & 0x8) {
+ while (strm->next_in[0])
+ strm->next_in++;
+ strm->next_in++;
+ }
+ strm->avail_in = len - (strm->next_in - zbuf);
+
+ strm->next_out = out_buf;
+ strm->avail_out = out_len;
+
+ rc = zlib_inflateInit2(strm, -MAX_WBITS);
+
+ if (!flush) {
+ WS(strm)->inflate_state.wsize = 0;
+ WS(strm)->inflate_state.window = NULL;
+ }
+
+ while (rc == Z_OK) {
+ if (strm->avail_in == 0) {
+ /* TODO: handle case where both pos and fill are set */
+ len = fill(zbuf, GZIP_IOBUF_SIZE);
+ if (len < 0) {
+ rc = -1;
+ error("read error");
+ break;
+ }
+ strm->next_in = zbuf;
+ strm->avail_in = len;
+ }
+ rc = zlib_inflate(strm, 0);
+
+ /* Write any data generated */
+ if (flush && strm->next_out > out_buf) {
+ int l = strm->next_out - out_buf;
+ if (l != flush(out_buf, l)) {
+ rc = -1;
+ error("write error");
+ break;
+ }
+ strm->next_out = out_buf;
+ strm->avail_out = out_len;
+ }
+
+ /* after Z_FINISH, only Z_STREAM_END is "we unpacked it all" */
+ if (rc == Z_STREAM_END) {
+ rc = 0;
+ break;
+ } else if (rc != Z_OK) {
+ error("uncompression error");
+ rc = -1;
+ }
+ }
+
+ zlib_inflateEnd(strm);
+ if (pos)
+ /* add + 8 to skip over trailer */
+ *pos = strm->next_in - zbuf+8;
+
+gunzip_5:
+ free(strm->workspace);
+gunzip_nomem4:
+ free(strm);
+gunzip_nomem3:
+ if (!buf)
+ free(zbuf);
+gunzip_nomem2:
+ if (flush)
+ free(out_buf);
+gunzip_nomem1:
+ return rc; /* returns Z_OK (0) if successful */
+}
+
+#define decompress gunzip
diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c
new file mode 100644
index 00000000..ca82fde8
--- /dev/null
+++ b/lib/decompress_unlzma.c
@@ -0,0 +1,667 @@
+/* Lzma decompressor for Linux kernel. Shamelessly snarfed
+ *from busybox 1.1.1
+ *
+ *Linux kernel adaptation
+ *Copyright (C) 2006 Alain < alain@knaff.lu >
+ *
+ *Based on small lzma deflate implementation/Small range coder
+ *implementation for lzma.
+ *Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org >
+ *
+ *Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/)
+ *Copyright (C) 1999-2005 Igor Pavlov
+ *
+ *Copyrights of the parts, see headers below.
+ *
+ *
+ *This program is free software; you can redistribute it and/or
+ *modify it under the terms of the GNU Lesser General Public
+ *License as published by the Free Software Foundation; either
+ *version 2.1 of the License, or (at your option) any later version.
+ *
+ *This program is distributed in the hope that it will be useful,
+ *but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ *Lesser General Public License for more details.
+ *
+ *You should have received a copy of the GNU Lesser General Public
+ *License along with this library; if not, write to the Free Software
+ *Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifdef STATIC
+#define PREBOOT
+#else
+#include <linux/decompress/unlzma.h>
+#include <linux/slab.h>
+#endif /* STATIC */
+
+#include <linux/decompress/mm.h>
+
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+
+static long long INIT read_int(unsigned char *ptr, int size)
+{
+ int i;
+ long long ret = 0;
+
+ for (i = 0; i < size; i++)
+ ret = (ret << 8) | ptr[size-i-1];
+ return ret;
+}
+
+#define ENDIAN_CONVERT(x) \
+ x = (typeof(x))read_int((unsigned char *)&x, sizeof(x))
+
+
+/* Small range coder implementation for lzma.
+ *Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org >
+ *
+ *Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/)
+ *Copyright (c) 1999-2005 Igor Pavlov
+ */
+
+#include <linux/compiler.h>
+
+#define LZMA_IOBUF_SIZE 0x10000
+
+struct rc {
+ int (*fill)(void*, unsigned int);
+ uint8_t *ptr;
+ uint8_t *buffer;
+ uint8_t *buffer_end;
+ int buffer_size;
+ uint32_t code;
+ uint32_t range;
+ uint32_t bound;
+};
+
+
+#define RC_TOP_BITS 24
+#define RC_MOVE_BITS 5
+#define RC_MODEL_TOTAL_BITS 11
+
+
+static int nofill(void *buffer, unsigned int len)
+{
+ return -1;
+}
+
+/* Called twice: once at startup and once in rc_normalize() */
+static void INIT rc_read(struct rc *rc)
+{
+ rc->buffer_size = rc->fill((char *)rc->buffer, LZMA_IOBUF_SIZE);
+ if (rc->buffer_size <= 0)
+ error("unexpected EOF");
+ rc->ptr = rc->buffer;
+ rc->buffer_end = rc->buffer + rc->buffer_size;
+}
+
+/* Called once */
+static inline void INIT rc_init(struct rc *rc,
+ int (*fill)(void*, unsigned int),
+ char *buffer, int buffer_size)
+{
+ if (fill)
+ rc->fill = fill;
+ else
+ rc->fill = nofill;
+ rc->buffer = (uint8_t *)buffer;
+ rc->buffer_size = buffer_size;
+ rc->buffer_end = rc->buffer + rc->buffer_size;
+ rc->ptr = rc->buffer;
+
+ rc->code = 0;
+ rc->range = 0xFFFFFFFF;
+}
+
+static inline void INIT rc_init_code(struct rc *rc)
+{
+ int i;
+
+ for (i = 0; i < 5; i++) {
+ if (rc->ptr >= rc->buffer_end)
+ rc_read(rc);
+ rc->code = (rc->code << 8) | *rc->ptr++;
+ }
+}
+
+
+/* Called once. TODO: bb_maybe_free() */
+static inline void INIT rc_free(struct rc *rc)
+{
+ free(rc->buffer);
+}
+
+/* Called twice, but one callsite is in inline'd rc_is_bit_0_helper() */
+static void INIT rc_do_normalize(struct rc *rc)
+{
+ if (rc->ptr >= rc->buffer_end)
+ rc_read(rc);
+ rc->range <<= 8;
+ rc->code = (rc->code << 8) | *rc->ptr++;
+}
+static inline void INIT rc_normalize(struct rc *rc)
+{
+ if (rc->range < (1 << RC_TOP_BITS))
+ rc_do_normalize(rc);
+}
+
+/* Called 9 times */
+/* Why rc_is_bit_0_helper exists?
+ *Because we want to always expose (rc->code < rc->bound) to optimizer
+ */
+static inline uint32_t INIT rc_is_bit_0_helper(struct rc *rc, uint16_t *p)
+{
+ rc_normalize(rc);
+ rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS);
+ return rc->bound;
+}
+static inline int INIT rc_is_bit_0(struct rc *rc, uint16_t *p)
+{
+ uint32_t t = rc_is_bit_0_helper(rc, p);
+ return rc->code < t;
+}
+
+/* Called ~10 times, but very small, thus inlined */
+static inline void INIT rc_update_bit_0(struct rc *rc, uint16_t *p)
+{
+ rc->range = rc->bound;
+ *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS;
+}
+static inline void rc_update_bit_1(struct rc *rc, uint16_t *p)
+{
+ rc->range -= rc->bound;
+ rc->code -= rc->bound;
+ *p -= *p >> RC_MOVE_BITS;
+}
+
+/* Called 4 times in unlzma loop */
+static int INIT rc_get_bit(struct rc *rc, uint16_t *p, int *symbol)
+{
+ if (rc_is_bit_0(rc, p)) {
+ rc_update_bit_0(rc, p);
+ *symbol *= 2;
+ return 0;
+ } else {
+ rc_update_bit_1(rc, p);
+ *symbol = *symbol * 2 + 1;
+ return 1;
+ }
+}
+
+/* Called once */
+static inline int INIT rc_direct_bit(struct rc *rc)
+{
+ rc_normalize(rc);
+ rc->range >>= 1;
+ if (rc->code >= rc->range) {
+ rc->code -= rc->range;
+ return 1;
+ }
+ return 0;
+}
+
+/* Called twice */
+static inline void INIT
+rc_bit_tree_decode(struct rc *rc, uint16_t *p, int num_levels, int *symbol)
+{
+ int i = num_levels;
+
+ *symbol = 1;
+ while (i--)
+ rc_get_bit(rc, p + *symbol, symbol);
+ *symbol -= 1 << num_levels;
+}
+
+
+/*
+ * Small lzma deflate implementation.
+ * Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org >
+ *
+ * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/)
+ * Copyright (C) 1999-2005 Igor Pavlov
+ */
+
+
+struct lzma_header {
+ uint8_t pos;
+ uint32_t dict_size;
+ uint64_t dst_size;
+} __attribute__ ((packed)) ;
+
+
+#define LZMA_BASE_SIZE 1846
+#define LZMA_LIT_SIZE 768
+
+#define LZMA_NUM_POS_BITS_MAX 4
+
+#define LZMA_LEN_NUM_LOW_BITS 3
+#define LZMA_LEN_NUM_MID_BITS 3
+#define LZMA_LEN_NUM_HIGH_BITS 8
+
+#define LZMA_LEN_CHOICE 0
+#define LZMA_LEN_CHOICE_2 (LZMA_LEN_CHOICE + 1)
+#define LZMA_LEN_LOW (LZMA_LEN_CHOICE_2 + 1)
+#define LZMA_LEN_MID (LZMA_LEN_LOW \
+ + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_LOW_BITS)))
+#define LZMA_LEN_HIGH (LZMA_LEN_MID \
+ +(1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_MID_BITS)))
+#define LZMA_NUM_LEN_PROBS (LZMA_LEN_HIGH + (1 << LZMA_LEN_NUM_HIGH_BITS))
+
+#define LZMA_NUM_STATES 12
+#define LZMA_NUM_LIT_STATES 7
+
+#define LZMA_START_POS_MODEL_INDEX 4
+#define LZMA_END_POS_MODEL_INDEX 14
+#define LZMA_NUM_FULL_DISTANCES (1 << (LZMA_END_POS_MODEL_INDEX >> 1))
+
+#define LZMA_NUM_POS_SLOT_BITS 6
+#define LZMA_NUM_LEN_TO_POS_STATES 4
+
+#define LZMA_NUM_ALIGN_BITS 4
+
+#define LZMA_MATCH_MIN_LEN 2
+
+#define LZMA_IS_MATCH 0
+#define LZMA_IS_REP (LZMA_IS_MATCH + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX))
+#define LZMA_IS_REP_G0 (LZMA_IS_REP + LZMA_NUM_STATES)
+#define LZMA_IS_REP_G1 (LZMA_IS_REP_G0 + LZMA_NUM_STATES)
+#define LZMA_IS_REP_G2 (LZMA_IS_REP_G1 + LZMA_NUM_STATES)
+#define LZMA_IS_REP_0_LONG (LZMA_IS_REP_G2 + LZMA_NUM_STATES)
+#define LZMA_POS_SLOT (LZMA_IS_REP_0_LONG \
+ + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX))
+#define LZMA_SPEC_POS (LZMA_POS_SLOT \
+ +(LZMA_NUM_LEN_TO_POS_STATES << LZMA_NUM_POS_SLOT_BITS))
+#define LZMA_ALIGN (LZMA_SPEC_POS \
+ + LZMA_NUM_FULL_DISTANCES - LZMA_END_POS_MODEL_INDEX)
+#define LZMA_LEN_CODER (LZMA_ALIGN + (1 << LZMA_NUM_ALIGN_BITS))
+#define LZMA_REP_LEN_CODER (LZMA_LEN_CODER + LZMA_NUM_LEN_PROBS)
+#define LZMA_LITERAL (LZMA_REP_LEN_CODER + LZMA_NUM_LEN_PROBS)
+
+
+struct writer {
+ uint8_t *buffer;
+ uint8_t previous_byte;
+ size_t buffer_pos;
+ int bufsize;
+ size_t global_pos;
+ int(*flush)(void*, unsigned int);
+ struct lzma_header *header;
+};
+
+struct cstate {
+ int state;
+ uint32_t rep0, rep1, rep2, rep3;
+};
+
+static inline size_t INIT get_pos(struct writer *wr)
+{
+ return
+ wr->global_pos + wr->buffer_pos;
+}
+
+static inline uint8_t INIT peek_old_byte(struct writer *wr,
+ uint32_t offs)
+{
+ if (!wr->flush) {
+ int32_t pos;
+ while (offs > wr->header->dict_size)
+ offs -= wr->header->dict_size;
+ pos = wr->buffer_pos - offs;
+ return wr->buffer[pos];
+ } else {
+ uint32_t pos = wr->buffer_pos - offs;
+ while (pos >= wr->header->dict_size)
+ pos += wr->header->dict_size;
+ return wr->buffer[pos];
+ }
+
+}
+
+static inline void INIT write_byte(struct writer *wr, uint8_t byte)
+{
+ wr->buffer[wr->buffer_pos++] = wr->previous_byte = byte;
+ if (wr->flush && wr->buffer_pos == wr->header->dict_size) {
+ wr->buffer_pos = 0;
+ wr->global_pos += wr->header->dict_size;
+ wr->flush((char *)wr->buffer, wr->header->dict_size);
+ }
+}
+
+
+static inline void INIT copy_byte(struct writer *wr, uint32_t offs)
+{
+ write_byte(wr, peek_old_byte(wr, offs));
+}
+
+static inline void INIT copy_bytes(struct writer *wr,
+ uint32_t rep0, int len)
+{
+ do {
+ copy_byte(wr, rep0);
+ len--;
+ } while (len != 0 && wr->buffer_pos < wr->header->dst_size);
+}
+
+static inline void INIT process_bit0(struct writer *wr, struct rc *rc,
+ struct cstate *cst, uint16_t *p,
+ int pos_state, uint16_t *prob,
+ int lc, uint32_t literal_pos_mask) {
+ int mi = 1;
+ rc_update_bit_0(rc, prob);
+ prob = (p + LZMA_LITERAL +
+ (LZMA_LIT_SIZE
+ * (((get_pos(wr) & literal_pos_mask) << lc)
+ + (wr->previous_byte >> (8 - lc))))
+ );
+
+ if (cst->state >= LZMA_NUM_LIT_STATES) {
+ int match_byte = peek_old_byte(wr, cst->rep0);
+ do {
+ int bit;
+ uint16_t *prob_lit;
+
+ match_byte <<= 1;
+ bit = match_byte & 0x100;
+ prob_lit = prob + 0x100 + bit + mi;
+ if (rc_get_bit(rc, prob_lit, &mi)) {
+ if (!bit)
+ break;
+ } else {
+ if (bit)
+ break;
+ }
+ } while (mi < 0x100);
+ }
+ while (mi < 0x100) {
+ uint16_t *prob_lit = prob + mi;
+ rc_get_bit(rc, prob_lit, &mi);
+ }
+ write_byte(wr, mi);
+ if (cst->state < 4)
+ cst->state = 0;
+ else if (cst->state < 10)
+ cst->state -= 3;
+ else
+ cst->state -= 6;
+}
+
+static inline void INIT process_bit1(struct writer *wr, struct rc *rc,
+ struct cstate *cst, uint16_t *p,
+ int pos_state, uint16_t *prob) {
+ int offset;
+ uint16_t *prob_len;
+ int num_bits;
+ int len;
+
+ rc_update_bit_1(rc, prob);
+ prob = p + LZMA_IS_REP + cst->state;
+ if (rc_is_bit_0(rc, prob)) {
+ rc_update_bit_0(rc, prob);
+ cst->rep3 = cst->rep2;
+ cst->rep2 = cst->rep1;
+ cst->rep1 = cst->rep0;
+ cst->state = cst->state < LZMA_NUM_LIT_STATES ? 0 : 3;
+ prob = p + LZMA_LEN_CODER;
+ } else {
+ rc_update_bit_1(rc, prob);
+ prob = p + LZMA_IS_REP_G0 + cst->state;
+ if (rc_is_bit_0(rc, prob)) {
+ rc_update_bit_0(rc, prob);
+ prob = (p + LZMA_IS_REP_0_LONG
+ + (cst->state <<
+ LZMA_NUM_POS_BITS_MAX) +
+ pos_state);
+ if (rc_is_bit_0(rc, prob)) {
+ rc_update_bit_0(rc, prob);
+
+ cst->state = cst->state < LZMA_NUM_LIT_STATES ?
+ 9 : 11;
+ copy_byte(wr, cst->rep0);
+ return;
+ } else {
+ rc_update_bit_1(rc, prob);
+ }
+ } else {
+ uint32_t distance;
+
+ rc_update_bit_1(rc, prob);
+ prob = p + LZMA_IS_REP_G1 + cst->state;
+ if (rc_is_bit_0(rc, prob)) {
+ rc_update_bit_0(rc, prob);
+ distance = cst->rep1;
+ } else {
+ rc_update_bit_1(rc, prob);
+ prob = p + LZMA_IS_REP_G2 + cst->state;
+ if (rc_is_bit_0(rc, prob)) {
+ rc_update_bit_0(rc, prob);
+ distance = cst->rep2;
+ } else {
+ rc_update_bit_1(rc, prob);
+ distance = cst->rep3;
+ cst->rep3 = cst->rep2;
+ }
+ cst->rep2 = cst->rep1;
+ }
+ cst->rep1 = cst->rep0;
+ cst->rep0 = distance;
+ }
+ cst->state = cst->state < LZMA_NUM_LIT_STATES ? 8 : 11;
+ prob = p + LZMA_REP_LEN_CODER;
+ }
+
+ prob_len = prob + LZMA_LEN_CHOICE;
+ if (rc_is_bit_0(rc, prob_len)) {
+ rc_update_bit_0(rc, prob_len);
+ prob_len = (prob + LZMA_LEN_LOW
+ + (pos_state <<
+ LZMA_LEN_NUM_LOW_BITS));
+ offset = 0;
+ num_bits = LZMA_LEN_NUM_LOW_BITS;
+ } else {
+ rc_update_bit_1(rc, prob_len);
+ prob_len = prob + LZMA_LEN_CHOICE_2;
+ if (rc_is_bit_0(rc, prob_len)) {
+ rc_update_bit_0(rc, prob_len);
+ prob_len = (prob + LZMA_LEN_MID
+ + (pos_state <<
+ LZMA_LEN_NUM_MID_BITS));
+ offset = 1 << LZMA_LEN_NUM_LOW_BITS;
+ num_bits = LZMA_LEN_NUM_MID_BITS;
+ } else {
+ rc_update_bit_1(rc, prob_len);
+ prob_len = prob + LZMA_LEN_HIGH;
+ offset = ((1 << LZMA_LEN_NUM_LOW_BITS)
+ + (1 << LZMA_LEN_NUM_MID_BITS));
+ num_bits = LZMA_LEN_NUM_HIGH_BITS;
+ }
+ }
+
+ rc_bit_tree_decode(rc, prob_len, num_bits, &len);
+ len += offset;
+
+ if (cst->state < 4) {
+ int pos_slot;
+
+ cst->state += LZMA_NUM_LIT_STATES;
+ prob =
+ p + LZMA_POS_SLOT +
+ ((len <
+ LZMA_NUM_LEN_TO_POS_STATES ? len :
+ LZMA_NUM_LEN_TO_POS_STATES - 1)
+ << LZMA_NUM_POS_SLOT_BITS);
+ rc_bit_tree_decode(rc, prob,
+ LZMA_NUM_POS_SLOT_BITS,
+ &pos_slot);
+ if (pos_slot >= LZMA_START_POS_MODEL_INDEX) {
+ int i, mi;
+ num_bits = (pos_slot >> 1) - 1;
+ cst->rep0 = 2 | (pos_slot & 1);
+ if (pos_slot < LZMA_END_POS_MODEL_INDEX) {
+ cst->rep0 <<= num_bits;
+ prob = p + LZMA_SPEC_POS +
+ cst->rep0 - pos_slot - 1;
+ } else {
+ num_bits -= LZMA_NUM_ALIGN_BITS;
+ while (num_bits--)
+ cst->rep0 = (cst->rep0 << 1) |
+ rc_direct_bit(rc);
+ prob = p + LZMA_ALIGN;
+ cst->rep0 <<= LZMA_NUM_ALIGN_BITS;
+ num_bits = LZMA_NUM_ALIGN_BITS;
+ }
+ i = 1;
+ mi = 1;
+ while (num_bits--) {
+ if (rc_get_bit(rc, prob + mi, &mi))
+ cst->rep0 |= i;
+ i <<= 1;
+ }
+ } else
+ cst->rep0 = pos_slot;
+ if (++(cst->rep0) == 0)
+ return;
+ }
+
+ len += LZMA_MATCH_MIN_LEN;
+
+ copy_bytes(wr, cst->rep0, len);
+}
+
+
+
+STATIC inline int INIT unlzma(unsigned char *buf, int in_len,
+ int(*fill)(void*, unsigned int),
+ int(*flush)(void*, unsigned int),
+ unsigned char *output,
+ int *posp,
+ void(*error_fn)(char *x)
+ )
+{
+ struct lzma_header header;
+ int lc, pb, lp;
+ uint32_t pos_state_mask;
+ uint32_t literal_pos_mask;
+ uint16_t *p;
+ int num_probs;
+ struct rc rc;
+ int i, mi;
+ struct writer wr;
+ struct cstate cst;
+ unsigned char *inbuf;
+ int ret = -1;
+
+ set_error_fn(error_fn);
+
+ if (buf)
+ inbuf = buf;
+ else
+ inbuf = malloc(LZMA_IOBUF_SIZE);
+ if (!inbuf) {
+ error("Could not allocate input bufer");
+ goto exit_0;
+ }
+
+ cst.state = 0;
+ cst.rep0 = cst.rep1 = cst.rep2 = cst.rep3 = 1;
+
+ wr.header = &header;
+ wr.flush = flush;
+ wr.global_pos = 0;
+ wr.previous_byte = 0;
+ wr.buffer_pos = 0;
+
+ rc_init(&rc, fill, inbuf, in_len);
+
+ for (i = 0; i < sizeof(header); i++) {
+ if (rc.ptr >= rc.buffer_end)
+ rc_read(&rc);
+ ((unsigned char *)&header)[i] = *rc.ptr++;
+ }
+
+ if (header.pos >= (9 * 5 * 5))
+ error("bad header");
+
+ mi = 0;
+ lc = header.pos;
+ while (lc >= 9) {
+ mi++;
+ lc -= 9;
+ }
+ pb = 0;
+ lp = mi;
+ while (lp >= 5) {
+ pb++;
+ lp -= 5;
+ }
+ pos_state_mask = (1 << pb) - 1;
+ literal_pos_mask = (1 << lp) - 1;
+
+ ENDIAN_CONVERT(header.dict_size);
+ ENDIAN_CONVERT(header.dst_size);
+
+ if (header.dict_size == 0)
+ header.dict_size = 1;
+
+ if (output)
+ wr.buffer = output;
+ else {
+ wr.bufsize = MIN(header.dst_size, header.dict_size);
+ wr.buffer = large_malloc(wr.bufsize);
+ }
+ if (wr.buffer == NULL)
+ goto exit_1;
+
+ num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp));
+ p = (uint16_t *) large_malloc(num_probs * sizeof(*p));
+ if (p == 0)
+ goto exit_2;
+ num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp));
+ for (i = 0; i < num_probs; i++)
+ p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1;
+
+ rc_init_code(&rc);
+
+ while (get_pos(&wr) < header.dst_size) {
+ int pos_state = get_pos(&wr) & pos_state_mask;
+ uint16_t *prob = p + LZMA_IS_MATCH +
+ (cst.state << LZMA_NUM_POS_BITS_MAX) + pos_state;
+ if (rc_is_bit_0(&rc, prob))
+ process_bit0(&wr, &rc, &cst, p, pos_state, prob,
+ lc, literal_pos_mask);
+ else {
+ process_bit1(&wr, &rc, &cst, p, pos_state, prob);
+ if (cst.rep0 == 0)
+ break;
+ }
+ }
+
+ if (posp)
+ *posp = rc.ptr-rc.buffer;
+ if (wr.flush)
+ wr.flush(wr.buffer, wr.buffer_pos);
+ ret = 0;
+ large_free(p);
+exit_2:
+ if (!output)
+ large_free(wr.buffer);
+exit_1:
+ if (!buf)
+ free(inbuf);
+exit_0:
+ return ret;
+}
+
+#ifdef PREBOOT
+STATIC int INIT decompress(unsigned char *buf, int in_len,
+ int(*fill)(void*, unsigned int),
+ int(*flush)(void*, unsigned int),
+ unsigned char *output,
+ int *posp,
+ void(*error_fn)(char *x)
+ )
+{
+ return unlzma(buf, in_len - 4, fill, flush, output, posp, error_fn);
+}
+#endif
diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c
new file mode 100644
index 00000000..bcb3a4bd
--- /dev/null
+++ b/lib/decompress_unlzo.c
@@ -0,0 +1,217 @@
+/*
+ * LZO decompressor for the Linux kernel. Code borrowed from the lzo
+ * implementation by Markus Franz Xaver Johannes Oberhumer.
+ *
+ * Linux kernel adaptation:
+ * Copyright (C) 2009
+ * Albin Tonnerre, Free Electrons <albin.tonnerre@free-electrons.com>
+ *
+ * Original code:
+ * Copyright (C) 1996-2005 Markus Franz Xaver Johannes Oberhumer
+ * All Rights Reserved.
+ *
+ * lzop and the LZO library are free software; you can redistribute them
+ * and/or modify them under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.
+ * If not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Markus F.X.J. Oberhumer
+ * <markus@oberhumer.com>
+ * http://www.oberhumer.com/opensource/lzop/
+ */
+
+#ifdef STATIC
+#include "lzo/lzo1x_decompress.c"
+#else
+#include <linux/slab.h>
+#include <linux/decompress/unlzo.h>
+#endif
+
+#include <linux/types.h>
+#include <linux/lzo.h>
+#include <linux/decompress/mm.h>
+
+#include <linux/compiler.h>
+#include <asm/unaligned.h>
+
+static const unsigned char lzop_magic[] = {
+ 0x89, 0x4c, 0x5a, 0x4f, 0x00, 0x0d, 0x0a, 0x1a, 0x0a };
+
+#define LZO_BLOCK_SIZE (256*1024l)
+#define HEADER_HAS_FILTER 0x00000800L
+
+STATIC inline int INIT parse_header(u8 *input, u8 *skip)
+{
+ int l;
+ u8 *parse = input;
+ u8 level = 0;
+ u16 version;
+
+ /* read magic: 9 first bits */
+ for (l = 0; l < 9; l++) {
+ if (*parse++ != lzop_magic[l])
+ return 0;
+ }
+ /* get version (2bytes), skip library version (2),
+ * 'need to be extracted' version (2) and
+ * method (1) */
+ version = get_unaligned_be16(parse);
+ parse += 7;
+ if (version >= 0x0940)
+ level = *parse++;
+ if (get_unaligned_be32(parse) & HEADER_HAS_FILTER)
+ parse += 8; /* flags + filter info */
+ else
+ parse += 4; /* flags */
+
+ /* skip mode and mtime_low */
+ parse += 8;
+ if (version >= 0x0940)
+ parse += 4; /* skip mtime_high */
+
+ l = *parse++;
+ /* don't care about the file name, and skip checksum */
+ parse += l + 4;
+
+ *skip = parse - input;
+ return 1;
+}
+
+STATIC inline int INIT unlzo(u8 *input, int in_len,
+ int (*fill) (void *, unsigned int),
+ int (*flush) (void *, unsigned int),
+ u8 *output, int *posp,
+ void (*error_fn) (char *x))
+{
+ u8 skip = 0, r = 0;
+ u32 src_len, dst_len;
+ size_t tmp;
+ u8 *in_buf, *in_buf_save, *out_buf;
+ int ret = -1;
+
+ set_error_fn(error_fn);
+
+ if (output) {
+ out_buf = output;
+ } else if (!flush) {
+ error("NULL output pointer and no flush function provided");
+ goto exit;
+ } else {
+ out_buf = malloc(LZO_BLOCK_SIZE);
+ if (!out_buf) {
+ error("Could not allocate output buffer");
+ goto exit;
+ }
+ }
+
+ if (input && fill) {
+ error("Both input pointer and fill function provided, don't know what to do");
+ goto exit_1;
+ } else if (input) {
+ in_buf = input;
+ } else if (!fill || !posp) {
+ error("NULL input pointer and missing position pointer or fill function");
+ goto exit_1;
+ } else {
+ in_buf = malloc(lzo1x_worst_compress(LZO_BLOCK_SIZE));
+ if (!in_buf) {
+ error("Could not allocate input buffer");
+ goto exit_1;
+ }
+ }
+ in_buf_save = in_buf;
+
+ if (posp)
+ *posp = 0;
+
+ if (fill)
+ fill(in_buf, lzo1x_worst_compress(LZO_BLOCK_SIZE));
+
+ if (!parse_header(input, &skip)) {
+ error("invalid header");
+ goto exit_2;
+ }
+ in_buf += skip;
+
+ if (posp)
+ *posp = skip;
+
+ for (;;) {
+ /* read uncompressed block size */
+ dst_len = get_unaligned_be32(in_buf);
+ in_buf += 4;
+
+ /* exit if last block */
+ if (dst_len == 0) {
+ if (posp)
+ *posp += 4;
+ break;
+ }
+
+ if (dst_len > LZO_BLOCK_SIZE) {
+ error("dest len longer than block size");
+ goto exit_2;
+ }
+
+ /* read compressed block size, and skip block checksum info */
+ src_len = get_unaligned_be32(in_buf);
+ in_buf += 8;
+
+ if (src_len <= 0 || src_len > dst_len) {
+ error("file corrupted");
+ goto exit_2;
+ }
+
+ /* decompress */
+ tmp = dst_len;
+
+ /* When the input data is not compressed at all,
+ * lzo1x_decompress_safe will fail, so call memcpy()
+ * instead */
+ if (unlikely(dst_len == src_len))
+ memcpy(out_buf, in_buf, src_len);
+ else {
+ r = lzo1x_decompress_safe((u8 *) in_buf, src_len,
+ out_buf, &tmp);
+
+ if (r != LZO_E_OK || dst_len != tmp) {
+ error("Compressed data violation");
+ goto exit_2;
+ }
+ }
+
+ if (flush)
+ flush(out_buf, dst_len);
+ if (output)
+ out_buf += dst_len;
+ if (posp)
+ *posp += src_len + 12;
+ if (fill) {
+ in_buf = in_buf_save;
+ fill(in_buf, lzo1x_worst_compress(LZO_BLOCK_SIZE));
+ } else
+ in_buf += src_len;
+ }
+
+ ret = 0;
+exit_2:
+ if (!input)
+ free(in_buf);
+exit_1:
+ if (!output)
+ free(out_buf);
+exit:
+ return ret;
+}
+
+#define decompress unlzo
diff --git a/lib/devres.c b/lib/devres.c
new file mode 100644
index 00000000..6efddf53
--- /dev/null
+++ b/lib/devres.c
@@ -0,0 +1,352 @@
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <linux/gfp.h>
+#include <linux/module.h>
+
+void devm_ioremap_release(struct device *dev, void *res)
+{
+ iounmap(*(void __iomem **)res);
+}
+
+static int devm_ioremap_match(struct device *dev, void *res, void *match_data)
+{
+ return *(void **)res == match_data;
+}
+
+/**
+ * devm_ioremap - Managed ioremap()
+ * @dev: Generic device to remap IO address for
+ * @offset: BUS offset to map
+ * @size: Size of map
+ *
+ * Managed ioremap(). Map is automatically unmapped on driver detach.
+ */
+void __iomem *devm_ioremap(struct device *dev, resource_size_t offset,
+ unsigned long size)
+{
+ void __iomem **ptr, *addr;
+
+ ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL);
+ if (!ptr)
+ return NULL;
+
+ addr = ioremap(offset, size);
+ if (addr) {
+ *ptr = addr;
+ devres_add(dev, ptr);
+ } else
+ devres_free(ptr);
+
+ return addr;
+}
+EXPORT_SYMBOL(devm_ioremap);
+
+/**
+ * devm_ioremap_nocache - Managed ioremap_nocache()
+ * @dev: Generic device to remap IO address for
+ * @offset: BUS offset to map
+ * @size: Size of map
+ *
+ * Managed ioremap_nocache(). Map is automatically unmapped on driver
+ * detach.
+ */
+void __iomem *devm_ioremap_nocache(struct device *dev, resource_size_t offset,
+ unsigned long size)
+{
+ void __iomem **ptr, *addr;
+
+ ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL);
+ if (!ptr)
+ return NULL;
+
+ addr = ioremap_nocache(offset, size);
+ if (addr) {
+ *ptr = addr;
+ devres_add(dev, ptr);
+ } else
+ devres_free(ptr);
+
+ return addr;
+}
+EXPORT_SYMBOL(devm_ioremap_nocache);
+
+/**
+ * devm_iounmap - Managed iounmap()
+ * @dev: Generic device to unmap for
+ * @addr: Address to unmap
+ *
+ * Managed iounmap(). @addr must have been mapped using devm_ioremap*().
+ */
+void devm_iounmap(struct device *dev, void __iomem *addr)
+{
+ iounmap(addr);
+ WARN_ON(devres_destroy(dev, devm_ioremap_release, devm_ioremap_match,
+ (void *)addr));
+}
+EXPORT_SYMBOL(devm_iounmap);
+
+#ifdef CONFIG_HAS_IOPORT
+/*
+ * Generic iomap devres
+ */
+static void devm_ioport_map_release(struct device *dev, void *res)
+{
+ ioport_unmap(*(void __iomem **)res);
+}
+
+static int devm_ioport_map_match(struct device *dev, void *res,
+ void *match_data)
+{
+ return *(void **)res == match_data;
+}
+
+/**
+ * devm_ioport_map - Managed ioport_map()
+ * @dev: Generic device to map ioport for
+ * @port: Port to map
+ * @nr: Number of ports to map
+ *
+ * Managed ioport_map(). Map is automatically unmapped on driver
+ * detach.
+ */
+void __iomem * devm_ioport_map(struct device *dev, unsigned long port,
+ unsigned int nr)
+{
+ void __iomem **ptr, *addr;
+
+ ptr = devres_alloc(devm_ioport_map_release, sizeof(*ptr), GFP_KERNEL);
+ if (!ptr)
+ return NULL;
+
+ addr = ioport_map(port, nr);
+ if (addr) {
+ *ptr = addr;
+ devres_add(dev, ptr);
+ } else
+ devres_free(ptr);
+
+ return addr;
+}
+EXPORT_SYMBOL(devm_ioport_map);
+
+/**
+ * devm_ioport_unmap - Managed ioport_unmap()
+ * @dev: Generic device to unmap for
+ * @addr: Address to unmap
+ *
+ * Managed ioport_unmap(). @addr must have been mapped using
+ * devm_ioport_map().
+ */
+void devm_ioport_unmap(struct device *dev, void __iomem *addr)
+{
+ ioport_unmap(addr);
+ WARN_ON(devres_destroy(dev, devm_ioport_map_release,
+ devm_ioport_map_match, (void *)addr));
+}
+EXPORT_SYMBOL(devm_ioport_unmap);
+
+#ifdef CONFIG_PCI
+/*
+ * PCI iomap devres
+ */
+#define PCIM_IOMAP_MAX PCI_ROM_RESOURCE
+
+struct pcim_iomap_devres {
+ void __iomem *table[PCIM_IOMAP_MAX];
+};
+
+static void pcim_iomap_release(struct device *gendev, void *res)
+{
+ struct pci_dev *dev = container_of(gendev, struct pci_dev, dev);
+ struct pcim_iomap_devres *this = res;
+ int i;
+
+ for (i = 0; i < PCIM_IOMAP_MAX; i++)
+ if (this->table[i])
+ pci_iounmap(dev, this->table[i]);
+}
+
+/**
+ * pcim_iomap_table - access iomap allocation table
+ * @pdev: PCI device to access iomap table for
+ *
+ * Access iomap allocation table for @dev. If iomap table doesn't
+ * exist and @pdev is managed, it will be allocated. All iomaps
+ * recorded in the iomap table are automatically unmapped on driver
+ * detach.
+ *
+ * This function might sleep when the table is first allocated but can
+ * be safely called without context and guaranteed to succed once
+ * allocated.
+ */
+void __iomem * const * pcim_iomap_table(struct pci_dev *pdev)
+{
+ struct pcim_iomap_devres *dr, *new_dr;
+
+ dr = devres_find(&pdev->dev, pcim_iomap_release, NULL, NULL);
+ if (dr)
+ return dr->table;
+
+ new_dr = devres_alloc(pcim_iomap_release, sizeof(*new_dr), GFP_KERNEL);
+ if (!new_dr)
+ return NULL;
+ dr = devres_get(&pdev->dev, new_dr, NULL, NULL);
+ return dr->table;
+}
+EXPORT_SYMBOL(pcim_iomap_table);
+
+/**
+ * pcim_iomap - Managed pcim_iomap()
+ * @pdev: PCI device to iomap for
+ * @bar: BAR to iomap
+ * @maxlen: Maximum length of iomap
+ *
+ * Managed pci_iomap(). Map is automatically unmapped on driver
+ * detach.
+ */
+void __iomem * pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen)
+{
+ void __iomem **tbl;
+
+ BUG_ON(bar >= PCIM_IOMAP_MAX);
+
+ tbl = (void __iomem **)pcim_iomap_table(pdev);
+ if (!tbl || tbl[bar]) /* duplicate mappings not allowed */
+ return NULL;
+
+ tbl[bar] = pci_iomap(pdev, bar, maxlen);
+ return tbl[bar];
+}
+EXPORT_SYMBOL(pcim_iomap);
+
+/**
+ * pcim_iounmap - Managed pci_iounmap()
+ * @pdev: PCI device to iounmap for
+ * @addr: Address to unmap
+ *
+ * Managed pci_iounmap(). @addr must have been mapped using pcim_iomap().
+ */
+void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr)
+{
+ void __iomem **tbl;
+ int i;
+
+ pci_iounmap(pdev, addr);
+
+ tbl = (void __iomem **)pcim_iomap_table(pdev);
+ BUG_ON(!tbl);
+
+ for (i = 0; i < PCIM_IOMAP_MAX; i++)
+ if (tbl[i] == addr) {
+ tbl[i] = NULL;
+ return;
+ }
+ WARN_ON(1);
+}
+EXPORT_SYMBOL(pcim_iounmap);
+
+/**
+ * pcim_iomap_regions - Request and iomap PCI BARs
+ * @pdev: PCI device to map IO resources for
+ * @mask: Mask of BARs to request and iomap
+ * @name: Name used when requesting regions
+ *
+ * Request and iomap regions specified by @mask.
+ */
+int pcim_iomap_regions(struct pci_dev *pdev, u16 mask, const char *name)
+{
+ void __iomem * const *iomap;
+ int i, rc;
+
+ iomap = pcim_iomap_table(pdev);
+ if (!iomap)
+ return -ENOMEM;
+
+ for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
+ unsigned long len;
+
+ if (!(mask & (1 << i)))
+ continue;
+
+ rc = -EINVAL;
+ len = pci_resource_len(pdev, i);
+ if (!len)
+ goto err_inval;
+
+ rc = pci_request_region(pdev, i, name);
+ if (rc)
+ goto err_inval;
+
+ rc = -ENOMEM;
+ if (!pcim_iomap(pdev, i, 0))
+ goto err_region;
+ }
+
+ return 0;
+
+ err_region:
+ pci_release_region(pdev, i);
+ err_inval:
+ while (--i >= 0) {
+ if (!(mask & (1 << i)))
+ continue;
+ pcim_iounmap(pdev, iomap[i]);
+ pci_release_region(pdev, i);
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL(pcim_iomap_regions);
+
+/**
+ * pcim_iomap_regions_request_all - Request all BARs and iomap specified ones
+ * @pdev: PCI device to map IO resources for
+ * @mask: Mask of BARs to iomap
+ * @name: Name used when requesting regions
+ *
+ * Request all PCI BARs and iomap regions specified by @mask.
+ */
+int pcim_iomap_regions_request_all(struct pci_dev *pdev, u16 mask,
+ const char *name)
+{
+ int request_mask = ((1 << 6) - 1) & ~mask;
+ int rc;
+
+ rc = pci_request_selected_regions(pdev, request_mask, name);
+ if (rc)
+ return rc;
+
+ rc = pcim_iomap_regions(pdev, mask, name);
+ if (rc)
+ pci_release_selected_regions(pdev, request_mask);
+ return rc;
+}
+EXPORT_SYMBOL(pcim_iomap_regions_request_all);
+
+/**
+ * pcim_iounmap_regions - Unmap and release PCI BARs
+ * @pdev: PCI device to map IO resources for
+ * @mask: Mask of BARs to unmap and release
+ *
+ * Unmap and release regions specified by @mask.
+ */
+void pcim_iounmap_regions(struct pci_dev *pdev, u16 mask)
+{
+ void __iomem * const *iomap;
+ int i;
+
+ iomap = pcim_iomap_table(pdev);
+ if (!iomap)
+ return;
+
+ for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
+ if (!(mask & (1 << i)))
+ continue;
+
+ pcim_iounmap(pdev, iomap[i]);
+ pci_release_region(pdev, i);
+ }
+}
+EXPORT_SYMBOL(pcim_iounmap_regions);
+#endif
+#endif
diff --git a/lib/div64.c b/lib/div64.c
new file mode 100644
index 00000000..a111eb8d
--- /dev/null
+++ b/lib/div64.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com>
+ *
+ * Based on former do_div() implementation from asm-parisc/div64.h:
+ * Copyright (C) 1999 Hewlett-Packard Co
+ * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ *
+ * Generic C version of 64bit/32bit division and modulo, with
+ * 64bit result and 32bit remainder.
+ *
+ * The fast case for (n>>32 == 0) is handled inline by do_div().
+ *
+ * Code generated for this function might be very inefficient
+ * for some CPUs. __div64_32() can be overridden by linking arch-specific
+ * assembly versions such as arch/ppc/lib/div64.S and arch/sh/lib/div64.S.
+ */
+
+#include <linux/module.h>
+#include <linux/math64.h>
+
+/* Not needed on 64bit architectures */
+#if BITS_PER_LONG == 32
+
+uint32_t __attribute__((weak)) __div64_32(uint64_t *n, uint32_t base)
+{
+ uint64_t rem = *n;
+ uint64_t b = base;
+ uint64_t res, d = 1;
+ uint32_t high = rem >> 32;
+
+ /* Reduce the thing a bit first */
+ res = 0;
+ if (high >= base) {
+ high /= base;
+ res = (uint64_t) high << 32;
+ rem -= (uint64_t) (high*base) << 32;
+ }
+
+ while ((int64_t)b > 0 && b < rem) {
+ b = b+b;
+ d = d+d;
+ }
+
+ do {
+ if (rem >= b) {
+ rem -= b;
+ res += d;
+ }
+ b >>= 1;
+ d >>= 1;
+ } while (d);
+
+ *n = res;
+ return rem;
+}
+
+EXPORT_SYMBOL(__div64_32);
+
+#ifndef div_s64_rem
+s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder)
+{
+ u64 quotient;
+
+ if (dividend < 0) {
+ quotient = div_u64_rem(-dividend, abs(divisor), (u32 *)remainder);
+ *remainder = -*remainder;
+ if (divisor > 0)
+ quotient = -quotient;
+ } else {
+ quotient = div_u64_rem(dividend, abs(divisor), (u32 *)remainder);
+ if (divisor < 0)
+ quotient = -quotient;
+ }
+ return quotient;
+}
+EXPORT_SYMBOL(div_s64_rem);
+#endif
+
+/* 64bit divisor, dividend and result. dynamic precision */
+#ifndef div64_u64
+u64 div64_u64(u64 dividend, u64 divisor)
+{
+ u32 high, d;
+
+ high = divisor >> 32;
+ if (high) {
+ unsigned int shift = fls(high);
+
+ d = divisor >> shift;
+ dividend >>= shift;
+ } else
+ d = divisor;
+
+ return div_u64(dividend, d);
+}
+EXPORT_SYMBOL(div64_u64);
+#endif
+
+#endif /* BITS_PER_LONG == 32 */
+
+/*
+ * Iterative div/mod for use when dividend is not expected to be much
+ * bigger than divisor.
+ */
+u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
+{
+ return __iter_div_u64_rem(dividend, divisor, remainder);
+}
+EXPORT_SYMBOL(iter_div_u64_rem);
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
new file mode 100644
index 00000000..01e64270
--- /dev/null
+++ b/lib/dma-debug.c
@@ -0,0 +1,1302 @@
+/*
+ * Copyright (C) 2008 Advanced Micro Devices, Inc.
+ *
+ * Author: Joerg Roedel <joerg.roedel@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+#include <linux/stacktrace.h>
+#include <linux/dma-debug.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/device.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/ctype.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+
+#include <asm/sections.h>
+
+#define HASH_SIZE 1024ULL
+#define HASH_FN_SHIFT 13
+#define HASH_FN_MASK (HASH_SIZE - 1)
+
+enum {
+ dma_debug_single,
+ dma_debug_page,
+ dma_debug_sg,
+ dma_debug_coherent,
+};
+
+#define DMA_DEBUG_STACKTRACE_ENTRIES 5
+
+struct dma_debug_entry {
+ struct list_head list;
+ struct device *dev;
+ int type;
+ phys_addr_t paddr;
+ u64 dev_addr;
+ u64 size;
+ int direction;
+ int sg_call_ents;
+ int sg_mapped_ents;
+#ifdef CONFIG_STACKTRACE
+ struct stack_trace stacktrace;
+ unsigned long st_entries[DMA_DEBUG_STACKTRACE_ENTRIES];
+#endif
+};
+
+struct hash_bucket {
+ struct list_head list;
+ spinlock_t lock;
+} ____cacheline_aligned_in_smp;
+
+/* Hash list to save the allocated dma addresses */
+static struct hash_bucket dma_entry_hash[HASH_SIZE];
+/* List of pre-allocated dma_debug_entry's */
+static LIST_HEAD(free_entries);
+/* Lock for the list above */
+static DEFINE_SPINLOCK(free_entries_lock);
+
+/* Global disable flag - will be set in case of an error */
+static bool global_disable __read_mostly;
+
+/* Global error count */
+static u32 error_count;
+
+/* Global error show enable*/
+static u32 show_all_errors __read_mostly;
+/* Number of errors to show */
+static u32 show_num_errors = 1;
+
+static u32 num_free_entries;
+static u32 min_free_entries;
+static u32 nr_total_entries;
+
+/* number of preallocated entries requested by kernel cmdline */
+static u32 req_entries;
+
+/* debugfs dentry's for the stuff above */
+static struct dentry *dma_debug_dent __read_mostly;
+static struct dentry *global_disable_dent __read_mostly;
+static struct dentry *error_count_dent __read_mostly;
+static struct dentry *show_all_errors_dent __read_mostly;
+static struct dentry *show_num_errors_dent __read_mostly;
+static struct dentry *num_free_entries_dent __read_mostly;
+static struct dentry *min_free_entries_dent __read_mostly;
+static struct dentry *filter_dent __read_mostly;
+
+/* per-driver filter related state */
+
+#define NAME_MAX_LEN 64
+
+static char current_driver_name[NAME_MAX_LEN] __read_mostly;
+static struct device_driver *current_driver __read_mostly;
+
+static DEFINE_RWLOCK(driver_name_lock);
+
+static const char *type2name[4] = { "single", "page",
+ "scather-gather", "coherent" };
+
+static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
+ "DMA_FROM_DEVICE", "DMA_NONE" };
+
+/* little merge helper - remove it after the merge window */
+#ifndef BUS_NOTIFY_UNBOUND_DRIVER
+#define BUS_NOTIFY_UNBOUND_DRIVER 0x0005
+#endif
+
+/*
+ * The access to some variables in this macro is racy. We can't use atomic_t
+ * here because all these variables are exported to debugfs. Some of them even
+ * writeable. This is also the reason why a lock won't help much. But anyway,
+ * the races are no big deal. Here is why:
+ *
+ * error_count: the addition is racy, but the worst thing that can happen is
+ * that we don't count some errors
+ * show_num_errors: the subtraction is racy. Also no big deal because in
+ * worst case this will result in one warning more in the
+ * system log than the user configured. This variable is
+ * writeable via debugfs.
+ */
+static inline void dump_entry_trace(struct dma_debug_entry *entry)
+{
+#ifdef CONFIG_STACKTRACE
+ if (entry) {
+ pr_warning("Mapped at:\n");
+ print_stack_trace(&entry->stacktrace, 0);
+ }
+#endif
+}
+
+static bool driver_filter(struct device *dev)
+{
+ struct device_driver *drv;
+ unsigned long flags;
+ bool ret;
+
+ /* driver filter off */
+ if (likely(!current_driver_name[0]))
+ return true;
+
+ /* driver filter on and initialized */
+ if (current_driver && dev && dev->driver == current_driver)
+ return true;
+
+ /* driver filter on, but we can't filter on a NULL device... */
+ if (!dev)
+ return false;
+
+ if (current_driver || !current_driver_name[0])
+ return false;
+
+ /* driver filter on but not yet initialized */
+ drv = get_driver(dev->driver);
+ if (!drv)
+ return false;
+
+ /* lock to protect against change of current_driver_name */
+ read_lock_irqsave(&driver_name_lock, flags);
+
+ ret = false;
+ if (drv->name &&
+ strncmp(current_driver_name, drv->name, NAME_MAX_LEN - 1) == 0) {
+ current_driver = drv;
+ ret = true;
+ }
+
+ read_unlock_irqrestore(&driver_name_lock, flags);
+ put_driver(drv);
+
+ return ret;
+}
+
+#define err_printk(dev, entry, format, arg...) do { \
+ error_count += 1; \
+ if (driver_filter(dev) && \
+ (show_all_errors || show_num_errors > 0)) { \
+ WARN(1, "%s %s: " format, \
+ dev ? dev_driver_string(dev) : "NULL", \
+ dev ? dev_name(dev) : "NULL", ## arg); \
+ dump_entry_trace(entry); \
+ } \
+ if (!show_all_errors && show_num_errors > 0) \
+ show_num_errors -= 1; \
+ } while (0);
+
+/*
+ * Hash related functions
+ *
+ * Every DMA-API request is saved into a struct dma_debug_entry. To
+ * have quick access to these structs they are stored into a hash.
+ */
+static int hash_fn(struct dma_debug_entry *entry)
+{
+ /*
+ * Hash function is based on the dma address.
+ * We use bits 20-27 here as the index into the hash
+ */
+ return (entry->dev_addr >> HASH_FN_SHIFT) & HASH_FN_MASK;
+}
+
+/*
+ * Request exclusive access to a hash bucket for a given dma_debug_entry.
+ */
+static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry,
+ unsigned long *flags)
+{
+ int idx = hash_fn(entry);
+ unsigned long __flags;
+
+ spin_lock_irqsave(&dma_entry_hash[idx].lock, __flags);
+ *flags = __flags;
+ return &dma_entry_hash[idx];
+}
+
+/*
+ * Give up exclusive access to the hash bucket
+ */
+static void put_hash_bucket(struct hash_bucket *bucket,
+ unsigned long *flags)
+{
+ unsigned long __flags = *flags;
+
+ spin_unlock_irqrestore(&bucket->lock, __flags);
+}
+
+/*
+ * Search a given entry in the hash bucket list
+ */
+static struct dma_debug_entry *hash_bucket_find(struct hash_bucket *bucket,
+ struct dma_debug_entry *ref)
+{
+ struct dma_debug_entry *entry, *ret = NULL;
+ int matches = 0, match_lvl, last_lvl = 0;
+
+ list_for_each_entry(entry, &bucket->list, list) {
+ if ((entry->dev_addr != ref->dev_addr) ||
+ (entry->dev != ref->dev))
+ continue;
+
+ /*
+ * Some drivers map the same physical address multiple
+ * times. Without a hardware IOMMU this results in the
+ * same device addresses being put into the dma-debug
+ * hash multiple times too. This can result in false
+ * positives being reported. Therefore we implement a
+ * best-fit algorithm here which returns the entry from
+ * the hash which fits best to the reference value
+ * instead of the first-fit.
+ */
+ matches += 1;
+ match_lvl = 0;
+ entry->size == ref->size ? ++match_lvl : 0;
+ entry->type == ref->type ? ++match_lvl : 0;
+ entry->direction == ref->direction ? ++match_lvl : 0;
+ entry->sg_call_ents == ref->sg_call_ents ? ++match_lvl : 0;
+
+ if (match_lvl == 4) {
+ /* perfect-fit - return the result */
+ return entry;
+ } else if (match_lvl > last_lvl) {
+ /*
+ * We found an entry that fits better then the
+ * previous one
+ */
+ last_lvl = match_lvl;
+ ret = entry;
+ }
+ }
+
+ /*
+ * If we have multiple matches but no perfect-fit, just return
+ * NULL.
+ */
+ ret = (matches == 1) ? ret : NULL;
+
+ return ret;
+}
+
+/*
+ * Add an entry to a hash bucket
+ */
+static void hash_bucket_add(struct hash_bucket *bucket,
+ struct dma_debug_entry *entry)
+{
+ list_add_tail(&entry->list, &bucket->list);
+}
+
+/*
+ * Remove entry from a hash bucket list
+ */
+static void hash_bucket_del(struct dma_debug_entry *entry)
+{
+ list_del(&entry->list);
+}
+
+/*
+ * Dump mapping entries for debugging purposes
+ */
+void debug_dma_dump_mappings(struct device *dev)
+{
+ int idx;
+
+ for (idx = 0; idx < HASH_SIZE; idx++) {
+ struct hash_bucket *bucket = &dma_entry_hash[idx];
+ struct dma_debug_entry *entry;
+ unsigned long flags;
+
+ spin_lock_irqsave(&bucket->lock, flags);
+
+ list_for_each_entry(entry, &bucket->list, list) {
+ if (!dev || dev == entry->dev) {
+ dev_info(entry->dev,
+ "%s idx %d P=%Lx D=%Lx L=%Lx %s\n",
+ type2name[entry->type], idx,
+ (unsigned long long)entry->paddr,
+ entry->dev_addr, entry->size,
+ dir2name[entry->direction]);
+ }
+ }
+
+ spin_unlock_irqrestore(&bucket->lock, flags);
+ }
+}
+EXPORT_SYMBOL(debug_dma_dump_mappings);
+
+/*
+ * Wrapper function for adding an entry to the hash.
+ * This function takes care of locking itself.
+ */
+static void add_dma_entry(struct dma_debug_entry *entry)
+{
+ struct hash_bucket *bucket;
+ unsigned long flags;
+
+ bucket = get_hash_bucket(entry, &flags);
+ hash_bucket_add(bucket, entry);
+ put_hash_bucket(bucket, &flags);
+}
+
+static struct dma_debug_entry *__dma_entry_alloc(void)
+{
+ struct dma_debug_entry *entry;
+
+ entry = list_entry(free_entries.next, struct dma_debug_entry, list);
+ list_del(&entry->list);
+ memset(entry, 0, sizeof(*entry));
+
+ num_free_entries -= 1;
+ if (num_free_entries < min_free_entries)
+ min_free_entries = num_free_entries;
+
+ return entry;
+}
+
+/* struct dma_entry allocator
+ *
+ * The next two functions implement the allocator for
+ * struct dma_debug_entries.
+ */
+static struct dma_debug_entry *dma_entry_alloc(void)
+{
+ struct dma_debug_entry *entry = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&free_entries_lock, flags);
+
+ if (list_empty(&free_entries)) {
+ pr_err("DMA-API: debugging out of memory - disabling\n");
+ global_disable = true;
+ goto out;
+ }
+
+ entry = __dma_entry_alloc();
+
+#ifdef CONFIG_STACKTRACE
+ entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES;
+ entry->stacktrace.entries = entry->st_entries;
+ entry->stacktrace.skip = 2;
+ save_stack_trace(&entry->stacktrace);
+#endif
+
+out:
+ spin_unlock_irqrestore(&free_entries_lock, flags);
+
+ return entry;
+}
+
+static void dma_entry_free(struct dma_debug_entry *entry)
+{
+ unsigned long flags;
+
+ /*
+ * add to beginning of the list - this way the entries are
+ * more likely cache hot when they are reallocated.
+ */
+ spin_lock_irqsave(&free_entries_lock, flags);
+ list_add(&entry->list, &free_entries);
+ num_free_entries += 1;
+ spin_unlock_irqrestore(&free_entries_lock, flags);
+}
+
+int dma_debug_resize_entries(u32 num_entries)
+{
+ int i, delta, ret = 0;
+ unsigned long flags;
+ struct dma_debug_entry *entry;
+ LIST_HEAD(tmp);
+
+ spin_lock_irqsave(&free_entries_lock, flags);
+
+ if (nr_total_entries < num_entries) {
+ delta = num_entries - nr_total_entries;
+
+ spin_unlock_irqrestore(&free_entries_lock, flags);
+
+ for (i = 0; i < delta; i++) {
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ break;
+
+ list_add_tail(&entry->list, &tmp);
+ }
+
+ spin_lock_irqsave(&free_entries_lock, flags);
+
+ list_splice(&tmp, &free_entries);
+ nr_total_entries += i;
+ num_free_entries += i;
+ } else {
+ delta = nr_total_entries - num_entries;
+
+ for (i = 0; i < delta && !list_empty(&free_entries); i++) {
+ entry = __dma_entry_alloc();
+ kfree(entry);
+ }
+
+ nr_total_entries -= i;
+ }
+
+ if (nr_total_entries != num_entries)
+ ret = 1;
+
+ spin_unlock_irqrestore(&free_entries_lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(dma_debug_resize_entries);
+
+/*
+ * DMA-API debugging init code
+ *
+ * The init code does two things:
+ * 1. Initialize core data structures
+ * 2. Preallocate a given number of dma_debug_entry structs
+ */
+
+static int prealloc_memory(u32 num_entries)
+{
+ struct dma_debug_entry *entry, *next_entry;
+ int i;
+
+ for (i = 0; i < num_entries; ++i) {
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ goto out_err;
+
+ list_add_tail(&entry->list, &free_entries);
+ }
+
+ num_free_entries = num_entries;
+ min_free_entries = num_entries;
+
+ pr_info("DMA-API: preallocated %d debug entries\n", num_entries);
+
+ return 0;
+
+out_err:
+
+ list_for_each_entry_safe(entry, next_entry, &free_entries, list) {
+ list_del(&entry->list);
+ kfree(entry);
+ }
+
+ return -ENOMEM;
+}
+
+static ssize_t filter_read(struct file *file, char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ char buf[NAME_MAX_LEN + 1];
+ unsigned long flags;
+ int len;
+
+ if (!current_driver_name[0])
+ return 0;
+
+ /*
+ * We can't copy to userspace directly because current_driver_name can
+ * only be read under the driver_name_lock with irqs disabled. So
+ * create a temporary copy first.
+ */
+ read_lock_irqsave(&driver_name_lock, flags);
+ len = scnprintf(buf, NAME_MAX_LEN + 1, "%s\n", current_driver_name);
+ read_unlock_irqrestore(&driver_name_lock, flags);
+
+ return simple_read_from_buffer(user_buf, count, ppos, buf, len);
+}
+
+static ssize_t filter_write(struct file *file, const char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ char buf[NAME_MAX_LEN];
+ unsigned long flags;
+ size_t len;
+ int i;
+
+ /*
+ * We can't copy from userspace directly. Access to
+ * current_driver_name is protected with a write_lock with irqs
+ * disabled. Since copy_from_user can fault and may sleep we
+ * need to copy to temporary buffer first
+ */
+ len = min(count, (size_t)(NAME_MAX_LEN - 1));
+ if (copy_from_user(buf, userbuf, len))
+ return -EFAULT;
+
+ buf[len] = 0;
+
+ write_lock_irqsave(&driver_name_lock, flags);
+
+ /*
+ * Now handle the string we got from userspace very carefully.
+ * The rules are:
+ * - only use the first token we got
+ * - token delimiter is everything looking like a space
+ * character (' ', '\n', '\t' ...)
+ *
+ */
+ if (!isalnum(buf[0])) {
+ /*
+ * If the first character userspace gave us is not
+ * alphanumerical then assume the filter should be
+ * switched off.
+ */
+ if (current_driver_name[0])
+ pr_info("DMA-API: switching off dma-debug driver filter\n");
+ current_driver_name[0] = 0;
+ current_driver = NULL;
+ goto out_unlock;
+ }
+
+ /*
+ * Now parse out the first token and use it as the name for the
+ * driver to filter for.
+ */
+ for (i = 0; i < NAME_MAX_LEN - 1; ++i) {
+ current_driver_name[i] = buf[i];
+ if (isspace(buf[i]) || buf[i] == ' ' || buf[i] == 0)
+ break;
+ }
+ current_driver_name[i] = 0;
+ current_driver = NULL;
+
+ pr_info("DMA-API: enable driver filter for driver [%s]\n",
+ current_driver_name);
+
+out_unlock:
+ write_unlock_irqrestore(&driver_name_lock, flags);
+
+ return count;
+}
+
+static const struct file_operations filter_fops = {
+ .read = filter_read,
+ .write = filter_write,
+};
+
+static int dma_debug_fs_init(void)
+{
+ dma_debug_dent = debugfs_create_dir("dma-api", NULL);
+ if (!dma_debug_dent) {
+ pr_err("DMA-API: can not create debugfs directory\n");
+ return -ENOMEM;
+ }
+
+ global_disable_dent = debugfs_create_bool("disabled", 0444,
+ dma_debug_dent,
+ (u32 *)&global_disable);
+ if (!global_disable_dent)
+ goto out_err;
+
+ error_count_dent = debugfs_create_u32("error_count", 0444,
+ dma_debug_dent, &error_count);
+ if (!error_count_dent)
+ goto out_err;
+
+ show_all_errors_dent = debugfs_create_u32("all_errors", 0644,
+ dma_debug_dent,
+ &show_all_errors);
+ if (!show_all_errors_dent)
+ goto out_err;
+
+ show_num_errors_dent = debugfs_create_u32("num_errors", 0644,
+ dma_debug_dent,
+ &show_num_errors);
+ if (!show_num_errors_dent)
+ goto out_err;
+
+ num_free_entries_dent = debugfs_create_u32("num_free_entries", 0444,
+ dma_debug_dent,
+ &num_free_entries);
+ if (!num_free_entries_dent)
+ goto out_err;
+
+ min_free_entries_dent = debugfs_create_u32("min_free_entries", 0444,
+ dma_debug_dent,
+ &min_free_entries);
+ if (!min_free_entries_dent)
+ goto out_err;
+
+ filter_dent = debugfs_create_file("driver_filter", 0644,
+ dma_debug_dent, NULL, &filter_fops);
+ if (!filter_dent)
+ goto out_err;
+
+ return 0;
+
+out_err:
+ debugfs_remove_recursive(dma_debug_dent);
+
+ return -ENOMEM;
+}
+
+static int device_dma_allocations(struct device *dev)
+{
+ struct dma_debug_entry *entry;
+ unsigned long flags;
+ int count = 0, i;
+
+ local_irq_save(flags);
+
+ for (i = 0; i < HASH_SIZE; ++i) {
+ spin_lock(&dma_entry_hash[i].lock);
+ list_for_each_entry(entry, &dma_entry_hash[i].list, list) {
+ if (entry->dev == dev)
+ count += 1;
+ }
+ spin_unlock(&dma_entry_hash[i].lock);
+ }
+
+ local_irq_restore(flags);
+
+ return count;
+}
+
+static int dma_debug_device_change(struct notifier_block *nb, unsigned long action, void *data)
+{
+ struct device *dev = data;
+ int count;
+
+ if (global_disable)
+ return 0;
+
+ switch (action) {
+ case BUS_NOTIFY_UNBOUND_DRIVER:
+ count = device_dma_allocations(dev);
+ if (count == 0)
+ break;
+ err_printk(dev, NULL, "DMA-API: device driver has pending "
+ "DMA allocations while released from device "
+ "[count=%d]\n", count);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+void dma_debug_add_bus(struct bus_type *bus)
+{
+ struct notifier_block *nb;
+
+ if (global_disable)
+ return;
+
+ nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL);
+ if (nb == NULL) {
+ pr_err("dma_debug_add_bus: out of memory\n");
+ return;
+ }
+
+ nb->notifier_call = dma_debug_device_change;
+
+ bus_register_notifier(bus, nb);
+}
+
+/*
+ * Let the architectures decide how many entries should be preallocated.
+ */
+void dma_debug_init(u32 num_entries)
+{
+ int i;
+
+ if (global_disable)
+ return;
+
+ for (i = 0; i < HASH_SIZE; ++i) {
+ INIT_LIST_HEAD(&dma_entry_hash[i].list);
+ spin_lock_init(&dma_entry_hash[i].lock);
+ }
+
+ if (dma_debug_fs_init() != 0) {
+ pr_err("DMA-API: error creating debugfs entries - disabling\n");
+ global_disable = true;
+
+ return;
+ }
+
+ if (req_entries)
+ num_entries = req_entries;
+
+ if (prealloc_memory(num_entries) != 0) {
+ pr_err("DMA-API: debugging out of memory error - disabled\n");
+ global_disable = true;
+
+ return;
+ }
+
+ nr_total_entries = num_free_entries;
+
+ pr_info("DMA-API: debugging enabled by kernel config\n");
+}
+
+static __init int dma_debug_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (strncmp(str, "off", 3) == 0) {
+ pr_info("DMA-API: debugging disabled on kernel command line\n");
+ global_disable = true;
+ }
+
+ return 0;
+}
+
+static __init int dma_debug_entries_cmdline(char *str)
+{
+ int res;
+
+ if (!str)
+ return -EINVAL;
+
+ res = get_option(&str, &req_entries);
+
+ if (!res)
+ req_entries = 0;
+
+ return 0;
+}
+
+__setup("dma_debug=", dma_debug_cmdline);
+__setup("dma_debug_entries=", dma_debug_entries_cmdline);
+
+static void check_unmap(struct dma_debug_entry *ref)
+{
+ struct dma_debug_entry *entry;
+ struct hash_bucket *bucket;
+ unsigned long flags;
+
+ if (dma_mapping_error(ref->dev, ref->dev_addr)) {
+ err_printk(ref->dev, NULL, "DMA-API: device driver tries "
+ "to free an invalid DMA memory address\n");
+ return;
+ }
+
+ bucket = get_hash_bucket(ref, &flags);
+ entry = hash_bucket_find(bucket, ref);
+
+ if (!entry) {
+ err_printk(ref->dev, NULL, "DMA-API: device driver tries "
+ "to free DMA memory it has not allocated "
+ "[device address=0x%016llx] [size=%llu bytes]\n",
+ ref->dev_addr, ref->size);
+ goto out;
+ }
+
+ if (ref->size != entry->size) {
+ err_printk(ref->dev, entry, "DMA-API: device driver frees "
+ "DMA memory with different size "
+ "[device address=0x%016llx] [map size=%llu bytes] "
+ "[unmap size=%llu bytes]\n",
+ ref->dev_addr, entry->size, ref->size);
+ }
+
+ if (ref->type != entry->type) {
+ err_printk(ref->dev, entry, "DMA-API: device driver frees "
+ "DMA memory with wrong function "
+ "[device address=0x%016llx] [size=%llu bytes] "
+ "[mapped as %s] [unmapped as %s]\n",
+ ref->dev_addr, ref->size,
+ type2name[entry->type], type2name[ref->type]);
+ } else if ((entry->type == dma_debug_coherent) &&
+ (ref->paddr != entry->paddr)) {
+ err_printk(ref->dev, entry, "DMA-API: device driver frees "
+ "DMA memory with different CPU address "
+ "[device address=0x%016llx] [size=%llu bytes] "
+ "[cpu alloc address=0x%016llx] "
+ "[cpu free address=0x%016llx]",
+ ref->dev_addr, ref->size,
+ (unsigned long long)entry->paddr,
+ (unsigned long long)ref->paddr);
+ }
+
+ if (ref->sg_call_ents && ref->type == dma_debug_sg &&
+ ref->sg_call_ents != entry->sg_call_ents) {
+ err_printk(ref->dev, entry, "DMA-API: device driver frees "
+ "DMA sg list with different entry count "
+ "[map count=%d] [unmap count=%d]\n",
+ entry->sg_call_ents, ref->sg_call_ents);
+ }
+
+ /*
+ * This may be no bug in reality - but most implementations of the
+ * DMA API don't handle this properly, so check for it here
+ */
+ if (ref->direction != entry->direction) {
+ err_printk(ref->dev, entry, "DMA-API: device driver frees "
+ "DMA memory with different direction "
+ "[device address=0x%016llx] [size=%llu bytes] "
+ "[mapped with %s] [unmapped with %s]\n",
+ ref->dev_addr, ref->size,
+ dir2name[entry->direction],
+ dir2name[ref->direction]);
+ }
+
+ hash_bucket_del(entry);
+ dma_entry_free(entry);
+
+out:
+ put_hash_bucket(bucket, &flags);
+}
+
+static void check_for_stack(struct device *dev, void *addr)
+{
+ if (object_is_on_stack(addr))
+ err_printk(dev, NULL, "DMA-API: device driver maps memory from"
+ "stack [addr=%p]\n", addr);
+}
+
+static inline bool overlap(void *addr, unsigned long len, void *start, void *end)
+{
+ unsigned long a1 = (unsigned long)addr;
+ unsigned long b1 = a1 + len;
+ unsigned long a2 = (unsigned long)start;
+ unsigned long b2 = (unsigned long)end;
+
+ return !(b1 <= a2 || a1 >= b2);
+}
+
+static void check_for_illegal_area(struct device *dev, void *addr, unsigned long len)
+{
+ if (overlap(addr, len, _text, _etext) ||
+ overlap(addr, len, __start_rodata, __end_rodata))
+ err_printk(dev, NULL, "DMA-API: device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len);
+}
+
+static void check_sync(struct device *dev,
+ struct dma_debug_entry *ref,
+ bool to_cpu)
+{
+ struct dma_debug_entry *entry;
+ struct hash_bucket *bucket;
+ unsigned long flags;
+
+ bucket = get_hash_bucket(ref, &flags);
+
+ entry = hash_bucket_find(bucket, ref);
+
+ if (!entry) {
+ err_printk(dev, NULL, "DMA-API: device driver tries "
+ "to sync DMA memory it has not allocated "
+ "[device address=0x%016llx] [size=%llu bytes]\n",
+ (unsigned long long)ref->dev_addr, ref->size);
+ goto out;
+ }
+
+ if (ref->size > entry->size) {
+ err_printk(dev, entry, "DMA-API: device driver syncs"
+ " DMA memory outside allocated range "
+ "[device address=0x%016llx] "
+ "[allocation size=%llu bytes] "
+ "[sync offset+size=%llu]\n",
+ entry->dev_addr, entry->size,
+ ref->size);
+ }
+
+ if (entry->direction == DMA_BIDIRECTIONAL)
+ goto out;
+
+ if (ref->direction != entry->direction) {
+ err_printk(dev, entry, "DMA-API: device driver syncs "
+ "DMA memory with different direction "
+ "[device address=0x%016llx] [size=%llu bytes] "
+ "[mapped with %s] [synced with %s]\n",
+ (unsigned long long)ref->dev_addr, entry->size,
+ dir2name[entry->direction],
+ dir2name[ref->direction]);
+ }
+
+ if (to_cpu && !(entry->direction == DMA_FROM_DEVICE) &&
+ !(ref->direction == DMA_TO_DEVICE))
+ err_printk(dev, entry, "DMA-API: device driver syncs "
+ "device read-only DMA memory for cpu "
+ "[device address=0x%016llx] [size=%llu bytes] "
+ "[mapped with %s] [synced with %s]\n",
+ (unsigned long long)ref->dev_addr, entry->size,
+ dir2name[entry->direction],
+ dir2name[ref->direction]);
+
+ if (!to_cpu && !(entry->direction == DMA_TO_DEVICE) &&
+ !(ref->direction == DMA_FROM_DEVICE))
+ err_printk(dev, entry, "DMA-API: device driver syncs "
+ "device write-only DMA memory to device "
+ "[device address=0x%016llx] [size=%llu bytes] "
+ "[mapped with %s] [synced with %s]\n",
+ (unsigned long long)ref->dev_addr, entry->size,
+ dir2name[entry->direction],
+ dir2name[ref->direction]);
+
+out:
+ put_hash_bucket(bucket, &flags);
+}
+
+void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
+ size_t size, int direction, dma_addr_t dma_addr,
+ bool map_single)
+{
+ struct dma_debug_entry *entry;
+
+ if (unlikely(global_disable))
+ return;
+
+ if (unlikely(dma_mapping_error(dev, dma_addr)))
+ return;
+
+ entry = dma_entry_alloc();
+ if (!entry)
+ return;
+
+ entry->dev = dev;
+ entry->type = dma_debug_page;
+ entry->paddr = page_to_phys(page) + offset;
+ entry->dev_addr = dma_addr;
+ entry->size = size;
+ entry->direction = direction;
+
+ if (map_single)
+ entry->type = dma_debug_single;
+
+ if (!PageHighMem(page)) {
+ void *addr = page_address(page) + offset;
+
+ check_for_stack(dev, addr);
+ check_for_illegal_area(dev, addr, size);
+ }
+
+ add_dma_entry(entry);
+}
+EXPORT_SYMBOL(debug_dma_map_page);
+
+void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
+ size_t size, int direction, bool map_single)
+{
+ struct dma_debug_entry ref = {
+ .type = dma_debug_page,
+ .dev = dev,
+ .dev_addr = addr,
+ .size = size,
+ .direction = direction,
+ };
+
+ if (unlikely(global_disable))
+ return;
+
+ if (map_single)
+ ref.type = dma_debug_single;
+
+ check_unmap(&ref);
+}
+EXPORT_SYMBOL(debug_dma_unmap_page);
+
+void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
+ int nents, int mapped_ents, int direction)
+{
+ struct dma_debug_entry *entry;
+ struct scatterlist *s;
+ int i;
+
+ if (unlikely(global_disable))
+ return;
+
+ for_each_sg(sg, s, mapped_ents, i) {
+ entry = dma_entry_alloc();
+ if (!entry)
+ return;
+
+ entry->type = dma_debug_sg;
+ entry->dev = dev;
+ entry->paddr = sg_phys(s);
+ entry->size = sg_dma_len(s);
+ entry->dev_addr = sg_dma_address(s);
+ entry->direction = direction;
+ entry->sg_call_ents = nents;
+ entry->sg_mapped_ents = mapped_ents;
+
+ if (!PageHighMem(sg_page(s))) {
+ check_for_stack(dev, sg_virt(s));
+ check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s));
+ }
+
+ add_dma_entry(entry);
+ }
+}
+EXPORT_SYMBOL(debug_dma_map_sg);
+
+static int get_nr_mapped_entries(struct device *dev,
+ struct dma_debug_entry *ref)
+{
+ struct dma_debug_entry *entry;
+ struct hash_bucket *bucket;
+ unsigned long flags;
+ int mapped_ents;
+
+ bucket = get_hash_bucket(ref, &flags);
+ entry = hash_bucket_find(bucket, ref);
+ mapped_ents = 0;
+
+ if (entry)
+ mapped_ents = entry->sg_mapped_ents;
+ put_hash_bucket(bucket, &flags);
+
+ return mapped_ents;
+}
+
+void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
+ int nelems, int dir)
+{
+ struct scatterlist *s;
+ int mapped_ents = 0, i;
+
+ if (unlikely(global_disable))
+ return;
+
+ for_each_sg(sglist, s, nelems, i) {
+
+ struct dma_debug_entry ref = {
+ .type = dma_debug_sg,
+ .dev = dev,
+ .paddr = sg_phys(s),
+ .dev_addr = sg_dma_address(s),
+ .size = sg_dma_len(s),
+ .direction = dir,
+ .sg_call_ents = nelems,
+ };
+
+ if (mapped_ents && i >= mapped_ents)
+ break;
+
+ if (!i)
+ mapped_ents = get_nr_mapped_entries(dev, &ref);
+
+ check_unmap(&ref);
+ }
+}
+EXPORT_SYMBOL(debug_dma_unmap_sg);
+
+void debug_dma_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t dma_addr, void *virt)
+{
+ struct dma_debug_entry *entry;
+
+ if (unlikely(global_disable))
+ return;
+
+ if (unlikely(virt == NULL))
+ return;
+
+ entry = dma_entry_alloc();
+ if (!entry)
+ return;
+
+ entry->type = dma_debug_coherent;
+ entry->dev = dev;
+ entry->paddr = virt_to_phys(virt);
+ entry->size = size;
+ entry->dev_addr = dma_addr;
+ entry->direction = DMA_BIDIRECTIONAL;
+
+ add_dma_entry(entry);
+}
+EXPORT_SYMBOL(debug_dma_alloc_coherent);
+
+void debug_dma_free_coherent(struct device *dev, size_t size,
+ void *virt, dma_addr_t addr)
+{
+ struct dma_debug_entry ref = {
+ .type = dma_debug_coherent,
+ .dev = dev,
+ .paddr = virt_to_phys(virt),
+ .dev_addr = addr,
+ .size = size,
+ .direction = DMA_BIDIRECTIONAL,
+ };
+
+ if (unlikely(global_disable))
+ return;
+
+ check_unmap(&ref);
+}
+EXPORT_SYMBOL(debug_dma_free_coherent);
+
+void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
+ size_t size, int direction)
+{
+ struct dma_debug_entry ref;
+
+ if (unlikely(global_disable))
+ return;
+
+ ref.type = dma_debug_single;
+ ref.dev = dev;
+ ref.dev_addr = dma_handle;
+ ref.size = size;
+ ref.direction = direction;
+ ref.sg_call_ents = 0;
+
+ check_sync(dev, &ref, true);
+}
+EXPORT_SYMBOL(debug_dma_sync_single_for_cpu);
+
+void debug_dma_sync_single_for_device(struct device *dev,
+ dma_addr_t dma_handle, size_t size,
+ int direction)
+{
+ struct dma_debug_entry ref;
+
+ if (unlikely(global_disable))
+ return;
+
+ ref.type = dma_debug_single;
+ ref.dev = dev;
+ ref.dev_addr = dma_handle;
+ ref.size = size;
+ ref.direction = direction;
+ ref.sg_call_ents = 0;
+
+ check_sync(dev, &ref, false);
+}
+EXPORT_SYMBOL(debug_dma_sync_single_for_device);
+
+void debug_dma_sync_single_range_for_cpu(struct device *dev,
+ dma_addr_t dma_handle,
+ unsigned long offset, size_t size,
+ int direction)
+{
+ struct dma_debug_entry ref;
+
+ if (unlikely(global_disable))
+ return;
+
+ ref.type = dma_debug_single;
+ ref.dev = dev;
+ ref.dev_addr = dma_handle;
+ ref.size = offset + size;
+ ref.direction = direction;
+ ref.sg_call_ents = 0;
+
+ check_sync(dev, &ref, true);
+}
+EXPORT_SYMBOL(debug_dma_sync_single_range_for_cpu);
+
+void debug_dma_sync_single_range_for_device(struct device *dev,
+ dma_addr_t dma_handle,
+ unsigned long offset,
+ size_t size, int direction)
+{
+ struct dma_debug_entry ref;
+
+ if (unlikely(global_disable))
+ return;
+
+ ref.type = dma_debug_single;
+ ref.dev = dev;
+ ref.dev_addr = dma_handle;
+ ref.size = offset + size;
+ ref.direction = direction;
+ ref.sg_call_ents = 0;
+
+ check_sync(dev, &ref, false);
+}
+EXPORT_SYMBOL(debug_dma_sync_single_range_for_device);
+
+void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+ int nelems, int direction)
+{
+ struct scatterlist *s;
+ int mapped_ents = 0, i;
+
+ if (unlikely(global_disable))
+ return;
+
+ for_each_sg(sg, s, nelems, i) {
+
+ struct dma_debug_entry ref = {
+ .type = dma_debug_sg,
+ .dev = dev,
+ .paddr = sg_phys(s),
+ .dev_addr = sg_dma_address(s),
+ .size = sg_dma_len(s),
+ .direction = direction,
+ .sg_call_ents = nelems,
+ };
+
+ if (!i)
+ mapped_ents = get_nr_mapped_entries(dev, &ref);
+
+ if (i >= mapped_ents)
+ break;
+
+ check_sync(dev, &ref, true);
+ }
+}
+EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu);
+
+void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+ int nelems, int direction)
+{
+ struct scatterlist *s;
+ int mapped_ents = 0, i;
+
+ if (unlikely(global_disable))
+ return;
+
+ for_each_sg(sg, s, nelems, i) {
+
+ struct dma_debug_entry ref = {
+ .type = dma_debug_sg,
+ .dev = dev,
+ .paddr = sg_phys(s),
+ .dev_addr = sg_dma_address(s),
+ .size = sg_dma_len(s),
+ .direction = direction,
+ .sg_call_ents = nelems,
+ };
+ if (!i)
+ mapped_ents = get_nr_mapped_entries(dev, &ref);
+
+ if (i >= mapped_ents)
+ break;
+
+ check_sync(dev, &ref, false);
+ }
+}
+EXPORT_SYMBOL(debug_dma_sync_sg_for_device);
+
+static int __init dma_debug_driver_setup(char *str)
+{
+ int i;
+
+ for (i = 0; i < NAME_MAX_LEN - 1; ++i, ++str) {
+ current_driver_name[i] = *str;
+ if (*str == 0)
+ break;
+ }
+
+ if (current_driver_name[0])
+ pr_info("DMA-API: enable driver filter for driver [%s]\n",
+ current_driver_name);
+
+
+ return 1;
+}
+__setup("dma_debug_driver=", dma_debug_driver_setup);
diff --git a/lib/dump_stack.c b/lib/dump_stack.c
new file mode 100644
index 00000000..53bff4c8
--- /dev/null
+++ b/lib/dump_stack.c
@@ -0,0 +1,15 @@
+/*
+ * Provide a default dump_stack() function for architectures
+ * which don't implement their own.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+void dump_stack(void)
+{
+ printk(KERN_NOTICE
+ "This architecture does not implement dump_stack()\n");
+}
+
+EXPORT_SYMBOL(dump_stack);
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
new file mode 100644
index 00000000..02afc253
--- /dev/null
+++ b/lib/dynamic_debug.c
@@ -0,0 +1,770 @@
+/*
+ * lib/dynamic_debug.c
+ *
+ * make pr_debug()/dev_dbg() calls runtime configurable based upon their
+ * source module.
+ *
+ * Copyright (C) 2008 Jason Baron <jbaron@redhat.com>
+ * By Greg Banks <gnb@melbourne.sgi.com>
+ * Copyright (c) 2008 Silicon Graphics Inc. All Rights Reserved.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kallsyms.h>
+#include <linux/version.h>
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/list.h>
+#include <linux/sysctl.h>
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/uaccess.h>
+#include <linux/dynamic_debug.h>
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+
+extern struct _ddebug __start___verbose[];
+extern struct _ddebug __stop___verbose[];
+
+/* dynamic_debug_enabled, and dynamic_debug_enabled2 are bitmasks in which
+ * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They
+ * use independent hash functions, to reduce the chance of false positives.
+ */
+long long dynamic_debug_enabled;
+EXPORT_SYMBOL_GPL(dynamic_debug_enabled);
+long long dynamic_debug_enabled2;
+EXPORT_SYMBOL_GPL(dynamic_debug_enabled2);
+
+struct ddebug_table {
+ struct list_head link;
+ char *mod_name;
+ unsigned int num_ddebugs;
+ unsigned int num_enabled;
+ struct _ddebug *ddebugs;
+};
+
+struct ddebug_query {
+ const char *filename;
+ const char *module;
+ const char *function;
+ const char *format;
+ unsigned int first_lineno, last_lineno;
+};
+
+struct ddebug_iter {
+ struct ddebug_table *table;
+ unsigned int idx;
+};
+
+static DEFINE_MUTEX(ddebug_lock);
+static LIST_HEAD(ddebug_tables);
+static int verbose = 0;
+
+/* Return the last part of a pathname */
+static inline const char *basename(const char *path)
+{
+ const char *tail = strrchr(path, '/');
+ return tail ? tail+1 : path;
+}
+
+/* format a string into buf[] which describes the _ddebug's flags */
+static char *ddebug_describe_flags(struct _ddebug *dp, char *buf,
+ size_t maxlen)
+{
+ char *p = buf;
+
+ BUG_ON(maxlen < 4);
+ if (dp->flags & _DPRINTK_FLAGS_PRINT)
+ *p++ = 'p';
+ if (p == buf)
+ *p++ = '-';
+ *p = '\0';
+
+ return buf;
+}
+
+/*
+ * must be called with ddebug_lock held
+ */
+
+static int disabled_hash(char hash, bool first_table)
+{
+ struct ddebug_table *dt;
+ char table_hash_value;
+
+ list_for_each_entry(dt, &ddebug_tables, link) {
+ if (first_table)
+ table_hash_value = dt->ddebugs->primary_hash;
+ else
+ table_hash_value = dt->ddebugs->secondary_hash;
+ if (dt->num_enabled && (hash == table_hash_value))
+ return 0;
+ }
+ return 1;
+}
+
+/*
+ * Search the tables for _ddebug's which match the given
+ * `query' and apply the `flags' and `mask' to them. Tells
+ * the user which ddebug's were changed, or whether none
+ * were matched.
+ */
+static void ddebug_change(const struct ddebug_query *query,
+ unsigned int flags, unsigned int mask)
+{
+ int i;
+ struct ddebug_table *dt;
+ unsigned int newflags;
+ unsigned int nfound = 0;
+ char flagbuf[8];
+
+ /* search for matching ddebugs */
+ mutex_lock(&ddebug_lock);
+ list_for_each_entry(dt, &ddebug_tables, link) {
+
+ /* match against the module name */
+ if (query->module != NULL &&
+ strcmp(query->module, dt->mod_name))
+ continue;
+
+ for (i = 0 ; i < dt->num_ddebugs ; i++) {
+ struct _ddebug *dp = &dt->ddebugs[i];
+
+ /* match against the source filename */
+ if (query->filename != NULL &&
+ strcmp(query->filename, dp->filename) &&
+ strcmp(query->filename, basename(dp->filename)))
+ continue;
+
+ /* match against the function */
+ if (query->function != NULL &&
+ strcmp(query->function, dp->function))
+ continue;
+
+ /* match against the format */
+ if (query->format != NULL &&
+ strstr(dp->format, query->format) == NULL)
+ continue;
+
+ /* match against the line number range */
+ if (query->first_lineno &&
+ dp->lineno < query->first_lineno)
+ continue;
+ if (query->last_lineno &&
+ dp->lineno > query->last_lineno)
+ continue;
+
+ nfound++;
+
+ newflags = (dp->flags & mask) | flags;
+ if (newflags == dp->flags)
+ continue;
+
+ if (!newflags)
+ dt->num_enabled--;
+ else if (!dp->flags)
+ dt->num_enabled++;
+ dp->flags = newflags;
+ if (newflags) {
+ dynamic_debug_enabled |=
+ (1LL << dp->primary_hash);
+ dynamic_debug_enabled2 |=
+ (1LL << dp->secondary_hash);
+ } else {
+ if (disabled_hash(dp->primary_hash, true))
+ dynamic_debug_enabled &=
+ ~(1LL << dp->primary_hash);
+ if (disabled_hash(dp->secondary_hash, false))
+ dynamic_debug_enabled2 &=
+ ~(1LL << dp->secondary_hash);
+ }
+ if (verbose)
+ printk(KERN_INFO
+ "ddebug: changed %s:%d [%s]%s %s\n",
+ dp->filename, dp->lineno,
+ dt->mod_name, dp->function,
+ ddebug_describe_flags(dp, flagbuf,
+ sizeof(flagbuf)));
+ }
+ }
+ mutex_unlock(&ddebug_lock);
+
+ if (!nfound && verbose)
+ printk(KERN_INFO "ddebug: no matches for query\n");
+}
+
+/*
+ * Split the buffer `buf' into space-separated words.
+ * Handles simple " and ' quoting, i.e. without nested,
+ * embedded or escaped \". Return the number of words
+ * or <0 on error.
+ */
+static int ddebug_tokenize(char *buf, char *words[], int maxwords)
+{
+ int nwords = 0;
+
+ while (*buf) {
+ char *end;
+
+ /* Skip leading whitespace */
+ buf = skip_spaces(buf);
+ if (!*buf)
+ break; /* oh, it was trailing whitespace */
+
+ /* Run `end' over a word, either whitespace separated or quoted */
+ if (*buf == '"' || *buf == '\'') {
+ int quote = *buf++;
+ for (end = buf ; *end && *end != quote ; end++)
+ ;
+ if (!*end)
+ return -EINVAL; /* unclosed quote */
+ } else {
+ for (end = buf ; *end && !isspace(*end) ; end++)
+ ;
+ BUG_ON(end == buf);
+ }
+ /* Here `buf' is the start of the word, `end' is one past the end */
+
+ if (nwords == maxwords)
+ return -EINVAL; /* ran out of words[] before bytes */
+ if (*end)
+ *end++ = '\0'; /* terminate the word */
+ words[nwords++] = buf;
+ buf = end;
+ }
+
+ if (verbose) {
+ int i;
+ printk(KERN_INFO "%s: split into words:", __func__);
+ for (i = 0 ; i < nwords ; i++)
+ printk(" \"%s\"", words[i]);
+ printk("\n");
+ }
+
+ return nwords;
+}
+
+/*
+ * Parse a single line number. Note that the empty string ""
+ * is treated as a special case and converted to zero, which
+ * is later treated as a "don't care" value.
+ */
+static inline int parse_lineno(const char *str, unsigned int *val)
+{
+ char *end = NULL;
+ BUG_ON(str == NULL);
+ if (*str == '\0') {
+ *val = 0;
+ return 0;
+ }
+ *val = simple_strtoul(str, &end, 10);
+ return end == NULL || end == str || *end != '\0' ? -EINVAL : 0;
+}
+
+/*
+ * Undo octal escaping in a string, inplace. This is useful to
+ * allow the user to express a query which matches a format
+ * containing embedded spaces.
+ */
+#define isodigit(c) ((c) >= '0' && (c) <= '7')
+static char *unescape(char *str)
+{
+ char *in = str;
+ char *out = str;
+
+ while (*in) {
+ if (*in == '\\') {
+ if (in[1] == '\\') {
+ *out++ = '\\';
+ in += 2;
+ continue;
+ } else if (in[1] == 't') {
+ *out++ = '\t';
+ in += 2;
+ continue;
+ } else if (in[1] == 'n') {
+ *out++ = '\n';
+ in += 2;
+ continue;
+ } else if (isodigit(in[1]) &&
+ isodigit(in[2]) &&
+ isodigit(in[3])) {
+ *out++ = ((in[1] - '0')<<6) |
+ ((in[2] - '0')<<3) |
+ (in[3] - '0');
+ in += 4;
+ continue;
+ }
+ }
+ *out++ = *in++;
+ }
+ *out = '\0';
+
+ return str;
+}
+
+/*
+ * Parse words[] as a ddebug query specification, which is a series
+ * of (keyword, value) pairs chosen from these possibilities:
+ *
+ * func <function-name>
+ * file <full-pathname>
+ * file <base-filename>
+ * module <module-name>
+ * format <escaped-string-to-find-in-format>
+ * line <lineno>
+ * line <first-lineno>-<last-lineno> // where either may be empty
+ */
+static int ddebug_parse_query(char *words[], int nwords,
+ struct ddebug_query *query)
+{
+ unsigned int i;
+
+ /* check we have an even number of words */
+ if (nwords % 2 != 0)
+ return -EINVAL;
+ memset(query, 0, sizeof(*query));
+
+ for (i = 0 ; i < nwords ; i += 2) {
+ if (!strcmp(words[i], "func"))
+ query->function = words[i+1];
+ else if (!strcmp(words[i], "file"))
+ query->filename = words[i+1];
+ else if (!strcmp(words[i], "module"))
+ query->module = words[i+1];
+ else if (!strcmp(words[i], "format"))
+ query->format = unescape(words[i+1]);
+ else if (!strcmp(words[i], "line")) {
+ char *first = words[i+1];
+ char *last = strchr(first, '-');
+ if (last)
+ *last++ = '\0';
+ if (parse_lineno(first, &query->first_lineno) < 0)
+ return -EINVAL;
+ if (last != NULL) {
+ /* range <first>-<last> */
+ if (parse_lineno(last, &query->last_lineno) < 0)
+ return -EINVAL;
+ } else {
+ query->last_lineno = query->first_lineno;
+ }
+ } else {
+ if (verbose)
+ printk(KERN_ERR "%s: unknown keyword \"%s\"\n",
+ __func__, words[i]);
+ return -EINVAL;
+ }
+ }
+
+ if (verbose)
+ printk(KERN_INFO "%s: q->function=\"%s\" q->filename=\"%s\" "
+ "q->module=\"%s\" q->format=\"%s\" q->lineno=%u-%u\n",
+ __func__, query->function, query->filename,
+ query->module, query->format, query->first_lineno,
+ query->last_lineno);
+
+ return 0;
+}
+
+/*
+ * Parse `str' as a flags specification, format [-+=][p]+.
+ * Sets up *maskp and *flagsp to be used when changing the
+ * flags fields of matched _ddebug's. Returns 0 on success
+ * or <0 on error.
+ */
+static int ddebug_parse_flags(const char *str, unsigned int *flagsp,
+ unsigned int *maskp)
+{
+ unsigned flags = 0;
+ int op = '=';
+
+ switch (*str) {
+ case '+':
+ case '-':
+ case '=':
+ op = *str++;
+ break;
+ default:
+ return -EINVAL;
+ }
+ if (verbose)
+ printk(KERN_INFO "%s: op='%c'\n", __func__, op);
+
+ for ( ; *str ; ++str) {
+ switch (*str) {
+ case 'p':
+ flags |= _DPRINTK_FLAGS_PRINT;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+ if (flags == 0)
+ return -EINVAL;
+ if (verbose)
+ printk(KERN_INFO "%s: flags=0x%x\n", __func__, flags);
+
+ /* calculate final *flagsp, *maskp according to mask and op */
+ switch (op) {
+ case '=':
+ *maskp = 0;
+ *flagsp = flags;
+ break;
+ case '+':
+ *maskp = ~0U;
+ *flagsp = flags;
+ break;
+ case '-':
+ *maskp = ~flags;
+ *flagsp = 0;
+ break;
+ }
+ if (verbose)
+ printk(KERN_INFO "%s: *flagsp=0x%x *maskp=0x%x\n",
+ __func__, *flagsp, *maskp);
+ return 0;
+}
+
+/*
+ * File_ops->write method for <debugfs>/dynamic_debug/conrol. Gathers the
+ * command text from userspace, parses and executes it.
+ */
+static ssize_t ddebug_proc_write(struct file *file, const char __user *ubuf,
+ size_t len, loff_t *offp)
+{
+ unsigned int flags = 0, mask = 0;
+ struct ddebug_query query;
+#define MAXWORDS 9
+ int nwords;
+ char *words[MAXWORDS];
+ char tmpbuf[256];
+
+ if (len == 0)
+ return 0;
+ /* we don't check *offp -- multiple writes() are allowed */
+ if (len > sizeof(tmpbuf)-1)
+ return -E2BIG;
+ if (copy_from_user(tmpbuf, ubuf, len))
+ return -EFAULT;
+ tmpbuf[len] = '\0';
+ if (verbose)
+ printk(KERN_INFO "%s: read %d bytes from userspace\n",
+ __func__, (int)len);
+
+ nwords = ddebug_tokenize(tmpbuf, words, MAXWORDS);
+ if (nwords <= 0)
+ return -EINVAL;
+ if (ddebug_parse_query(words, nwords-1, &query))
+ return -EINVAL;
+ if (ddebug_parse_flags(words[nwords-1], &flags, &mask))
+ return -EINVAL;
+
+ /* actually go and implement the change */
+ ddebug_change(&query, flags, mask);
+
+ *offp += len;
+ return len;
+}
+
+/*
+ * Set the iterator to point to the first _ddebug object
+ * and return a pointer to that first object. Returns
+ * NULL if there are no _ddebugs at all.
+ */
+static struct _ddebug *ddebug_iter_first(struct ddebug_iter *iter)
+{
+ if (list_empty(&ddebug_tables)) {
+ iter->table = NULL;
+ iter->idx = 0;
+ return NULL;
+ }
+ iter->table = list_entry(ddebug_tables.next,
+ struct ddebug_table, link);
+ iter->idx = 0;
+ return &iter->table->ddebugs[iter->idx];
+}
+
+/*
+ * Advance the iterator to point to the next _ddebug
+ * object from the one the iterator currently points at,
+ * and returns a pointer to the new _ddebug. Returns
+ * NULL if the iterator has seen all the _ddebugs.
+ */
+static struct _ddebug *ddebug_iter_next(struct ddebug_iter *iter)
+{
+ if (iter->table == NULL)
+ return NULL;
+ if (++iter->idx == iter->table->num_ddebugs) {
+ /* iterate to next table */
+ iter->idx = 0;
+ if (list_is_last(&iter->table->link, &ddebug_tables)) {
+ iter->table = NULL;
+ return NULL;
+ }
+ iter->table = list_entry(iter->table->link.next,
+ struct ddebug_table, link);
+ }
+ return &iter->table->ddebugs[iter->idx];
+}
+
+/*
+ * Seq_ops start method. Called at the start of every
+ * read() call from userspace. Takes the ddebug_lock and
+ * seeks the seq_file's iterator to the given position.
+ */
+static void *ddebug_proc_start(struct seq_file *m, loff_t *pos)
+{
+ struct ddebug_iter *iter = m->private;
+ struct _ddebug *dp;
+ int n = *pos;
+
+ if (verbose)
+ printk(KERN_INFO "%s: called m=%p *pos=%lld\n",
+ __func__, m, (unsigned long long)*pos);
+
+ mutex_lock(&ddebug_lock);
+
+ if (!n)
+ return SEQ_START_TOKEN;
+ if (n < 0)
+ return NULL;
+ dp = ddebug_iter_first(iter);
+ while (dp != NULL && --n > 0)
+ dp = ddebug_iter_next(iter);
+ return dp;
+}
+
+/*
+ * Seq_ops next method. Called several times within a read()
+ * call from userspace, with ddebug_lock held. Walks to the
+ * next _ddebug object with a special case for the header line.
+ */
+static void *ddebug_proc_next(struct seq_file *m, void *p, loff_t *pos)
+{
+ struct ddebug_iter *iter = m->private;
+ struct _ddebug *dp;
+
+ if (verbose)
+ printk(KERN_INFO "%s: called m=%p p=%p *pos=%lld\n",
+ __func__, m, p, (unsigned long long)*pos);
+
+ if (p == SEQ_START_TOKEN)
+ dp = ddebug_iter_first(iter);
+ else
+ dp = ddebug_iter_next(iter);
+ ++*pos;
+ return dp;
+}
+
+/*
+ * Seq_ops show method. Called several times within a read()
+ * call from userspace, with ddebug_lock held. Formats the
+ * current _ddebug as a single human-readable line, with a
+ * special case for the header line.
+ */
+static int ddebug_proc_show(struct seq_file *m, void *p)
+{
+ struct ddebug_iter *iter = m->private;
+ struct _ddebug *dp = p;
+ char flagsbuf[8];
+
+ if (verbose)
+ printk(KERN_INFO "%s: called m=%p p=%p\n",
+ __func__, m, p);
+
+ if (p == SEQ_START_TOKEN) {
+ seq_puts(m,
+ "# filename:lineno [module]function flags format\n");
+ return 0;
+ }
+
+ seq_printf(m, "%s:%u [%s]%s %s \"",
+ dp->filename, dp->lineno,
+ iter->table->mod_name, dp->function,
+ ddebug_describe_flags(dp, flagsbuf, sizeof(flagsbuf)));
+ seq_escape(m, dp->format, "\t\r\n\"");
+ seq_puts(m, "\"\n");
+
+ return 0;
+}
+
+/*
+ * Seq_ops stop method. Called at the end of each read()
+ * call from userspace. Drops ddebug_lock.
+ */
+static void ddebug_proc_stop(struct seq_file *m, void *p)
+{
+ if (verbose)
+ printk(KERN_INFO "%s: called m=%p p=%p\n",
+ __func__, m, p);
+ mutex_unlock(&ddebug_lock);
+}
+
+static const struct seq_operations ddebug_proc_seqops = {
+ .start = ddebug_proc_start,
+ .next = ddebug_proc_next,
+ .show = ddebug_proc_show,
+ .stop = ddebug_proc_stop
+};
+
+/*
+ * File_ops->open method for <debugfs>/dynamic_debug/control. Does the seq_file
+ * setup dance, and also creates an iterator to walk the _ddebugs.
+ * Note that we create a seq_file always, even for O_WRONLY files
+ * where it's not needed, as doing so simplifies the ->release method.
+ */
+static int ddebug_proc_open(struct inode *inode, struct file *file)
+{
+ struct ddebug_iter *iter;
+ int err;
+
+ if (verbose)
+ printk(KERN_INFO "%s: called\n", __func__);
+
+ iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+ if (iter == NULL)
+ return -ENOMEM;
+
+ err = seq_open(file, &ddebug_proc_seqops);
+ if (err) {
+ kfree(iter);
+ return err;
+ }
+ ((struct seq_file *) file->private_data)->private = iter;
+ return 0;
+}
+
+static const struct file_operations ddebug_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = ddebug_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+ .write = ddebug_proc_write
+};
+
+/*
+ * Allocate a new ddebug_table for the given module
+ * and add it to the global list.
+ */
+int ddebug_add_module(struct _ddebug *tab, unsigned int n,
+ const char *name)
+{
+ struct ddebug_table *dt;
+ char *new_name;
+
+ dt = kzalloc(sizeof(*dt), GFP_KERNEL);
+ if (dt == NULL)
+ return -ENOMEM;
+ new_name = kstrdup(name, GFP_KERNEL);
+ if (new_name == NULL) {
+ kfree(dt);
+ return -ENOMEM;
+ }
+ dt->mod_name = new_name;
+ dt->num_ddebugs = n;
+ dt->num_enabled = 0;
+ dt->ddebugs = tab;
+
+ mutex_lock(&ddebug_lock);
+ list_add_tail(&dt->link, &ddebug_tables);
+ mutex_unlock(&ddebug_lock);
+
+ if (verbose)
+ printk(KERN_INFO "%u debug prints in module %s\n",
+ n, dt->mod_name);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ddebug_add_module);
+
+static void ddebug_table_free(struct ddebug_table *dt)
+{
+ list_del_init(&dt->link);
+ kfree(dt->mod_name);
+ kfree(dt);
+}
+
+/*
+ * Called in response to a module being unloaded. Removes
+ * any ddebug_table's which point at the module.
+ */
+int ddebug_remove_module(const char *mod_name)
+{
+ struct ddebug_table *dt, *nextdt;
+ int ret = -ENOENT;
+
+ if (verbose)
+ printk(KERN_INFO "%s: removing module \"%s\"\n",
+ __func__, mod_name);
+
+ mutex_lock(&ddebug_lock);
+ list_for_each_entry_safe(dt, nextdt, &ddebug_tables, link) {
+ if (!strcmp(dt->mod_name, mod_name)) {
+ ddebug_table_free(dt);
+ ret = 0;
+ }
+ }
+ mutex_unlock(&ddebug_lock);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ddebug_remove_module);
+
+static void ddebug_remove_all_tables(void)
+{
+ mutex_lock(&ddebug_lock);
+ while (!list_empty(&ddebug_tables)) {
+ struct ddebug_table *dt = list_entry(ddebug_tables.next,
+ struct ddebug_table,
+ link);
+ ddebug_table_free(dt);
+ }
+ mutex_unlock(&ddebug_lock);
+}
+
+static int __init dynamic_debug_init(void)
+{
+ struct dentry *dir, *file;
+ struct _ddebug *iter, *iter_start;
+ const char *modname = NULL;
+ int ret = 0;
+ int n = 0;
+
+ dir = debugfs_create_dir("dynamic_debug", NULL);
+ if (!dir)
+ return -ENOMEM;
+ file = debugfs_create_file("control", 0644, dir, NULL,
+ &ddebug_proc_fops);
+ if (!file) {
+ debugfs_remove(dir);
+ return -ENOMEM;
+ }
+ if (__start___verbose != __stop___verbose) {
+ iter = __start___verbose;
+ modname = iter->modname;
+ iter_start = iter;
+ for (; iter < __stop___verbose; iter++) {
+ if (strcmp(modname, iter->modname)) {
+ ret = ddebug_add_module(iter_start, n, modname);
+ if (ret)
+ goto out_free;
+ n = 0;
+ modname = iter->modname;
+ iter_start = iter;
+ }
+ n++;
+ }
+ ret = ddebug_add_module(iter_start, n, modname);
+ }
+out_free:
+ if (ret) {
+ ddebug_remove_all_tables();
+ debugfs_remove(dir);
+ debugfs_remove(file);
+ }
+ return 0;
+}
+module_init(dynamic_debug_init);
diff --git a/lib/extable.c b/lib/extable.c
new file mode 100644
index 00000000..4cac81ec
--- /dev/null
+++ b/lib/extable.c
@@ -0,0 +1,93 @@
+/*
+ * Derived from arch/ppc/mm/extable.c and arch/i386/mm/extable.c.
+ *
+ * Copyright (C) 2004 Paul Mackerras, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/sort.h>
+#include <asm/uaccess.h>
+
+#ifndef ARCH_HAS_SORT_EXTABLE
+/*
+ * The exception table needs to be sorted so that the binary
+ * search that we use to find entries in it works properly.
+ * This is used both for the kernel exception table and for
+ * the exception tables of modules that get loaded.
+ */
+static int cmp_ex(const void *a, const void *b)
+{
+ const struct exception_table_entry *x = a, *y = b;
+
+ /* avoid overflow */
+ if (x->insn > y->insn)
+ return 1;
+ if (x->insn < y->insn)
+ return -1;
+ return 0;
+}
+
+void sort_extable(struct exception_table_entry *start,
+ struct exception_table_entry *finish)
+{
+ sort(start, finish - start, sizeof(struct exception_table_entry),
+ cmp_ex, NULL);
+}
+
+#ifdef CONFIG_MODULES
+/*
+ * If the exception table is sorted, any referring to the module init
+ * will be at the beginning or the end.
+ */
+void trim_init_extable(struct module *m)
+{
+ /*trim the beginning*/
+ while (m->num_exentries && within_module_init(m->extable[0].insn, m)) {
+ m->extable++;
+ m->num_exentries--;
+ }
+ /*trim the end*/
+ while (m->num_exentries &&
+ within_module_init(m->extable[m->num_exentries-1].insn, m))
+ m->num_exentries--;
+}
+#endif /* CONFIG_MODULES */
+#endif /* !ARCH_HAS_SORT_EXTABLE */
+
+#ifndef ARCH_HAS_SEARCH_EXTABLE
+/*
+ * Search one exception table for an entry corresponding to the
+ * given instruction address, and return the address of the entry,
+ * or NULL if none is found.
+ * We use a binary search, and thus we assume that the table is
+ * already sorted.
+ */
+const struct exception_table_entry *
+search_extable(const struct exception_table_entry *first,
+ const struct exception_table_entry *last,
+ unsigned long value)
+{
+ while (first <= last) {
+ const struct exception_table_entry *mid;
+
+ mid = ((last - first) >> 1) + first;
+ /*
+ * careful, the distance between value and insn
+ * can be larger than MAX_LONG:
+ */
+ if (mid->insn < value)
+ first = mid + 1;
+ else if (mid->insn > value)
+ last = mid - 1;
+ else
+ return mid;
+ }
+ return NULL;
+}
+#endif
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
new file mode 100644
index 00000000..7e65af70
--- /dev/null
+++ b/lib/fault-inject.c
@@ -0,0 +1,315 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/random.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/stacktrace.h>
+#include <linux/kallsyms.h>
+#include <linux/fault-inject.h>
+
+/*
+ * setup_fault_attr() is a helper function for various __setup handlers, so it
+ * returns 0 on error, because that is what __setup handlers do.
+ */
+int __init setup_fault_attr(struct fault_attr *attr, char *str)
+{
+ unsigned long probability;
+ unsigned long interval;
+ int times;
+ int space;
+
+ /* "<interval>,<probability>,<space>,<times>" */
+ if (sscanf(str, "%lu,%lu,%d,%d",
+ &interval, &probability, &space, &times) < 4) {
+ printk(KERN_WARNING
+ "FAULT_INJECTION: failed to parse arguments\n");
+ return 0;
+ }
+
+ attr->probability = probability;
+ attr->interval = interval;
+ atomic_set(&attr->times, times);
+ atomic_set(&attr->space, space);
+
+ return 1;
+}
+
+static void fail_dump(struct fault_attr *attr)
+{
+ if (attr->verbose > 0)
+ printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure\n");
+ if (attr->verbose > 1)
+ dump_stack();
+}
+
+#define atomic_dec_not_zero(v) atomic_add_unless((v), -1, 0)
+
+static bool fail_task(struct fault_attr *attr, struct task_struct *task)
+{
+ return !in_interrupt() && task->make_it_fail;
+}
+
+#define MAX_STACK_TRACE_DEPTH 32
+
+#ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER
+
+static bool fail_stacktrace(struct fault_attr *attr)
+{
+ struct stack_trace trace;
+ int depth = attr->stacktrace_depth;
+ unsigned long entries[MAX_STACK_TRACE_DEPTH];
+ int n;
+ bool found = (attr->require_start == 0 && attr->require_end == ULONG_MAX);
+
+ if (depth == 0)
+ return found;
+
+ trace.nr_entries = 0;
+ trace.entries = entries;
+ trace.max_entries = depth;
+ trace.skip = 1;
+
+ save_stack_trace(&trace);
+ for (n = 0; n < trace.nr_entries; n++) {
+ if (attr->reject_start <= entries[n] &&
+ entries[n] < attr->reject_end)
+ return false;
+ if (attr->require_start <= entries[n] &&
+ entries[n] < attr->require_end)
+ found = true;
+ }
+ return found;
+}
+
+#else
+
+static inline bool fail_stacktrace(struct fault_attr *attr)
+{
+ return true;
+}
+
+#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */
+
+/*
+ * This code is stolen from failmalloc-1.0
+ * http://www.nongnu.org/failmalloc/
+ */
+
+bool should_fail(struct fault_attr *attr, ssize_t size)
+{
+ if (attr->task_filter && !fail_task(attr, current))
+ return false;
+
+ if (atomic_read(&attr->times) == 0)
+ return false;
+
+ if (atomic_read(&attr->space) > size) {
+ atomic_sub(size, &attr->space);
+ return false;
+ }
+
+ if (attr->interval > 1) {
+ attr->count++;
+ if (attr->count % attr->interval)
+ return false;
+ }
+
+ if (attr->probability <= random32() % 100)
+ return false;
+
+ if (!fail_stacktrace(attr))
+ return false;
+
+ fail_dump(attr);
+
+ if (atomic_read(&attr->times) != -1)
+ atomic_dec_not_zero(&attr->times);
+
+ return true;
+}
+
+#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+
+static int debugfs_ul_set(void *data, u64 val)
+{
+ *(unsigned long *)data = val;
+ return 0;
+}
+
+#ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER
+static int debugfs_ul_set_MAX_STACK_TRACE_DEPTH(void *data, u64 val)
+{
+ *(unsigned long *)data =
+ val < MAX_STACK_TRACE_DEPTH ?
+ val : MAX_STACK_TRACE_DEPTH;
+ return 0;
+}
+#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */
+
+static int debugfs_ul_get(void *data, u64 *val)
+{
+ *val = *(unsigned long *)data;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_ul, debugfs_ul_get, debugfs_ul_set, "%llu\n");
+
+static struct dentry *debugfs_create_ul(const char *name, mode_t mode,
+ struct dentry *parent, unsigned long *value)
+{
+ return debugfs_create_file(name, mode, parent, value, &fops_ul);
+}
+
+#ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER
+DEFINE_SIMPLE_ATTRIBUTE(fops_ul_MAX_STACK_TRACE_DEPTH, debugfs_ul_get,
+ debugfs_ul_set_MAX_STACK_TRACE_DEPTH, "%llu\n");
+
+static struct dentry *debugfs_create_ul_MAX_STACK_TRACE_DEPTH(
+ const char *name, mode_t mode,
+ struct dentry *parent, unsigned long *value)
+{
+ return debugfs_create_file(name, mode, parent, value,
+ &fops_ul_MAX_STACK_TRACE_DEPTH);
+}
+#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */
+
+static int debugfs_atomic_t_set(void *data, u64 val)
+{
+ atomic_set((atomic_t *)data, val);
+ return 0;
+}
+
+static int debugfs_atomic_t_get(void *data, u64 *val)
+{
+ *val = atomic_read((atomic_t *)data);
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_atomic_t, debugfs_atomic_t_get,
+ debugfs_atomic_t_set, "%lld\n");
+
+static struct dentry *debugfs_create_atomic_t(const char *name, mode_t mode,
+ struct dentry *parent, atomic_t *value)
+{
+ return debugfs_create_file(name, mode, parent, value, &fops_atomic_t);
+}
+
+void cleanup_fault_attr_dentries(struct fault_attr *attr)
+{
+ debugfs_remove(attr->dentries.probability_file);
+ attr->dentries.probability_file = NULL;
+
+ debugfs_remove(attr->dentries.interval_file);
+ attr->dentries.interval_file = NULL;
+
+ debugfs_remove(attr->dentries.times_file);
+ attr->dentries.times_file = NULL;
+
+ debugfs_remove(attr->dentries.space_file);
+ attr->dentries.space_file = NULL;
+
+ debugfs_remove(attr->dentries.verbose_file);
+ attr->dentries.verbose_file = NULL;
+
+ debugfs_remove(attr->dentries.task_filter_file);
+ attr->dentries.task_filter_file = NULL;
+
+#ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER
+
+ debugfs_remove(attr->dentries.stacktrace_depth_file);
+ attr->dentries.stacktrace_depth_file = NULL;
+
+ debugfs_remove(attr->dentries.require_start_file);
+ attr->dentries.require_start_file = NULL;
+
+ debugfs_remove(attr->dentries.require_end_file);
+ attr->dentries.require_end_file = NULL;
+
+ debugfs_remove(attr->dentries.reject_start_file);
+ attr->dentries.reject_start_file = NULL;
+
+ debugfs_remove(attr->dentries.reject_end_file);
+ attr->dentries.reject_end_file = NULL;
+
+#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */
+
+ if (attr->dentries.dir)
+ WARN_ON(!simple_empty(attr->dentries.dir));
+
+ debugfs_remove(attr->dentries.dir);
+ attr->dentries.dir = NULL;
+}
+
+int init_fault_attr_dentries(struct fault_attr *attr, const char *name)
+{
+ mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
+ struct dentry *dir;
+
+ memset(&attr->dentries, 0, sizeof(attr->dentries));
+
+ dir = debugfs_create_dir(name, NULL);
+ if (!dir)
+ goto fail;
+ attr->dentries.dir = dir;
+
+ attr->dentries.probability_file =
+ debugfs_create_ul("probability", mode, dir, &attr->probability);
+
+ attr->dentries.interval_file =
+ debugfs_create_ul("interval", mode, dir, &attr->interval);
+
+ attr->dentries.times_file =
+ debugfs_create_atomic_t("times", mode, dir, &attr->times);
+
+ attr->dentries.space_file =
+ debugfs_create_atomic_t("space", mode, dir, &attr->space);
+
+ attr->dentries.verbose_file =
+ debugfs_create_ul("verbose", mode, dir, &attr->verbose);
+
+ attr->dentries.task_filter_file = debugfs_create_bool("task-filter",
+ mode, dir, &attr->task_filter);
+
+ if (!attr->dentries.probability_file || !attr->dentries.interval_file ||
+ !attr->dentries.times_file || !attr->dentries.space_file ||
+ !attr->dentries.verbose_file || !attr->dentries.task_filter_file)
+ goto fail;
+
+#ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER
+
+ attr->dentries.stacktrace_depth_file =
+ debugfs_create_ul_MAX_STACK_TRACE_DEPTH(
+ "stacktrace-depth", mode, dir, &attr->stacktrace_depth);
+
+ attr->dentries.require_start_file =
+ debugfs_create_ul("require-start", mode, dir, &attr->require_start);
+
+ attr->dentries.require_end_file =
+ debugfs_create_ul("require-end", mode, dir, &attr->require_end);
+
+ attr->dentries.reject_start_file =
+ debugfs_create_ul("reject-start", mode, dir, &attr->reject_start);
+
+ attr->dentries.reject_end_file =
+ debugfs_create_ul("reject-end", mode, dir, &attr->reject_end);
+
+ if (!attr->dentries.stacktrace_depth_file ||
+ !attr->dentries.require_start_file ||
+ !attr->dentries.require_end_file ||
+ !attr->dentries.reject_start_file ||
+ !attr->dentries.reject_end_file)
+ goto fail;
+
+#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */
+
+ return 0;
+fail:
+ cleanup_fault_attr_dentries(attr);
+ return -ENOMEM;
+}
+
+#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c
new file mode 100644
index 00000000..5d202e36
--- /dev/null
+++ b/lib/find_last_bit.c
@@ -0,0 +1,45 @@
+/* find_last_bit.c: fallback find next bit implementation
+ *
+ * Copyright (C) 2008 IBM Corporation
+ * Written by Rusty Russell <rusty@rustcorp.com.au>
+ * (Inspired by David Howell's find_next_bit implementation)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/bitops.h>
+#include <linux/module.h>
+#include <asm/types.h>
+#include <asm/byteorder.h>
+
+unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
+{
+ unsigned long words;
+ unsigned long tmp;
+
+ /* Start at final word. */
+ words = size / BITS_PER_LONG;
+
+ /* Partial final word? */
+ if (size & (BITS_PER_LONG-1)) {
+ tmp = (addr[words] & (~0UL >> (BITS_PER_LONG
+ - (size & (BITS_PER_LONG-1)))));
+ if (tmp)
+ goto found;
+ }
+
+ while (words) {
+ tmp = addr[--words];
+ if (tmp) {
+found:
+ return words * BITS_PER_LONG + __fls(tmp);
+ }
+ }
+
+ /* Not found */
+ return size;
+}
+EXPORT_SYMBOL(find_last_bit);
diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c
new file mode 100644
index 00000000..24c59ded
--- /dev/null
+++ b/lib/find_next_bit.c
@@ -0,0 +1,275 @@
+/* find_next_bit.c: fallback find next bit implementation
+ *
+ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/bitops.h>
+#include <linux/module.h>
+#include <asm/types.h>
+#include <asm/byteorder.h>
+
+#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
+
+#ifdef CONFIG_GENERIC_FIND_NEXT_BIT
+/*
+ * Find the next set bit in a memory region.
+ */
+unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ const unsigned long *p = addr + BITOP_WORD(offset);
+ unsigned long result = offset & ~(BITS_PER_LONG-1);
+ unsigned long tmp;
+
+ if (offset >= size)
+ return size;
+ size -= result;
+ offset %= BITS_PER_LONG;
+ if (offset) {
+ tmp = *(p++);
+ tmp &= (~0UL << offset);
+ if (size < BITS_PER_LONG)
+ goto found_first;
+ if (tmp)
+ goto found_middle;
+ size -= BITS_PER_LONG;
+ result += BITS_PER_LONG;
+ }
+ while (size & ~(BITS_PER_LONG-1)) {
+ if ((tmp = *(p++)))
+ goto found_middle;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+ tmp = *p;
+
+found_first:
+ tmp &= (~0UL >> (BITS_PER_LONG - size));
+ if (tmp == 0UL) /* Are any bits set? */
+ return result + size; /* Nope. */
+found_middle:
+ return result + __ffs(tmp);
+}
+EXPORT_SYMBOL(find_next_bit);
+
+/*
+ * This implementation of find_{first,next}_zero_bit was stolen from
+ * Linus' asm-alpha/bitops.h.
+ */
+unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ const unsigned long *p = addr + BITOP_WORD(offset);
+ unsigned long result = offset & ~(BITS_PER_LONG-1);
+ unsigned long tmp;
+
+ if (offset >= size)
+ return size;
+ size -= result;
+ offset %= BITS_PER_LONG;
+ if (offset) {
+ tmp = *(p++);
+ tmp |= ~0UL >> (BITS_PER_LONG - offset);
+ if (size < BITS_PER_LONG)
+ goto found_first;
+ if (~tmp)
+ goto found_middle;
+ size -= BITS_PER_LONG;
+ result += BITS_PER_LONG;
+ }
+ while (size & ~(BITS_PER_LONG-1)) {
+ if (~(tmp = *(p++)))
+ goto found_middle;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+ tmp = *p;
+
+found_first:
+ tmp |= ~0UL << size;
+ if (tmp == ~0UL) /* Are any bits zero? */
+ return result + size; /* Nope. */
+found_middle:
+ return result + ffz(tmp);
+}
+EXPORT_SYMBOL(find_next_zero_bit);
+#endif /* CONFIG_GENERIC_FIND_NEXT_BIT */
+
+#ifdef CONFIG_GENERIC_FIND_FIRST_BIT
+/*
+ * Find the first set bit in a memory region.
+ */
+unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
+{
+ const unsigned long *p = addr;
+ unsigned long result = 0;
+ unsigned long tmp;
+
+ while (size & ~(BITS_PER_LONG-1)) {
+ if ((tmp = *(p++)))
+ goto found;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+
+ tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
+ if (tmp == 0UL) /* Are any bits set? */
+ return result + size; /* Nope. */
+found:
+ return result + __ffs(tmp);
+}
+EXPORT_SYMBOL(find_first_bit);
+
+/*
+ * Find the first cleared bit in a memory region.
+ */
+unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
+{
+ const unsigned long *p = addr;
+ unsigned long result = 0;
+ unsigned long tmp;
+
+ while (size & ~(BITS_PER_LONG-1)) {
+ if (~(tmp = *(p++)))
+ goto found;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+
+ tmp = (*p) | (~0UL << size);
+ if (tmp == ~0UL) /* Are any bits zero? */
+ return result + size; /* Nope. */
+found:
+ return result + ffz(tmp);
+}
+EXPORT_SYMBOL(find_first_zero_bit);
+#endif /* CONFIG_GENERIC_FIND_FIRST_BIT */
+
+#ifdef __BIG_ENDIAN
+
+/* include/linux/byteorder does not support "unsigned long" type */
+static inline unsigned long ext2_swabp(const unsigned long * x)
+{
+#if BITS_PER_LONG == 64
+ return (unsigned long) __swab64p((u64 *) x);
+#elif BITS_PER_LONG == 32
+ return (unsigned long) __swab32p((u32 *) x);
+#else
+#error BITS_PER_LONG not defined
+#endif
+}
+
+/* include/linux/byteorder doesn't support "unsigned long" type */
+static inline unsigned long ext2_swab(const unsigned long y)
+{
+#if BITS_PER_LONG == 64
+ return (unsigned long) __swab64((u64) y);
+#elif BITS_PER_LONG == 32
+ return (unsigned long) __swab32((u32) y);
+#else
+#error BITS_PER_LONG not defined
+#endif
+}
+
+unsigned long generic_find_next_zero_le_bit(const unsigned long *addr, unsigned
+ long size, unsigned long offset)
+{
+ const unsigned long *p = addr + BITOP_WORD(offset);
+ unsigned long result = offset & ~(BITS_PER_LONG - 1);
+ unsigned long tmp;
+
+ if (offset >= size)
+ return size;
+ size -= result;
+ offset &= (BITS_PER_LONG - 1UL);
+ if (offset) {
+ tmp = ext2_swabp(p++);
+ tmp |= (~0UL >> (BITS_PER_LONG - offset));
+ if (size < BITS_PER_LONG)
+ goto found_first;
+ if (~tmp)
+ goto found_middle;
+ size -= BITS_PER_LONG;
+ result += BITS_PER_LONG;
+ }
+
+ while (size & ~(BITS_PER_LONG - 1)) {
+ if (~(tmp = *(p++)))
+ goto found_middle_swap;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+ tmp = ext2_swabp(p);
+found_first:
+ tmp |= ~0UL << size;
+ if (tmp == ~0UL) /* Are any bits zero? */
+ return result + size; /* Nope. Skip ffz */
+found_middle:
+ return result + ffz(tmp);
+
+found_middle_swap:
+ return result + ffz(ext2_swab(tmp));
+}
+
+EXPORT_SYMBOL(generic_find_next_zero_le_bit);
+
+unsigned long generic_find_next_le_bit(const unsigned long *addr, unsigned
+ long size, unsigned long offset)
+{
+ const unsigned long *p = addr + BITOP_WORD(offset);
+ unsigned long result = offset & ~(BITS_PER_LONG - 1);
+ unsigned long tmp;
+
+ if (offset >= size)
+ return size;
+ size -= result;
+ offset &= (BITS_PER_LONG - 1UL);
+ if (offset) {
+ tmp = ext2_swabp(p++);
+ tmp &= (~0UL << offset);
+ if (size < BITS_PER_LONG)
+ goto found_first;
+ if (tmp)
+ goto found_middle;
+ size -= BITS_PER_LONG;
+ result += BITS_PER_LONG;
+ }
+
+ while (size & ~(BITS_PER_LONG - 1)) {
+ tmp = *(p++);
+ if (tmp)
+ goto found_middle_swap;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+ tmp = ext2_swabp(p);
+found_first:
+ tmp &= (~0UL >> (BITS_PER_LONG - size));
+ if (tmp == 0UL) /* Are any bits set? */
+ return result + size; /* Nope. */
+found_middle:
+ return result + __ffs(tmp);
+
+found_middle_swap:
+ return result + __ffs(ext2_swab(tmp));
+}
+EXPORT_SYMBOL(generic_find_next_le_bit);
+#endif /* __BIG_ENDIAN */
diff --git a/lib/flex_array.c b/lib/flex_array.c
new file mode 100644
index 00000000..77a6fea7
--- /dev/null
+++ b/lib/flex_array.c
@@ -0,0 +1,350 @@
+/*
+ * Flexible array managed in PAGE_SIZE parts
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2009
+ *
+ * Author: Dave Hansen <dave@linux.vnet.ibm.com>
+ */
+
+#include <linux/flex_array.h>
+#include <linux/slab.h>
+#include <linux/stddef.h>
+
+struct flex_array_part {
+ char elements[FLEX_ARRAY_PART_SIZE];
+};
+
+/*
+ * If a user requests an allocation which is small
+ * enough, we may simply use the space in the
+ * flex_array->parts[] array to store the user
+ * data.
+ */
+static inline int elements_fit_in_base(struct flex_array *fa)
+{
+ int data_size = fa->element_size * fa->total_nr_elements;
+ if (data_size <= FLEX_ARRAY_BASE_BYTES_LEFT)
+ return 1;
+ return 0;
+}
+
+/**
+ * flex_array_alloc - allocate a new flexible array
+ * @element_size: the size of individual elements in the array
+ * @total: total number of elements that this should hold
+ * @flags: page allocation flags to use for base array
+ *
+ * Note: all locking must be provided by the caller.
+ *
+ * @total is used to size internal structures. If the user ever
+ * accesses any array indexes >=@total, it will produce errors.
+ *
+ * The maximum number of elements is defined as: the number of
+ * elements that can be stored in a page times the number of
+ * page pointers that we can fit in the base structure or (using
+ * integer math):
+ *
+ * (PAGE_SIZE/element_size) * (PAGE_SIZE-8)/sizeof(void *)
+ *
+ * Here's a table showing example capacities. Note that the maximum
+ * index that the get/put() functions is just nr_objects-1. This
+ * basically means that you get 4MB of storage on 32-bit and 2MB on
+ * 64-bit.
+ *
+ *
+ * Element size | Objects | Objects |
+ * PAGE_SIZE=4k | 32-bit | 64-bit |
+ * ---------------------------------|
+ * 1 bytes | 4186112 | 2093056 |
+ * 2 bytes | 2093056 | 1046528 |
+ * 3 bytes | 1395030 | 697515 |
+ * 4 bytes | 1046528 | 523264 |
+ * 32 bytes | 130816 | 65408 |
+ * 33 bytes | 126728 | 63364 |
+ * 2048 bytes | 2044 | 1022 |
+ * 2049 bytes | 1022 | 511 |
+ * void * | 1046528 | 261632 |
+ *
+ * Since 64-bit pointers are twice the size, we lose half the
+ * capacity in the base structure. Also note that no effort is made
+ * to efficiently pack objects across page boundaries.
+ */
+struct flex_array *flex_array_alloc(int element_size, unsigned int total,
+ gfp_t flags)
+{
+ struct flex_array *ret;
+ int max_size = FLEX_ARRAY_NR_BASE_PTRS *
+ FLEX_ARRAY_ELEMENTS_PER_PART(element_size);
+
+ /* max_size will end up 0 if element_size > PAGE_SIZE */
+ if (total > max_size)
+ return NULL;
+ ret = kzalloc(sizeof(struct flex_array), flags);
+ if (!ret)
+ return NULL;
+ ret->element_size = element_size;
+ ret->total_nr_elements = total;
+ if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO))
+ memset(&ret->parts[0], FLEX_ARRAY_FREE,
+ FLEX_ARRAY_BASE_BYTES_LEFT);
+ return ret;
+}
+
+static int fa_element_to_part_nr(struct flex_array *fa,
+ unsigned int element_nr)
+{
+ return element_nr / FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size);
+}
+
+/**
+ * flex_array_free_parts - just free the second-level pages
+ * @fa: the flex array from which to free parts
+ *
+ * This is to be used in cases where the base 'struct flex_array'
+ * has been statically allocated and should not be free.
+ */
+void flex_array_free_parts(struct flex_array *fa)
+{
+ int part_nr;
+
+ if (elements_fit_in_base(fa))
+ return;
+ for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++)
+ kfree(fa->parts[part_nr]);
+}
+
+void flex_array_free(struct flex_array *fa)
+{
+ flex_array_free_parts(fa);
+ kfree(fa);
+}
+
+static unsigned int index_inside_part(struct flex_array *fa,
+ unsigned int element_nr)
+{
+ unsigned int part_offset;
+
+ part_offset = element_nr %
+ FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size);
+ return part_offset * fa->element_size;
+}
+
+static struct flex_array_part *
+__fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags)
+{
+ struct flex_array_part *part = fa->parts[part_nr];
+ if (!part) {
+ part = kmalloc(sizeof(struct flex_array_part), flags);
+ if (!part)
+ return NULL;
+ if (!(flags & __GFP_ZERO))
+ memset(part, FLEX_ARRAY_FREE,
+ sizeof(struct flex_array_part));
+ fa->parts[part_nr] = part;
+ }
+ return part;
+}
+
+/**
+ * flex_array_put - copy data into the array at @element_nr
+ * @fa: the flex array to copy data into
+ * @element_nr: index of the position in which to insert
+ * the new element.
+ * @src: address of data to copy into the array
+ * @flags: page allocation flags to use for array expansion
+ *
+ *
+ * Note that this *copies* the contents of @src into
+ * the array. If you are trying to store an array of
+ * pointers, make sure to pass in &ptr instead of ptr.
+ * You may instead wish to use the flex_array_put_ptr()
+ * helper function.
+ *
+ * Locking must be provided by the caller.
+ */
+int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
+ gfp_t flags)
+{
+ int part_nr = fa_element_to_part_nr(fa, element_nr);
+ struct flex_array_part *part;
+ void *dst;
+
+ if (element_nr >= fa->total_nr_elements)
+ return -ENOSPC;
+ if (elements_fit_in_base(fa))
+ part = (struct flex_array_part *)&fa->parts[0];
+ else {
+ part = __fa_get_part(fa, part_nr, flags);
+ if (!part)
+ return -ENOMEM;
+ }
+ dst = &part->elements[index_inside_part(fa, element_nr)];
+ memcpy(dst, src, fa->element_size);
+ return 0;
+}
+
+/**
+ * flex_array_clear - clear element in array at @element_nr
+ * @fa: the flex array of the element.
+ * @element_nr: index of the position to clear.
+ *
+ * Locking must be provided by the caller.
+ */
+int flex_array_clear(struct flex_array *fa, unsigned int element_nr)
+{
+ int part_nr = fa_element_to_part_nr(fa, element_nr);
+ struct flex_array_part *part;
+ void *dst;
+
+ if (element_nr >= fa->total_nr_elements)
+ return -ENOSPC;
+ if (elements_fit_in_base(fa))
+ part = (struct flex_array_part *)&fa->parts[0];
+ else {
+ part = fa->parts[part_nr];
+ if (!part)
+ return -EINVAL;
+ }
+ dst = &part->elements[index_inside_part(fa, element_nr)];
+ memset(dst, FLEX_ARRAY_FREE, fa->element_size);
+ return 0;
+}
+
+/**
+ * flex_array_prealloc - guarantee that array space exists
+ * @fa: the flex array for which to preallocate parts
+ * @start: index of first array element for which space is allocated
+ * @end: index of last (inclusive) element for which space is allocated
+ * @flags: page allocation flags
+ *
+ * This will guarantee that no future calls to flex_array_put()
+ * will allocate memory. It can be used if you are expecting to
+ * be holding a lock or in some atomic context while writing
+ * data into the array.
+ *
+ * Locking must be provided by the caller.
+ */
+int flex_array_prealloc(struct flex_array *fa, unsigned int start,
+ unsigned int end, gfp_t flags)
+{
+ int start_part;
+ int end_part;
+ int part_nr;
+ struct flex_array_part *part;
+
+ if (start >= fa->total_nr_elements || end >= fa->total_nr_elements)
+ return -ENOSPC;
+ if (elements_fit_in_base(fa))
+ return 0;
+ start_part = fa_element_to_part_nr(fa, start);
+ end_part = fa_element_to_part_nr(fa, end);
+ for (part_nr = start_part; part_nr <= end_part; part_nr++) {
+ part = __fa_get_part(fa, part_nr, flags);
+ if (!part)
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+/**
+ * flex_array_get - pull data back out of the array
+ * @fa: the flex array from which to extract data
+ * @element_nr: index of the element to fetch from the array
+ *
+ * Returns a pointer to the data at index @element_nr. Note
+ * that this is a copy of the data that was passed in. If you
+ * are using this to store pointers, you'll get back &ptr. You
+ * may instead wish to use the flex_array_get_ptr helper.
+ *
+ * Locking must be provided by the caller.
+ */
+void *flex_array_get(struct flex_array *fa, unsigned int element_nr)
+{
+ int part_nr = fa_element_to_part_nr(fa, element_nr);
+ struct flex_array_part *part;
+
+ if (element_nr >= fa->total_nr_elements)
+ return NULL;
+ if (elements_fit_in_base(fa))
+ part = (struct flex_array_part *)&fa->parts[0];
+ else {
+ part = fa->parts[part_nr];
+ if (!part)
+ return NULL;
+ }
+ return &part->elements[index_inside_part(fa, element_nr)];
+}
+
+/**
+ * flex_array_get_ptr - pull a ptr back out of the array
+ * @fa: the flex array from which to extract data
+ * @element_nr: index of the element to fetch from the array
+ *
+ * Returns the pointer placed in the flex array at element_nr using
+ * flex_array_put_ptr(). This function should not be called if the
+ * element in question was not set using the _put_ptr() helper.
+ */
+void *flex_array_get_ptr(struct flex_array *fa, unsigned int element_nr)
+{
+ void **tmp;
+
+ tmp = flex_array_get(fa, element_nr);
+ if (!tmp)
+ return NULL;
+
+ return *tmp;
+}
+
+static int part_is_free(struct flex_array_part *part)
+{
+ int i;
+
+ for (i = 0; i < sizeof(struct flex_array_part); i++)
+ if (part->elements[i] != FLEX_ARRAY_FREE)
+ return 0;
+ return 1;
+}
+
+/**
+ * flex_array_shrink - free unused second-level pages
+ * @fa: the flex array to shrink
+ *
+ * Frees all second-level pages that consist solely of unused
+ * elements. Returns the number of pages freed.
+ *
+ * Locking must be provided by the caller.
+ */
+int flex_array_shrink(struct flex_array *fa)
+{
+ struct flex_array_part *part;
+ int part_nr;
+ int ret = 0;
+
+ if (elements_fit_in_base(fa))
+ return ret;
+ for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) {
+ part = fa->parts[part_nr];
+ if (!part)
+ continue;
+ if (part_is_free(part)) {
+ fa->parts[part_nr] = NULL;
+ kfree(part);
+ ret++;
+ }
+ }
+ return ret;
+}
diff --git a/lib/gcd.c b/lib/gcd.c
new file mode 100644
index 00000000..f879033d
--- /dev/null
+++ b/lib/gcd.c
@@ -0,0 +1,18 @@
+#include <linux/kernel.h>
+#include <linux/gcd.h>
+#include <linux/module.h>
+
+/* Greatest common divisor */
+unsigned long gcd(unsigned long a, unsigned long b)
+{
+ unsigned long r;
+
+ if (a < b)
+ swap(a, b);
+ while ((r = a % b) != 0) {
+ a = b;
+ b = r;
+ }
+ return b;
+}
+EXPORT_SYMBOL_GPL(gcd);
diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c
new file mode 100644
index 00000000..85d0e412
--- /dev/null
+++ b/lib/gen_crc32table.c
@@ -0,0 +1,99 @@
+#include <stdio.h>
+#include "crc32defs.h"
+#include <inttypes.h>
+
+#define ENTRIES_PER_LINE 4
+
+#define LE_TABLE_SIZE (1 << CRC_LE_BITS)
+#define BE_TABLE_SIZE (1 << CRC_BE_BITS)
+
+static uint32_t crc32table_le[4][LE_TABLE_SIZE];
+static uint32_t crc32table_be[4][BE_TABLE_SIZE];
+
+/**
+ * crc32init_le() - allocate and initialize LE table data
+ *
+ * crc is the crc of the byte i; other entries are filled in based on the
+ * fact that crctable[i^j] = crctable[i] ^ crctable[j].
+ *
+ */
+static void crc32init_le(void)
+{
+ unsigned i, j;
+ uint32_t crc = 1;
+
+ crc32table_le[0][0] = 0;
+
+ for (i = 1 << (CRC_LE_BITS - 1); i; i >>= 1) {
+ crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0);
+ for (j = 0; j < LE_TABLE_SIZE; j += 2 * i)
+ crc32table_le[0][i + j] = crc ^ crc32table_le[0][j];
+ }
+ for (i = 0; i < LE_TABLE_SIZE; i++) {
+ crc = crc32table_le[0][i];
+ for (j = 1; j < 4; j++) {
+ crc = crc32table_le[0][crc & 0xff] ^ (crc >> 8);
+ crc32table_le[j][i] = crc;
+ }
+ }
+}
+
+/**
+ * crc32init_be() - allocate and initialize BE table data
+ */
+static void crc32init_be(void)
+{
+ unsigned i, j;
+ uint32_t crc = 0x80000000;
+
+ crc32table_be[0][0] = 0;
+
+ for (i = 1; i < BE_TABLE_SIZE; i <<= 1) {
+ crc = (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE : 0);
+ for (j = 0; j < i; j++)
+ crc32table_be[0][i + j] = crc ^ crc32table_be[0][j];
+ }
+ for (i = 0; i < BE_TABLE_SIZE; i++) {
+ crc = crc32table_be[0][i];
+ for (j = 1; j < 4; j++) {
+ crc = crc32table_be[0][(crc >> 24) & 0xff] ^ (crc << 8);
+ crc32table_be[j][i] = crc;
+ }
+ }
+}
+
+static void output_table(uint32_t table[4][256], int len, char *trans)
+{
+ int i, j;
+
+ for (j = 0 ; j < 4; j++) {
+ printf("{");
+ for (i = 0; i < len - 1; i++) {
+ if (i % ENTRIES_PER_LINE == 0)
+ printf("\n");
+ printf("%s(0x%8.8xL), ", trans, table[j][i]);
+ }
+ printf("%s(0x%8.8xL)},\n", trans, table[j][len - 1]);
+ }
+}
+
+int main(int argc, char** argv)
+{
+ printf("/* this file is generated - do not edit */\n\n");
+
+ if (CRC_LE_BITS > 1) {
+ crc32init_le();
+ printf("static const u32 crc32table_le[4][256] = {");
+ output_table(crc32table_le, LE_TABLE_SIZE, "tole");
+ printf("};\n");
+ }
+
+ if (CRC_BE_BITS > 1) {
+ crc32init_be();
+ printf("static const u32 crc32table_be[4][256] = {");
+ output_table(crc32table_be, BE_TABLE_SIZE, "tobe");
+ printf("};\n");
+ }
+
+ return 0;
+}
diff --git a/lib/genalloc.c b/lib/genalloc.c
new file mode 100644
index 00000000..dc6d8331
--- /dev/null
+++ b/lib/genalloc.c
@@ -0,0 +1,215 @@
+/*
+ * Basic general purpose allocator for managing special purpose memory
+ * not managed by the regular kmalloc/kfree interface.
+ * Uses for this includes on-device special memory, uncached memory
+ * etc.
+ *
+ * Copyright 2005 (C) Jes Sorensen <jes@trained-monkey.org>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/bitmap.h>
+#include <linux/genalloc.h>
+
+/* General purpose special memory pool descriptor. */
+struct gen_pool {
+ rwlock_t lock; /* protects chunks list */
+ struct list_head chunks; /* list of chunks in this pool */
+ unsigned order; /* minimum allocation order */
+};
+
+/* General purpose special memory pool chunk descriptor. */
+struct gen_pool_chunk {
+ spinlock_t lock; /* protects bits */
+ struct list_head next_chunk; /* next chunk in pool */
+ unsigned long start; /* start of memory chunk */
+ unsigned long size; /* number of bits */
+ unsigned long bits[0]; /* bitmap for allocating memory chunk */
+};
+
+/**
+ * gen_pool_create() - create a new special memory pool
+ * @order: Log base 2 of number of bytes each bitmap bit
+ * represents.
+ * @nid: Node id of the node the pool structure should be allocated
+ * on, or -1.
+ *
+ * Create a new special memory pool that can be used to manage special purpose
+ * memory not managed by the regular kmalloc/kfree interface.
+ */
+struct gen_pool *__must_check gen_pool_create(unsigned order, int nid)
+{
+ struct gen_pool *pool;
+
+ if (WARN_ON(order >= BITS_PER_LONG))
+ return NULL;
+
+ pool = kmalloc_node(sizeof *pool, GFP_KERNEL, nid);
+ if (pool) {
+ rwlock_init(&pool->lock);
+ INIT_LIST_HEAD(&pool->chunks);
+ pool->order = order;
+ }
+ return pool;
+}
+EXPORT_SYMBOL(gen_pool_create);
+
+/**
+ * gen_pool_add() - add a new chunk of special memory to the pool
+ * @pool: Pool to add new memory chunk to.
+ * @addr: Starting address of memory chunk to add to pool.
+ * @size: Size in bytes of the memory chunk to add to pool.
+ * @nid: Node id of the node the pool structure should be allocated
+ * on, or -1.
+ *
+ * Add a new chunk of special memory to the specified pool.
+ */
+int __must_check
+gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, int nid)
+{
+ struct gen_pool_chunk *chunk;
+ size_t nbytes;
+
+ if (WARN_ON(!addr || addr + size < addr ||
+ (addr & ((1 << pool->order) - 1))))
+ return -EINVAL;
+
+ size = size >> pool->order;
+ if (WARN_ON(!size))
+ return -EINVAL;
+
+ nbytes = sizeof *chunk + BITS_TO_LONGS(size) * sizeof *chunk->bits;
+ chunk = kzalloc_node(nbytes, GFP_KERNEL, nid);
+ if (!chunk)
+ return -ENOMEM;
+
+ spin_lock_init(&chunk->lock);
+ chunk->start = addr >> pool->order;
+ chunk->size = size;
+
+ write_lock(&pool->lock);
+ list_add(&chunk->next_chunk, &pool->chunks);
+ write_unlock(&pool->lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(gen_pool_add);
+
+/**
+ * gen_pool_destroy() - destroy a special memory pool
+ * @pool: Pool to destroy.
+ *
+ * Destroy the specified special memory pool. Verifies that there are no
+ * outstanding allocations.
+ */
+void gen_pool_destroy(struct gen_pool *pool)
+{
+ struct gen_pool_chunk *chunk;
+ int bit;
+
+ while (!list_empty(&pool->chunks)) {
+ chunk = list_entry(pool->chunks.next, struct gen_pool_chunk,
+ next_chunk);
+ list_del(&chunk->next_chunk);
+
+ bit = find_next_bit(chunk->bits, chunk->size, 0);
+ BUG_ON(bit < chunk->size);
+
+ kfree(chunk);
+ }
+ kfree(pool);
+}
+EXPORT_SYMBOL(gen_pool_destroy);
+
+/**
+ * gen_pool_alloc_aligned() - allocate special memory from the pool
+ * @pool: Pool to allocate from.
+ * @size: Number of bytes to allocate from the pool.
+ * @alignment_order: Order the allocated space should be
+ * aligned to (eg. 20 means allocated space
+ * must be aligned to 1MiB).
+ *
+ * Allocate the requested number of bytes from the specified pool.
+ * Uses a first-fit algorithm.
+ */
+unsigned long __must_check
+gen_pool_alloc_aligned(struct gen_pool *pool, size_t size,
+ unsigned alignment_order)
+{
+ unsigned long addr, align_mask = 0, flags, start;
+ struct gen_pool_chunk *chunk;
+
+ if (size == 0)
+ return 0;
+
+ if (alignment_order > pool->order)
+ align_mask = (1 << (alignment_order - pool->order)) - 1;
+
+ size = (size + (1UL << pool->order) - 1) >> pool->order;
+
+ read_lock(&pool->lock);
+ list_for_each_entry(chunk, &pool->chunks, next_chunk) {
+ if (chunk->size < size)
+ continue;
+
+ spin_lock_irqsave(&chunk->lock, flags);
+ start = bitmap_find_next_zero_area_off(chunk->bits, chunk->size,
+ 0, size, align_mask,
+ chunk->start);
+ if (start >= chunk->size) {
+ spin_unlock_irqrestore(&chunk->lock, flags);
+ continue;
+ }
+
+ bitmap_set(chunk->bits, start, size);
+ spin_unlock_irqrestore(&chunk->lock, flags);
+ addr = (chunk->start + start) << pool->order;
+ goto done;
+ }
+
+ addr = 0;
+done:
+ read_unlock(&pool->lock);
+ return addr;
+}
+EXPORT_SYMBOL(gen_pool_alloc_aligned);
+
+/**
+ * gen_pool_free() - free allocated special memory back to the pool
+ * @pool: Pool to free to.
+ * @addr: Starting address of memory to free back to pool.
+ * @size: Size in bytes of memory to free.
+ *
+ * Free previously allocated special memory back to the specified pool.
+ */
+void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size)
+{
+ struct gen_pool_chunk *chunk;
+ unsigned long flags;
+
+ if (!size)
+ return;
+
+ addr = addr >> pool->order;
+ size = (size + (1UL << pool->order) - 1) >> pool->order;
+
+ BUG_ON(addr + size < addr);
+
+ read_lock(&pool->lock);
+ list_for_each_entry(chunk, &pool->chunks, next_chunk)
+ if (addr >= chunk->start &&
+ addr + size <= chunk->start + chunk->size) {
+ spin_lock_irqsave(&chunk->lock, flags);
+ bitmap_clear(chunk->bits, addr - chunk->start, size);
+ spin_unlock_irqrestore(&chunk->lock, flags);
+ goto done;
+ }
+ BUG_ON(1);
+done:
+ read_unlock(&pool->lock);
+}
+EXPORT_SYMBOL(gen_pool_free);
diff --git a/lib/halfmd4.c b/lib/halfmd4.c
new file mode 100644
index 00000000..e11db26f
--- /dev/null
+++ b/lib/halfmd4.c
@@ -0,0 +1,66 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/cryptohash.h>
+
+/* F, G and H are basic MD4 functions: selection, majority, parity */
+#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
+#define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z)))
+#define H(x, y, z) ((x) ^ (y) ^ (z))
+
+/*
+ * The generic round function. The application is so specific that
+ * we don't bother protecting all the arguments with parens, as is generally
+ * good macro practice, in favor of extra legibility.
+ * Rotation is separate from addition to prevent recomputation
+ */
+#define ROUND(f, a, b, c, d, x, s) \
+ (a += f(b, c, d) + x, a = (a << s) | (a >> (32 - s)))
+#define K1 0
+#define K2 013240474631UL
+#define K3 015666365641UL
+
+/*
+ * Basic cut-down MD4 transform. Returns only 32 bits of result.
+ */
+__u32 half_md4_transform(__u32 buf[4], __u32 const in[8])
+{
+ __u32 a = buf[0], b = buf[1], c = buf[2], d = buf[3];
+
+ /* Round 1 */
+ ROUND(F, a, b, c, d, in[0] + K1, 3);
+ ROUND(F, d, a, b, c, in[1] + K1, 7);
+ ROUND(F, c, d, a, b, in[2] + K1, 11);
+ ROUND(F, b, c, d, a, in[3] + K1, 19);
+ ROUND(F, a, b, c, d, in[4] + K1, 3);
+ ROUND(F, d, a, b, c, in[5] + K1, 7);
+ ROUND(F, c, d, a, b, in[6] + K1, 11);
+ ROUND(F, b, c, d, a, in[7] + K1, 19);
+
+ /* Round 2 */
+ ROUND(G, a, b, c, d, in[1] + K2, 3);
+ ROUND(G, d, a, b, c, in[3] + K2, 5);
+ ROUND(G, c, d, a, b, in[5] + K2, 9);
+ ROUND(G, b, c, d, a, in[7] + K2, 13);
+ ROUND(G, a, b, c, d, in[0] + K2, 3);
+ ROUND(G, d, a, b, c, in[2] + K2, 5);
+ ROUND(G, c, d, a, b, in[4] + K2, 9);
+ ROUND(G, b, c, d, a, in[6] + K2, 13);
+
+ /* Round 3 */
+ ROUND(H, a, b, c, d, in[3] + K3, 3);
+ ROUND(H, d, a, b, c, in[7] + K3, 9);
+ ROUND(H, c, d, a, b, in[2] + K3, 11);
+ ROUND(H, b, c, d, a, in[6] + K3, 15);
+ ROUND(H, a, b, c, d, in[1] + K3, 3);
+ ROUND(H, d, a, b, c, in[5] + K3, 9);
+ ROUND(H, c, d, a, b, in[0] + K3, 11);
+ ROUND(H, b, c, d, a, in[4] + K3, 15);
+
+ buf[0] += a;
+ buf[1] += b;
+ buf[2] += c;
+ buf[3] += d;
+
+ return buf[1]; /* "most hashed" word */
+}
+EXPORT_SYMBOL(half_md4_transform);
diff --git a/lib/hexdump.c b/lib/hexdump.c
new file mode 100644
index 00000000..5d7a4802
--- /dev/null
+++ b/lib/hexdump.c
@@ -0,0 +1,224 @@
+/*
+ * lib/hexdump.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation. See README and COPYING for
+ * more details.
+ */
+
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+const char hex_asc[] = "0123456789abcdef";
+EXPORT_SYMBOL(hex_asc);
+
+/**
+ * hex_to_bin - convert a hex digit to its real value
+ * @ch: ascii character represents hex digit
+ *
+ * hex_to_bin() converts one hex digit to its actual value or -1 in case of bad
+ * input.
+ */
+int hex_to_bin(char ch)
+{
+ if ((ch >= '0') && (ch <= '9'))
+ return ch - '0';
+ ch = tolower(ch);
+ if ((ch >= 'a') && (ch <= 'f'))
+ return ch - 'a' + 10;
+ return -1;
+}
+EXPORT_SYMBOL(hex_to_bin);
+
+/**
+ * hex_dump_to_buffer - convert a blob of data to "hex ASCII" in memory
+ * @buf: data blob to dump
+ * @len: number of bytes in the @buf
+ * @rowsize: number of bytes to print per line; must be 16 or 32
+ * @groupsize: number of bytes to print at a time (1, 2, 4, 8; default = 1)
+ * @linebuf: where to put the converted data
+ * @linebuflen: total size of @linebuf, including space for terminating NUL
+ * @ascii: include ASCII after the hex output
+ *
+ * hex_dump_to_buffer() works on one "line" of output at a time, i.e.,
+ * 16 or 32 bytes of input data converted to hex + ASCII output.
+ *
+ * Given a buffer of u8 data, hex_dump_to_buffer() converts the input data
+ * to a hex + ASCII dump at the supplied memory location.
+ * The converted output is always NUL-terminated.
+ *
+ * E.g.:
+ * hex_dump_to_buffer(frame->data, frame->len, 16, 1,
+ * linebuf, sizeof(linebuf), true);
+ *
+ * example output buffer:
+ * 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO
+ */
+void hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
+ int groupsize, char *linebuf, size_t linebuflen,
+ bool ascii)
+{
+ const u8 *ptr = buf;
+ u8 ch;
+ int j, lx = 0;
+ int ascii_column;
+
+ if (rowsize != 16 && rowsize != 32)
+ rowsize = 16;
+
+ if (!len)
+ goto nil;
+ if (len > rowsize) /* limit to one line at a time */
+ len = rowsize;
+ if ((len % groupsize) != 0) /* no mixed size output */
+ groupsize = 1;
+
+ switch (groupsize) {
+ case 8: {
+ const u64 *ptr8 = buf;
+ int ngroups = len / groupsize;
+
+ for (j = 0; j < ngroups; j++)
+ lx += scnprintf(linebuf + lx, linebuflen - lx,
+ "%s%16.16llx", j ? " " : "",
+ (unsigned long long)*(ptr8 + j));
+ ascii_column = 17 * ngroups + 2;
+ break;
+ }
+
+ case 4: {
+ const u32 *ptr4 = buf;
+ int ngroups = len / groupsize;
+
+ for (j = 0; j < ngroups; j++)
+ lx += scnprintf(linebuf + lx, linebuflen - lx,
+ "%s%8.8x", j ? " " : "", *(ptr4 + j));
+ ascii_column = 9 * ngroups + 2;
+ break;
+ }
+
+ case 2: {
+ const u16 *ptr2 = buf;
+ int ngroups = len / groupsize;
+
+ for (j = 0; j < ngroups; j++)
+ lx += scnprintf(linebuf + lx, linebuflen - lx,
+ "%s%4.4x", j ? " " : "", *(ptr2 + j));
+ ascii_column = 5 * ngroups + 2;
+ break;
+ }
+
+ default:
+ for (j = 0; (j < len) && (lx + 3) <= linebuflen; j++) {
+ ch = ptr[j];
+ linebuf[lx++] = hex_asc_hi(ch);
+ linebuf[lx++] = hex_asc_lo(ch);
+ linebuf[lx++] = ' ';
+ }
+ if (j)
+ lx--;
+
+ ascii_column = 3 * rowsize + 2;
+ break;
+ }
+ if (!ascii)
+ goto nil;
+
+ while (lx < (linebuflen - 1) && lx < (ascii_column - 1))
+ linebuf[lx++] = ' ';
+ for (j = 0; (j < len) && (lx + 2) < linebuflen; j++) {
+ ch = ptr[j];
+ linebuf[lx++] = (isascii(ch) && isprint(ch)) ? ch : '.';
+ }
+nil:
+ linebuf[lx++] = '\0';
+}
+EXPORT_SYMBOL(hex_dump_to_buffer);
+
+/**
+ * print_hex_dump - print a text hex dump to syslog for a binary blob of data
+ * @level: kernel log level (e.g. KERN_DEBUG)
+ * @prefix_str: string to prefix each line with;
+ * caller supplies trailing spaces for alignment if desired
+ * @prefix_type: controls whether prefix of an offset, address, or none
+ * is printed (%DUMP_PREFIX_OFFSET, %DUMP_PREFIX_ADDRESS, %DUMP_PREFIX_NONE)
+ * @rowsize: number of bytes to print per line; must be 16 or 32
+ * @groupsize: number of bytes to print at a time (1, 2, 4, 8; default = 1)
+ * @buf: data blob to dump
+ * @len: number of bytes in the @buf
+ * @ascii: include ASCII after the hex output
+ *
+ * Given a buffer of u8 data, print_hex_dump() prints a hex + ASCII dump
+ * to the kernel log at the specified kernel log level, with an optional
+ * leading prefix.
+ *
+ * print_hex_dump() works on one "line" of output at a time, i.e.,
+ * 16 or 32 bytes of input data converted to hex + ASCII output.
+ * print_hex_dump() iterates over the entire input @buf, breaking it into
+ * "line size" chunks to format and print.
+ *
+ * E.g.:
+ * print_hex_dump(KERN_DEBUG, "raw data: ", DUMP_PREFIX_ADDRESS,
+ * 16, 1, frame->data, frame->len, true);
+ *
+ * Example output using %DUMP_PREFIX_OFFSET and 1-byte mode:
+ * 0009ab42: 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO
+ * Example output using %DUMP_PREFIX_ADDRESS and 4-byte mode:
+ * ffffffff88089af0: 73727170 77767574 7b7a7978 7f7e7d7c pqrstuvwxyz{|}~.
+ */
+void print_hex_dump(const char *level, const char *prefix_str, int prefix_type,
+ int rowsize, int groupsize,
+ const void *buf, size_t len, bool ascii)
+{
+ const u8 *ptr = buf;
+ int i, linelen, remaining = len;
+ unsigned char linebuf[32 * 3 + 2 + 32 + 1];
+
+ if (rowsize != 16 && rowsize != 32)
+ rowsize = 16;
+
+ for (i = 0; i < len; i += rowsize) {
+ linelen = min(remaining, rowsize);
+ remaining -= rowsize;
+
+ hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize,
+ linebuf, sizeof(linebuf), ascii);
+
+ switch (prefix_type) {
+ case DUMP_PREFIX_ADDRESS:
+ printk("%s%s%p: %s\n",
+ level, prefix_str, ptr + i, linebuf);
+ break;
+ case DUMP_PREFIX_OFFSET:
+ printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf);
+ break;
+ default:
+ printk("%s%s%s\n", level, prefix_str, linebuf);
+ break;
+ }
+ }
+}
+EXPORT_SYMBOL(print_hex_dump);
+
+/**
+ * print_hex_dump_bytes - shorthand form of print_hex_dump() with default params
+ * @prefix_str: string to prefix each line with;
+ * caller supplies trailing spaces for alignment if desired
+ * @prefix_type: controls whether prefix of an offset, address, or none
+ * is printed (%DUMP_PREFIX_OFFSET, %DUMP_PREFIX_ADDRESS, %DUMP_PREFIX_NONE)
+ * @buf: data blob to dump
+ * @len: number of bytes in the @buf
+ *
+ * Calls print_hex_dump(), with log level of KERN_DEBUG,
+ * rowsize of 16, groupsize of 1, and ASCII output included.
+ */
+void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
+ const void *buf, size_t len)
+{
+ print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, 16, 1,
+ buf, len, true);
+}
+EXPORT_SYMBOL(print_hex_dump_bytes);
diff --git a/lib/hweight.c b/lib/hweight.c
new file mode 100644
index 00000000..3c79d508
--- /dev/null
+++ b/lib/hweight.c
@@ -0,0 +1,67 @@
+#include <linux/module.h>
+#include <linux/bitops.h>
+#include <asm/types.h>
+
+/**
+ * hweightN - returns the hamming weight of a N-bit word
+ * @x: the word to weigh
+ *
+ * The Hamming Weight of a number is the total number of bits set in it.
+ */
+
+unsigned int __sw_hweight32(unsigned int w)
+{
+#ifdef ARCH_HAS_FAST_MULTIPLIER
+ w -= (w >> 1) & 0x55555555;
+ w = (w & 0x33333333) + ((w >> 2) & 0x33333333);
+ w = (w + (w >> 4)) & 0x0f0f0f0f;
+ return (w * 0x01010101) >> 24;
+#else
+ unsigned int res = w - ((w >> 1) & 0x55555555);
+ res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
+ res = (res + (res >> 4)) & 0x0F0F0F0F;
+ res = res + (res >> 8);
+ return (res + (res >> 16)) & 0x000000FF;
+#endif
+}
+EXPORT_SYMBOL(__sw_hweight32);
+
+unsigned int __sw_hweight16(unsigned int w)
+{
+ unsigned int res = w - ((w >> 1) & 0x5555);
+ res = (res & 0x3333) + ((res >> 2) & 0x3333);
+ res = (res + (res >> 4)) & 0x0F0F;
+ return (res + (res >> 8)) & 0x00FF;
+}
+EXPORT_SYMBOL(__sw_hweight16);
+
+unsigned int __sw_hweight8(unsigned int w)
+{
+ unsigned int res = w - ((w >> 1) & 0x55);
+ res = (res & 0x33) + ((res >> 2) & 0x33);
+ return (res + (res >> 4)) & 0x0F;
+}
+EXPORT_SYMBOL(__sw_hweight8);
+
+unsigned long __sw_hweight64(__u64 w)
+{
+#if BITS_PER_LONG == 32
+ return __sw_hweight32((unsigned int)(w >> 32)) +
+ __sw_hweight32((unsigned int)w);
+#elif BITS_PER_LONG == 64
+#ifdef ARCH_HAS_FAST_MULTIPLIER
+ w -= (w >> 1) & 0x5555555555555555ul;
+ w = (w & 0x3333333333333333ul) + ((w >> 2) & 0x3333333333333333ul);
+ w = (w + (w >> 4)) & 0x0f0f0f0f0f0f0f0ful;
+ return (w * 0x0101010101010101ul) >> 56;
+#else
+ __u64 res = w - ((w >> 1) & 0x5555555555555555ul);
+ res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul);
+ res = (res + (res >> 4)) & 0x0F0F0F0F0F0F0F0Ful;
+ res = res + (res >> 8);
+ res = res + (res >> 16);
+ return (res + (res >> 32)) & 0x00000000000000FFul;
+#endif
+#endif
+}
+EXPORT_SYMBOL(__sw_hweight64);
diff --git a/lib/idr.c b/lib/idr.c
new file mode 100644
index 00000000..7f1a4f0a
--- /dev/null
+++ b/lib/idr.c
@@ -0,0 +1,937 @@
+/*
+ * 2002-10-18 written by Jim Houston jim.houston@ccur.com
+ * Copyright (C) 2002 by Concurrent Computer Corporation
+ * Distributed under the GNU GPL license version 2.
+ *
+ * Modified by George Anzinger to reuse immediately and to use
+ * find bit instructions. Also removed _irq on spinlocks.
+ *
+ * Modified by Nadia Derbey to make it RCU safe.
+ *
+ * Small id to pointer translation service.
+ *
+ * It uses a radix tree like structure as a sparse array indexed
+ * by the id to obtain the pointer. The bitmap makes allocating
+ * a new id quick.
+ *
+ * You call it to allocate an id (an int) an associate with that id a
+ * pointer or what ever, we treat it as a (void *). You can pass this
+ * id to a user for him to pass back at a later time. You then pass
+ * that id to this code and it returns your pointer.
+
+ * You can release ids at any time. When all ids are released, most of
+ * the memory is returned (we keep IDR_FREE_MAX) in a local pool so we
+ * don't need to go to the memory "store" during an id allocate, just
+ * so you don't need to be too concerned about locking and conflicts
+ * with the slab allocator.
+ */
+
+#ifndef TEST // to test in user space...
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#endif
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/idr.h>
+
+static struct kmem_cache *idr_layer_cache;
+
+static struct idr_layer *get_from_free_list(struct idr *idp)
+{
+ struct idr_layer *p;
+ unsigned long flags;
+
+ spin_lock_irqsave(&idp->lock, flags);
+ if ((p = idp->id_free)) {
+ idp->id_free = p->ary[0];
+ idp->id_free_cnt--;
+ p->ary[0] = NULL;
+ }
+ spin_unlock_irqrestore(&idp->lock, flags);
+ return(p);
+}
+
+static void idr_layer_rcu_free(struct rcu_head *head)
+{
+ struct idr_layer *layer;
+
+ layer = container_of(head, struct idr_layer, rcu_head);
+ kmem_cache_free(idr_layer_cache, layer);
+}
+
+static inline void free_layer(struct idr_layer *p)
+{
+ call_rcu(&p->rcu_head, idr_layer_rcu_free);
+}
+
+/* only called when idp->lock is held */
+static void __move_to_free_list(struct idr *idp, struct idr_layer *p)
+{
+ p->ary[0] = idp->id_free;
+ idp->id_free = p;
+ idp->id_free_cnt++;
+}
+
+static void move_to_free_list(struct idr *idp, struct idr_layer *p)
+{
+ unsigned long flags;
+
+ /*
+ * Depends on the return element being zeroed.
+ */
+ spin_lock_irqsave(&idp->lock, flags);
+ __move_to_free_list(idp, p);
+ spin_unlock_irqrestore(&idp->lock, flags);
+}
+
+static void idr_mark_full(struct idr_layer **pa, int id)
+{
+ struct idr_layer *p = pa[0];
+ int l = 0;
+
+ __set_bit(id & IDR_MASK, &p->bitmap);
+ /*
+ * If this layer is full mark the bit in the layer above to
+ * show that this part of the radix tree is full. This may
+ * complete the layer above and require walking up the radix
+ * tree.
+ */
+ while (p->bitmap == IDR_FULL) {
+ if (!(p = pa[++l]))
+ break;
+ id = id >> IDR_BITS;
+ __set_bit((id & IDR_MASK), &p->bitmap);
+ }
+}
+
+/**
+ * idr_pre_get - reserver resources for idr allocation
+ * @idp: idr handle
+ * @gfp_mask: memory allocation flags
+ *
+ * This function should be called prior to locking and calling the
+ * idr_get_new* functions. It preallocates enough memory to satisfy
+ * the worst possible allocation.
+ *
+ * If the system is REALLY out of memory this function returns 0,
+ * otherwise 1.
+ */
+int idr_pre_get(struct idr *idp, gfp_t gfp_mask)
+{
+ while (idp->id_free_cnt < IDR_FREE_MAX) {
+ struct idr_layer *new;
+ new = kmem_cache_zalloc(idr_layer_cache, gfp_mask);
+ if (new == NULL)
+ return (0);
+ move_to_free_list(idp, new);
+ }
+ return 1;
+}
+EXPORT_SYMBOL(idr_pre_get);
+
+static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa)
+{
+ int n, m, sh;
+ struct idr_layer *p, *new;
+ int l, id, oid;
+ unsigned long bm;
+
+ id = *starting_id;
+ restart:
+ p = idp->top;
+ l = idp->layers;
+ pa[l--] = NULL;
+ while (1) {
+ /*
+ * We run around this while until we reach the leaf node...
+ */
+ n = (id >> (IDR_BITS*l)) & IDR_MASK;
+ bm = ~p->bitmap;
+ m = find_next_bit(&bm, IDR_SIZE, n);
+ if (m == IDR_SIZE) {
+ /* no space available go back to previous layer. */
+ l++;
+ oid = id;
+ id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1;
+
+ /* if already at the top layer, we need to grow */
+ if (id >= 1 << (idp->layers * IDR_BITS)) {
+ *starting_id = id;
+ return IDR_NEED_TO_GROW;
+ }
+ p = pa[l];
+ BUG_ON(!p);
+
+ /* If we need to go up one layer, continue the
+ * loop; otherwise, restart from the top.
+ */
+ sh = IDR_BITS * (l + 1);
+ if (oid >> sh == id >> sh)
+ continue;
+ else
+ goto restart;
+ }
+ if (m != n) {
+ sh = IDR_BITS*l;
+ id = ((id >> sh) ^ n ^ m) << sh;
+ }
+ if ((id >= MAX_ID_BIT) || (id < 0))
+ return IDR_NOMORE_SPACE;
+ if (l == 0)
+ break;
+ /*
+ * Create the layer below if it is missing.
+ */
+ if (!p->ary[m]) {
+ new = get_from_free_list(idp);
+ if (!new)
+ return -1;
+ new->layer = l-1;
+ rcu_assign_pointer(p->ary[m], new);
+ p->count++;
+ }
+ pa[l--] = p;
+ p = p->ary[m];
+ }
+
+ pa[l] = p;
+ return id;
+}
+
+static int idr_get_empty_slot(struct idr *idp, int starting_id,
+ struct idr_layer **pa)
+{
+ struct idr_layer *p, *new;
+ int layers, v, id;
+ unsigned long flags;
+
+ id = starting_id;
+build_up:
+ p = idp->top;
+ layers = idp->layers;
+ if (unlikely(!p)) {
+ if (!(p = get_from_free_list(idp)))
+ return -1;
+ p->layer = 0;
+ layers = 1;
+ }
+ /*
+ * Add a new layer to the top of the tree if the requested
+ * id is larger than the currently allocated space.
+ */
+ while ((layers < (MAX_LEVEL - 1)) && (id >= (1 << (layers*IDR_BITS)))) {
+ layers++;
+ if (!p->count) {
+ /* special case: if the tree is currently empty,
+ * then we grow the tree by moving the top node
+ * upwards.
+ */
+ p->layer++;
+ continue;
+ }
+ if (!(new = get_from_free_list(idp))) {
+ /*
+ * The allocation failed. If we built part of
+ * the structure tear it down.
+ */
+ spin_lock_irqsave(&idp->lock, flags);
+ for (new = p; p && p != idp->top; new = p) {
+ p = p->ary[0];
+ new->ary[0] = NULL;
+ new->bitmap = new->count = 0;
+ __move_to_free_list(idp, new);
+ }
+ spin_unlock_irqrestore(&idp->lock, flags);
+ return -1;
+ }
+ new->ary[0] = p;
+ new->count = 1;
+ new->layer = layers-1;
+ if (p->bitmap == IDR_FULL)
+ __set_bit(0, &new->bitmap);
+ p = new;
+ }
+ rcu_assign_pointer(idp->top, p);
+ idp->layers = layers;
+ v = sub_alloc(idp, &id, pa);
+ if (v == IDR_NEED_TO_GROW)
+ goto build_up;
+ return(v);
+}
+
+static int idr_get_new_above_int(struct idr *idp, void *ptr, int starting_id)
+{
+ struct idr_layer *pa[MAX_LEVEL];
+ int id;
+
+ id = idr_get_empty_slot(idp, starting_id, pa);
+ if (id >= 0) {
+ /*
+ * Successfully found an empty slot. Install the user
+ * pointer and mark the slot full.
+ */
+ rcu_assign_pointer(pa[0]->ary[id & IDR_MASK],
+ (struct idr_layer *)ptr);
+ pa[0]->count++;
+ idr_mark_full(pa, id);
+ }
+
+ return id;
+}
+
+/**
+ * idr_get_new_above - allocate new idr entry above or equal to a start id
+ * @idp: idr handle
+ * @ptr: pointer you want associated with the id
+ * @start_id: id to start search at
+ * @id: pointer to the allocated handle
+ *
+ * This is the allocate id function. It should be called with any
+ * required locks.
+ *
+ * If memory is required, it will return -EAGAIN, you should unlock
+ * and go back to the idr_pre_get() call. If the idr is full, it will
+ * return -ENOSPC.
+ *
+ * @id returns a value in the range @starting_id ... 0x7fffffff
+ */
+int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id)
+{
+ int rv;
+
+ rv = idr_get_new_above_int(idp, ptr, starting_id);
+ /*
+ * This is a cheap hack until the IDR code can be fixed to
+ * return proper error values.
+ */
+ if (rv < 0)
+ return _idr_rc_to_errno(rv);
+ *id = rv;
+ return 0;
+}
+EXPORT_SYMBOL(idr_get_new_above);
+
+/**
+ * idr_get_new - allocate new idr entry
+ * @idp: idr handle
+ * @ptr: pointer you want associated with the id
+ * @id: pointer to the allocated handle
+ *
+ * This is the allocate id function. It should be called with any
+ * required locks.
+ *
+ * If memory is required, it will return -EAGAIN, you should unlock
+ * and go back to the idr_pre_get() call. If the idr is full, it will
+ * return -ENOSPC.
+ *
+ * @id returns a value in the range 0 ... 0x7fffffff
+ */
+int idr_get_new(struct idr *idp, void *ptr, int *id)
+{
+ int rv;
+
+ rv = idr_get_new_above_int(idp, ptr, 0);
+ /*
+ * This is a cheap hack until the IDR code can be fixed to
+ * return proper error values.
+ */
+ if (rv < 0)
+ return _idr_rc_to_errno(rv);
+ *id = rv;
+ return 0;
+}
+EXPORT_SYMBOL(idr_get_new);
+
+static void idr_remove_warning(int id)
+{
+ printk(KERN_WARNING
+ "idr_remove called for id=%d which is not allocated.\n", id);
+ dump_stack();
+}
+
+static void sub_remove(struct idr *idp, int shift, int id)
+{
+ struct idr_layer *p = idp->top;
+ struct idr_layer **pa[MAX_LEVEL];
+ struct idr_layer ***paa = &pa[0];
+ struct idr_layer *to_free;
+ int n;
+
+ *paa = NULL;
+ *++paa = &idp->top;
+
+ while ((shift > 0) && p) {
+ n = (id >> shift) & IDR_MASK;
+ __clear_bit(n, &p->bitmap);
+ *++paa = &p->ary[n];
+ p = p->ary[n];
+ shift -= IDR_BITS;
+ }
+ n = id & IDR_MASK;
+ if (likely(p != NULL && test_bit(n, &p->bitmap))){
+ __clear_bit(n, &p->bitmap);
+ rcu_assign_pointer(p->ary[n], NULL);
+ to_free = NULL;
+ while(*paa && ! --((**paa)->count)){
+ if (to_free)
+ free_layer(to_free);
+ to_free = **paa;
+ **paa-- = NULL;
+ }
+ if (!*paa)
+ idp->layers = 0;
+ if (to_free)
+ free_layer(to_free);
+ } else
+ idr_remove_warning(id);
+}
+
+/**
+ * idr_remove - remove the given id and free it's slot
+ * @idp: idr handle
+ * @id: unique key
+ */
+void idr_remove(struct idr *idp, int id)
+{
+ struct idr_layer *p;
+ struct idr_layer *to_free;
+
+ /* Mask off upper bits we don't use for the search. */
+ id &= MAX_ID_MASK;
+
+ sub_remove(idp, (idp->layers - 1) * IDR_BITS, id);
+ if (idp->top && idp->top->count == 1 && (idp->layers > 1) &&
+ idp->top->ary[0]) {
+ /*
+ * Single child at leftmost slot: we can shrink the tree.
+ * This level is not needed anymore since when layers are
+ * inserted, they are inserted at the top of the existing
+ * tree.
+ */
+ to_free = idp->top;
+ p = idp->top->ary[0];
+ rcu_assign_pointer(idp->top, p);
+ --idp->layers;
+ to_free->bitmap = to_free->count = 0;
+ free_layer(to_free);
+ }
+ while (idp->id_free_cnt >= IDR_FREE_MAX) {
+ p = get_from_free_list(idp);
+ /*
+ * Note: we don't call the rcu callback here, since the only
+ * layers that fall into the freelist are those that have been
+ * preallocated.
+ */
+ kmem_cache_free(idr_layer_cache, p);
+ }
+ return;
+}
+EXPORT_SYMBOL(idr_remove);
+
+/**
+ * idr_remove_all - remove all ids from the given idr tree
+ * @idp: idr handle
+ *
+ * idr_destroy() only frees up unused, cached idp_layers, but this
+ * function will remove all id mappings and leave all idp_layers
+ * unused.
+ *
+ * A typical clean-up sequence for objects stored in an idr tree, will
+ * use idr_for_each() to free all objects, if necessay, then
+ * idr_remove_all() to remove all ids, and idr_destroy() to free
+ * up the cached idr_layers.
+ */
+void idr_remove_all(struct idr *idp)
+{
+ int n, id, max;
+ int bt_mask;
+ struct idr_layer *p;
+ struct idr_layer *pa[MAX_LEVEL];
+ struct idr_layer **paa = &pa[0];
+
+ n = idp->layers * IDR_BITS;
+ p = idp->top;
+ rcu_assign_pointer(idp->top, NULL);
+ max = 1 << n;
+
+ id = 0;
+ while (id < max) {
+ while (n > IDR_BITS && p) {
+ n -= IDR_BITS;
+ *paa++ = p;
+ p = p->ary[(id >> n) & IDR_MASK];
+ }
+
+ bt_mask = id;
+ id += 1 << n;
+ /* Get the highest bit that the above add changed from 0->1. */
+ while (n < fls(id ^ bt_mask)) {
+ if (p)
+ free_layer(p);
+ n += IDR_BITS;
+ p = *--paa;
+ }
+ }
+ idp->layers = 0;
+}
+EXPORT_SYMBOL(idr_remove_all);
+
+/**
+ * idr_destroy - release all cached layers within an idr tree
+ * idp: idr handle
+ */
+void idr_destroy(struct idr *idp)
+{
+ while (idp->id_free_cnt) {
+ struct idr_layer *p = get_from_free_list(idp);
+ kmem_cache_free(idr_layer_cache, p);
+ }
+}
+EXPORT_SYMBOL(idr_destroy);
+
+/**
+ * idr_find - return pointer for given id
+ * @idp: idr handle
+ * @id: lookup key
+ *
+ * Return the pointer given the id it has been registered with. A %NULL
+ * return indicates that @id is not valid or you passed %NULL in
+ * idr_get_new().
+ *
+ * This function can be called under rcu_read_lock(), given that the leaf
+ * pointers lifetimes are correctly managed.
+ */
+void *idr_find(struct idr *idp, int id)
+{
+ int n;
+ struct idr_layer *p;
+
+ p = rcu_dereference_raw(idp->top);
+ if (!p)
+ return NULL;
+ n = (p->layer+1) * IDR_BITS;
+
+ /* Mask off upper bits we don't use for the search. */
+ id &= MAX_ID_MASK;
+
+ if (id >= (1 << n))
+ return NULL;
+ BUG_ON(n == 0);
+
+ while (n > 0 && p) {
+ n -= IDR_BITS;
+ BUG_ON(n != p->layer*IDR_BITS);
+ p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
+ }
+ return((void *)p);
+}
+EXPORT_SYMBOL(idr_find);
+
+/**
+ * idr_for_each - iterate through all stored pointers
+ * @idp: idr handle
+ * @fn: function to be called for each pointer
+ * @data: data passed back to callback function
+ *
+ * Iterate over the pointers registered with the given idr. The
+ * callback function will be called for each pointer currently
+ * registered, passing the id, the pointer and the data pointer passed
+ * to this function. It is not safe to modify the idr tree while in
+ * the callback, so functions such as idr_get_new and idr_remove are
+ * not allowed.
+ *
+ * We check the return of @fn each time. If it returns anything other
+ * than 0, we break out and return that value.
+ *
+ * The caller must serialize idr_for_each() vs idr_get_new() and idr_remove().
+ */
+int idr_for_each(struct idr *idp,
+ int (*fn)(int id, void *p, void *data), void *data)
+{
+ int n, id, max, error = 0;
+ struct idr_layer *p;
+ struct idr_layer *pa[MAX_LEVEL];
+ struct idr_layer **paa = &pa[0];
+
+ n = idp->layers * IDR_BITS;
+ p = rcu_dereference_raw(idp->top);
+ max = 1 << n;
+
+ id = 0;
+ while (id < max) {
+ while (n > 0 && p) {
+ n -= IDR_BITS;
+ *paa++ = p;
+ p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
+ }
+
+ if (p) {
+ error = fn(id, (void *)p, data);
+ if (error)
+ break;
+ }
+
+ id += 1 << n;
+ while (n < fls(id)) {
+ n += IDR_BITS;
+ p = *--paa;
+ }
+ }
+
+ return error;
+}
+EXPORT_SYMBOL(idr_for_each);
+
+/**
+ * idr_get_next - lookup next object of id to given id.
+ * @idp: idr handle
+ * @id: pointer to lookup key
+ *
+ * Returns pointer to registered object with id, which is next number to
+ * given id.
+ */
+
+void *idr_get_next(struct idr *idp, int *nextidp)
+{
+ struct idr_layer *p, *pa[MAX_LEVEL];
+ struct idr_layer **paa = &pa[0];
+ int id = *nextidp;
+ int n, max;
+
+ /* find first ent */
+ n = idp->layers * IDR_BITS;
+ max = 1 << n;
+ p = rcu_dereference_raw(idp->top);
+ if (!p)
+ return NULL;
+
+ while (id < max) {
+ while (n > 0 && p) {
+ n -= IDR_BITS;
+ *paa++ = p;
+ p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
+ }
+
+ if (p) {
+ *nextidp = id;
+ return p;
+ }
+
+ id += 1 << n;
+ while (n < fls(id)) {
+ n += IDR_BITS;
+ p = *--paa;
+ }
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(idr_get_next);
+
+
+/**
+ * idr_replace - replace pointer for given id
+ * @idp: idr handle
+ * @ptr: pointer you want associated with the id
+ * @id: lookup key
+ *
+ * Replace the pointer registered with an id and return the old value.
+ * A -ENOENT return indicates that @id was not found.
+ * A -EINVAL return indicates that @id was not within valid constraints.
+ *
+ * The caller must serialize with writers.
+ */
+void *idr_replace(struct idr *idp, void *ptr, int id)
+{
+ int n;
+ struct idr_layer *p, *old_p;
+
+ p = idp->top;
+ if (!p)
+ return ERR_PTR(-EINVAL);
+
+ n = (p->layer+1) * IDR_BITS;
+
+ id &= MAX_ID_MASK;
+
+ if (id >= (1 << n))
+ return ERR_PTR(-EINVAL);
+
+ n -= IDR_BITS;
+ while ((n > 0) && p) {
+ p = p->ary[(id >> n) & IDR_MASK];
+ n -= IDR_BITS;
+ }
+
+ n = id & IDR_MASK;
+ if (unlikely(p == NULL || !test_bit(n, &p->bitmap)))
+ return ERR_PTR(-ENOENT);
+
+ old_p = p->ary[n];
+ rcu_assign_pointer(p->ary[n], ptr);
+
+ return old_p;
+}
+EXPORT_SYMBOL(idr_replace);
+
+void __init idr_init_cache(void)
+{
+ idr_layer_cache = kmem_cache_create("idr_layer_cache",
+ sizeof(struct idr_layer), 0, SLAB_PANIC, NULL);
+}
+
+/**
+ * idr_init - initialize idr handle
+ * @idp: idr handle
+ *
+ * This function is use to set up the handle (@idp) that you will pass
+ * to the rest of the functions.
+ */
+void idr_init(struct idr *idp)
+{
+ memset(idp, 0, sizeof(struct idr));
+ spin_lock_init(&idp->lock);
+}
+EXPORT_SYMBOL(idr_init);
+
+
+/*
+ * IDA - IDR based ID allocator
+ *
+ * this is id allocator without id -> pointer translation. Memory
+ * usage is much lower than full blown idr because each id only
+ * occupies a bit. ida uses a custom leaf node which contains
+ * IDA_BITMAP_BITS slots.
+ *
+ * 2007-04-25 written by Tejun Heo <htejun@gmail.com>
+ */
+
+static void free_bitmap(struct ida *ida, struct ida_bitmap *bitmap)
+{
+ unsigned long flags;
+
+ if (!ida->free_bitmap) {
+ spin_lock_irqsave(&ida->idr.lock, flags);
+ if (!ida->free_bitmap) {
+ ida->free_bitmap = bitmap;
+ bitmap = NULL;
+ }
+ spin_unlock_irqrestore(&ida->idr.lock, flags);
+ }
+
+ kfree(bitmap);
+}
+
+/**
+ * ida_pre_get - reserve resources for ida allocation
+ * @ida: ida handle
+ * @gfp_mask: memory allocation flag
+ *
+ * This function should be called prior to locking and calling the
+ * following function. It preallocates enough memory to satisfy the
+ * worst possible allocation.
+ *
+ * If the system is REALLY out of memory this function returns 0,
+ * otherwise 1.
+ */
+int ida_pre_get(struct ida *ida, gfp_t gfp_mask)
+{
+ /* allocate idr_layers */
+ if (!idr_pre_get(&ida->idr, gfp_mask))
+ return 0;
+
+ /* allocate free_bitmap */
+ if (!ida->free_bitmap) {
+ struct ida_bitmap *bitmap;
+
+ bitmap = kmalloc(sizeof(struct ida_bitmap), gfp_mask);
+ if (!bitmap)
+ return 0;
+
+ free_bitmap(ida, bitmap);
+ }
+
+ return 1;
+}
+EXPORT_SYMBOL(ida_pre_get);
+
+/**
+ * ida_get_new_above - allocate new ID above or equal to a start id
+ * @ida: ida handle
+ * @staring_id: id to start search at
+ * @p_id: pointer to the allocated handle
+ *
+ * Allocate new ID above or equal to @ida. It should be called with
+ * any required locks.
+ *
+ * If memory is required, it will return -EAGAIN, you should unlock
+ * and go back to the ida_pre_get() call. If the ida is full, it will
+ * return -ENOSPC.
+ *
+ * @p_id returns a value in the range @starting_id ... 0x7fffffff.
+ */
+int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
+{
+ struct idr_layer *pa[MAX_LEVEL];
+ struct ida_bitmap *bitmap;
+ unsigned long flags;
+ int idr_id = starting_id / IDA_BITMAP_BITS;
+ int offset = starting_id % IDA_BITMAP_BITS;
+ int t, id;
+
+ restart:
+ /* get vacant slot */
+ t = idr_get_empty_slot(&ida->idr, idr_id, pa);
+ if (t < 0)
+ return _idr_rc_to_errno(t);
+
+ if (t * IDA_BITMAP_BITS >= MAX_ID_BIT)
+ return -ENOSPC;
+
+ if (t != idr_id)
+ offset = 0;
+ idr_id = t;
+
+ /* if bitmap isn't there, create a new one */
+ bitmap = (void *)pa[0]->ary[idr_id & IDR_MASK];
+ if (!bitmap) {
+ spin_lock_irqsave(&ida->idr.lock, flags);
+ bitmap = ida->free_bitmap;
+ ida->free_bitmap = NULL;
+ spin_unlock_irqrestore(&ida->idr.lock, flags);
+
+ if (!bitmap)
+ return -EAGAIN;
+
+ memset(bitmap, 0, sizeof(struct ida_bitmap));
+ rcu_assign_pointer(pa[0]->ary[idr_id & IDR_MASK],
+ (void *)bitmap);
+ pa[0]->count++;
+ }
+
+ /* lookup for empty slot */
+ t = find_next_zero_bit(bitmap->bitmap, IDA_BITMAP_BITS, offset);
+ if (t == IDA_BITMAP_BITS) {
+ /* no empty slot after offset, continue to the next chunk */
+ idr_id++;
+ offset = 0;
+ goto restart;
+ }
+
+ id = idr_id * IDA_BITMAP_BITS + t;
+ if (id >= MAX_ID_BIT)
+ return -ENOSPC;
+
+ __set_bit(t, bitmap->bitmap);
+ if (++bitmap->nr_busy == IDA_BITMAP_BITS)
+ idr_mark_full(pa, idr_id);
+
+ *p_id = id;
+
+ /* Each leaf node can handle nearly a thousand slots and the
+ * whole idea of ida is to have small memory foot print.
+ * Throw away extra resources one by one after each successful
+ * allocation.
+ */
+ if (ida->idr.id_free_cnt || ida->free_bitmap) {
+ struct idr_layer *p = get_from_free_list(&ida->idr);
+ if (p)
+ kmem_cache_free(idr_layer_cache, p);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(ida_get_new_above);
+
+/**
+ * ida_get_new - allocate new ID
+ * @ida: idr handle
+ * @p_id: pointer to the allocated handle
+ *
+ * Allocate new ID. It should be called with any required locks.
+ *
+ * If memory is required, it will return -EAGAIN, you should unlock
+ * and go back to the idr_pre_get() call. If the idr is full, it will
+ * return -ENOSPC.
+ *
+ * @id returns a value in the range 0 ... 0x7fffffff.
+ */
+int ida_get_new(struct ida *ida, int *p_id)
+{
+ return ida_get_new_above(ida, 0, p_id);
+}
+EXPORT_SYMBOL(ida_get_new);
+
+/**
+ * ida_remove - remove the given ID
+ * @ida: ida handle
+ * @id: ID to free
+ */
+void ida_remove(struct ida *ida, int id)
+{
+ struct idr_layer *p = ida->idr.top;
+ int shift = (ida->idr.layers - 1) * IDR_BITS;
+ int idr_id = id / IDA_BITMAP_BITS;
+ int offset = id % IDA_BITMAP_BITS;
+ int n;
+ struct ida_bitmap *bitmap;
+
+ /* clear full bits while looking up the leaf idr_layer */
+ while ((shift > 0) && p) {
+ n = (idr_id >> shift) & IDR_MASK;
+ __clear_bit(n, &p->bitmap);
+ p = p->ary[n];
+ shift -= IDR_BITS;
+ }
+
+ if (p == NULL)
+ goto err;
+
+ n = idr_id & IDR_MASK;
+ __clear_bit(n, &p->bitmap);
+
+ bitmap = (void *)p->ary[n];
+ if (!test_bit(offset, bitmap->bitmap))
+ goto err;
+
+ /* update bitmap and remove it if empty */
+ __clear_bit(offset, bitmap->bitmap);
+ if (--bitmap->nr_busy == 0) {
+ __set_bit(n, &p->bitmap); /* to please idr_remove() */
+ idr_remove(&ida->idr, idr_id);
+ free_bitmap(ida, bitmap);
+ }
+
+ return;
+
+ err:
+ printk(KERN_WARNING
+ "ida_remove called for id=%d which is not allocated.\n", id);
+}
+EXPORT_SYMBOL(ida_remove);
+
+/**
+ * ida_destroy - release all cached layers within an ida tree
+ * ida: ida handle
+ */
+void ida_destroy(struct ida *ida)
+{
+ idr_destroy(&ida->idr);
+ kfree(ida->free_bitmap);
+}
+EXPORT_SYMBOL(ida_destroy);
+
+/**
+ * ida_init - initialize ida handle
+ * @ida: ida handle
+ *
+ * This function is use to set up the handle (@ida) that you will pass
+ * to the rest of the functions.
+ */
+void ida_init(struct ida *ida)
+{
+ memset(ida, 0, sizeof(struct ida));
+ idr_init(&ida->idr);
+
+}
+EXPORT_SYMBOL(ida_init);
diff --git a/lib/inflate.c b/lib/inflate.c
new file mode 100644
index 00000000..013a7619
--- /dev/null
+++ b/lib/inflate.c
@@ -0,0 +1,1309 @@
+#define DEBG(x)
+#define DEBG1(x)
+/* inflate.c -- Not copyrighted 1992 by Mark Adler
+ version c10p1, 10 January 1993 */
+
+/*
+ * Adapted for booting Linux by Hannu Savolainen 1993
+ * based on gzip-1.0.3
+ *
+ * Nicolas Pitre <nico@fluxnic.net>, 1999/04/14 :
+ * Little mods for all variable to reside either into rodata or bss segments
+ * by marking constant variables with 'const' and initializing all the others
+ * at run-time only. This allows for the kernel uncompressor to run
+ * directly from Flash or ROM memory on embedded systems.
+ */
+
+/*
+ Inflate deflated (PKZIP's method 8 compressed) data. The compression
+ method searches for as much of the current string of bytes (up to a
+ length of 258) in the previous 32 K bytes. If it doesn't find any
+ matches (of at least length 3), it codes the next byte. Otherwise, it
+ codes the length of the matched string and its distance backwards from
+ the current position. There is a single Huffman code that codes both
+ single bytes (called "literals") and match lengths. A second Huffman
+ code codes the distance information, which follows a length code. Each
+ length or distance code actually represents a base value and a number
+ of "extra" (sometimes zero) bits to get to add to the base value. At
+ the end of each deflated block is a special end-of-block (EOB) literal/
+ length code. The decoding process is basically: get a literal/length
+ code; if EOB then done; if a literal, emit the decoded byte; if a
+ length then get the distance and emit the referred-to bytes from the
+ sliding window of previously emitted data.
+
+ There are (currently) three kinds of inflate blocks: stored, fixed, and
+ dynamic. The compressor deals with some chunk of data at a time, and
+ decides which method to use on a chunk-by-chunk basis. A chunk might
+ typically be 32 K or 64 K. If the chunk is incompressible, then the
+ "stored" method is used. In this case, the bytes are simply stored as
+ is, eight bits per byte, with none of the above coding. The bytes are
+ preceded by a count, since there is no longer an EOB code.
+
+ If the data is compressible, then either the fixed or dynamic methods
+ are used. In the dynamic method, the compressed data is preceded by
+ an encoding of the literal/length and distance Huffman codes that are
+ to be used to decode this block. The representation is itself Huffman
+ coded, and so is preceded by a description of that code. These code
+ descriptions take up a little space, and so for small blocks, there is
+ a predefined set of codes, called the fixed codes. The fixed method is
+ used if the block codes up smaller that way (usually for quite small
+ chunks), otherwise the dynamic method is used. In the latter case, the
+ codes are customized to the probabilities in the current block, and so
+ can code it much better than the pre-determined fixed codes.
+
+ The Huffman codes themselves are decoded using a multi-level table
+ lookup, in order to maximize the speed of decoding plus the speed of
+ building the decoding tables. See the comments below that precede the
+ lbits and dbits tuning parameters.
+ */
+
+
+/*
+ Notes beyond the 1.93a appnote.txt:
+
+ 1. Distance pointers never point before the beginning of the output
+ stream.
+ 2. Distance pointers can point back across blocks, up to 32k away.
+ 3. There is an implied maximum of 7 bits for the bit length table and
+ 15 bits for the actual data.
+ 4. If only one code exists, then it is encoded using one bit. (Zero
+ would be more efficient, but perhaps a little confusing.) If two
+ codes exist, they are coded using one bit each (0 and 1).
+ 5. There is no way of sending zero distance codes--a dummy must be
+ sent if there are none. (History: a pre 2.0 version of PKZIP would
+ store blocks with no distance codes, but this was discovered to be
+ too harsh a criterion.) Valid only for 1.93a. 2.04c does allow
+ zero distance codes, which is sent as one code of zero bits in
+ length.
+ 6. There are up to 286 literal/length codes. Code 256 represents the
+ end-of-block. Note however that the static length tree defines
+ 288 codes just to fill out the Huffman codes. Codes 286 and 287
+ cannot be used though, since there is no length base or extra bits
+ defined for them. Similarly, there are up to 30 distance codes.
+ However, static trees define 32 codes (all 5 bits) to fill out the
+ Huffman codes, but the last two had better not show up in the data.
+ 7. Unzip can check dynamic Huffman blocks for complete code sets.
+ The exception is that a single code would not be complete (see #4).
+ 8. The five bits following the block type is really the number of
+ literal codes sent minus 257.
+ 9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits
+ (1+6+6). Therefore, to output three times the length, you output
+ three codes (1+1+1), whereas to output four times the same length,
+ you only need two codes (1+3). Hmm.
+ 10. In the tree reconstruction algorithm, Code = Code + Increment
+ only if BitLength(i) is not zero. (Pretty obvious.)
+ 11. Correction: 4 Bits: # of Bit Length codes - 4 (4 - 19)
+ 12. Note: length code 284 can represent 227-258, but length code 285
+ really is 258. The last length deserves its own, short code
+ since it gets used a lot in very redundant files. The length
+ 258 is special since 258 - 3 (the min match length) is 255.
+ 13. The literal/length and distance code bit lengths are read as a
+ single stream of lengths. It is possible (and advantageous) for
+ a repeat code (16, 17, or 18) to go across the boundary between
+ the two sets of lengths.
+ */
+#include <linux/compiler.h>
+#ifdef NO_INFLATE_MALLOC
+#include <linux/slab.h>
+#endif
+
+#ifdef RCSID
+static char rcsid[] = "#Id: inflate.c,v 0.14 1993/06/10 13:27:04 jloup Exp #";
+#endif
+
+#ifndef STATIC
+
+#if defined(STDC_HEADERS) || defined(HAVE_STDLIB_H)
+# include <sys/types.h>
+# include <stdlib.h>
+#endif
+
+#include "gzip.h"
+#define STATIC
+#endif /* !STATIC */
+
+#ifndef INIT
+#define INIT
+#endif
+
+#define slide window
+
+/* Huffman code lookup table entry--this entry is four bytes for machines
+ that have 16-bit pointers (e.g. PC's in the small or medium model).
+ Valid extra bits are 0..13. e == 15 is EOB (end of block), e == 16
+ means that v is a literal, 16 < e < 32 means that v is a pointer to
+ the next table, which codes e - 16 bits, and lastly e == 99 indicates
+ an unused code. If a code with e == 99 is looked up, this implies an
+ error in the data. */
+struct huft {
+ uch e; /* number of extra bits or operation */
+ uch b; /* number of bits in this code or subcode */
+ union {
+ ush n; /* literal, length base, or distance base */
+ struct huft *t; /* pointer to next level of table */
+ } v;
+};
+
+
+/* Function prototypes */
+STATIC int INIT huft_build OF((unsigned *, unsigned, unsigned,
+ const ush *, const ush *, struct huft **, int *));
+STATIC int INIT huft_free OF((struct huft *));
+STATIC int INIT inflate_codes OF((struct huft *, struct huft *, int, int));
+STATIC int INIT inflate_stored OF((void));
+STATIC int INIT inflate_fixed OF((void));
+STATIC int INIT inflate_dynamic OF((void));
+STATIC int INIT inflate_block OF((int *));
+STATIC int INIT inflate OF((void));
+
+
+/* The inflate algorithm uses a sliding 32 K byte window on the uncompressed
+ stream to find repeated byte strings. This is implemented here as a
+ circular buffer. The index is updated simply by incrementing and then
+ ANDing with 0x7fff (32K-1). */
+/* It is left to other modules to supply the 32 K area. It is assumed
+ to be usable as if it were declared "uch slide[32768];" or as just
+ "uch *slide;" and then malloc'ed in the latter case. The definition
+ must be in unzip.h, included above. */
+/* unsigned wp; current position in slide */
+#define wp outcnt
+#define flush_output(w) (wp=(w),flush_window())
+
+/* Tables for deflate from PKZIP's appnote.txt. */
+static const unsigned border[] = { /* Order of the bit length code lengths */
+ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+static const ush cplens[] = { /* Copy lengths for literal codes 257..285 */
+ 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
+ 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
+ /* note: see note #13 above about the 258 in this list. */
+static const ush cplext[] = { /* Extra bits for literal codes 257..285 */
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
+ 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 99, 99}; /* 99==invalid */
+static const ush cpdist[] = { /* Copy offsets for distance codes 0..29 */
+ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
+ 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
+ 8193, 12289, 16385, 24577};
+static const ush cpdext[] = { /* Extra bits for distance codes */
+ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
+ 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
+ 12, 12, 13, 13};
+
+
+
+/* Macros for inflate() bit peeking and grabbing.
+ The usage is:
+
+ NEEDBITS(j)
+ x = b & mask_bits[j];
+ DUMPBITS(j)
+
+ where NEEDBITS makes sure that b has at least j bits in it, and
+ DUMPBITS removes the bits from b. The macros use the variable k
+ for the number of bits in b. Normally, b and k are register
+ variables for speed, and are initialized at the beginning of a
+ routine that uses these macros from a global bit buffer and count.
+
+ If we assume that EOB will be the longest code, then we will never
+ ask for bits with NEEDBITS that are beyond the end of the stream.
+ So, NEEDBITS should not read any more bytes than are needed to
+ meet the request. Then no bytes need to be "returned" to the buffer
+ at the end of the last block.
+
+ However, this assumption is not true for fixed blocks--the EOB code
+ is 7 bits, but the other literal/length codes can be 8 or 9 bits.
+ (The EOB code is shorter than other codes because fixed blocks are
+ generally short. So, while a block always has an EOB, many other
+ literal/length codes have a significantly lower probability of
+ showing up at all.) However, by making the first table have a
+ lookup of seven bits, the EOB code will be found in that first
+ lookup, and so will not require that too many bits be pulled from
+ the stream.
+ */
+
+STATIC ulg bb; /* bit buffer */
+STATIC unsigned bk; /* bits in bit buffer */
+
+STATIC const ush mask_bits[] = {
+ 0x0000,
+ 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff,
+ 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff
+};
+
+#define NEXTBYTE() ({ int v = get_byte(); if (v < 0) goto underrun; (uch)v; })
+#define NEEDBITS(n) {while(k<(n)){b|=((ulg)NEXTBYTE())<<k;k+=8;}}
+#define DUMPBITS(n) {b>>=(n);k-=(n);}
+
+#ifndef NO_INFLATE_MALLOC
+/* A trivial malloc implementation, adapted from
+ * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
+ */
+
+static unsigned long malloc_ptr;
+static int malloc_count;
+
+static void *malloc(int size)
+{
+ void *p;
+
+ if (size < 0)
+ error("Malloc error");
+ if (!malloc_ptr)
+ malloc_ptr = free_mem_ptr;
+
+ malloc_ptr = (malloc_ptr + 3) & ~3; /* Align */
+
+ p = (void *)malloc_ptr;
+ malloc_ptr += size;
+
+ if (free_mem_end_ptr && malloc_ptr >= free_mem_end_ptr)
+ error("Out of memory");
+
+ malloc_count++;
+ return p;
+}
+
+static void free(void *where)
+{
+ malloc_count--;
+ if (!malloc_count)
+ malloc_ptr = free_mem_ptr;
+}
+#else
+#define malloc(a) kmalloc(a, GFP_KERNEL)
+#define free(a) kfree(a)
+#endif
+
+/*
+ Huffman code decoding is performed using a multi-level table lookup.
+ The fastest way to decode is to simply build a lookup table whose
+ size is determined by the longest code. However, the time it takes
+ to build this table can also be a factor if the data being decoded
+ is not very long. The most common codes are necessarily the
+ shortest codes, so those codes dominate the decoding time, and hence
+ the speed. The idea is you can have a shorter table that decodes the
+ shorter, more probable codes, and then point to subsidiary tables for
+ the longer codes. The time it costs to decode the longer codes is
+ then traded against the time it takes to make longer tables.
+
+ This results of this trade are in the variables lbits and dbits
+ below. lbits is the number of bits the first level table for literal/
+ length codes can decode in one step, and dbits is the same thing for
+ the distance codes. Subsequent tables are also less than or equal to
+ those sizes. These values may be adjusted either when all of the
+ codes are shorter than that, in which case the longest code length in
+ bits is used, or when the shortest code is *longer* than the requested
+ table size, in which case the length of the shortest code in bits is
+ used.
+
+ There are two different values for the two tables, since they code a
+ different number of possibilities each. The literal/length table
+ codes 286 possible values, or in a flat code, a little over eight
+ bits. The distance table codes 30 possible values, or a little less
+ than five bits, flat. The optimum values for speed end up being
+ about one bit more than those, so lbits is 8+1 and dbits is 5+1.
+ The optimum values may differ though from machine to machine, and
+ possibly even between compilers. Your mileage may vary.
+ */
+
+
+STATIC const int lbits = 9; /* bits in base literal/length lookup table */
+STATIC const int dbits = 6; /* bits in base distance lookup table */
+
+
+/* If BMAX needs to be larger than 16, then h and x[] should be ulg. */
+#define BMAX 16 /* maximum bit length of any code (16 for explode) */
+#define N_MAX 288 /* maximum number of codes in any set */
+
+
+STATIC unsigned hufts; /* track memory usage */
+
+
+STATIC int INIT huft_build(
+ unsigned *b, /* code lengths in bits (all assumed <= BMAX) */
+ unsigned n, /* number of codes (assumed <= N_MAX) */
+ unsigned s, /* number of simple-valued codes (0..s-1) */
+ const ush *d, /* list of base values for non-simple codes */
+ const ush *e, /* list of extra bits for non-simple codes */
+ struct huft **t, /* result: starting table */
+ int *m /* maximum lookup bits, returns actual */
+ )
+/* Given a list of code lengths and a maximum table size, make a set of
+ tables to decode that set of codes. Return zero on success, one if
+ the given code set is incomplete (the tables are still built in this
+ case), two if the input is invalid (all zero length codes or an
+ oversubscribed set of lengths), and three if not enough memory. */
+{
+ unsigned a; /* counter for codes of length k */
+ unsigned f; /* i repeats in table every f entries */
+ int g; /* maximum code length */
+ int h; /* table level */
+ register unsigned i; /* counter, current code */
+ register unsigned j; /* counter */
+ register int k; /* number of bits in current code */
+ int l; /* bits per table (returned in m) */
+ register unsigned *p; /* pointer into c[], b[], or v[] */
+ register struct huft *q; /* points to current table */
+ struct huft r; /* table entry for structure assignment */
+ register int w; /* bits before this table == (l * h) */
+ unsigned *xp; /* pointer into x */
+ int y; /* number of dummy codes added */
+ unsigned z; /* number of entries in current table */
+ struct {
+ unsigned c[BMAX+1]; /* bit length count table */
+ struct huft *u[BMAX]; /* table stack */
+ unsigned v[N_MAX]; /* values in order of bit length */
+ unsigned x[BMAX+1]; /* bit offsets, then code stack */
+ } *stk;
+ unsigned *c, *v, *x;
+ struct huft **u;
+ int ret;
+
+DEBG("huft1 ");
+
+ stk = malloc(sizeof(*stk));
+ if (stk == NULL)
+ return 3; /* out of memory */
+
+ c = stk->c;
+ v = stk->v;
+ x = stk->x;
+ u = stk->u;
+
+ /* Generate counts for each bit length */
+ memzero(stk->c, sizeof(stk->c));
+ p = b; i = n;
+ do {
+ Tracecv(*p, (stderr, (n-i >= ' ' && n-i <= '~' ? "%c %d\n" : "0x%x %d\n"),
+ n-i, *p));
+ c[*p]++; /* assume all entries <= BMAX */
+ p++; /* Can't combine with above line (Solaris bug) */
+ } while (--i);
+ if (c[0] == n) /* null input--all zero length codes */
+ {
+ *t = (struct huft *)NULL;
+ *m = 0;
+ ret = 2;
+ goto out;
+ }
+
+DEBG("huft2 ");
+
+ /* Find minimum and maximum length, bound *m by those */
+ l = *m;
+ for (j = 1; j <= BMAX; j++)
+ if (c[j])
+ break;
+ k = j; /* minimum code length */
+ if ((unsigned)l < j)
+ l = j;
+ for (i = BMAX; i; i--)
+ if (c[i])
+ break;
+ g = i; /* maximum code length */
+ if ((unsigned)l > i)
+ l = i;
+ *m = l;
+
+DEBG("huft3 ");
+
+ /* Adjust last length count to fill out codes, if needed */
+ for (y = 1 << j; j < i; j++, y <<= 1)
+ if ((y -= c[j]) < 0) {
+ ret = 2; /* bad input: more codes than bits */
+ goto out;
+ }
+ if ((y -= c[i]) < 0) {
+ ret = 2;
+ goto out;
+ }
+ c[i] += y;
+
+DEBG("huft4 ");
+
+ /* Generate starting offsets into the value table for each length */
+ x[1] = j = 0;
+ p = c + 1; xp = x + 2;
+ while (--i) { /* note that i == g from above */
+ *xp++ = (j += *p++);
+ }
+
+DEBG("huft5 ");
+
+ /* Make a table of values in order of bit lengths */
+ p = b; i = 0;
+ do {
+ if ((j = *p++) != 0)
+ v[x[j]++] = i;
+ } while (++i < n);
+ n = x[g]; /* set n to length of v */
+
+DEBG("h6 ");
+
+ /* Generate the Huffman codes and for each, make the table entries */
+ x[0] = i = 0; /* first Huffman code is zero */
+ p = v; /* grab values in bit order */
+ h = -1; /* no tables yet--level -1 */
+ w = -l; /* bits decoded == (l * h) */
+ u[0] = (struct huft *)NULL; /* just to keep compilers happy */
+ q = (struct huft *)NULL; /* ditto */
+ z = 0; /* ditto */
+DEBG("h6a ");
+
+ /* go through the bit lengths (k already is bits in shortest code) */
+ for (; k <= g; k++)
+ {
+DEBG("h6b ");
+ a = c[k];
+ while (a--)
+ {
+DEBG("h6b1 ");
+ /* here i is the Huffman code of length k bits for value *p */
+ /* make tables up to required level */
+ while (k > w + l)
+ {
+DEBG1("1 ");
+ h++;
+ w += l; /* previous table always l bits */
+
+ /* compute minimum size table less than or equal to l bits */
+ z = (z = g - w) > (unsigned)l ? l : z; /* upper limit on table size */
+ if ((f = 1 << (j = k - w)) > a + 1) /* try a k-w bit table */
+ { /* too few codes for k-w bit table */
+DEBG1("2 ");
+ f -= a + 1; /* deduct codes from patterns left */
+ xp = c + k;
+ if (j < z)
+ while (++j < z) /* try smaller tables up to z bits */
+ {
+ if ((f <<= 1) <= *++xp)
+ break; /* enough codes to use up j bits */
+ f -= *xp; /* else deduct codes from patterns */
+ }
+ }
+DEBG1("3 ");
+ z = 1 << j; /* table entries for j-bit table */
+
+ /* allocate and link in new table */
+ if ((q = (struct huft *)malloc((z + 1)*sizeof(struct huft))) ==
+ (struct huft *)NULL)
+ {
+ if (h)
+ huft_free(u[0]);
+ ret = 3; /* not enough memory */
+ goto out;
+ }
+DEBG1("4 ");
+ hufts += z + 1; /* track memory usage */
+ *t = q + 1; /* link to list for huft_free() */
+ *(t = &(q->v.t)) = (struct huft *)NULL;
+ u[h] = ++q; /* table starts after link */
+
+DEBG1("5 ");
+ /* connect to last table, if there is one */
+ if (h)
+ {
+ x[h] = i; /* save pattern for backing up */
+ r.b = (uch)l; /* bits to dump before this table */
+ r.e = (uch)(16 + j); /* bits in this table */
+ r.v.t = q; /* pointer to this table */
+ j = i >> (w - l); /* (get around Turbo C bug) */
+ u[h-1][j] = r; /* connect to last table */
+ }
+DEBG1("6 ");
+ }
+DEBG("h6c ");
+
+ /* set up table entry in r */
+ r.b = (uch)(k - w);
+ if (p >= v + n)
+ r.e = 99; /* out of values--invalid code */
+ else if (*p < s)
+ {
+ r.e = (uch)(*p < 256 ? 16 : 15); /* 256 is end-of-block code */
+ r.v.n = (ush)(*p); /* simple code is just the value */
+ p++; /* one compiler does not like *p++ */
+ }
+ else
+ {
+ r.e = (uch)e[*p - s]; /* non-simple--look up in lists */
+ r.v.n = d[*p++ - s];
+ }
+DEBG("h6d ");
+
+ /* fill code-like entries with r */
+ f = 1 << (k - w);
+ for (j = i >> w; j < z; j += f)
+ q[j] = r;
+
+ /* backwards increment the k-bit code i */
+ for (j = 1 << (k - 1); i & j; j >>= 1)
+ i ^= j;
+ i ^= j;
+
+ /* backup over finished tables */
+ while ((i & ((1 << w) - 1)) != x[h])
+ {
+ h--; /* don't need to update q */
+ w -= l;
+ }
+DEBG("h6e ");
+ }
+DEBG("h6f ");
+ }
+
+DEBG("huft7 ");
+
+ /* Return true (1) if we were given an incomplete table */
+ ret = y != 0 && g != 1;
+
+ out:
+ free(stk);
+ return ret;
+}
+
+
+
+STATIC int INIT huft_free(
+ struct huft *t /* table to free */
+ )
+/* Free the malloc'ed tables built by huft_build(), which makes a linked
+ list of the tables it made, with the links in a dummy first entry of
+ each table. */
+{
+ register struct huft *p, *q;
+
+
+ /* Go through linked list, freeing from the malloced (t[-1]) address. */
+ p = t;
+ while (p != (struct huft *)NULL)
+ {
+ q = (--p)->v.t;
+ free((char*)p);
+ p = q;
+ }
+ return 0;
+}
+
+
+STATIC int INIT inflate_codes(
+ struct huft *tl, /* literal/length decoder tables */
+ struct huft *td, /* distance decoder tables */
+ int bl, /* number of bits decoded by tl[] */
+ int bd /* number of bits decoded by td[] */
+ )
+/* inflate (decompress) the codes in a deflated (compressed) block.
+ Return an error code or zero if it all goes ok. */
+{
+ register unsigned e; /* table entry flag/number of extra bits */
+ unsigned n, d; /* length and index for copy */
+ unsigned w; /* current window position */
+ struct huft *t; /* pointer to table entry */
+ unsigned ml, md; /* masks for bl and bd bits */
+ register ulg b; /* bit buffer */
+ register unsigned k; /* number of bits in bit buffer */
+
+
+ /* make local copies of globals */
+ b = bb; /* initialize bit buffer */
+ k = bk;
+ w = wp; /* initialize window position */
+
+ /* inflate the coded data */
+ ml = mask_bits[bl]; /* precompute masks for speed */
+ md = mask_bits[bd];
+ for (;;) /* do until end of block */
+ {
+ NEEDBITS((unsigned)bl)
+ if ((e = (t = tl + ((unsigned)b & ml))->e) > 16)
+ do {
+ if (e == 99)
+ return 1;
+ DUMPBITS(t->b)
+ e -= 16;
+ NEEDBITS(e)
+ } while ((e = (t = t->v.t + ((unsigned)b & mask_bits[e]))->e) > 16);
+ DUMPBITS(t->b)
+ if (e == 16) /* then it's a literal */
+ {
+ slide[w++] = (uch)t->v.n;
+ Tracevv((stderr, "%c", slide[w-1]));
+ if (w == WSIZE)
+ {
+ flush_output(w);
+ w = 0;
+ }
+ }
+ else /* it's an EOB or a length */
+ {
+ /* exit if end of block */
+ if (e == 15)
+ break;
+
+ /* get length of block to copy */
+ NEEDBITS(e)
+ n = t->v.n + ((unsigned)b & mask_bits[e]);
+ DUMPBITS(e);
+
+ /* decode distance of block to copy */
+ NEEDBITS((unsigned)bd)
+ if ((e = (t = td + ((unsigned)b & md))->e) > 16)
+ do {
+ if (e == 99)
+ return 1;
+ DUMPBITS(t->b)
+ e -= 16;
+ NEEDBITS(e)
+ } while ((e = (t = t->v.t + ((unsigned)b & mask_bits[e]))->e) > 16);
+ DUMPBITS(t->b)
+ NEEDBITS(e)
+ d = w - t->v.n - ((unsigned)b & mask_bits[e]);
+ DUMPBITS(e)
+ Tracevv((stderr,"\\[%d,%d]", w-d, n));
+
+ /* do the copy */
+ do {
+ n -= (e = (e = WSIZE - ((d &= WSIZE-1) > w ? d : w)) > n ? n : e);
+#if !defined(NOMEMCPY) && !defined(DEBUG)
+ if (w - d >= e) /* (this test assumes unsigned comparison) */
+ {
+ memcpy(slide + w, slide + d, e);
+ w += e;
+ d += e;
+ }
+ else /* do it slow to avoid memcpy() overlap */
+#endif /* !NOMEMCPY */
+ do {
+ slide[w++] = slide[d++];
+ Tracevv((stderr, "%c", slide[w-1]));
+ } while (--e);
+ if (w == WSIZE)
+ {
+ flush_output(w);
+ w = 0;
+ }
+ } while (n);
+ }
+ }
+
+
+ /* restore the globals from the locals */
+ wp = w; /* restore global window pointer */
+ bb = b; /* restore global bit buffer */
+ bk = k;
+
+ /* done */
+ return 0;
+
+ underrun:
+ return 4; /* Input underrun */
+}
+
+
+
+STATIC int INIT inflate_stored(void)
+/* "decompress" an inflated type 0 (stored) block. */
+{
+ unsigned n; /* number of bytes in block */
+ unsigned w; /* current window position */
+ register ulg b; /* bit buffer */
+ register unsigned k; /* number of bits in bit buffer */
+
+DEBG("<stor");
+
+ /* make local copies of globals */
+ b = bb; /* initialize bit buffer */
+ k = bk;
+ w = wp; /* initialize window position */
+
+
+ /* go to byte boundary */
+ n = k & 7;
+ DUMPBITS(n);
+
+
+ /* get the length and its complement */
+ NEEDBITS(16)
+ n = ((unsigned)b & 0xffff);
+ DUMPBITS(16)
+ NEEDBITS(16)
+ if (n != (unsigned)((~b) & 0xffff))
+ return 1; /* error in compressed data */
+ DUMPBITS(16)
+
+
+ /* read and output the compressed data */
+ while (n--)
+ {
+ NEEDBITS(8)
+ slide[w++] = (uch)b;
+ if (w == WSIZE)
+ {
+ flush_output(w);
+ w = 0;
+ }
+ DUMPBITS(8)
+ }
+
+
+ /* restore the globals from the locals */
+ wp = w; /* restore global window pointer */
+ bb = b; /* restore global bit buffer */
+ bk = k;
+
+ DEBG(">");
+ return 0;
+
+ underrun:
+ return 4; /* Input underrun */
+}
+
+
+/*
+ * We use `noinline' here to prevent gcc-3.5 from using too much stack space
+ */
+STATIC int noinline INIT inflate_fixed(void)
+/* decompress an inflated type 1 (fixed Huffman codes) block. We should
+ either replace this with a custom decoder, or at least precompute the
+ Huffman tables. */
+{
+ int i; /* temporary variable */
+ struct huft *tl; /* literal/length code table */
+ struct huft *td; /* distance code table */
+ int bl; /* lookup bits for tl */
+ int bd; /* lookup bits for td */
+ unsigned *l; /* length list for huft_build */
+
+DEBG("<fix");
+
+ l = malloc(sizeof(*l) * 288);
+ if (l == NULL)
+ return 3; /* out of memory */
+
+ /* set up literal table */
+ for (i = 0; i < 144; i++)
+ l[i] = 8;
+ for (; i < 256; i++)
+ l[i] = 9;
+ for (; i < 280; i++)
+ l[i] = 7;
+ for (; i < 288; i++) /* make a complete, but wrong code set */
+ l[i] = 8;
+ bl = 7;
+ if ((i = huft_build(l, 288, 257, cplens, cplext, &tl, &bl)) != 0) {
+ free(l);
+ return i;
+ }
+
+ /* set up distance table */
+ for (i = 0; i < 30; i++) /* make an incomplete code set */
+ l[i] = 5;
+ bd = 5;
+ if ((i = huft_build(l, 30, 0, cpdist, cpdext, &td, &bd)) > 1)
+ {
+ huft_free(tl);
+ free(l);
+
+ DEBG(">");
+ return i;
+ }
+
+
+ /* decompress until an end-of-block code */
+ if (inflate_codes(tl, td, bl, bd)) {
+ free(l);
+ return 1;
+ }
+
+ /* free the decoding tables, return */
+ free(l);
+ huft_free(tl);
+ huft_free(td);
+ return 0;
+}
+
+
+/*
+ * We use `noinline' here to prevent gcc-3.5 from using too much stack space
+ */
+STATIC int noinline INIT inflate_dynamic(void)
+/* decompress an inflated type 2 (dynamic Huffman codes) block. */
+{
+ int i; /* temporary variables */
+ unsigned j;
+ unsigned l; /* last length */
+ unsigned m; /* mask for bit lengths table */
+ unsigned n; /* number of lengths to get */
+ struct huft *tl; /* literal/length code table */
+ struct huft *td; /* distance code table */
+ int bl; /* lookup bits for tl */
+ int bd; /* lookup bits for td */
+ unsigned nb; /* number of bit length codes */
+ unsigned nl; /* number of literal/length codes */
+ unsigned nd; /* number of distance codes */
+ unsigned *ll; /* literal/length and distance code lengths */
+ register ulg b; /* bit buffer */
+ register unsigned k; /* number of bits in bit buffer */
+ int ret;
+
+DEBG("<dyn");
+
+#ifdef PKZIP_BUG_WORKAROUND
+ ll = malloc(sizeof(*ll) * (288+32)); /* literal/length and distance code lengths */
+#else
+ ll = malloc(sizeof(*ll) * (286+30)); /* literal/length and distance code lengths */
+#endif
+
+ if (ll == NULL)
+ return 1;
+
+ /* make local bit buffer */
+ b = bb;
+ k = bk;
+
+
+ /* read in table lengths */
+ NEEDBITS(5)
+ nl = 257 + ((unsigned)b & 0x1f); /* number of literal/length codes */
+ DUMPBITS(5)
+ NEEDBITS(5)
+ nd = 1 + ((unsigned)b & 0x1f); /* number of distance codes */
+ DUMPBITS(5)
+ NEEDBITS(4)
+ nb = 4 + ((unsigned)b & 0xf); /* number of bit length codes */
+ DUMPBITS(4)
+#ifdef PKZIP_BUG_WORKAROUND
+ if (nl > 288 || nd > 32)
+#else
+ if (nl > 286 || nd > 30)
+#endif
+ {
+ ret = 1; /* bad lengths */
+ goto out;
+ }
+
+DEBG("dyn1 ");
+
+ /* read in bit-length-code lengths */
+ for (j = 0; j < nb; j++)
+ {
+ NEEDBITS(3)
+ ll[border[j]] = (unsigned)b & 7;
+ DUMPBITS(3)
+ }
+ for (; j < 19; j++)
+ ll[border[j]] = 0;
+
+DEBG("dyn2 ");
+
+ /* build decoding table for trees--single level, 7 bit lookup */
+ bl = 7;
+ if ((i = huft_build(ll, 19, 19, NULL, NULL, &tl, &bl)) != 0)
+ {
+ if (i == 1)
+ huft_free(tl);
+ ret = i; /* incomplete code set */
+ goto out;
+ }
+
+DEBG("dyn3 ");
+
+ /* read in literal and distance code lengths */
+ n = nl + nd;
+ m = mask_bits[bl];
+ i = l = 0;
+ while ((unsigned)i < n)
+ {
+ NEEDBITS((unsigned)bl)
+ j = (td = tl + ((unsigned)b & m))->b;
+ DUMPBITS(j)
+ j = td->v.n;
+ if (j < 16) /* length of code in bits (0..15) */
+ ll[i++] = l = j; /* save last length in l */
+ else if (j == 16) /* repeat last length 3 to 6 times */
+ {
+ NEEDBITS(2)
+ j = 3 + ((unsigned)b & 3);
+ DUMPBITS(2)
+ if ((unsigned)i + j > n) {
+ ret = 1;
+ goto out;
+ }
+ while (j--)
+ ll[i++] = l;
+ }
+ else if (j == 17) /* 3 to 10 zero length codes */
+ {
+ NEEDBITS(3)
+ j = 3 + ((unsigned)b & 7);
+ DUMPBITS(3)
+ if ((unsigned)i + j > n) {
+ ret = 1;
+ goto out;
+ }
+ while (j--)
+ ll[i++] = 0;
+ l = 0;
+ }
+ else /* j == 18: 11 to 138 zero length codes */
+ {
+ NEEDBITS(7)
+ j = 11 + ((unsigned)b & 0x7f);
+ DUMPBITS(7)
+ if ((unsigned)i + j > n) {
+ ret = 1;
+ goto out;
+ }
+ while (j--)
+ ll[i++] = 0;
+ l = 0;
+ }
+ }
+
+DEBG("dyn4 ");
+
+ /* free decoding table for trees */
+ huft_free(tl);
+
+DEBG("dyn5 ");
+
+ /* restore the global bit buffer */
+ bb = b;
+ bk = k;
+
+DEBG("dyn5a ");
+
+ /* build the decoding tables for literal/length and distance codes */
+ bl = lbits;
+ if ((i = huft_build(ll, nl, 257, cplens, cplext, &tl, &bl)) != 0)
+ {
+DEBG("dyn5b ");
+ if (i == 1) {
+ error("incomplete literal tree");
+ huft_free(tl);
+ }
+ ret = i; /* incomplete code set */
+ goto out;
+ }
+DEBG("dyn5c ");
+ bd = dbits;
+ if ((i = huft_build(ll + nl, nd, 0, cpdist, cpdext, &td, &bd)) != 0)
+ {
+DEBG("dyn5d ");
+ if (i == 1) {
+ error("incomplete distance tree");
+#ifdef PKZIP_BUG_WORKAROUND
+ i = 0;
+ }
+#else
+ huft_free(td);
+ }
+ huft_free(tl);
+ ret = i; /* incomplete code set */
+ goto out;
+#endif
+ }
+
+DEBG("dyn6 ");
+
+ /* decompress until an end-of-block code */
+ if (inflate_codes(tl, td, bl, bd)) {
+ ret = 1;
+ goto out;
+ }
+
+DEBG("dyn7 ");
+
+ /* free the decoding tables, return */
+ huft_free(tl);
+ huft_free(td);
+
+ DEBG(">");
+ ret = 0;
+out:
+ free(ll);
+ return ret;
+
+underrun:
+ ret = 4; /* Input underrun */
+ goto out;
+}
+
+
+
+STATIC int INIT inflate_block(
+ int *e /* last block flag */
+ )
+/* decompress an inflated block */
+{
+ unsigned t; /* block type */
+ register ulg b; /* bit buffer */
+ register unsigned k; /* number of bits in bit buffer */
+
+ DEBG("<blk");
+
+ /* make local bit buffer */
+ b = bb;
+ k = bk;
+
+
+ /* read in last block bit */
+ NEEDBITS(1)
+ *e = (int)b & 1;
+ DUMPBITS(1)
+
+
+ /* read in block type */
+ NEEDBITS(2)
+ t = (unsigned)b & 3;
+ DUMPBITS(2)
+
+
+ /* restore the global bit buffer */
+ bb = b;
+ bk = k;
+
+ /* inflate that block type */
+ if (t == 2)
+ return inflate_dynamic();
+ if (t == 0)
+ return inflate_stored();
+ if (t == 1)
+ return inflate_fixed();
+
+ DEBG(">");
+
+ /* bad block type */
+ return 2;
+
+ underrun:
+ return 4; /* Input underrun */
+}
+
+
+
+STATIC int INIT inflate(void)
+/* decompress an inflated entry */
+{
+ int e; /* last block flag */
+ int r; /* result code */
+ unsigned h; /* maximum struct huft's malloc'ed */
+
+ /* initialize window, bit buffer */
+ wp = 0;
+ bk = 0;
+ bb = 0;
+
+
+ /* decompress until the last block */
+ h = 0;
+ do {
+ hufts = 0;
+#ifdef ARCH_HAS_DECOMP_WDOG
+ arch_decomp_wdog();
+#endif
+ r = inflate_block(&e);
+ if (r)
+ return r;
+ if (hufts > h)
+ h = hufts;
+ } while (!e);
+
+ /* Undo too much lookahead. The next read will be byte aligned so we
+ * can discard unused bits in the last meaningful byte.
+ */
+ while (bk >= 8) {
+ bk -= 8;
+ inptr--;
+ }
+
+ /* flush out slide */
+ flush_output(wp);
+
+
+ /* return success */
+#ifdef DEBUG
+ fprintf(stderr, "<%u> ", h);
+#endif /* DEBUG */
+ return 0;
+}
+
+/**********************************************************************
+ *
+ * The following are support routines for inflate.c
+ *
+ **********************************************************************/
+
+static ulg crc_32_tab[256];
+static ulg crc; /* initialized in makecrc() so it'll reside in bss */
+#define CRC_VALUE (crc ^ 0xffffffffUL)
+
+/*
+ * Code to compute the CRC-32 table. Borrowed from
+ * gzip-1.0.3/makecrc.c.
+ */
+
+static void INIT
+makecrc(void)
+{
+/* Not copyrighted 1990 Mark Adler */
+
+ unsigned long c; /* crc shift register */
+ unsigned long e; /* polynomial exclusive-or pattern */
+ int i; /* counter for all possible eight bit values */
+ int k; /* byte being shifted into crc apparatus */
+
+ /* terms of polynomial defining this crc (except x^32): */
+ static const int p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26};
+
+ /* Make exclusive-or pattern from polynomial */
+ e = 0;
+ for (i = 0; i < sizeof(p)/sizeof(int); i++)
+ e |= 1L << (31 - p[i]);
+
+ crc_32_tab[0] = 0;
+
+ for (i = 1; i < 256; i++)
+ {
+ c = 0;
+ for (k = i | 256; k != 1; k >>= 1)
+ {
+ c = c & 1 ? (c >> 1) ^ e : c >> 1;
+ if (k & 1)
+ c ^= e;
+ }
+ crc_32_tab[i] = c;
+ }
+
+ /* this is initialized here so this code could reside in ROM */
+ crc = (ulg)0xffffffffUL; /* shift register contents */
+}
+
+/* gzip flag byte */
+#define ASCII_FLAG 0x01 /* bit 0 set: file probably ASCII text */
+#define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gzip file */
+#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */
+#define ORIG_NAME 0x08 /* bit 3 set: original file name present */
+#define COMMENT 0x10 /* bit 4 set: file comment present */
+#define ENCRYPTED 0x20 /* bit 5 set: file is encrypted */
+#define RESERVED 0xC0 /* bit 6,7: reserved */
+
+/*
+ * Do the uncompression!
+ */
+static int INIT gunzip(void)
+{
+ uch flags;
+ unsigned char magic[2]; /* magic header */
+ char method;
+ ulg orig_crc = 0; /* original crc */
+ ulg orig_len = 0; /* original uncompressed length */
+ int res;
+
+ magic[0] = NEXTBYTE();
+ magic[1] = NEXTBYTE();
+ method = NEXTBYTE();
+
+ if (magic[0] != 037 ||
+ ((magic[1] != 0213) && (magic[1] != 0236))) {
+ error("bad gzip magic numbers");
+ return -1;
+ }
+
+ /* We only support method #8, DEFLATED */
+ if (method != 8) {
+ error("internal error, invalid method");
+ return -1;
+ }
+
+ flags = (uch)get_byte();
+ if ((flags & ENCRYPTED) != 0) {
+ error("Input is encrypted");
+ return -1;
+ }
+ if ((flags & CONTINUATION) != 0) {
+ error("Multi part input");
+ return -1;
+ }
+ if ((flags & RESERVED) != 0) {
+ error("Input has invalid flags");
+ return -1;
+ }
+ NEXTBYTE(); /* Get timestamp */
+ NEXTBYTE();
+ NEXTBYTE();
+ NEXTBYTE();
+
+ (void)NEXTBYTE(); /* Ignore extra flags for the moment */
+ (void)NEXTBYTE(); /* Ignore OS type for the moment */
+
+ if ((flags & EXTRA_FIELD) != 0) {
+ unsigned len = (unsigned)NEXTBYTE();
+ len |= ((unsigned)NEXTBYTE())<<8;
+ while (len--) (void)NEXTBYTE();
+ }
+
+ /* Get original file name if it was truncated */
+ if ((flags & ORIG_NAME) != 0) {
+ /* Discard the old name */
+ while (NEXTBYTE() != 0) /* null */ ;
+ }
+
+ /* Discard file comment if any */
+ if ((flags & COMMENT) != 0) {
+ while (NEXTBYTE() != 0) /* null */ ;
+ }
+
+ /* Decompress */
+ if ((res = inflate())) {
+ switch (res) {
+ case 0:
+ break;
+ case 1:
+ error("invalid compressed format (err=1)");
+ break;
+ case 2:
+ error("invalid compressed format (err=2)");
+ break;
+ case 3:
+ error("out of memory");
+ break;
+ case 4:
+ error("out of input data");
+ break;
+ default:
+ error("invalid compressed format (other)");
+ }
+ return -1;
+ }
+
+ /* Get the crc and original length */
+ /* crc32 (see algorithm.doc)
+ * uncompressed input size modulo 2^32
+ */
+ orig_crc = (ulg) NEXTBYTE();
+ orig_crc |= (ulg) NEXTBYTE() << 8;
+ orig_crc |= (ulg) NEXTBYTE() << 16;
+ orig_crc |= (ulg) NEXTBYTE() << 24;
+
+ orig_len = (ulg) NEXTBYTE();
+ orig_len |= (ulg) NEXTBYTE() << 8;
+ orig_len |= (ulg) NEXTBYTE() << 16;
+ orig_len |= (ulg) NEXTBYTE() << 24;
+
+ /* Validate decompression */
+ if (orig_crc != CRC_VALUE) {
+ error("crc error");
+ return -1;
+ }
+ if (orig_len != bytes_out) {
+ error("length error");
+ return -1;
+ }
+ return 0;
+
+ underrun: /* NEXTBYTE() goto's here if needed */
+ error("out of input data");
+ return -1;
+}
+
+
diff --git a/lib/int_sqrt.c b/lib/int_sqrt.c
new file mode 100644
index 00000000..fd355a99
--- /dev/null
+++ b/lib/int_sqrt.c
@@ -0,0 +1,32 @@
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+/**
+ * int_sqrt - rough approximation to sqrt
+ * @x: integer of which to calculate the sqrt
+ *
+ * A very rough approximation to the sqrt() function.
+ */
+unsigned long int_sqrt(unsigned long x)
+{
+ unsigned long op, res, one;
+
+ op = x;
+ res = 0;
+
+ one = 1UL << (BITS_PER_LONG - 2);
+ while (one > op)
+ one >>= 2;
+
+ while (one != 0) {
+ if (op >= res + one) {
+ op = op - (res + one);
+ res = res + 2 * one;
+ }
+ res /= 2;
+ one /= 4;
+ }
+ return res;
+}
+EXPORT_SYMBOL(int_sqrt);
diff --git a/lib/iomap.c b/lib/iomap.c
new file mode 100644
index 00000000..d3222938
--- /dev/null
+++ b/lib/iomap.c
@@ -0,0 +1,282 @@
+/*
+ * Implement the default iomap interfaces
+ *
+ * (C) Copyright 2004 Linus Torvalds
+ */
+#include <linux/pci.h>
+#include <linux/io.h>
+
+#include <linux/module.h>
+
+/*
+ * Read/write from/to an (offsettable) iomem cookie. It might be a PIO
+ * access or a MMIO access, these functions don't care. The info is
+ * encoded in the hardware mapping set up by the mapping functions
+ * (or the cookie itself, depending on implementation and hw).
+ *
+ * The generic routines don't assume any hardware mappings, and just
+ * encode the PIO/MMIO as part of the cookie. They coldly assume that
+ * the MMIO IO mappings are not in the low address range.
+ *
+ * Architectures for which this is not true can't use this generic
+ * implementation and should do their own copy.
+ */
+
+#ifndef HAVE_ARCH_PIO_SIZE
+/*
+ * We encode the physical PIO addresses (0-0xffff) into the
+ * pointer by offsetting them with a constant (0x10000) and
+ * assuming that all the low addresses are always PIO. That means
+ * we can do some sanity checks on the low bits, and don't
+ * need to just take things for granted.
+ */
+#define PIO_OFFSET 0x10000UL
+#define PIO_MASK 0x0ffffUL
+#define PIO_RESERVED 0x40000UL
+#endif
+
+static void bad_io_access(unsigned long port, const char *access)
+{
+ static int count = 10;
+ if (count) {
+ count--;
+ WARN(1, KERN_ERR "Bad IO access at port %#lx (%s)\n", port, access);
+ }
+}
+
+/*
+ * Ugly macros are a way of life.
+ */
+#define IO_COND(addr, is_pio, is_mmio) do { \
+ unsigned long port = (unsigned long __force)addr; \
+ if (port >= PIO_RESERVED) { \
+ is_mmio; \
+ } else if (port > PIO_OFFSET) { \
+ port &= PIO_MASK; \
+ is_pio; \
+ } else \
+ bad_io_access(port, #is_pio ); \
+} while (0)
+
+#ifndef pio_read16be
+#define pio_read16be(port) swab16(inw(port))
+#define pio_read32be(port) swab32(inl(port))
+#endif
+
+#ifndef mmio_read16be
+#define mmio_read16be(addr) be16_to_cpu(__raw_readw(addr))
+#define mmio_read32be(addr) be32_to_cpu(__raw_readl(addr))
+#endif
+
+unsigned int ioread8(void __iomem *addr)
+{
+ IO_COND(addr, return inb(port), return readb(addr));
+ return 0xff;
+}
+unsigned int ioread16(void __iomem *addr)
+{
+ IO_COND(addr, return inw(port), return readw(addr));
+ return 0xffff;
+}
+unsigned int ioread16be(void __iomem *addr)
+{
+ IO_COND(addr, return pio_read16be(port), return mmio_read16be(addr));
+ return 0xffff;
+}
+unsigned int ioread32(void __iomem *addr)
+{
+ IO_COND(addr, return inl(port), return readl(addr));
+ return 0xffffffff;
+}
+unsigned int ioread32be(void __iomem *addr)
+{
+ IO_COND(addr, return pio_read32be(port), return mmio_read32be(addr));
+ return 0xffffffff;
+}
+EXPORT_SYMBOL(ioread8);
+EXPORT_SYMBOL(ioread16);
+EXPORT_SYMBOL(ioread16be);
+EXPORT_SYMBOL(ioread32);
+EXPORT_SYMBOL(ioread32be);
+
+#ifndef pio_write16be
+#define pio_write16be(val,port) outw(swab16(val),port)
+#define pio_write32be(val,port) outl(swab32(val),port)
+#endif
+
+#ifndef mmio_write16be
+#define mmio_write16be(val,port) __raw_writew(be16_to_cpu(val),port)
+#define mmio_write32be(val,port) __raw_writel(be32_to_cpu(val),port)
+#endif
+
+void iowrite8(u8 val, void __iomem *addr)
+{
+ IO_COND(addr, outb(val,port), writeb(val, addr));
+}
+void iowrite16(u16 val, void __iomem *addr)
+{
+ IO_COND(addr, outw(val,port), writew(val, addr));
+}
+void iowrite16be(u16 val, void __iomem *addr)
+{
+ IO_COND(addr, pio_write16be(val,port), mmio_write16be(val, addr));
+}
+void iowrite32(u32 val, void __iomem *addr)
+{
+ IO_COND(addr, outl(val,port), writel(val, addr));
+}
+void iowrite32be(u32 val, void __iomem *addr)
+{
+ IO_COND(addr, pio_write32be(val,port), mmio_write32be(val, addr));
+}
+EXPORT_SYMBOL(iowrite8);
+EXPORT_SYMBOL(iowrite16);
+EXPORT_SYMBOL(iowrite16be);
+EXPORT_SYMBOL(iowrite32);
+EXPORT_SYMBOL(iowrite32be);
+
+/*
+ * These are the "repeat MMIO read/write" functions.
+ * Note the "__raw" accesses, since we don't want to
+ * convert to CPU byte order. We write in "IO byte
+ * order" (we also don't have IO barriers).
+ */
+#ifndef mmio_insb
+static inline void mmio_insb(void __iomem *addr, u8 *dst, int count)
+{
+ while (--count >= 0) {
+ u8 data = __raw_readb(addr);
+ *dst = data;
+ dst++;
+ }
+}
+static inline void mmio_insw(void __iomem *addr, u16 *dst, int count)
+{
+ while (--count >= 0) {
+ u16 data = __raw_readw(addr);
+ *dst = data;
+ dst++;
+ }
+}
+static inline void mmio_insl(void __iomem *addr, u32 *dst, int count)
+{
+ while (--count >= 0) {
+ u32 data = __raw_readl(addr);
+ *dst = data;
+ dst++;
+ }
+}
+#endif
+
+#ifndef mmio_outsb
+static inline void mmio_outsb(void __iomem *addr, const u8 *src, int count)
+{
+ while (--count >= 0) {
+ __raw_writeb(*src, addr);
+ src++;
+ }
+}
+static inline void mmio_outsw(void __iomem *addr, const u16 *src, int count)
+{
+ while (--count >= 0) {
+ __raw_writew(*src, addr);
+ src++;
+ }
+}
+static inline void mmio_outsl(void __iomem *addr, const u32 *src, int count)
+{
+ while (--count >= 0) {
+ __raw_writel(*src, addr);
+ src++;
+ }
+}
+#endif
+
+void ioread8_rep(void __iomem *addr, void *dst, unsigned long count)
+{
+ IO_COND(addr, insb(port,dst,count), mmio_insb(addr, dst, count));
+}
+void ioread16_rep(void __iomem *addr, void *dst, unsigned long count)
+{
+ IO_COND(addr, insw(port,dst,count), mmio_insw(addr, dst, count));
+}
+void ioread32_rep(void __iomem *addr, void *dst, unsigned long count)
+{
+ IO_COND(addr, insl(port,dst,count), mmio_insl(addr, dst, count));
+}
+EXPORT_SYMBOL(ioread8_rep);
+EXPORT_SYMBOL(ioread16_rep);
+EXPORT_SYMBOL(ioread32_rep);
+
+void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count)
+{
+ IO_COND(addr, outsb(port, src, count), mmio_outsb(addr, src, count));
+}
+void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count)
+{
+ IO_COND(addr, outsw(port, src, count), mmio_outsw(addr, src, count));
+}
+void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count)
+{
+ IO_COND(addr, outsl(port, src,count), mmio_outsl(addr, src, count));
+}
+EXPORT_SYMBOL(iowrite8_rep);
+EXPORT_SYMBOL(iowrite16_rep);
+EXPORT_SYMBOL(iowrite32_rep);
+
+/* Create a virtual mapping cookie for an IO port range */
+void __iomem *ioport_map(unsigned long port, unsigned int nr)
+{
+ if (port > PIO_MASK)
+ return NULL;
+ return (void __iomem *) (unsigned long) (port + PIO_OFFSET);
+}
+
+void ioport_unmap(void __iomem *addr)
+{
+ /* Nothing to do */
+}
+EXPORT_SYMBOL(ioport_map);
+EXPORT_SYMBOL(ioport_unmap);
+
+/**
+ * pci_iomap - create a virtual mapping cookie for a PCI BAR
+ * @dev: PCI device that owns the BAR
+ * @bar: BAR number
+ * @maxlen: length of the memory to map
+ *
+ * Using this function you will get a __iomem address to your device BAR.
+ * You can access it using ioread*() and iowrite*(). These functions hide
+ * the details if this is a MMIO or PIO address space and will just do what
+ * you expect from them in the correct way.
+ *
+ * @maxlen specifies the maximum length to map. If you want to get access to
+ * the complete BAR without checking for its length first, pass %0 here.
+ * */
+void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
+{
+ resource_size_t start = pci_resource_start(dev, bar);
+ resource_size_t len = pci_resource_len(dev, bar);
+ unsigned long flags = pci_resource_flags(dev, bar);
+
+ if (!len || !start)
+ return NULL;
+ if (maxlen && len > maxlen)
+ len = maxlen;
+ if (flags & IORESOURCE_IO)
+ return ioport_map(start, len);
+ if (flags & IORESOURCE_MEM) {
+ if (flags & IORESOURCE_CACHEABLE)
+ return ioremap(start, len);
+ return ioremap_nocache(start, len);
+ }
+ /* What? */
+ return NULL;
+}
+
+void pci_iounmap(struct pci_dev *dev, void __iomem * addr)
+{
+ IO_COND(addr, /* nothing */, iounmap(addr));
+}
+EXPORT_SYMBOL(pci_iomap);
+EXPORT_SYMBOL(pci_iounmap);
diff --git a/lib/iomap_copy.c b/lib/iomap_copy.c
new file mode 100644
index 00000000..864fc5ea
--- /dev/null
+++ b/lib/iomap_copy.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2006 PathScale, Inc. All Rights Reserved.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/io.h>
+
+/**
+ * __iowrite32_copy - copy data to MMIO space, in 32-bit units
+ * @to: destination, in MMIO space (must be 32-bit aligned)
+ * @from: source (must be 32-bit aligned)
+ * @count: number of 32-bit quantities to copy
+ *
+ * Copy data from kernel space to MMIO space, in units of 32 bits at a
+ * time. Order of access is not guaranteed, nor is a memory barrier
+ * performed afterwards.
+ */
+void __attribute__((weak)) __iowrite32_copy(void __iomem *to,
+ const void *from,
+ size_t count)
+{
+ u32 __iomem *dst = to;
+ const u32 *src = from;
+ const u32 *end = src + count;
+
+ while (src < end)
+ __raw_writel(*src++, dst++);
+}
+EXPORT_SYMBOL_GPL(__iowrite32_copy);
+
+/**
+ * __iowrite64_copy - copy data to MMIO space, in 64-bit or 32-bit units
+ * @to: destination, in MMIO space (must be 64-bit aligned)
+ * @from: source (must be 64-bit aligned)
+ * @count: number of 64-bit quantities to copy
+ *
+ * Copy data from kernel space to MMIO space, in units of 32 or 64 bits at a
+ * time. Order of access is not guaranteed, nor is a memory barrier
+ * performed afterwards.
+ */
+void __attribute__((weak)) __iowrite64_copy(void __iomem *to,
+ const void *from,
+ size_t count)
+{
+#ifdef CONFIG_64BIT
+ u64 __iomem *dst = to;
+ const u64 *src = from;
+ const u64 *end = src + count;
+
+ while (src < end)
+ __raw_writeq(*src++, dst++);
+#else
+ __iowrite32_copy(to, from, count * 2);
+#endif
+}
+
+EXPORT_SYMBOL_GPL(__iowrite64_copy);
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
new file mode 100644
index 00000000..da053313
--- /dev/null
+++ b/lib/iommu-helper.c
@@ -0,0 +1,40 @@
+/*
+ * IOMMU helper functions for the free area management
+ */
+
+#include <linux/module.h>
+#include <linux/bitmap.h>
+
+int iommu_is_span_boundary(unsigned int index, unsigned int nr,
+ unsigned long shift,
+ unsigned long boundary_size)
+{
+ BUG_ON(!is_power_of_2(boundary_size));
+
+ shift = (shift + index) & (boundary_size - 1);
+ return shift + nr > boundary_size;
+}
+
+unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
+ unsigned long start, unsigned int nr,
+ unsigned long shift, unsigned long boundary_size,
+ unsigned long align_mask)
+{
+ unsigned long index;
+
+ /* We don't want the last of the limit */
+ size -= 1;
+again:
+ index = bitmap_find_next_zero_area(map, size, start, nr, align_mask);
+ if (index < size) {
+ if (iommu_is_span_boundary(index, nr, shift, boundary_size)) {
+ /* we could do more effectively */
+ start = index + 1;
+ goto again;
+ }
+ bitmap_set(map, index, nr);
+ return index;
+ }
+ return -1;
+}
+EXPORT_SYMBOL(iommu_area_alloc);
diff --git a/lib/ioremap.c b/lib/ioremap.c
new file mode 100644
index 00000000..5730ecd3
--- /dev/null
+++ b/lib/ioremap.c
@@ -0,0 +1,92 @@
+/*
+ * Re-map IO memory to kernel address space so that we can access it.
+ * This is needed for high PCI addresses that aren't mapped in the
+ * 640k-1MB IO memory area on PC's
+ *
+ * (C) Copyright 1995 1996 Linus Torvalds
+ */
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/io.h>
+#include <asm/cacheflush.h>
+#include <asm/pgtable.h>
+
+static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
+ unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
+{
+ pte_t *pte;
+ u64 pfn;
+
+ pfn = phys_addr >> PAGE_SHIFT;
+ pte = pte_alloc_kernel(pmd, addr);
+ if (!pte)
+ return -ENOMEM;
+ do {
+ BUG_ON(!pte_none(*pte));
+ set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot));
+ pfn++;
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+ return 0;
+}
+
+static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
+ unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
+{
+ pmd_t *pmd;
+ unsigned long next;
+
+ phys_addr -= addr;
+ pmd = pmd_alloc(&init_mm, pud, addr);
+ if (!pmd)
+ return -ENOMEM;
+ do {
+ next = pmd_addr_end(addr, end);
+ if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, prot))
+ return -ENOMEM;
+ } while (pmd++, addr = next, addr != end);
+ return 0;
+}
+
+static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
+ unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
+{
+ pud_t *pud;
+ unsigned long next;
+
+ phys_addr -= addr;
+ pud = pud_alloc(&init_mm, pgd, addr);
+ if (!pud)
+ return -ENOMEM;
+ do {
+ next = pud_addr_end(addr, end);
+ if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, prot))
+ return -ENOMEM;
+ } while (pud++, addr = next, addr != end);
+ return 0;
+}
+
+int ioremap_page_range(unsigned long addr,
+ unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
+{
+ pgd_t *pgd;
+ unsigned long start;
+ unsigned long next;
+ int err;
+
+ BUG_ON(addr >= end);
+
+ start = addr;
+ phys_addr -= addr;
+ pgd = pgd_offset_k(addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, prot);
+ if (err)
+ break;
+ } while (pgd++, addr = next, addr != end);
+
+ flush_cache_vmap(start, end);
+
+ return err;
+}
diff --git a/lib/irq_regs.c b/lib/irq_regs.c
new file mode 100644
index 00000000..753880a5
--- /dev/null
+++ b/lib/irq_regs.c
@@ -0,0 +1,17 @@
+/* saved per-CPU IRQ register pointer
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/module.h>
+#include <asm/irq_regs.h>
+
+#ifndef ARCH_HAS_OWN_IRQ_REGS
+DEFINE_PER_CPU(struct pt_regs *, __irq_regs);
+EXPORT_PER_CPU_SYMBOL(__irq_regs);
+#endif
diff --git a/lib/is_single_threaded.c b/lib/is_single_threaded.c
new file mode 100644
index 00000000..bd2bea96
--- /dev/null
+++ b/lib/is_single_threaded.c
@@ -0,0 +1,58 @@
+/* Function to determine if a thread group is single threaded or not
+ *
+ * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ * - Derived from security/selinux/hooks.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/sched.h>
+
+/*
+ * Returns true if the task does not share ->mm with another thread/process.
+ */
+bool current_is_single_threaded(void)
+{
+ struct task_struct *task = current;
+ struct mm_struct *mm = task->mm;
+ struct task_struct *p, *t;
+ bool ret;
+
+ if (atomic_read(&task->signal->live) != 1)
+ return false;
+
+ if (atomic_read(&mm->mm_users) == 1)
+ return true;
+
+ ret = false;
+ rcu_read_lock();
+ for_each_process(p) {
+ if (unlikely(p->flags & PF_KTHREAD))
+ continue;
+ if (unlikely(p == task->group_leader))
+ continue;
+
+ t = p;
+ do {
+ if (unlikely(t->mm == mm))
+ goto found;
+ if (likely(t->mm))
+ break;
+ /*
+ * t->mm == NULL. Make sure next_thread/next_task
+ * will see other CLONE_VM tasks which might be
+ * forked before exiting.
+ */
+ smp_rmb();
+ } while_each_thread(p, t);
+ }
+ ret = true;
+found:
+ rcu_read_unlock();
+
+ return ret;
+}
diff --git a/lib/kasprintf.c b/lib/kasprintf.c
new file mode 100644
index 00000000..9c4233b2
--- /dev/null
+++ b/lib/kasprintf.c
@@ -0,0 +1,45 @@
+/*
+ * linux/lib/kasprintf.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <stdarg.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/string.h>
+
+/* Simplified asprintf. */
+char *kvasprintf(gfp_t gfp, const char *fmt, va_list ap)
+{
+ unsigned int len;
+ char *p;
+ va_list aq;
+
+ va_copy(aq, ap);
+ len = vsnprintf(NULL, 0, fmt, aq);
+ va_end(aq);
+
+ p = kmalloc(len+1, gfp);
+ if (!p)
+ return NULL;
+
+ vsnprintf(p, len+1, fmt, ap);
+
+ return p;
+}
+EXPORT_SYMBOL(kvasprintf);
+
+char *kasprintf(gfp_t gfp, const char *fmt, ...)
+{
+ va_list ap;
+ char *p;
+
+ va_start(ap, fmt);
+ p = kvasprintf(gfp, fmt, ap);
+ va_end(ap);
+
+ return p;
+}
+EXPORT_SYMBOL(kasprintf);
diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
new file mode 100644
index 00000000..b135d04a
--- /dev/null
+++ b/lib/kernel_lock.c
@@ -0,0 +1,143 @@
+/*
+ * lib/kernel_lock.c
+ *
+ * This is the traditional BKL - big kernel lock. Largely
+ * relegated to obsolescence, but used by various less
+ * important (or lazy) subsystems.
+ */
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/semaphore.h>
+#include <linux/smp_lock.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/bkl.h>
+
+/*
+ * The 'big kernel lock'
+ *
+ * This spinlock is taken and released recursively by lock_kernel()
+ * and unlock_kernel(). It is transparently dropped and reacquired
+ * over schedule(). It is used to protect legacy code that hasn't
+ * been migrated to a proper locking design yet.
+ *
+ * Don't use in new code.
+ */
+static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(kernel_flag);
+
+
+/*
+ * Acquire/release the underlying lock from the scheduler.
+ *
+ * This is called with preemption disabled, and should
+ * return an error value if it cannot get the lock and
+ * TIF_NEED_RESCHED gets set.
+ *
+ * If it successfully gets the lock, it should increment
+ * the preemption count like any spinlock does.
+ *
+ * (This works on UP too - do_raw_spin_trylock will never
+ * return false in that case)
+ */
+int __lockfunc __reacquire_kernel_lock(void)
+{
+ while (!do_raw_spin_trylock(&kernel_flag)) {
+ if (need_resched())
+ return -EAGAIN;
+ cpu_relax();
+ }
+ preempt_disable();
+ return 0;
+}
+
+void __lockfunc __release_kernel_lock(void)
+{
+ do_raw_spin_unlock(&kernel_flag);
+ preempt_enable_no_resched();
+}
+
+/*
+ * These are the BKL spinlocks - we try to be polite about preemption.
+ * If SMP is not on (ie UP preemption), this all goes away because the
+ * do_raw_spin_trylock() will always succeed.
+ */
+#ifdef CONFIG_PREEMPT
+static inline void __lock_kernel(void)
+{
+ preempt_disable();
+ if (unlikely(!do_raw_spin_trylock(&kernel_flag))) {
+ /*
+ * If preemption was disabled even before this
+ * was called, there's nothing we can be polite
+ * about - just spin.
+ */
+ if (preempt_count() > 1) {
+ do_raw_spin_lock(&kernel_flag);
+ return;
+ }
+
+ /*
+ * Otherwise, let's wait for the kernel lock
+ * with preemption enabled..
+ */
+ do {
+ preempt_enable();
+ while (raw_spin_is_locked(&kernel_flag))
+ cpu_relax();
+ preempt_disable();
+ } while (!do_raw_spin_trylock(&kernel_flag));
+ }
+}
+
+#else
+
+/*
+ * Non-preemption case - just get the spinlock
+ */
+static inline void __lock_kernel(void)
+{
+ do_raw_spin_lock(&kernel_flag);
+}
+#endif
+
+static inline void __unlock_kernel(void)
+{
+ /*
+ * the BKL is not covered by lockdep, so we open-code the
+ * unlocking sequence (and thus avoid the dep-chain ops):
+ */
+ do_raw_spin_unlock(&kernel_flag);
+ preempt_enable();
+}
+
+/*
+ * Getting the big kernel lock.
+ *
+ * This cannot happen asynchronously, so we only need to
+ * worry about other CPU's.
+ */
+void __lockfunc _lock_kernel(const char *func, const char *file, int line)
+{
+ int depth = current->lock_depth + 1;
+
+ trace_lock_kernel(func, file, line);
+
+ if (likely(!depth)) {
+ might_sleep();
+ __lock_kernel();
+ }
+ current->lock_depth = depth;
+}
+
+void __lockfunc _unlock_kernel(const char *func, const char *file, int line)
+{
+ BUG_ON(current->lock_depth < 0);
+ if (likely(--current->lock_depth < 0))
+ __unlock_kernel();
+
+ trace_unlock_kernel(func, file, line);
+}
+
+EXPORT_SYMBOL(_lock_kernel);
+EXPORT_SYMBOL(_unlock_kernel);
+
diff --git a/lib/klist.c b/lib/klist.c
new file mode 100644
index 00000000..573d6068
--- /dev/null
+++ b/lib/klist.c
@@ -0,0 +1,365 @@
+/*
+ * klist.c - Routines for manipulating klists.
+ *
+ * Copyright (C) 2005 Patrick Mochel
+ *
+ * This file is released under the GPL v2.
+ *
+ * This klist interface provides a couple of structures that wrap around
+ * struct list_head to provide explicit list "head" (struct klist) and list
+ * "node" (struct klist_node) objects. For struct klist, a spinlock is
+ * included that protects access to the actual list itself. struct
+ * klist_node provides a pointer to the klist that owns it and a kref
+ * reference count that indicates the number of current users of that node
+ * in the list.
+ *
+ * The entire point is to provide an interface for iterating over a list
+ * that is safe and allows for modification of the list during the
+ * iteration (e.g. insertion and removal), including modification of the
+ * current node on the list.
+ *
+ * It works using a 3rd object type - struct klist_iter - that is declared
+ * and initialized before an iteration. klist_next() is used to acquire the
+ * next element in the list. It returns NULL if there are no more items.
+ * Internally, that routine takes the klist's lock, decrements the
+ * reference count of the previous klist_node and increments the count of
+ * the next klist_node. It then drops the lock and returns.
+ *
+ * There are primitives for adding and removing nodes to/from a klist.
+ * When deleting, klist_del() will simply decrement the reference count.
+ * Only when the count goes to 0 is the node removed from the list.
+ * klist_remove() will try to delete the node from the list and block until
+ * it is actually removed. This is useful for objects (like devices) that
+ * have been removed from the system and must be freed (but must wait until
+ * all accessors have finished).
+ */
+
+#include <linux/klist.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+
+/*
+ * Use the lowest bit of n_klist to mark deleted nodes and exclude
+ * dead ones from iteration.
+ */
+#define KNODE_DEAD 1LU
+#define KNODE_KLIST_MASK ~KNODE_DEAD
+
+static struct klist *knode_klist(struct klist_node *knode)
+{
+ return (struct klist *)
+ ((unsigned long)knode->n_klist & KNODE_KLIST_MASK);
+}
+
+static bool knode_dead(struct klist_node *knode)
+{
+ return (unsigned long)knode->n_klist & KNODE_DEAD;
+}
+
+static void knode_set_klist(struct klist_node *knode, struct klist *klist)
+{
+ knode->n_klist = klist;
+ /* no knode deserves to start its life dead */
+ WARN_ON(knode_dead(knode));
+}
+
+static void knode_kill(struct klist_node *knode)
+{
+ /* and no knode should die twice ever either, see we're very humane */
+ WARN_ON(knode_dead(knode));
+ *(unsigned long *)&knode->n_klist |= KNODE_DEAD;
+}
+
+/**
+ * klist_init - Initialize a klist structure.
+ * @k: The klist we're initializing.
+ * @get: The get function for the embedding object (NULL if none)
+ * @put: The put function for the embedding object (NULL if none)
+ *
+ * Initialises the klist structure. If the klist_node structures are
+ * going to be embedded in refcounted objects (necessary for safe
+ * deletion) then the get/put arguments are used to initialise
+ * functions that take and release references on the embedding
+ * objects.
+ */
+void klist_init(struct klist *k, void (*get)(struct klist_node *),
+ void (*put)(struct klist_node *))
+{
+ INIT_LIST_HEAD(&k->k_list);
+ spin_lock_init(&k->k_lock);
+ k->get = get;
+ k->put = put;
+}
+EXPORT_SYMBOL_GPL(klist_init);
+
+static void add_head(struct klist *k, struct klist_node *n)
+{
+ spin_lock(&k->k_lock);
+ list_add(&n->n_node, &k->k_list);
+ spin_unlock(&k->k_lock);
+}
+
+static void add_tail(struct klist *k, struct klist_node *n)
+{
+ spin_lock(&k->k_lock);
+ list_add_tail(&n->n_node, &k->k_list);
+ spin_unlock(&k->k_lock);
+}
+
+static void klist_node_init(struct klist *k, struct klist_node *n)
+{
+ INIT_LIST_HEAD(&n->n_node);
+ kref_init(&n->n_ref);
+ knode_set_klist(n, k);
+ if (k->get)
+ k->get(n);
+}
+
+/**
+ * klist_add_head - Initialize a klist_node and add it to front.
+ * @n: node we're adding.
+ * @k: klist it's going on.
+ */
+void klist_add_head(struct klist_node *n, struct klist *k)
+{
+ klist_node_init(k, n);
+ add_head(k, n);
+}
+EXPORT_SYMBOL_GPL(klist_add_head);
+
+/**
+ * klist_add_tail - Initialize a klist_node and add it to back.
+ * @n: node we're adding.
+ * @k: klist it's going on.
+ */
+void klist_add_tail(struct klist_node *n, struct klist *k)
+{
+ klist_node_init(k, n);
+ add_tail(k, n);
+}
+EXPORT_SYMBOL_GPL(klist_add_tail);
+
+/**
+ * klist_add_after - Init a klist_node and add it after an existing node
+ * @n: node we're adding.
+ * @pos: node to put @n after
+ */
+void klist_add_after(struct klist_node *n, struct klist_node *pos)
+{
+ struct klist *k = knode_klist(pos);
+
+ klist_node_init(k, n);
+ spin_lock(&k->k_lock);
+ list_add(&n->n_node, &pos->n_node);
+ spin_unlock(&k->k_lock);
+}
+EXPORT_SYMBOL_GPL(klist_add_after);
+
+/**
+ * klist_add_before - Init a klist_node and add it before an existing node
+ * @n: node we're adding.
+ * @pos: node to put @n after
+ */
+void klist_add_before(struct klist_node *n, struct klist_node *pos)
+{
+ struct klist *k = knode_klist(pos);
+
+ klist_node_init(k, n);
+ spin_lock(&k->k_lock);
+ list_add_tail(&n->n_node, &pos->n_node);
+ spin_unlock(&k->k_lock);
+}
+EXPORT_SYMBOL_GPL(klist_add_before);
+
+struct klist_waiter {
+ struct list_head list;
+ struct klist_node *node;
+ struct task_struct *process;
+ int woken;
+};
+
+static DEFINE_SPINLOCK(klist_remove_lock);
+static LIST_HEAD(klist_remove_waiters);
+
+static void klist_release(struct kref *kref)
+{
+ struct klist_waiter *waiter, *tmp;
+ struct klist_node *n = container_of(kref, struct klist_node, n_ref);
+
+ WARN_ON(!knode_dead(n));
+ list_del(&n->n_node);
+ spin_lock(&klist_remove_lock);
+ list_for_each_entry_safe(waiter, tmp, &klist_remove_waiters, list) {
+ if (waiter->node != n)
+ continue;
+
+ waiter->woken = 1;
+ mb();
+ wake_up_process(waiter->process);
+ list_del(&waiter->list);
+ }
+ spin_unlock(&klist_remove_lock);
+ knode_set_klist(n, NULL);
+}
+
+static int klist_dec_and_del(struct klist_node *n)
+{
+ return kref_put(&n->n_ref, klist_release);
+}
+
+static void klist_put(struct klist_node *n, bool kill)
+{
+ struct klist *k = knode_klist(n);
+ void (*put)(struct klist_node *) = k->put;
+
+ spin_lock(&k->k_lock);
+ if (kill)
+ knode_kill(n);
+ if (!klist_dec_and_del(n))
+ put = NULL;
+ spin_unlock(&k->k_lock);
+ if (put)
+ put(n);
+}
+
+/**
+ * klist_del - Decrement the reference count of node and try to remove.
+ * @n: node we're deleting.
+ */
+void klist_del(struct klist_node *n)
+{
+ klist_put(n, true);
+}
+EXPORT_SYMBOL_GPL(klist_del);
+
+/**
+ * klist_remove - Decrement the refcount of node and wait for it to go away.
+ * @n: node we're removing.
+ */
+void klist_remove(struct klist_node *n)
+{
+ struct klist_waiter waiter;
+
+ waiter.node = n;
+ waiter.process = current;
+ waiter.woken = 0;
+ spin_lock(&klist_remove_lock);
+ list_add(&waiter.list, &klist_remove_waiters);
+ spin_unlock(&klist_remove_lock);
+
+ klist_del(n);
+
+ for (;;) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (waiter.woken)
+ break;
+ schedule();
+ }
+ __set_current_state(TASK_RUNNING);
+}
+EXPORT_SYMBOL_GPL(klist_remove);
+
+/**
+ * klist_node_attached - Say whether a node is bound to a list or not.
+ * @n: Node that we're testing.
+ */
+int klist_node_attached(struct klist_node *n)
+{
+ return (n->n_klist != NULL);
+}
+EXPORT_SYMBOL_GPL(klist_node_attached);
+
+/**
+ * klist_iter_init_node - Initialize a klist_iter structure.
+ * @k: klist we're iterating.
+ * @i: klist_iter we're filling.
+ * @n: node to start with.
+ *
+ * Similar to klist_iter_init(), but starts the action off with @n,
+ * instead of with the list head.
+ */
+void klist_iter_init_node(struct klist *k, struct klist_iter *i,
+ struct klist_node *n)
+{
+ i->i_klist = k;
+ i->i_cur = n;
+ if (n)
+ kref_get(&n->n_ref);
+}
+EXPORT_SYMBOL_GPL(klist_iter_init_node);
+
+/**
+ * klist_iter_init - Iniitalize a klist_iter structure.
+ * @k: klist we're iterating.
+ * @i: klist_iter structure we're filling.
+ *
+ * Similar to klist_iter_init_node(), but start with the list head.
+ */
+void klist_iter_init(struct klist *k, struct klist_iter *i)
+{
+ klist_iter_init_node(k, i, NULL);
+}
+EXPORT_SYMBOL_GPL(klist_iter_init);
+
+/**
+ * klist_iter_exit - Finish a list iteration.
+ * @i: Iterator structure.
+ *
+ * Must be called when done iterating over list, as it decrements the
+ * refcount of the current node. Necessary in case iteration exited before
+ * the end of the list was reached, and always good form.
+ */
+void klist_iter_exit(struct klist_iter *i)
+{
+ if (i->i_cur) {
+ klist_put(i->i_cur, false);
+ i->i_cur = NULL;
+ }
+}
+EXPORT_SYMBOL_GPL(klist_iter_exit);
+
+static struct klist_node *to_klist_node(struct list_head *n)
+{
+ return container_of(n, struct klist_node, n_node);
+}
+
+/**
+ * klist_next - Ante up next node in list.
+ * @i: Iterator structure.
+ *
+ * First grab list lock. Decrement the reference count of the previous
+ * node, if there was one. Grab the next node, increment its reference
+ * count, drop the lock, and return that next node.
+ */
+struct klist_node *klist_next(struct klist_iter *i)
+{
+ void (*put)(struct klist_node *) = i->i_klist->put;
+ struct klist_node *last = i->i_cur;
+ struct klist_node *next;
+
+ spin_lock(&i->i_klist->k_lock);
+
+ if (last) {
+ next = to_klist_node(last->n_node.next);
+ if (!klist_dec_and_del(last))
+ put = NULL;
+ } else
+ next = to_klist_node(i->i_klist->k_list.next);
+
+ i->i_cur = NULL;
+ while (next != to_klist_node(&i->i_klist->k_list)) {
+ if (likely(!knode_dead(next))) {
+ kref_get(&next->n_ref);
+ i->i_cur = next;
+ break;
+ }
+ next = to_klist_node(next->n_node.next);
+ }
+
+ spin_unlock(&i->i_klist->k_lock);
+
+ if (put && last)
+ put(last);
+ return i->i_cur;
+}
+EXPORT_SYMBOL_GPL(klist_next);
diff --git a/lib/kobject.c b/lib/kobject.c
new file mode 100644
index 00000000..f07c5725
--- /dev/null
+++ b/lib/kobject.c
@@ -0,0 +1,973 @@
+/*
+ * kobject.c - library routines for handling generic kernel objects
+ *
+ * Copyright (c) 2002-2003 Patrick Mochel <mochel@osdl.org>
+ * Copyright (c) 2006-2007 Greg Kroah-Hartman <greg@kroah.com>
+ * Copyright (c) 2006-2007 Novell Inc.
+ *
+ * This file is released under the GPLv2.
+ *
+ *
+ * Please see the file Documentation/kobject.txt for critical information
+ * about using the kobject interface.
+ */
+
+#include <linux/kobject.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/stat.h>
+#include <linux/slab.h>
+
+/*
+ * populate_dir - populate directory with attributes.
+ * @kobj: object we're working on.
+ *
+ * Most subsystems have a set of default attributes that are associated
+ * with an object that registers with them. This is a helper called during
+ * object registration that loops through the default attributes of the
+ * subsystem and creates attributes files for them in sysfs.
+ */
+static int populate_dir(struct kobject *kobj)
+{
+ struct kobj_type *t = get_ktype(kobj);
+ struct attribute *attr;
+ int error = 0;
+ int i;
+
+ if (t && t->default_attrs) {
+ for (i = 0; (attr = t->default_attrs[i]) != NULL; i++) {
+ error = sysfs_create_file(kobj, attr);
+ if (error)
+ break;
+ }
+ }
+ return error;
+}
+
+static int create_dir(struct kobject *kobj)
+{
+ int error = 0;
+ if (kobject_name(kobj)) {
+ error = sysfs_create_dir(kobj);
+ if (!error) {
+ error = populate_dir(kobj);
+ if (error)
+ sysfs_remove_dir(kobj);
+ }
+ }
+ return error;
+}
+
+static int get_kobj_path_length(struct kobject *kobj)
+{
+ int length = 1;
+ struct kobject *parent = kobj;
+
+ /* walk up the ancestors until we hit the one pointing to the
+ * root.
+ * Add 1 to strlen for leading '/' of each level.
+ */
+ do {
+ if (kobject_name(parent) == NULL)
+ return 0;
+ length += strlen(kobject_name(parent)) + 1;
+ parent = parent->parent;
+ } while (parent);
+ return length;
+}
+
+static void fill_kobj_path(struct kobject *kobj, char *path, int length)
+{
+ struct kobject *parent;
+
+ --length;
+ for (parent = kobj; parent; parent = parent->parent) {
+ int cur = strlen(kobject_name(parent));
+ /* back up enough to print this name with '/' */
+ length -= cur;
+ strncpy(path + length, kobject_name(parent), cur);
+ *(path + --length) = '/';
+ }
+
+ pr_debug("kobject: '%s' (%p): %s: path = '%s'\n", kobject_name(kobj),
+ kobj, __func__, path);
+}
+
+/**
+ * kobject_get_path - generate and return the path associated with a given kobj and kset pair.
+ *
+ * @kobj: kobject in question, with which to build the path
+ * @gfp_mask: the allocation type used to allocate the path
+ *
+ * The result must be freed by the caller with kfree().
+ */
+char *kobject_get_path(struct kobject *kobj, gfp_t gfp_mask)
+{
+ char *path;
+ int len;
+
+ len = get_kobj_path_length(kobj);
+ if (len == 0)
+ return NULL;
+ path = kzalloc(len, gfp_mask);
+ if (!path)
+ return NULL;
+ fill_kobj_path(kobj, path, len);
+
+ return path;
+}
+EXPORT_SYMBOL_GPL(kobject_get_path);
+
+/* add the kobject to its kset's list */
+static void kobj_kset_join(struct kobject *kobj)
+{
+ if (!kobj->kset)
+ return;
+
+ kset_get(kobj->kset);
+ spin_lock(&kobj->kset->list_lock);
+ list_add_tail(&kobj->entry, &kobj->kset->list);
+ spin_unlock(&kobj->kset->list_lock);
+}
+
+/* remove the kobject from its kset's list */
+static void kobj_kset_leave(struct kobject *kobj)
+{
+ if (!kobj->kset)
+ return;
+
+ spin_lock(&kobj->kset->list_lock);
+ list_del_init(&kobj->entry);
+ spin_unlock(&kobj->kset->list_lock);
+ kset_put(kobj->kset);
+}
+
+static void kobject_init_internal(struct kobject *kobj)
+{
+ if (!kobj)
+ return;
+ kref_init(&kobj->kref);
+ INIT_LIST_HEAD(&kobj->entry);
+ kobj->state_in_sysfs = 0;
+ kobj->state_add_uevent_sent = 0;
+ kobj->state_remove_uevent_sent = 0;
+ kobj->state_initialized = 1;
+}
+
+
+static int kobject_add_internal(struct kobject *kobj)
+{
+ int error = 0;
+ struct kobject *parent;
+
+ if (!kobj)
+ return -ENOENT;
+
+ if (!kobj->name || !kobj->name[0]) {
+ WARN(1, "kobject: (%p): attempted to be registered with empty "
+ "name!\n", kobj);
+ return -EINVAL;
+ }
+
+ parent = kobject_get(kobj->parent);
+
+ /* join kset if set, use it as parent if we do not already have one */
+ if (kobj->kset) {
+ if (!parent)
+ parent = kobject_get(&kobj->kset->kobj);
+ kobj_kset_join(kobj);
+ kobj->parent = parent;
+ }
+
+ pr_debug("kobject: '%s' (%p): %s: parent: '%s', set: '%s'\n",
+ kobject_name(kobj), kobj, __func__,
+ parent ? kobject_name(parent) : "<NULL>",
+ kobj->kset ? kobject_name(&kobj->kset->kobj) : "<NULL>");
+
+ error = create_dir(kobj);
+ if (error) {
+ kobj_kset_leave(kobj);
+ kobject_put(parent);
+ kobj->parent = NULL;
+
+ /* be noisy on error issues */
+ if (error == -EEXIST)
+ printk(KERN_ERR "%s failed for %s with "
+ "-EEXIST, don't try to register things with "
+ "the same name in the same directory.\n",
+ __func__, kobject_name(kobj));
+ else
+ printk(KERN_ERR "%s failed for %s (%d)\n",
+ __func__, kobject_name(kobj), error);
+ dump_stack();
+ } else
+ kobj->state_in_sysfs = 1;
+
+ return error;
+}
+
+/**
+ * kobject_set_name_vargs - Set the name of an kobject
+ * @kobj: struct kobject to set the name of
+ * @fmt: format string used to build the name
+ * @vargs: vargs to format the string.
+ */
+int kobject_set_name_vargs(struct kobject *kobj, const char *fmt,
+ va_list vargs)
+{
+ const char *old_name = kobj->name;
+ char *s;
+
+ if (kobj->name && !fmt)
+ return 0;
+
+ kobj->name = kvasprintf(GFP_KERNEL, fmt, vargs);
+ if (!kobj->name)
+ return -ENOMEM;
+
+ /* ewww... some of these buggers have '/' in the name ... */
+ while ((s = strchr(kobj->name, '/')))
+ s[0] = '!';
+
+ kfree(old_name);
+ return 0;
+}
+
+/**
+ * kobject_set_name - Set the name of a kobject
+ * @kobj: struct kobject to set the name of
+ * @fmt: format string used to build the name
+ *
+ * This sets the name of the kobject. If you have already added the
+ * kobject to the system, you must call kobject_rename() in order to
+ * change the name of the kobject.
+ */
+int kobject_set_name(struct kobject *kobj, const char *fmt, ...)
+{
+ va_list vargs;
+ int retval;
+
+ va_start(vargs, fmt);
+ retval = kobject_set_name_vargs(kobj, fmt, vargs);
+ va_end(vargs);
+
+ return retval;
+}
+EXPORT_SYMBOL(kobject_set_name);
+
+/**
+ * kobject_init - initialize a kobject structure
+ * @kobj: pointer to the kobject to initialize
+ * @ktype: pointer to the ktype for this kobject.
+ *
+ * This function will properly initialize a kobject such that it can then
+ * be passed to the kobject_add() call.
+ *
+ * After this function is called, the kobject MUST be cleaned up by a call
+ * to kobject_put(), not by a call to kfree directly to ensure that all of
+ * the memory is cleaned up properly.
+ */
+void kobject_init(struct kobject *kobj, struct kobj_type *ktype)
+{
+ char *err_str;
+
+ if (!kobj) {
+ err_str = "invalid kobject pointer!";
+ goto error;
+ }
+ if (!ktype) {
+ err_str = "must have a ktype to be initialized properly!\n";
+ goto error;
+ }
+ if (kobj->state_initialized) {
+ /* do not error out as sometimes we can recover */
+ printk(KERN_ERR "kobject (%p): tried to init an initialized "
+ "object, something is seriously wrong.\n", kobj);
+ dump_stack();
+ }
+
+ kobject_init_internal(kobj);
+ kobj->ktype = ktype;
+ return;
+
+error:
+ printk(KERN_ERR "kobject (%p): %s\n", kobj, err_str);
+ dump_stack();
+}
+EXPORT_SYMBOL(kobject_init);
+
+static int kobject_add_varg(struct kobject *kobj, struct kobject *parent,
+ const char *fmt, va_list vargs)
+{
+ int retval;
+
+ retval = kobject_set_name_vargs(kobj, fmt, vargs);
+ if (retval) {
+ printk(KERN_ERR "kobject: can not set name properly!\n");
+ return retval;
+ }
+ kobj->parent = parent;
+ return kobject_add_internal(kobj);
+}
+
+/**
+ * kobject_add - the main kobject add function
+ * @kobj: the kobject to add
+ * @parent: pointer to the parent of the kobject.
+ * @fmt: format to name the kobject with.
+ *
+ * The kobject name is set and added to the kobject hierarchy in this
+ * function.
+ *
+ * If @parent is set, then the parent of the @kobj will be set to it.
+ * If @parent is NULL, then the parent of the @kobj will be set to the
+ * kobject associted with the kset assigned to this kobject. If no kset
+ * is assigned to the kobject, then the kobject will be located in the
+ * root of the sysfs tree.
+ *
+ * If this function returns an error, kobject_put() must be called to
+ * properly clean up the memory associated with the object.
+ * Under no instance should the kobject that is passed to this function
+ * be directly freed with a call to kfree(), that can leak memory.
+ *
+ * Note, no "add" uevent will be created with this call, the caller should set
+ * up all of the necessary sysfs files for the object and then call
+ * kobject_uevent() with the UEVENT_ADD parameter to ensure that
+ * userspace is properly notified of this kobject's creation.
+ */
+int kobject_add(struct kobject *kobj, struct kobject *parent,
+ const char *fmt, ...)
+{
+ va_list args;
+ int retval;
+
+ if (!kobj)
+ return -EINVAL;
+
+ if (!kobj->state_initialized) {
+ printk(KERN_ERR "kobject '%s' (%p): tried to add an "
+ "uninitialized object, something is seriously wrong.\n",
+ kobject_name(kobj), kobj);
+ dump_stack();
+ return -EINVAL;
+ }
+ va_start(args, fmt);
+ retval = kobject_add_varg(kobj, parent, fmt, args);
+ va_end(args);
+
+ return retval;
+}
+EXPORT_SYMBOL(kobject_add);
+
+/**
+ * kobject_init_and_add - initialize a kobject structure and add it to the kobject hierarchy
+ * @kobj: pointer to the kobject to initialize
+ * @ktype: pointer to the ktype for this kobject.
+ * @parent: pointer to the parent of this kobject.
+ * @fmt: the name of the kobject.
+ *
+ * This function combines the call to kobject_init() and
+ * kobject_add(). The same type of error handling after a call to
+ * kobject_add() and kobject lifetime rules are the same here.
+ */
+int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype,
+ struct kobject *parent, const char *fmt, ...)
+{
+ va_list args;
+ int retval;
+
+ kobject_init(kobj, ktype);
+
+ va_start(args, fmt);
+ retval = kobject_add_varg(kobj, parent, fmt, args);
+ va_end(args);
+
+ return retval;
+}
+EXPORT_SYMBOL_GPL(kobject_init_and_add);
+
+/**
+ * kobject_rename - change the name of an object
+ * @kobj: object in question.
+ * @new_name: object's new name
+ *
+ * It is the responsibility of the caller to provide mutual
+ * exclusion between two different calls of kobject_rename
+ * on the same kobject and to ensure that new_name is valid and
+ * won't conflict with other kobjects.
+ */
+int kobject_rename(struct kobject *kobj, const char *new_name)
+{
+ int error = 0;
+ const char *devpath = NULL;
+ const char *dup_name = NULL, *name;
+ char *devpath_string = NULL;
+ char *envp[2];
+
+ kobj = kobject_get(kobj);
+ if (!kobj)
+ return -EINVAL;
+ if (!kobj->parent)
+ return -EINVAL;
+
+ devpath = kobject_get_path(kobj, GFP_KERNEL);
+ if (!devpath) {
+ error = -ENOMEM;
+ goto out;
+ }
+ devpath_string = kmalloc(strlen(devpath) + 15, GFP_KERNEL);
+ if (!devpath_string) {
+ error = -ENOMEM;
+ goto out;
+ }
+ sprintf(devpath_string, "DEVPATH_OLD=%s", devpath);
+ envp[0] = devpath_string;
+ envp[1] = NULL;
+
+ name = dup_name = kstrdup(new_name, GFP_KERNEL);
+ if (!name) {
+ error = -ENOMEM;
+ goto out;
+ }
+
+ error = sysfs_rename_dir(kobj, new_name);
+ if (error)
+ goto out;
+
+ /* Install the new kobject name */
+ dup_name = kobj->name;
+ kobj->name = name;
+
+ /* This function is mostly/only used for network interface.
+ * Some hotplug package track interfaces by their name and
+ * therefore want to know when the name is changed by the user. */
+ kobject_uevent_env(kobj, KOBJ_MOVE, envp);
+
+out:
+ kfree(dup_name);
+ kfree(devpath_string);
+ kfree(devpath);
+ kobject_put(kobj);
+
+ return error;
+}
+EXPORT_SYMBOL_GPL(kobject_rename);
+
+/**
+ * kobject_move - move object to another parent
+ * @kobj: object in question.
+ * @new_parent: object's new parent (can be NULL)
+ */
+int kobject_move(struct kobject *kobj, struct kobject *new_parent)
+{
+ int error;
+ struct kobject *old_parent;
+ const char *devpath = NULL;
+ char *devpath_string = NULL;
+ char *envp[2];
+
+ kobj = kobject_get(kobj);
+ if (!kobj)
+ return -EINVAL;
+ new_parent = kobject_get(new_parent);
+ if (!new_parent) {
+ if (kobj->kset)
+ new_parent = kobject_get(&kobj->kset->kobj);
+ }
+ /* old object path */
+ devpath = kobject_get_path(kobj, GFP_KERNEL);
+ if (!devpath) {
+ error = -ENOMEM;
+ goto out;
+ }
+ devpath_string = kmalloc(strlen(devpath) + 15, GFP_KERNEL);
+ if (!devpath_string) {
+ error = -ENOMEM;
+ goto out;
+ }
+ sprintf(devpath_string, "DEVPATH_OLD=%s", devpath);
+ envp[0] = devpath_string;
+ envp[1] = NULL;
+ error = sysfs_move_dir(kobj, new_parent);
+ if (error)
+ goto out;
+ old_parent = kobj->parent;
+ kobj->parent = new_parent;
+ new_parent = NULL;
+ kobject_put(old_parent);
+ kobject_uevent_env(kobj, KOBJ_MOVE, envp);
+out:
+ kobject_put(new_parent);
+ kobject_put(kobj);
+ kfree(devpath_string);
+ kfree(devpath);
+ return error;
+}
+
+/**
+ * kobject_del - unlink kobject from hierarchy.
+ * @kobj: object.
+ */
+void kobject_del(struct kobject *kobj)
+{
+ if (!kobj)
+ return;
+
+ sysfs_remove_dir(kobj);
+ kobj->state_in_sysfs = 0;
+ kobj_kset_leave(kobj);
+ kobject_put(kobj->parent);
+ kobj->parent = NULL;
+}
+
+/**
+ * kobject_get - increment refcount for object.
+ * @kobj: object.
+ */
+struct kobject *kobject_get(struct kobject *kobj)
+{
+ if (kobj)
+ kref_get(&kobj->kref);
+ return kobj;
+}
+
+/*
+ * kobject_cleanup - free kobject resources.
+ * @kobj: object to cleanup
+ */
+static void kobject_cleanup(struct kobject *kobj)
+{
+ struct kobj_type *t = get_ktype(kobj);
+ const char *name = kobj->name;
+
+ pr_debug("kobject: '%s' (%p): %s\n",
+ kobject_name(kobj), kobj, __func__);
+
+ if (t && !t->release)
+ pr_debug("kobject: '%s' (%p): does not have a release() "
+ "function, it is broken and must be fixed.\n",
+ kobject_name(kobj), kobj);
+
+ /* send "remove" if the caller did not do it but sent "add" */
+ if (kobj->state_add_uevent_sent && !kobj->state_remove_uevent_sent) {
+ pr_debug("kobject: '%s' (%p): auto cleanup 'remove' event\n",
+ kobject_name(kobj), kobj);
+ kobject_uevent(kobj, KOBJ_REMOVE);
+ }
+
+ /* remove from sysfs if the caller did not do it */
+ if (kobj->state_in_sysfs) {
+ pr_debug("kobject: '%s' (%p): auto cleanup kobject_del\n",
+ kobject_name(kobj), kobj);
+ kobject_del(kobj);
+ }
+
+ if (t && t->release) {
+ pr_debug("kobject: '%s' (%p): calling ktype release\n",
+ kobject_name(kobj), kobj);
+ t->release(kobj);
+ }
+
+ /* free name if we allocated it */
+ if (name) {
+ pr_debug("kobject: '%s': free name\n", name);
+ kfree(name);
+ }
+}
+
+static void kobject_release(struct kref *kref)
+{
+ kobject_cleanup(container_of(kref, struct kobject, kref));
+}
+
+/**
+ * kobject_put - decrement refcount for object.
+ * @kobj: object.
+ *
+ * Decrement the refcount, and if 0, call kobject_cleanup().
+ */
+void kobject_put(struct kobject *kobj)
+{
+ if (kobj) {
+ if (!kobj->state_initialized)
+ WARN(1, KERN_WARNING "kobject: '%s' (%p): is not "
+ "initialized, yet kobject_put() is being "
+ "called.\n", kobject_name(kobj), kobj);
+ kref_put(&kobj->kref, kobject_release);
+ }
+}
+
+static void dynamic_kobj_release(struct kobject *kobj)
+{
+ pr_debug("kobject: (%p): %s\n", kobj, __func__);
+ kfree(kobj);
+}
+
+static struct kobj_type dynamic_kobj_ktype = {
+ .release = dynamic_kobj_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+};
+
+/**
+ * kobject_create - create a struct kobject dynamically
+ *
+ * This function creates a kobject structure dynamically and sets it up
+ * to be a "dynamic" kobject with a default release function set up.
+ *
+ * If the kobject was not able to be created, NULL will be returned.
+ * The kobject structure returned from here must be cleaned up with a
+ * call to kobject_put() and not kfree(), as kobject_init() has
+ * already been called on this structure.
+ */
+struct kobject *kobject_create(void)
+{
+ struct kobject *kobj;
+
+ kobj = kzalloc(sizeof(*kobj), GFP_KERNEL);
+ if (!kobj)
+ return NULL;
+
+ kobject_init(kobj, &dynamic_kobj_ktype);
+ return kobj;
+}
+
+/**
+ * kobject_create_and_add - create a struct kobject dynamically and register it with sysfs
+ *
+ * @name: the name for the kset
+ * @parent: the parent kobject of this kobject, if any.
+ *
+ * This function creates a kobject structure dynamically and registers it
+ * with sysfs. When you are finished with this structure, call
+ * kobject_put() and the structure will be dynamically freed when
+ * it is no longer being used.
+ *
+ * If the kobject was not able to be created, NULL will be returned.
+ */
+struct kobject *kobject_create_and_add(const char *name, struct kobject *parent)
+{
+ struct kobject *kobj;
+ int retval;
+
+ kobj = kobject_create();
+ if (!kobj)
+ return NULL;
+
+ retval = kobject_add(kobj, parent, "%s", name);
+ if (retval) {
+ printk(KERN_WARNING "%s: kobject_add error: %d\n",
+ __func__, retval);
+ kobject_put(kobj);
+ kobj = NULL;
+ }
+ return kobj;
+}
+EXPORT_SYMBOL_GPL(kobject_create_and_add);
+
+/**
+ * kset_init - initialize a kset for use
+ * @k: kset
+ */
+void kset_init(struct kset *k)
+{
+ kobject_init_internal(&k->kobj);
+ INIT_LIST_HEAD(&k->list);
+ spin_lock_init(&k->list_lock);
+}
+
+/* default kobject attribute operations */
+static ssize_t kobj_attr_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct kobj_attribute *kattr;
+ ssize_t ret = -EIO;
+
+ kattr = container_of(attr, struct kobj_attribute, attr);
+ if (kattr->show)
+ ret = kattr->show(kobj, kattr, buf);
+ return ret;
+}
+
+static ssize_t kobj_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct kobj_attribute *kattr;
+ ssize_t ret = -EIO;
+
+ kattr = container_of(attr, struct kobj_attribute, attr);
+ if (kattr->store)
+ ret = kattr->store(kobj, kattr, buf, count);
+ return ret;
+}
+
+const struct sysfs_ops kobj_sysfs_ops = {
+ .show = kobj_attr_show,
+ .store = kobj_attr_store,
+};
+
+/**
+ * kset_register - initialize and add a kset.
+ * @k: kset.
+ */
+int kset_register(struct kset *k)
+{
+ int err;
+
+ if (!k)
+ return -EINVAL;
+
+ kset_init(k);
+ err = kobject_add_internal(&k->kobj);
+ if (err)
+ return err;
+ kobject_uevent(&k->kobj, KOBJ_ADD);
+ return 0;
+}
+
+/**
+ * kset_unregister - remove a kset.
+ * @k: kset.
+ */
+void kset_unregister(struct kset *k)
+{
+ if (!k)
+ return;
+ kobject_put(&k->kobj);
+}
+
+/**
+ * kset_find_obj - search for object in kset.
+ * @kset: kset we're looking in.
+ * @name: object's name.
+ *
+ * Lock kset via @kset->subsys, and iterate over @kset->list,
+ * looking for a matching kobject. If matching object is found
+ * take a reference and return the object.
+ */
+struct kobject *kset_find_obj(struct kset *kset, const char *name)
+{
+ struct kobject *k;
+ struct kobject *ret = NULL;
+
+ spin_lock(&kset->list_lock);
+ list_for_each_entry(k, &kset->list, entry) {
+ if (kobject_name(k) && !strcmp(kobject_name(k), name)) {
+ ret = kobject_get(k);
+ break;
+ }
+ }
+ spin_unlock(&kset->list_lock);
+ return ret;
+}
+
+static void kset_release(struct kobject *kobj)
+{
+ struct kset *kset = container_of(kobj, struct kset, kobj);
+ pr_debug("kobject: '%s' (%p): %s\n",
+ kobject_name(kobj), kobj, __func__);
+ kfree(kset);
+}
+
+static struct kobj_type kset_ktype = {
+ .sysfs_ops = &kobj_sysfs_ops,
+ .release = kset_release,
+};
+
+/**
+ * kset_create - create a struct kset dynamically
+ *
+ * @name: the name for the kset
+ * @uevent_ops: a struct kset_uevent_ops for the kset
+ * @parent_kobj: the parent kobject of this kset, if any.
+ *
+ * This function creates a kset structure dynamically. This structure can
+ * then be registered with the system and show up in sysfs with a call to
+ * kset_register(). When you are finished with this structure, if
+ * kset_register() has been called, call kset_unregister() and the
+ * structure will be dynamically freed when it is no longer being used.
+ *
+ * If the kset was not able to be created, NULL will be returned.
+ */
+static struct kset *kset_create(const char *name,
+ const struct kset_uevent_ops *uevent_ops,
+ struct kobject *parent_kobj)
+{
+ struct kset *kset;
+ int retval;
+
+ kset = kzalloc(sizeof(*kset), GFP_KERNEL);
+ if (!kset)
+ return NULL;
+ retval = kobject_set_name(&kset->kobj, name);
+ if (retval) {
+ kfree(kset);
+ return NULL;
+ }
+ kset->uevent_ops = uevent_ops;
+ kset->kobj.parent = parent_kobj;
+
+ /*
+ * The kobject of this kset will have a type of kset_ktype and belong to
+ * no kset itself. That way we can properly free it when it is
+ * finished being used.
+ */
+ kset->kobj.ktype = &kset_ktype;
+ kset->kobj.kset = NULL;
+
+ return kset;
+}
+
+/**
+ * kset_create_and_add - create a struct kset dynamically and add it to sysfs
+ *
+ * @name: the name for the kset
+ * @uevent_ops: a struct kset_uevent_ops for the kset
+ * @parent_kobj: the parent kobject of this kset, if any.
+ *
+ * This function creates a kset structure dynamically and registers it
+ * with sysfs. When you are finished with this structure, call
+ * kset_unregister() and the structure will be dynamically freed when it
+ * is no longer being used.
+ *
+ * If the kset was not able to be created, NULL will be returned.
+ */
+struct kset *kset_create_and_add(const char *name,
+ const struct kset_uevent_ops *uevent_ops,
+ struct kobject *parent_kobj)
+{
+ struct kset *kset;
+ int error;
+
+ kset = kset_create(name, uevent_ops, parent_kobj);
+ if (!kset)
+ return NULL;
+ error = kset_register(kset);
+ if (error) {
+ kfree(kset);
+ return NULL;
+ }
+ return kset;
+}
+EXPORT_SYMBOL_GPL(kset_create_and_add);
+
+
+static DEFINE_SPINLOCK(kobj_ns_type_lock);
+static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES];
+
+int kobj_ns_type_register(const struct kobj_ns_type_operations *ops)
+{
+ enum kobj_ns_type type = ops->type;
+ int error;
+
+ spin_lock(&kobj_ns_type_lock);
+
+ error = -EINVAL;
+ if (type >= KOBJ_NS_TYPES)
+ goto out;
+
+ error = -EINVAL;
+ if (type <= KOBJ_NS_TYPE_NONE)
+ goto out;
+
+ error = -EBUSY;
+ if (kobj_ns_ops_tbl[type])
+ goto out;
+
+ error = 0;
+ kobj_ns_ops_tbl[type] = ops;
+
+out:
+ spin_unlock(&kobj_ns_type_lock);
+ return error;
+}
+
+int kobj_ns_type_registered(enum kobj_ns_type type)
+{
+ int registered = 0;
+
+ spin_lock(&kobj_ns_type_lock);
+ if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES))
+ registered = kobj_ns_ops_tbl[type] != NULL;
+ spin_unlock(&kobj_ns_type_lock);
+
+ return registered;
+}
+
+const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent)
+{
+ const struct kobj_ns_type_operations *ops = NULL;
+
+ if (parent && parent->ktype->child_ns_type)
+ ops = parent->ktype->child_ns_type(parent);
+
+ return ops;
+}
+
+const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj)
+{
+ return kobj_child_ns_ops(kobj->parent);
+}
+
+
+const void *kobj_ns_current(enum kobj_ns_type type)
+{
+ const void *ns = NULL;
+
+ spin_lock(&kobj_ns_type_lock);
+ if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
+ kobj_ns_ops_tbl[type])
+ ns = kobj_ns_ops_tbl[type]->current_ns();
+ spin_unlock(&kobj_ns_type_lock);
+
+ return ns;
+}
+
+const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk)
+{
+ const void *ns = NULL;
+
+ spin_lock(&kobj_ns_type_lock);
+ if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
+ kobj_ns_ops_tbl[type])
+ ns = kobj_ns_ops_tbl[type]->netlink_ns(sk);
+ spin_unlock(&kobj_ns_type_lock);
+
+ return ns;
+}
+
+const void *kobj_ns_initial(enum kobj_ns_type type)
+{
+ const void *ns = NULL;
+
+ spin_lock(&kobj_ns_type_lock);
+ if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
+ kobj_ns_ops_tbl[type])
+ ns = kobj_ns_ops_tbl[type]->initial_ns();
+ spin_unlock(&kobj_ns_type_lock);
+
+ return ns;
+}
+
+/*
+ * kobj_ns_exit - invalidate a namespace tag
+ *
+ * @type: the namespace type (i.e. KOBJ_NS_TYPE_NET)
+ * @ns: the actual namespace being invalidated
+ *
+ * This is called when a tag is no longer valid. For instance,
+ * when a network namespace exits, it uses this helper to
+ * make sure no sb's sysfs_info points to the now-invalidated
+ * netns.
+ */
+void kobj_ns_exit(enum kobj_ns_type type, const void *ns)
+{
+ sysfs_exit_ns(type, ns);
+}
+
+
+EXPORT_SYMBOL(kobject_get);
+EXPORT_SYMBOL(kobject_put);
+EXPORT_SYMBOL(kobject_del);
+
+EXPORT_SYMBOL(kset_register);
+EXPORT_SYMBOL(kset_unregister);
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
new file mode 100644
index 00000000..70af0a7f
--- /dev/null
+++ b/lib/kobject_uevent.c
@@ -0,0 +1,425 @@
+/*
+ * kernel userspace event delivery
+ *
+ * Copyright (C) 2004 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2004 Novell, Inc. All rights reserved.
+ * Copyright (C) 2004 IBM, Inc. All rights reserved.
+ *
+ * Licensed under the GNU GPL v2.
+ *
+ * Authors:
+ * Robert Love <rml@novell.com>
+ * Kay Sievers <kay.sievers@vrfy.org>
+ * Arjan van de Ven <arjanv@redhat.com>
+ * Greg Kroah-Hartman <greg@kroah.com>
+ */
+
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/kobject.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/user_namespace.h>
+#include <linux/socket.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <net/sock.h>
+#include <net/net_namespace.h>
+
+
+u64 uevent_seqnum;
+char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH;
+static DEFINE_SPINLOCK(sequence_lock);
+#ifdef CONFIG_NET
+struct uevent_sock {
+ struct list_head list;
+ struct sock *sk;
+};
+static LIST_HEAD(uevent_sock_list);
+static DEFINE_MUTEX(uevent_sock_mutex);
+#endif
+
+/* the strings here must match the enum in include/linux/kobject.h */
+static const char *kobject_actions[] = {
+ [KOBJ_ADD] = "add",
+ [KOBJ_REMOVE] = "remove",
+ [KOBJ_CHANGE] = "change",
+ [KOBJ_MOVE] = "move",
+ [KOBJ_ONLINE] = "online",
+ [KOBJ_OFFLINE] = "offline",
+};
+
+/**
+ * kobject_action_type - translate action string to numeric type
+ *
+ * @buf: buffer containing the action string, newline is ignored
+ * @len: length of buffer
+ * @type: pointer to the location to store the action type
+ *
+ * Returns 0 if the action string was recognized.
+ */
+int kobject_action_type(const char *buf, size_t count,
+ enum kobject_action *type)
+{
+ enum kobject_action action;
+ int ret = -EINVAL;
+
+ if (count && (buf[count-1] == '\n' || buf[count-1] == '\0'))
+ count--;
+
+ if (!count)
+ goto out;
+
+ for (action = 0; action < ARRAY_SIZE(kobject_actions); action++) {
+ if (strncmp(kobject_actions[action], buf, count) != 0)
+ continue;
+ if (kobject_actions[action][count] != '\0')
+ continue;
+ *type = action;
+ ret = 0;
+ break;
+ }
+out:
+ return ret;
+}
+
+#ifdef CONFIG_NET
+static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data)
+{
+ struct kobject *kobj = data;
+ const struct kobj_ns_type_operations *ops;
+
+ ops = kobj_ns_ops(kobj);
+ if (ops) {
+ const void *sock_ns, *ns;
+ ns = kobj->ktype->namespace(kobj);
+ sock_ns = ops->netlink_ns(dsk);
+ return sock_ns != ns;
+ }
+
+ return 0;
+}
+#endif
+
+static int kobj_usermode_filter(struct kobject *kobj)
+{
+ const struct kobj_ns_type_operations *ops;
+
+ ops = kobj_ns_ops(kobj);
+ if (ops) {
+ const void *init_ns, *ns;
+ ns = kobj->ktype->namespace(kobj);
+ init_ns = ops->initial_ns();
+ return ns != init_ns;
+ }
+
+ return 0;
+}
+
+/**
+ * kobject_uevent_env - send an uevent with environmental data
+ *
+ * @action: action that is happening
+ * @kobj: struct kobject that the action is happening to
+ * @envp_ext: pointer to environmental data
+ *
+ * Returns 0 if kobject_uevent_env() is completed with success or the
+ * corresponding error when it fails.
+ */
+int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
+ char *envp_ext[])
+{
+ struct kobj_uevent_env *env;
+ const char *action_string = kobject_actions[action];
+ const char *devpath = NULL;
+ const char *subsystem;
+ struct kobject *top_kobj;
+ struct kset *kset;
+ const struct kset_uevent_ops *uevent_ops;
+ u64 seq;
+ int i = 0;
+ int retval = 0;
+#ifdef CONFIG_NET
+ struct uevent_sock *ue_sk;
+#endif
+
+ pr_debug("kobject: '%s' (%p): %s\n",
+ kobject_name(kobj), kobj, __func__);
+
+ /* search the kset we belong to */
+ top_kobj = kobj;
+ while (!top_kobj->kset && top_kobj->parent)
+ top_kobj = top_kobj->parent;
+
+ if (!top_kobj->kset) {
+ pr_debug("kobject: '%s' (%p): %s: attempted to send uevent "
+ "without kset!\n", kobject_name(kobj), kobj,
+ __func__);
+ return -EINVAL;
+ }
+
+ kset = top_kobj->kset;
+ uevent_ops = kset->uevent_ops;
+
+ /* skip the event, if uevent_suppress is set*/
+ if (kobj->uevent_suppress) {
+ pr_debug("kobject: '%s' (%p): %s: uevent_suppress "
+ "caused the event to drop!\n",
+ kobject_name(kobj), kobj, __func__);
+ return 0;
+ }
+ /* skip the event, if the filter returns zero. */
+ if (uevent_ops && uevent_ops->filter)
+ if (!uevent_ops->filter(kset, kobj)) {
+ pr_debug("kobject: '%s' (%p): %s: filter function "
+ "caused the event to drop!\n",
+ kobject_name(kobj), kobj, __func__);
+ return 0;
+ }
+
+ /* originating subsystem */
+ if (uevent_ops && uevent_ops->name)
+ subsystem = uevent_ops->name(kset, kobj);
+ else
+ subsystem = kobject_name(&kset->kobj);
+ if (!subsystem) {
+ pr_debug("kobject: '%s' (%p): %s: unset subsystem caused the "
+ "event to drop!\n", kobject_name(kobj), kobj,
+ __func__);
+ return 0;
+ }
+
+ /* environment buffer */
+ env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL);
+ if (!env)
+ return -ENOMEM;
+
+ /* complete object path */
+ devpath = kobject_get_path(kobj, GFP_KERNEL);
+ if (!devpath) {
+ retval = -ENOENT;
+ goto exit;
+ }
+
+ /* default keys */
+ retval = add_uevent_var(env, "ACTION=%s", action_string);
+ if (retval)
+ goto exit;
+ retval = add_uevent_var(env, "DEVPATH=%s", devpath);
+ if (retval)
+ goto exit;
+ retval = add_uevent_var(env, "SUBSYSTEM=%s", subsystem);
+ if (retval)
+ goto exit;
+
+ /* keys passed in from the caller */
+ if (envp_ext) {
+ for (i = 0; envp_ext[i]; i++) {
+ retval = add_uevent_var(env, "%s", envp_ext[i]);
+ if (retval)
+ goto exit;
+ }
+ }
+
+ /* let the kset specific function add its stuff */
+ if (uevent_ops && uevent_ops->uevent) {
+ retval = uevent_ops->uevent(kset, kobj, env);
+ if (retval) {
+ pr_debug("kobject: '%s' (%p): %s: uevent() returned "
+ "%d\n", kobject_name(kobj), kobj,
+ __func__, retval);
+ goto exit;
+ }
+ }
+
+ /*
+ * Mark "add" and "remove" events in the object to ensure proper
+ * events to userspace during automatic cleanup. If the object did
+ * send an "add" event, "remove" will automatically generated by
+ * the core, if not already done by the caller.
+ */
+ if (action == KOBJ_ADD)
+ kobj->state_add_uevent_sent = 1;
+ else if (action == KOBJ_REMOVE)
+ kobj->state_remove_uevent_sent = 1;
+
+ /* we will send an event, so request a new sequence number */
+ spin_lock(&sequence_lock);
+ seq = ++uevent_seqnum;
+ spin_unlock(&sequence_lock);
+ retval = add_uevent_var(env, "SEQNUM=%llu", (unsigned long long)seq);
+ if (retval)
+ goto exit;
+
+#if defined(CONFIG_NET)
+ /* send netlink message */
+ mutex_lock(&uevent_sock_mutex);
+ list_for_each_entry(ue_sk, &uevent_sock_list, list) {
+ struct sock *uevent_sock = ue_sk->sk;
+ struct sk_buff *skb;
+ size_t len;
+
+ /* allocate message with the maximum possible size */
+ len = strlen(action_string) + strlen(devpath) + 2;
+ skb = alloc_skb(len + env->buflen, GFP_KERNEL);
+ if (skb) {
+ char *scratch;
+
+ /* add header */
+ scratch = skb_put(skb, len);
+ sprintf(scratch, "%s@%s", action_string, devpath);
+
+ /* copy keys to our continuous event payload buffer */
+ for (i = 0; i < env->envp_idx; i++) {
+ len = strlen(env->envp[i]) + 1;
+ scratch = skb_put(skb, len);
+ strcpy(scratch, env->envp[i]);
+ }
+
+ NETLINK_CB(skb).dst_group = 1;
+ retval = netlink_broadcast_filtered(uevent_sock, skb,
+ 0, 1, GFP_KERNEL,
+ kobj_bcast_filter,
+ kobj);
+ /* ENOBUFS should be handled in userspace */
+ if (retval == -ENOBUFS)
+ retval = 0;
+ } else
+ retval = -ENOMEM;
+ }
+ mutex_unlock(&uevent_sock_mutex);
+#endif
+
+ /* call uevent_helper, usually only enabled during early boot */
+ if (uevent_helper[0] && !kobj_usermode_filter(kobj)) {
+ char *argv [3];
+
+ argv [0] = uevent_helper;
+ argv [1] = (char *)subsystem;
+ argv [2] = NULL;
+ retval = add_uevent_var(env, "HOME=/");
+ if (retval)
+ goto exit;
+ retval = add_uevent_var(env,
+ "PATH=/sbin:/bin:/usr/sbin:/usr/bin");
+ if (retval)
+ goto exit;
+
+ retval = call_usermodehelper(argv[0], argv,
+ env->envp, UMH_WAIT_EXEC);
+ }
+
+exit:
+ kfree(devpath);
+ kfree(env);
+ return retval;
+}
+EXPORT_SYMBOL_GPL(kobject_uevent_env);
+
+/**
+ * kobject_uevent - notify userspace by sending an uevent
+ *
+ * @action: action that is happening
+ * @kobj: struct kobject that the action is happening to
+ *
+ * Returns 0 if kobject_uevent() is completed with success or the
+ * corresponding error when it fails.
+ */
+int kobject_uevent(struct kobject *kobj, enum kobject_action action)
+{
+ return kobject_uevent_env(kobj, action, NULL);
+}
+EXPORT_SYMBOL_GPL(kobject_uevent);
+
+/**
+ * add_uevent_var - add key value string to the environment buffer
+ * @env: environment buffer structure
+ * @format: printf format for the key=value pair
+ *
+ * Returns 0 if environment variable was added successfully or -ENOMEM
+ * if no space was available.
+ */
+int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...)
+{
+ va_list args;
+ int len;
+
+ if (env->envp_idx >= ARRAY_SIZE(env->envp)) {
+ WARN(1, KERN_ERR "add_uevent_var: too many keys\n");
+ return -ENOMEM;
+ }
+
+ va_start(args, format);
+ len = vsnprintf(&env->buf[env->buflen],
+ sizeof(env->buf) - env->buflen,
+ format, args);
+ va_end(args);
+
+ if (len >= (sizeof(env->buf) - env->buflen)) {
+ WARN(1, KERN_ERR "add_uevent_var: buffer size too small\n");
+ return -ENOMEM;
+ }
+
+ env->envp[env->envp_idx++] = &env->buf[env->buflen];
+ env->buflen += len + 1;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(add_uevent_var);
+
+#if defined(CONFIG_NET)
+static int uevent_net_init(struct net *net)
+{
+ struct uevent_sock *ue_sk;
+
+ ue_sk = kzalloc(sizeof(*ue_sk), GFP_KERNEL);
+ if (!ue_sk)
+ return -ENOMEM;
+
+ ue_sk->sk = netlink_kernel_create(net, NETLINK_KOBJECT_UEVENT,
+ 1, NULL, NULL, THIS_MODULE);
+ if (!ue_sk->sk) {
+ printk(KERN_ERR
+ "kobject_uevent: unable to create netlink socket!\n");
+ kfree(ue_sk);
+ return -ENODEV;
+ }
+ mutex_lock(&uevent_sock_mutex);
+ list_add_tail(&ue_sk->list, &uevent_sock_list);
+ mutex_unlock(&uevent_sock_mutex);
+ return 0;
+}
+
+static void uevent_net_exit(struct net *net)
+{
+ struct uevent_sock *ue_sk;
+
+ mutex_lock(&uevent_sock_mutex);
+ list_for_each_entry(ue_sk, &uevent_sock_list, list) {
+ if (sock_net(ue_sk->sk) == net)
+ goto found;
+ }
+ mutex_unlock(&uevent_sock_mutex);
+ return;
+
+found:
+ list_del(&ue_sk->list);
+ mutex_unlock(&uevent_sock_mutex);
+
+ netlink_kernel_release(ue_sk->sk);
+ kfree(ue_sk);
+}
+
+static struct pernet_operations uevent_net_ops = {
+ .init = uevent_net_init,
+ .exit = uevent_net_exit,
+};
+
+static int __init kobject_uevent_init(void)
+{
+ netlink_set_nonroot(NETLINK_KOBJECT_UEVENT, NL_NONROOT_RECV);
+ return register_pernet_subsys(&uevent_net_ops);
+}
+
+
+postcore_initcall(kobject_uevent_init);
+#endif
diff --git a/lib/kref.c b/lib/kref.c
new file mode 100644
index 00000000..d3d227a0
--- /dev/null
+++ b/lib/kref.c
@@ -0,0 +1,67 @@
+/*
+ * kref.c - library routines for handling generic reference counted objects
+ *
+ * Copyright (C) 2004 Greg Kroah-Hartman <greg@kroah.com>
+ * Copyright (C) 2004 IBM Corp.
+ *
+ * based on lib/kobject.c which was:
+ * Copyright (C) 2002-2003 Patrick Mochel <mochel@osdl.org>
+ *
+ * This file is released under the GPLv2.
+ *
+ */
+
+#include <linux/kref.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+/**
+ * kref_init - initialize object.
+ * @kref: object in question.
+ */
+void kref_init(struct kref *kref)
+{
+ atomic_set(&kref->refcount, 1);
+ smp_mb();
+}
+
+/**
+ * kref_get - increment refcount for object.
+ * @kref: object.
+ */
+void kref_get(struct kref *kref)
+{
+ WARN_ON(!atomic_read(&kref->refcount));
+ atomic_inc(&kref->refcount);
+ smp_mb__after_atomic_inc();
+}
+
+/**
+ * kref_put - decrement refcount for object.
+ * @kref: object.
+ * @release: pointer to the function that will clean up the object when the
+ * last reference to the object is released.
+ * This pointer is required, and it is not acceptable to pass kfree
+ * in as this function.
+ *
+ * Decrement the refcount, and if 0, call release().
+ * Return 1 if the object was removed, otherwise return 0. Beware, if this
+ * function returns 0, you still can not count on the kref from remaining in
+ * memory. Only use the return value if you want to see if the kref is now
+ * gone, not present.
+ */
+int kref_put(struct kref *kref, void (*release)(struct kref *kref))
+{
+ WARN_ON(release == NULL);
+ WARN_ON(release == (void (*)(struct kref *))kfree);
+
+ if (atomic_dec_and_test(&kref->refcount)) {
+ release(kref);
+ return 1;
+ }
+ return 0;
+}
+
+EXPORT_SYMBOL(kref_init);
+EXPORT_SYMBOL(kref_get);
+EXPORT_SYMBOL(kref_put);
diff --git a/lib/lcm.c b/lib/lcm.c
new file mode 100644
index 00000000..157cd88a
--- /dev/null
+++ b/lib/lcm.c
@@ -0,0 +1,15 @@
+#include <linux/kernel.h>
+#include <linux/gcd.h>
+#include <linux/module.h>
+
+/* Lowest common multiple */
+unsigned long lcm(unsigned long a, unsigned long b)
+{
+ if (a && b)
+ return (a * b) / gcd(a, b);
+ else if (b)
+ return b;
+
+ return a;
+}
+EXPORT_SYMBOL_GPL(lcm);
diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c
new file mode 100644
index 00000000..244f5480
--- /dev/null
+++ b/lib/libcrc32c.c
@@ -0,0 +1,81 @@
+/*
+ * CRC32C
+ *@Article{castagnoli-crc,
+ * author = { Guy Castagnoli and Stefan Braeuer and Martin Herrman},
+ * title = {{Optimization of Cyclic Redundancy-Check Codes with 24
+ * and 32 Parity Bits}},
+ * journal = IEEE Transactions on Communication,
+ * year = {1993},
+ * volume = {41},
+ * number = {6},
+ * pages = {},
+ * month = {June},
+ *}
+ * Used by the iSCSI driver, possibly others, and derived from the
+ * the iscsi-crc.c module of the linux-iscsi driver at
+ * http://linux-iscsi.sourceforge.net.
+ *
+ * Following the example of lib/crc32, this function is intended to be
+ * flexible and useful for all users. Modules that currently have their
+ * own crc32c, but hopefully may be able to use this one are:
+ * net/sctp (please add all your doco to here if you change to
+ * use this one!)
+ * <endoflist>
+ *
+ * Copyright (c) 2004 Cisco Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/hash.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+static struct crypto_shash *tfm;
+
+u32 crc32c(u32 crc, const void *address, unsigned int length)
+{
+ struct {
+ struct shash_desc shash;
+ char ctx[crypto_shash_descsize(tfm)];
+ } desc;
+ int err;
+
+ desc.shash.tfm = tfm;
+ desc.shash.flags = 0;
+ *(u32 *)desc.ctx = crc;
+
+ err = crypto_shash_update(&desc.shash, address, length);
+ BUG_ON(err);
+
+ return *(u32 *)desc.ctx;
+}
+
+EXPORT_SYMBOL(crc32c);
+
+static int __init libcrc32c_mod_init(void)
+{
+ tfm = crypto_alloc_shash("crc32c", 0, 0);
+ if (IS_ERR(tfm))
+ return PTR_ERR(tfm);
+
+ return 0;
+}
+
+static void __exit libcrc32c_mod_fini(void)
+{
+ crypto_free_shash(tfm);
+}
+
+module_init(libcrc32c_mod_init);
+module_exit(libcrc32c_mod_fini);
+
+MODULE_AUTHOR("Clay Haapala <chaapala@cisco.com>");
+MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations");
+MODULE_LICENSE("GPL");
diff --git a/lib/list_debug.c b/lib/list_debug.c
new file mode 100644
index 00000000..344c710d
--- /dev/null
+++ b/lib/list_debug.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2006, Red Hat, Inc., Dave Jones
+ * Released under the General Public License (GPL).
+ *
+ * This file contains the linked list implementations for
+ * DEBUG_LIST.
+ */
+
+#include <linux/module.h>
+#include <linux/list.h>
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+
+void __list_add(struct list_head *new,
+ struct list_head *prev,
+ struct list_head *next)
+{
+ WARN(next->prev != prev,
+ "list_add corruption. next->prev should be "
+ "prev (%p), but was %p. (next=%p).\n",
+ prev, next->prev, next);
+ WARN(prev->next != next,
+ "list_add corruption. prev->next should be "
+ "next (%p), but was %p. (prev=%p).\n",
+ next, prev->next, prev);
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+EXPORT_SYMBOL(__list_add);
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty on entry does not return true after this, the entry is
+ * in an undefined state.
+ */
+void list_del(struct list_head *entry)
+{
+ WARN(entry->next == LIST_POISON1,
+ "list_del corruption, next is LIST_POISON1 (%p)\n",
+ LIST_POISON1);
+ WARN(entry->next != LIST_POISON1 && entry->prev == LIST_POISON2,
+ "list_del corruption, prev is LIST_POISON2 (%p)\n",
+ LIST_POISON2);
+ WARN(entry->prev->next != entry,
+ "list_del corruption. prev->next should be %p, "
+ "but was %p\n", entry, entry->prev->next);
+ WARN(entry->next->prev != entry,
+ "list_del corruption. next->prev should be %p, "
+ "but was %p\n", entry, entry->next->prev);
+ __list_del(entry->prev, entry->next);
+ entry->next = LIST_POISON1;
+ entry->prev = LIST_POISON2;
+}
+EXPORT_SYMBOL(list_del);
diff --git a/lib/list_sort.c b/lib/list_sort.c
new file mode 100644
index 00000000..a7616fa3
--- /dev/null
+++ b/lib/list_sort.c
@@ -0,0 +1,217 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/list_sort.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+
+#define MAX_LIST_LENGTH_BITS 20
+
+/*
+ * Returns a list organized in an intermediate format suited
+ * to chaining of merge() calls: null-terminated, no reserved or
+ * sentinel head node, "prev" links not maintained.
+ */
+static struct list_head *merge(void *priv,
+ int (*cmp)(void *priv, struct list_head *a,
+ struct list_head *b),
+ struct list_head *a, struct list_head *b)
+{
+ struct list_head head, *tail = &head;
+
+ while (a && b) {
+ /* if equal, take 'a' -- important for sort stability */
+ if ((*cmp)(priv, a, b) <= 0) {
+ tail->next = a;
+ a = a->next;
+ } else {
+ tail->next = b;
+ b = b->next;
+ }
+ tail = tail->next;
+ }
+ tail->next = a?:b;
+ return head.next;
+}
+
+/*
+ * Combine final list merge with restoration of standard doubly-linked
+ * list structure. This approach duplicates code from merge(), but
+ * runs faster than the tidier alternatives of either a separate final
+ * prev-link restoration pass, or maintaining the prev links
+ * throughout.
+ */
+static void merge_and_restore_back_links(void *priv,
+ int (*cmp)(void *priv, struct list_head *a,
+ struct list_head *b),
+ struct list_head *head,
+ struct list_head *a, struct list_head *b)
+{
+ struct list_head *tail = head;
+
+ while (a && b) {
+ /* if equal, take 'a' -- important for sort stability */
+ if ((*cmp)(priv, a, b) <= 0) {
+ tail->next = a;
+ a->prev = tail;
+ a = a->next;
+ } else {
+ tail->next = b;
+ b->prev = tail;
+ b = b->next;
+ }
+ tail = tail->next;
+ }
+ tail->next = a ? : b;
+
+ do {
+ /*
+ * In worst cases this loop may run many iterations.
+ * Continue callbacks to the client even though no
+ * element comparison is needed, so the client's cmp()
+ * routine can invoke cond_resched() periodically.
+ */
+ (*cmp)(priv, tail->next, tail->next);
+
+ tail->next->prev = tail;
+ tail = tail->next;
+ } while (tail->next);
+
+ tail->next = head;
+ head->prev = tail;
+}
+
+/**
+ * list_sort - sort a list
+ * @priv: private data, opaque to list_sort(), passed to @cmp
+ * @head: the list to sort
+ * @cmp: the elements comparison function
+ *
+ * This function implements "merge sort", which has O(nlog(n))
+ * complexity.
+ *
+ * The comparison function @cmp must return a negative value if @a
+ * should sort before @b, and a positive value if @a should sort after
+ * @b. If @a and @b are equivalent, and their original relative
+ * ordering is to be preserved, @cmp must return 0.
+ */
+void list_sort(void *priv, struct list_head *head,
+ int (*cmp)(void *priv, struct list_head *a,
+ struct list_head *b))
+{
+ struct list_head *part[MAX_LIST_LENGTH_BITS+1]; /* sorted partial lists
+ -- last slot is a sentinel */
+ int lev; /* index into part[] */
+ int max_lev = 0;
+ struct list_head *list;
+
+ if (list_empty(head))
+ return;
+
+ memset(part, 0, sizeof(part));
+
+ head->prev->next = NULL;
+ list = head->next;
+
+ while (list) {
+ struct list_head *cur = list;
+ list = list->next;
+ cur->next = NULL;
+
+ for (lev = 0; part[lev]; lev++) {
+ cur = merge(priv, cmp, part[lev], cur);
+ part[lev] = NULL;
+ }
+ if (lev > max_lev) {
+ if (unlikely(lev >= ARRAY_SIZE(part)-1)) {
+ printk_once(KERN_DEBUG "list passed to"
+ " list_sort() too long for"
+ " efficiency\n");
+ lev--;
+ }
+ max_lev = lev;
+ }
+ part[lev] = cur;
+ }
+
+ for (lev = 0; lev < max_lev; lev++)
+ if (part[lev])
+ list = merge(priv, cmp, part[lev], list);
+
+ merge_and_restore_back_links(priv, cmp, head, part[max_lev], list);
+}
+EXPORT_SYMBOL(list_sort);
+
+#ifdef DEBUG_LIST_SORT
+struct debug_el {
+ struct list_head l_h;
+ int value;
+ unsigned serial;
+};
+
+static int cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+ return container_of(a, struct debug_el, l_h)->value
+ - container_of(b, struct debug_el, l_h)->value;
+}
+
+/*
+ * The pattern of set bits in the list length determines which cases
+ * are hit in list_sort().
+ */
+#define LIST_SORT_TEST_LENGTH (512+128+2) /* not including head */
+
+static int __init list_sort_test(void)
+{
+ int i, r = 1, count;
+ struct list_head *head = kmalloc(sizeof(*head), GFP_KERNEL);
+ struct list_head *cur;
+
+ printk(KERN_WARNING "testing list_sort()\n");
+
+ cur = head;
+ for (i = 0; i < LIST_SORT_TEST_LENGTH; i++) {
+ struct debug_el *el = kmalloc(sizeof(*el), GFP_KERNEL);
+ BUG_ON(!el);
+ /* force some equivalencies */
+ el->value = (r = (r * 725861) % 6599) % (LIST_SORT_TEST_LENGTH/3);
+ el->serial = i;
+
+ el->l_h.prev = cur;
+ cur->next = &el->l_h;
+ cur = cur->next;
+ }
+ head->prev = cur;
+
+ list_sort(NULL, head, cmp);
+
+ count = 1;
+ for (cur = head->next; cur->next != head; cur = cur->next) {
+ struct debug_el *el = container_of(cur, struct debug_el, l_h);
+ int cmp_result = cmp(NULL, cur, cur->next);
+ if (cur->next->prev != cur) {
+ printk(KERN_EMERG "list_sort() returned "
+ "a corrupted list!\n");
+ return 1;
+ } else if (cmp_result > 0) {
+ printk(KERN_EMERG "list_sort() failed to sort!\n");
+ return 1;
+ } else if (cmp_result == 0 &&
+ el->serial >= container_of(cur->next,
+ struct debug_el, l_h)->serial) {
+ printk(KERN_EMERG "list_sort() failed to preserve order"
+ " of equivalent elements!\n");
+ return 1;
+ }
+ kfree(cur->prev);
+ count++;
+ }
+ kfree(cur);
+ if (count != LIST_SORT_TEST_LENGTH) {
+ printk(KERN_EMERG "list_sort() returned list of"
+ "different length!\n");
+ return 1;
+ }
+ return 0;
+}
+module_init(list_sort_test);
+#endif
diff --git a/lib/locking-selftest-hardirq.h b/lib/locking-selftest-hardirq.h
new file mode 100644
index 00000000..10d4a150
--- /dev/null
+++ b/lib/locking-selftest-hardirq.h
@@ -0,0 +1,9 @@
+#undef IRQ_DISABLE
+#undef IRQ_ENABLE
+#undef IRQ_ENTER
+#undef IRQ_EXIT
+
+#define IRQ_ENABLE HARDIRQ_ENABLE
+#define IRQ_DISABLE HARDIRQ_DISABLE
+#define IRQ_ENTER HARDIRQ_ENTER
+#define IRQ_EXIT HARDIRQ_EXIT
diff --git a/lib/locking-selftest-mutex.h b/lib/locking-selftest-mutex.h
new file mode 100644
index 00000000..68601b6f
--- /dev/null
+++ b/lib/locking-selftest-mutex.h
@@ -0,0 +1,11 @@
+#undef LOCK
+#define LOCK ML
+
+#undef UNLOCK
+#define UNLOCK MU
+
+#undef RLOCK
+#undef WLOCK
+
+#undef INIT
+#define INIT MI
diff --git a/lib/locking-selftest-rlock-hardirq.h b/lib/locking-selftest-rlock-hardirq.h
new file mode 100644
index 00000000..9f517ebc
--- /dev/null
+++ b/lib/locking-selftest-rlock-hardirq.h
@@ -0,0 +1,2 @@
+#include "locking-selftest-rlock.h"
+#include "locking-selftest-hardirq.h"
diff --git a/lib/locking-selftest-rlock-softirq.h b/lib/locking-selftest-rlock-softirq.h
new file mode 100644
index 00000000..981455db
--- /dev/null
+++ b/lib/locking-selftest-rlock-softirq.h
@@ -0,0 +1,2 @@
+#include "locking-selftest-rlock.h"
+#include "locking-selftest-softirq.h"
diff --git a/lib/locking-selftest-rlock.h b/lib/locking-selftest-rlock.h
new file mode 100644
index 00000000..6789044f
--- /dev/null
+++ b/lib/locking-selftest-rlock.h
@@ -0,0 +1,14 @@
+#undef LOCK
+#define LOCK RL
+
+#undef UNLOCK
+#define UNLOCK RU
+
+#undef RLOCK
+#define RLOCK RL
+
+#undef WLOCK
+#define WLOCK WL
+
+#undef INIT
+#define INIT RWI
diff --git a/lib/locking-selftest-rsem.h b/lib/locking-selftest-rsem.h
new file mode 100644
index 00000000..62da8866
--- /dev/null
+++ b/lib/locking-selftest-rsem.h
@@ -0,0 +1,14 @@
+#undef LOCK
+#define LOCK RSL
+
+#undef UNLOCK
+#define UNLOCK RSU
+
+#undef RLOCK
+#define RLOCK RSL
+
+#undef WLOCK
+#define WLOCK WSL
+
+#undef INIT
+#define INIT RWSI
diff --git a/lib/locking-selftest-softirq.h b/lib/locking-selftest-softirq.h
new file mode 100644
index 00000000..a83de2a0
--- /dev/null
+++ b/lib/locking-selftest-softirq.h
@@ -0,0 +1,9 @@
+#undef IRQ_DISABLE
+#undef IRQ_ENABLE
+#undef IRQ_ENTER
+#undef IRQ_EXIT
+
+#define IRQ_DISABLE SOFTIRQ_DISABLE
+#define IRQ_ENABLE SOFTIRQ_ENABLE
+#define IRQ_ENTER SOFTIRQ_ENTER
+#define IRQ_EXIT SOFTIRQ_EXIT
diff --git a/lib/locking-selftest-spin-hardirq.h b/lib/locking-selftest-spin-hardirq.h
new file mode 100644
index 00000000..693198dc
--- /dev/null
+++ b/lib/locking-selftest-spin-hardirq.h
@@ -0,0 +1,2 @@
+#include "locking-selftest-spin.h"
+#include "locking-selftest-hardirq.h"
diff --git a/lib/locking-selftest-spin-softirq.h b/lib/locking-selftest-spin-softirq.h
new file mode 100644
index 00000000..c472e2a8
--- /dev/null
+++ b/lib/locking-selftest-spin-softirq.h
@@ -0,0 +1,2 @@
+#include "locking-selftest-spin.h"
+#include "locking-selftest-softirq.h"
diff --git a/lib/locking-selftest-spin.h b/lib/locking-selftest-spin.h
new file mode 100644
index 00000000..ccd1b4b0
--- /dev/null
+++ b/lib/locking-selftest-spin.h
@@ -0,0 +1,11 @@
+#undef LOCK
+#define LOCK L
+
+#undef UNLOCK
+#define UNLOCK U
+
+#undef RLOCK
+#undef WLOCK
+
+#undef INIT
+#define INIT SI
diff --git a/lib/locking-selftest-wlock-hardirq.h b/lib/locking-selftest-wlock-hardirq.h
new file mode 100644
index 00000000..2dd2e512
--- /dev/null
+++ b/lib/locking-selftest-wlock-hardirq.h
@@ -0,0 +1,2 @@
+#include "locking-selftest-wlock.h"
+#include "locking-selftest-hardirq.h"
diff --git a/lib/locking-selftest-wlock-softirq.h b/lib/locking-selftest-wlock-softirq.h
new file mode 100644
index 00000000..cb80d1cb
--- /dev/null
+++ b/lib/locking-selftest-wlock-softirq.h
@@ -0,0 +1,2 @@
+#include "locking-selftest-wlock.h"
+#include "locking-selftest-softirq.h"
diff --git a/lib/locking-selftest-wlock.h b/lib/locking-selftest-wlock.h
new file mode 100644
index 00000000..0815322d
--- /dev/null
+++ b/lib/locking-selftest-wlock.h
@@ -0,0 +1,14 @@
+#undef LOCK
+#define LOCK WL
+
+#undef UNLOCK
+#define UNLOCK WU
+
+#undef RLOCK
+#define RLOCK RL
+
+#undef WLOCK
+#define WLOCK WL
+
+#undef INIT
+#define INIT RWI
diff --git a/lib/locking-selftest-wsem.h b/lib/locking-selftest-wsem.h
new file mode 100644
index 00000000..b88c5f2d
--- /dev/null
+++ b/lib/locking-selftest-wsem.h
@@ -0,0 +1,14 @@
+#undef LOCK
+#define LOCK WSL
+
+#undef UNLOCK
+#define UNLOCK WSU
+
+#undef RLOCK
+#define RLOCK RSL
+
+#undef WLOCK
+#define WLOCK WSL
+
+#undef INIT
+#define INIT RWSI
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
new file mode 100644
index 00000000..619313ed
--- /dev/null
+++ b/lib/locking-selftest.c
@@ -0,0 +1,1218 @@
+/*
+ * lib/locking-selftest.c
+ *
+ * Testsuite for various locking APIs: spinlocks, rwlocks,
+ * mutexes and rw-semaphores.
+ *
+ * It is checking both false positives and false negatives.
+ *
+ * Started by Ingo Molnar:
+ *
+ * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ */
+#include <linux/rwsem.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/lockdep.h>
+#include <linux/spinlock.h>
+#include <linux/kallsyms.h>
+#include <linux/interrupt.h>
+#include <linux/debug_locks.h>
+#include <linux/irqflags.h>
+
+/*
+ * Change this to 1 if you want to see the failure printouts:
+ */
+static unsigned int debug_locks_verbose;
+
+static int __init setup_debug_locks_verbose(char *str)
+{
+ get_option(&str, &debug_locks_verbose);
+
+ return 1;
+}
+
+__setup("debug_locks_verbose=", setup_debug_locks_verbose);
+
+#define FAILURE 0
+#define SUCCESS 1
+
+#define LOCKTYPE_SPIN 0x1
+#define LOCKTYPE_RWLOCK 0x2
+#define LOCKTYPE_MUTEX 0x4
+#define LOCKTYPE_RWSEM 0x8
+
+/*
+ * Normal standalone locks, for the circular and irq-context
+ * dependency tests:
+ */
+static DEFINE_SPINLOCK(lock_A);
+static DEFINE_SPINLOCK(lock_B);
+static DEFINE_SPINLOCK(lock_C);
+static DEFINE_SPINLOCK(lock_D);
+
+static DEFINE_RWLOCK(rwlock_A);
+static DEFINE_RWLOCK(rwlock_B);
+static DEFINE_RWLOCK(rwlock_C);
+static DEFINE_RWLOCK(rwlock_D);
+
+static DEFINE_MUTEX(mutex_A);
+static DEFINE_MUTEX(mutex_B);
+static DEFINE_MUTEX(mutex_C);
+static DEFINE_MUTEX(mutex_D);
+
+static DECLARE_RWSEM(rwsem_A);
+static DECLARE_RWSEM(rwsem_B);
+static DECLARE_RWSEM(rwsem_C);
+static DECLARE_RWSEM(rwsem_D);
+
+/*
+ * Locks that we initialize dynamically as well so that
+ * e.g. X1 and X2 becomes two instances of the same class,
+ * but X* and Y* are different classes. We do this so that
+ * we do not trigger a real lockup:
+ */
+static DEFINE_SPINLOCK(lock_X1);
+static DEFINE_SPINLOCK(lock_X2);
+static DEFINE_SPINLOCK(lock_Y1);
+static DEFINE_SPINLOCK(lock_Y2);
+static DEFINE_SPINLOCK(lock_Z1);
+static DEFINE_SPINLOCK(lock_Z2);
+
+static DEFINE_RWLOCK(rwlock_X1);
+static DEFINE_RWLOCK(rwlock_X2);
+static DEFINE_RWLOCK(rwlock_Y1);
+static DEFINE_RWLOCK(rwlock_Y2);
+static DEFINE_RWLOCK(rwlock_Z1);
+static DEFINE_RWLOCK(rwlock_Z2);
+
+static DEFINE_MUTEX(mutex_X1);
+static DEFINE_MUTEX(mutex_X2);
+static DEFINE_MUTEX(mutex_Y1);
+static DEFINE_MUTEX(mutex_Y2);
+static DEFINE_MUTEX(mutex_Z1);
+static DEFINE_MUTEX(mutex_Z2);
+
+static DECLARE_RWSEM(rwsem_X1);
+static DECLARE_RWSEM(rwsem_X2);
+static DECLARE_RWSEM(rwsem_Y1);
+static DECLARE_RWSEM(rwsem_Y2);
+static DECLARE_RWSEM(rwsem_Z1);
+static DECLARE_RWSEM(rwsem_Z2);
+
+/*
+ * non-inlined runtime initializers, to let separate locks share
+ * the same lock-class:
+ */
+#define INIT_CLASS_FUNC(class) \
+static noinline void \
+init_class_##class(spinlock_t *lock, rwlock_t *rwlock, struct mutex *mutex, \
+ struct rw_semaphore *rwsem) \
+{ \
+ spin_lock_init(lock); \
+ rwlock_init(rwlock); \
+ mutex_init(mutex); \
+ init_rwsem(rwsem); \
+}
+
+INIT_CLASS_FUNC(X)
+INIT_CLASS_FUNC(Y)
+INIT_CLASS_FUNC(Z)
+
+static void init_shared_classes(void)
+{
+ init_class_X(&lock_X1, &rwlock_X1, &mutex_X1, &rwsem_X1);
+ init_class_X(&lock_X2, &rwlock_X2, &mutex_X2, &rwsem_X2);
+
+ init_class_Y(&lock_Y1, &rwlock_Y1, &mutex_Y1, &rwsem_Y1);
+ init_class_Y(&lock_Y2, &rwlock_Y2, &mutex_Y2, &rwsem_Y2);
+
+ init_class_Z(&lock_Z1, &rwlock_Z1, &mutex_Z1, &rwsem_Z1);
+ init_class_Z(&lock_Z2, &rwlock_Z2, &mutex_Z2, &rwsem_Z2);
+}
+
+/*
+ * For spinlocks and rwlocks we also do hardirq-safe / softirq-safe tests.
+ * The following functions use a lock from a simulated hardirq/softirq
+ * context, causing the locks to be marked as hardirq-safe/softirq-safe:
+ */
+
+#define HARDIRQ_DISABLE local_irq_disable
+#define HARDIRQ_ENABLE local_irq_enable
+
+#define HARDIRQ_ENTER() \
+ local_irq_disable(); \
+ irq_enter(); \
+ WARN_ON(!in_irq());
+
+#define HARDIRQ_EXIT() \
+ __irq_exit(); \
+ local_irq_enable();
+
+#define SOFTIRQ_DISABLE local_bh_disable
+#define SOFTIRQ_ENABLE local_bh_enable
+
+#define SOFTIRQ_ENTER() \
+ local_bh_disable(); \
+ local_irq_disable(); \
+ lockdep_softirq_enter(); \
+ WARN_ON(!in_softirq());
+
+#define SOFTIRQ_EXIT() \
+ lockdep_softirq_exit(); \
+ local_irq_enable(); \
+ local_bh_enable();
+
+/*
+ * Shortcuts for lock/unlock API variants, to keep
+ * the testcases compact:
+ */
+#define L(x) spin_lock(&lock_##x)
+#define U(x) spin_unlock(&lock_##x)
+#define LU(x) L(x); U(x)
+#define SI(x) spin_lock_init(&lock_##x)
+
+#define WL(x) write_lock(&rwlock_##x)
+#define WU(x) write_unlock(&rwlock_##x)
+#define WLU(x) WL(x); WU(x)
+
+#define RL(x) read_lock(&rwlock_##x)
+#define RU(x) read_unlock(&rwlock_##x)
+#define RLU(x) RL(x); RU(x)
+#define RWI(x) rwlock_init(&rwlock_##x)
+
+#define ML(x) mutex_lock(&mutex_##x)
+#define MU(x) mutex_unlock(&mutex_##x)
+#define MI(x) mutex_init(&mutex_##x)
+
+#define WSL(x) down_write(&rwsem_##x)
+#define WSU(x) up_write(&rwsem_##x)
+
+#define RSL(x) down_read(&rwsem_##x)
+#define RSU(x) up_read(&rwsem_##x)
+#define RWSI(x) init_rwsem(&rwsem_##x)
+
+#define LOCK_UNLOCK_2(x,y) LOCK(x); LOCK(y); UNLOCK(y); UNLOCK(x)
+
+/*
+ * Generate different permutations of the same testcase, using
+ * the same basic lock-dependency/state events:
+ */
+
+#define GENERATE_TESTCASE(name) \
+ \
+static void name(void) { E(); }
+
+#define GENERATE_PERMUTATIONS_2_EVENTS(name) \
+ \
+static void name##_12(void) { E1(); E2(); } \
+static void name##_21(void) { E2(); E1(); }
+
+#define GENERATE_PERMUTATIONS_3_EVENTS(name) \
+ \
+static void name##_123(void) { E1(); E2(); E3(); } \
+static void name##_132(void) { E1(); E3(); E2(); } \
+static void name##_213(void) { E2(); E1(); E3(); } \
+static void name##_231(void) { E2(); E3(); E1(); } \
+static void name##_312(void) { E3(); E1(); E2(); } \
+static void name##_321(void) { E3(); E2(); E1(); }
+
+/*
+ * AA deadlock:
+ */
+
+#define E() \
+ \
+ LOCK(X1); \
+ LOCK(X2); /* this one should fail */
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(AA_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(AA_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(AA_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(AA_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(AA_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(AA_rsem)
+
+#undef E
+
+/*
+ * Special-case for read-locking, they are
+ * allowed to recurse on the same lock class:
+ */
+static void rlock_AA1(void)
+{
+ RL(X1);
+ RL(X1); // this one should NOT fail
+}
+
+static void rlock_AA1B(void)
+{
+ RL(X1);
+ RL(X2); // this one should NOT fail
+}
+
+static void rsem_AA1(void)
+{
+ RSL(X1);
+ RSL(X1); // this one should fail
+}
+
+static void rsem_AA1B(void)
+{
+ RSL(X1);
+ RSL(X2); // this one should fail
+}
+/*
+ * The mixing of read and write locks is not allowed:
+ */
+static void rlock_AA2(void)
+{
+ RL(X1);
+ WL(X2); // this one should fail
+}
+
+static void rsem_AA2(void)
+{
+ RSL(X1);
+ WSL(X2); // this one should fail
+}
+
+static void rlock_AA3(void)
+{
+ WL(X1);
+ RL(X2); // this one should fail
+}
+
+static void rsem_AA3(void)
+{
+ WSL(X1);
+ RSL(X2); // this one should fail
+}
+
+/*
+ * ABBA deadlock:
+ */
+
+#define E() \
+ \
+ LOCK_UNLOCK_2(A, B); \
+ LOCK_UNLOCK_2(B, A); /* fail */
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(ABBA_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(ABBA_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(ABBA_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(ABBA_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(ABBA_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(ABBA_rsem)
+
+#undef E
+
+/*
+ * AB BC CA deadlock:
+ */
+
+#define E() \
+ \
+ LOCK_UNLOCK_2(A, B); \
+ LOCK_UNLOCK_2(B, C); \
+ LOCK_UNLOCK_2(C, A); /* fail */
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(ABBCCA_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(ABBCCA_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(ABBCCA_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(ABBCCA_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(ABBCCA_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(ABBCCA_rsem)
+
+#undef E
+
+/*
+ * AB CA BC deadlock:
+ */
+
+#define E() \
+ \
+ LOCK_UNLOCK_2(A, B); \
+ LOCK_UNLOCK_2(C, A); \
+ LOCK_UNLOCK_2(B, C); /* fail */
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(ABCABC_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(ABCABC_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(ABCABC_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(ABCABC_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(ABCABC_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(ABCABC_rsem)
+
+#undef E
+
+/*
+ * AB BC CD DA deadlock:
+ */
+
+#define E() \
+ \
+ LOCK_UNLOCK_2(A, B); \
+ LOCK_UNLOCK_2(B, C); \
+ LOCK_UNLOCK_2(C, D); \
+ LOCK_UNLOCK_2(D, A); /* fail */
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(ABBCCDDA_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(ABBCCDDA_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(ABBCCDDA_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(ABBCCDDA_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(ABBCCDDA_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(ABBCCDDA_rsem)
+
+#undef E
+
+/*
+ * AB CD BD DA deadlock:
+ */
+#define E() \
+ \
+ LOCK_UNLOCK_2(A, B); \
+ LOCK_UNLOCK_2(C, D); \
+ LOCK_UNLOCK_2(B, D); \
+ LOCK_UNLOCK_2(D, A); /* fail */
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(ABCDBDDA_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(ABCDBDDA_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(ABCDBDDA_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(ABCDBDDA_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(ABCDBDDA_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(ABCDBDDA_rsem)
+
+#undef E
+
+/*
+ * AB CD BC DA deadlock:
+ */
+#define E() \
+ \
+ LOCK_UNLOCK_2(A, B); \
+ LOCK_UNLOCK_2(C, D); \
+ LOCK_UNLOCK_2(B, C); \
+ LOCK_UNLOCK_2(D, A); /* fail */
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(ABCDBCDA_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(ABCDBCDA_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(ABCDBCDA_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(ABCDBCDA_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(ABCDBCDA_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(ABCDBCDA_rsem)
+
+#undef E
+
+/*
+ * Double unlock:
+ */
+#define E() \
+ \
+ LOCK(A); \
+ UNLOCK(A); \
+ UNLOCK(A); /* fail */
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(double_unlock_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(double_unlock_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(double_unlock_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(double_unlock_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(double_unlock_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(double_unlock_rsem)
+
+#undef E
+
+/*
+ * Bad unlock ordering:
+ */
+#define E() \
+ \
+ LOCK(A); \
+ LOCK(B); \
+ UNLOCK(A); /* fail */ \
+ UNLOCK(B);
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(bad_unlock_order_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(bad_unlock_order_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(bad_unlock_order_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(bad_unlock_order_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(bad_unlock_order_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(bad_unlock_order_rsem)
+
+#undef E
+
+/*
+ * initializing a held lock:
+ */
+#define E() \
+ \
+ LOCK(A); \
+ INIT(A); /* fail */
+
+/*
+ * 6 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_TESTCASE(init_held_spin)
+#include "locking-selftest-wlock.h"
+GENERATE_TESTCASE(init_held_wlock)
+#include "locking-selftest-rlock.h"
+GENERATE_TESTCASE(init_held_rlock)
+#include "locking-selftest-mutex.h"
+GENERATE_TESTCASE(init_held_mutex)
+#include "locking-selftest-wsem.h"
+GENERATE_TESTCASE(init_held_wsem)
+#include "locking-selftest-rsem.h"
+GENERATE_TESTCASE(init_held_rsem)
+
+#undef E
+
+/*
+ * locking an irq-safe lock with irqs enabled:
+ */
+#define E1() \
+ \
+ IRQ_ENTER(); \
+ LOCK(A); \
+ UNLOCK(A); \
+ IRQ_EXIT();
+
+#define E2() \
+ \
+ LOCK(A); \
+ UNLOCK(A);
+
+/*
+ * Generate 24 testcases:
+ */
+#include "locking-selftest-spin-hardirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_spin)
+
+#include "locking-selftest-rlock-hardirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock)
+
+#include "locking-selftest-wlock-hardirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_wlock)
+
+#include "locking-selftest-spin-softirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_spin)
+
+#include "locking-selftest-rlock-softirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_rlock)
+
+#include "locking-selftest-wlock-softirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock)
+
+#undef E1
+#undef E2
+
+/*
+ * Enabling hardirqs with a softirq-safe lock held:
+ */
+#define E1() \
+ \
+ SOFTIRQ_ENTER(); \
+ LOCK(A); \
+ UNLOCK(A); \
+ SOFTIRQ_EXIT();
+
+#define E2() \
+ \
+ HARDIRQ_DISABLE(); \
+ LOCK(A); \
+ HARDIRQ_ENABLE(); \
+ UNLOCK(A);
+
+/*
+ * Generate 12 testcases:
+ */
+#include "locking-selftest-spin.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_spin)
+
+#include "locking-selftest-wlock.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_wlock)
+
+#include "locking-selftest-rlock.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock)
+
+#undef E1
+#undef E2
+
+/*
+ * Enabling irqs with an irq-safe lock held:
+ */
+#define E1() \
+ \
+ IRQ_ENTER(); \
+ LOCK(A); \
+ UNLOCK(A); \
+ IRQ_EXIT();
+
+#define E2() \
+ \
+ IRQ_DISABLE(); \
+ LOCK(A); \
+ IRQ_ENABLE(); \
+ UNLOCK(A);
+
+/*
+ * Generate 24 testcases:
+ */
+#include "locking-selftest-spin-hardirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_spin)
+
+#include "locking-selftest-rlock-hardirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock)
+
+#include "locking-selftest-wlock-hardirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_wlock)
+
+#include "locking-selftest-spin-softirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_spin)
+
+#include "locking-selftest-rlock-softirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_rlock)
+
+#include "locking-selftest-wlock-softirq.h"
+GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock)
+
+#undef E1
+#undef E2
+
+/*
+ * Acquiring a irq-unsafe lock while holding an irq-safe-lock:
+ */
+#define E1() \
+ \
+ LOCK(A); \
+ LOCK(B); \
+ UNLOCK(B); \
+ UNLOCK(A); \
+
+#define E2() \
+ \
+ LOCK(B); \
+ UNLOCK(B);
+
+#define E3() \
+ \
+ IRQ_ENTER(); \
+ LOCK(A); \
+ UNLOCK(A); \
+ IRQ_EXIT();
+
+/*
+ * Generate 36 testcases:
+ */
+#include "locking-selftest-spin-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_spin)
+
+#include "locking-selftest-rlock-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock)
+
+#include "locking-selftest-wlock-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_wlock)
+
+#include "locking-selftest-spin-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_spin)
+
+#include "locking-selftest-rlock-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_rlock)
+
+#include "locking-selftest-wlock-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock)
+
+#undef E1
+#undef E2
+#undef E3
+
+/*
+ * If a lock turns into softirq-safe, but earlier it took
+ * a softirq-unsafe lock:
+ */
+
+#define E1() \
+ IRQ_DISABLE(); \
+ LOCK(A); \
+ LOCK(B); \
+ UNLOCK(B); \
+ UNLOCK(A); \
+ IRQ_ENABLE();
+
+#define E2() \
+ LOCK(B); \
+ UNLOCK(B);
+
+#define E3() \
+ IRQ_ENTER(); \
+ LOCK(A); \
+ UNLOCK(A); \
+ IRQ_EXIT();
+
+/*
+ * Generate 36 testcases:
+ */
+#include "locking-selftest-spin-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_spin)
+
+#include "locking-selftest-rlock-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock)
+
+#include "locking-selftest-wlock-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_wlock)
+
+#include "locking-selftest-spin-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_spin)
+
+#include "locking-selftest-rlock-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_rlock)
+
+#include "locking-selftest-wlock-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock)
+
+#undef E1
+#undef E2
+#undef E3
+
+/*
+ * read-lock / write-lock irq inversion.
+ *
+ * Deadlock scenario:
+ *
+ * CPU#1 is at #1, i.e. it has write-locked A, but has not
+ * taken B yet.
+ *
+ * CPU#2 is at #2, i.e. it has locked B.
+ *
+ * Hardirq hits CPU#2 at point #2 and is trying to read-lock A.
+ *
+ * The deadlock occurs because CPU#1 will spin on B, and CPU#2
+ * will spin on A.
+ */
+
+#define E1() \
+ \
+ IRQ_DISABLE(); \
+ WL(A); \
+ LOCK(B); \
+ UNLOCK(B); \
+ WU(A); \
+ IRQ_ENABLE();
+
+#define E2() \
+ \
+ LOCK(B); \
+ UNLOCK(B);
+
+#define E3() \
+ \
+ IRQ_ENTER(); \
+ RL(A); \
+ RU(A); \
+ IRQ_EXIT();
+
+/*
+ * Generate 36 testcases:
+ */
+#include "locking-selftest-spin-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_spin)
+
+#include "locking-selftest-rlock-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_rlock)
+
+#include "locking-selftest-wlock-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_wlock)
+
+#include "locking-selftest-spin-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_spin)
+
+#include "locking-selftest-rlock-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_rlock)
+
+#include "locking-selftest-wlock-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_wlock)
+
+#undef E1
+#undef E2
+#undef E3
+
+/*
+ * read-lock / write-lock recursion that is actually safe.
+ */
+
+#define E1() \
+ \
+ IRQ_DISABLE(); \
+ WL(A); \
+ WU(A); \
+ IRQ_ENABLE();
+
+#define E2() \
+ \
+ RL(A); \
+ RU(A); \
+
+#define E3() \
+ \
+ IRQ_ENTER(); \
+ RL(A); \
+ L(B); \
+ U(B); \
+ RU(A); \
+ IRQ_EXIT();
+
+/*
+ * Generate 12 testcases:
+ */
+#include "locking-selftest-hardirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_hard)
+
+#include "locking-selftest-softirq.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
+
+#undef E1
+#undef E2
+#undef E3
+
+/*
+ * read-lock / write-lock recursion that is unsafe.
+ */
+
+#define E1() \
+ \
+ IRQ_DISABLE(); \
+ L(B); \
+ WL(A); \
+ WU(A); \
+ U(B); \
+ IRQ_ENABLE();
+
+#define E2() \
+ \
+ RL(A); \
+ RU(A); \
+
+#define E3() \
+ \
+ IRQ_ENTER(); \
+ L(B); \
+ U(B); \
+ IRQ_EXIT();
+
+/*
+ * Generate 12 testcases:
+ */
+#include "locking-selftest-hardirq.h"
+// GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_hard)
+
+#include "locking-selftest-softirq.h"
+// GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft)
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# define I_SPINLOCK(x) lockdep_reset_lock(&lock_##x.dep_map)
+# define I_RWLOCK(x) lockdep_reset_lock(&rwlock_##x.dep_map)
+# define I_MUTEX(x) lockdep_reset_lock(&mutex_##x.dep_map)
+# define I_RWSEM(x) lockdep_reset_lock(&rwsem_##x.dep_map)
+#else
+# define I_SPINLOCK(x)
+# define I_RWLOCK(x)
+# define I_MUTEX(x)
+# define I_RWSEM(x)
+#endif
+
+#define I1(x) \
+ do { \
+ I_SPINLOCK(x); \
+ I_RWLOCK(x); \
+ I_MUTEX(x); \
+ I_RWSEM(x); \
+ } while (0)
+
+#define I2(x) \
+ do { \
+ spin_lock_init(&lock_##x); \
+ rwlock_init(&rwlock_##x); \
+ mutex_init(&mutex_##x); \
+ init_rwsem(&rwsem_##x); \
+ } while (0)
+
+static void reset_locks(void)
+{
+ local_irq_disable();
+ I1(A); I1(B); I1(C); I1(D);
+ I1(X1); I1(X2); I1(Y1); I1(Y2); I1(Z1); I1(Z2);
+ lockdep_reset();
+ I2(A); I2(B); I2(C); I2(D);
+ init_shared_classes();
+ local_irq_enable();
+}
+
+#undef I
+
+static int testcase_total;
+static int testcase_successes;
+static int expected_testcase_failures;
+static int unexpected_testcase_failures;
+
+static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask)
+{
+ unsigned long saved_preempt_count = preempt_count();
+ int expected_failure = 0;
+
+ WARN_ON(irqs_disabled());
+
+ testcase_fn();
+ /*
+ * Filter out expected failures:
+ */
+#ifndef CONFIG_PROVE_LOCKING
+ if ((lockclass_mask & LOCKTYPE_SPIN) && debug_locks != expected)
+ expected_failure = 1;
+ if ((lockclass_mask & LOCKTYPE_RWLOCK) && debug_locks != expected)
+ expected_failure = 1;
+ if ((lockclass_mask & LOCKTYPE_MUTEX) && debug_locks != expected)
+ expected_failure = 1;
+ if ((lockclass_mask & LOCKTYPE_RWSEM) && debug_locks != expected)
+ expected_failure = 1;
+#endif
+ if (debug_locks != expected) {
+ if (expected_failure) {
+ expected_testcase_failures++;
+ printk("failed|");
+ } else {
+ unexpected_testcase_failures++;
+
+ printk("FAILED|");
+ dump_stack();
+ }
+ } else {
+ testcase_successes++;
+ printk(" ok |");
+ }
+ testcase_total++;
+
+ if (debug_locks_verbose)
+ printk(" lockclass mask: %x, debug_locks: %d, expected: %d\n",
+ lockclass_mask, debug_locks, expected);
+ /*
+ * Some tests (e.g. double-unlock) might corrupt the preemption
+ * count, so restore it:
+ */
+ preempt_count() = saved_preempt_count;
+#ifdef CONFIG_TRACE_IRQFLAGS
+ if (softirq_count())
+ current->softirqs_enabled = 0;
+ else
+ current->softirqs_enabled = 1;
+#endif
+
+ reset_locks();
+}
+
+static inline void print_testname(const char *testname)
+{
+ printk("%33s:", testname);
+}
+
+#define DO_TESTCASE_1(desc, name, nr) \
+ print_testname(desc"/"#nr); \
+ dotest(name##_##nr, SUCCESS, LOCKTYPE_RWLOCK); \
+ printk("\n");
+
+#define DO_TESTCASE_1B(desc, name, nr) \
+ print_testname(desc"/"#nr); \
+ dotest(name##_##nr, FAILURE, LOCKTYPE_RWLOCK); \
+ printk("\n");
+
+#define DO_TESTCASE_3(desc, name, nr) \
+ print_testname(desc"/"#nr); \
+ dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN); \
+ dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK); \
+ dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK); \
+ printk("\n");
+
+#define DO_TESTCASE_3RW(desc, name, nr) \
+ print_testname(desc"/"#nr); \
+ dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN|LOCKTYPE_RWLOCK);\
+ dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK); \
+ dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK); \
+ printk("\n");
+
+#define DO_TESTCASE_6(desc, name) \
+ print_testname(desc); \
+ dotest(name##_spin, FAILURE, LOCKTYPE_SPIN); \
+ dotest(name##_wlock, FAILURE, LOCKTYPE_RWLOCK); \
+ dotest(name##_rlock, FAILURE, LOCKTYPE_RWLOCK); \
+ dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX); \
+ dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM); \
+ dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM); \
+ printk("\n");
+
+#define DO_TESTCASE_6_SUCCESS(desc, name) \
+ print_testname(desc); \
+ dotest(name##_spin, SUCCESS, LOCKTYPE_SPIN); \
+ dotest(name##_wlock, SUCCESS, LOCKTYPE_RWLOCK); \
+ dotest(name##_rlock, SUCCESS, LOCKTYPE_RWLOCK); \
+ dotest(name##_mutex, SUCCESS, LOCKTYPE_MUTEX); \
+ dotest(name##_wsem, SUCCESS, LOCKTYPE_RWSEM); \
+ dotest(name##_rsem, SUCCESS, LOCKTYPE_RWSEM); \
+ printk("\n");
+
+/*
+ * 'read' variant: rlocks must not trigger.
+ */
+#define DO_TESTCASE_6R(desc, name) \
+ print_testname(desc); \
+ dotest(name##_spin, FAILURE, LOCKTYPE_SPIN); \
+ dotest(name##_wlock, FAILURE, LOCKTYPE_RWLOCK); \
+ dotest(name##_rlock, SUCCESS, LOCKTYPE_RWLOCK); \
+ dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX); \
+ dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM); \
+ dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM); \
+ printk("\n");
+
+#define DO_TESTCASE_2I(desc, name, nr) \
+ DO_TESTCASE_1("hard-"desc, name##_hard, nr); \
+ DO_TESTCASE_1("soft-"desc, name##_soft, nr);
+
+#define DO_TESTCASE_2IB(desc, name, nr) \
+ DO_TESTCASE_1B("hard-"desc, name##_hard, nr); \
+ DO_TESTCASE_1B("soft-"desc, name##_soft, nr);
+
+#define DO_TESTCASE_6I(desc, name, nr) \
+ DO_TESTCASE_3("hard-"desc, name##_hard, nr); \
+ DO_TESTCASE_3("soft-"desc, name##_soft, nr);
+
+#define DO_TESTCASE_6IRW(desc, name, nr) \
+ DO_TESTCASE_3RW("hard-"desc, name##_hard, nr); \
+ DO_TESTCASE_3RW("soft-"desc, name##_soft, nr);
+
+#define DO_TESTCASE_2x3(desc, name) \
+ DO_TESTCASE_3(desc, name, 12); \
+ DO_TESTCASE_3(desc, name, 21);
+
+#define DO_TESTCASE_2x6(desc, name) \
+ DO_TESTCASE_6I(desc, name, 12); \
+ DO_TESTCASE_6I(desc, name, 21);
+
+#define DO_TESTCASE_6x2(desc, name) \
+ DO_TESTCASE_2I(desc, name, 123); \
+ DO_TESTCASE_2I(desc, name, 132); \
+ DO_TESTCASE_2I(desc, name, 213); \
+ DO_TESTCASE_2I(desc, name, 231); \
+ DO_TESTCASE_2I(desc, name, 312); \
+ DO_TESTCASE_2I(desc, name, 321);
+
+#define DO_TESTCASE_6x2B(desc, name) \
+ DO_TESTCASE_2IB(desc, name, 123); \
+ DO_TESTCASE_2IB(desc, name, 132); \
+ DO_TESTCASE_2IB(desc, name, 213); \
+ DO_TESTCASE_2IB(desc, name, 231); \
+ DO_TESTCASE_2IB(desc, name, 312); \
+ DO_TESTCASE_2IB(desc, name, 321);
+
+#define DO_TESTCASE_6x6(desc, name) \
+ DO_TESTCASE_6I(desc, name, 123); \
+ DO_TESTCASE_6I(desc, name, 132); \
+ DO_TESTCASE_6I(desc, name, 213); \
+ DO_TESTCASE_6I(desc, name, 231); \
+ DO_TESTCASE_6I(desc, name, 312); \
+ DO_TESTCASE_6I(desc, name, 321);
+
+#define DO_TESTCASE_6x6RW(desc, name) \
+ DO_TESTCASE_6IRW(desc, name, 123); \
+ DO_TESTCASE_6IRW(desc, name, 132); \
+ DO_TESTCASE_6IRW(desc, name, 213); \
+ DO_TESTCASE_6IRW(desc, name, 231); \
+ DO_TESTCASE_6IRW(desc, name, 312); \
+ DO_TESTCASE_6IRW(desc, name, 321);
+
+
+void locking_selftest(void)
+{
+ /*
+ * Got a locking failure before the selftest ran?
+ */
+ if (!debug_locks) {
+ printk("----------------------------------\n");
+ printk("| Locking API testsuite disabled |\n");
+ printk("----------------------------------\n");
+ return;
+ }
+
+ /*
+ * Run the testsuite:
+ */
+ printk("------------------------\n");
+ printk("| Locking API testsuite:\n");
+ printk("----------------------------------------------------------------------------\n");
+ printk(" | spin |wlock |rlock |mutex | wsem | rsem |\n");
+ printk(" --------------------------------------------------------------------------\n");
+
+ init_shared_classes();
+ debug_locks_silent = !debug_locks_verbose;
+
+ DO_TESTCASE_6R("A-A deadlock", AA);
+ DO_TESTCASE_6R("A-B-B-A deadlock", ABBA);
+ DO_TESTCASE_6R("A-B-B-C-C-A deadlock", ABBCCA);
+ DO_TESTCASE_6R("A-B-C-A-B-C deadlock", ABCABC);
+ DO_TESTCASE_6R("A-B-B-C-C-D-D-A deadlock", ABBCCDDA);
+ DO_TESTCASE_6R("A-B-C-D-B-D-D-A deadlock", ABCDBDDA);
+ DO_TESTCASE_6R("A-B-C-D-B-C-D-A deadlock", ABCDBCDA);
+ DO_TESTCASE_6("double unlock", double_unlock);
+ DO_TESTCASE_6("initialize held", init_held);
+ DO_TESTCASE_6_SUCCESS("bad unlock order", bad_unlock_order);
+
+ printk(" --------------------------------------------------------------------------\n");
+ print_testname("recursive read-lock");
+ printk(" |");
+ dotest(rlock_AA1, SUCCESS, LOCKTYPE_RWLOCK);
+ printk(" |");
+ dotest(rsem_AA1, FAILURE, LOCKTYPE_RWSEM);
+ printk("\n");
+
+ print_testname("recursive read-lock #2");
+ printk(" |");
+ dotest(rlock_AA1B, SUCCESS, LOCKTYPE_RWLOCK);
+ printk(" |");
+ dotest(rsem_AA1B, FAILURE, LOCKTYPE_RWSEM);
+ printk("\n");
+
+ print_testname("mixed read-write-lock");
+ printk(" |");
+ dotest(rlock_AA2, FAILURE, LOCKTYPE_RWLOCK);
+ printk(" |");
+ dotest(rsem_AA2, FAILURE, LOCKTYPE_RWSEM);
+ printk("\n");
+
+ print_testname("mixed write-read-lock");
+ printk(" |");
+ dotest(rlock_AA3, FAILURE, LOCKTYPE_RWLOCK);
+ printk(" |");
+ dotest(rsem_AA3, FAILURE, LOCKTYPE_RWSEM);
+ printk("\n");
+
+ printk(" --------------------------------------------------------------------------\n");
+
+ /*
+ * irq-context testcases:
+ */
+ DO_TESTCASE_2x6("irqs-on + irq-safe-A", irqsafe1);
+ DO_TESTCASE_2x3("sirq-safe-A => hirqs-on", irqsafe2A);
+ DO_TESTCASE_2x6("safe-A + irqs-on", irqsafe2B);
+ DO_TESTCASE_6x6("safe-A + unsafe-B #1", irqsafe3);
+ DO_TESTCASE_6x6("safe-A + unsafe-B #2", irqsafe4);
+ DO_TESTCASE_6x6RW("irq lock-inversion", irq_inversion);
+
+ DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion);
+// DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2);
+
+ if (unexpected_testcase_failures) {
+ printk("-----------------------------------------------------------------\n");
+ debug_locks = 0;
+ printk("BUG: %3d unexpected failures (out of %3d) - debugging disabled! |\n",
+ unexpected_testcase_failures, testcase_total);
+ printk("-----------------------------------------------------------------\n");
+ } else if (expected_testcase_failures && testcase_successes) {
+ printk("--------------------------------------------------------\n");
+ printk("%3d out of %3d testcases failed, as expected. |\n",
+ expected_testcase_failures, testcase_total);
+ printk("----------------------------------------------------\n");
+ debug_locks = 1;
+ } else if (expected_testcase_failures && !testcase_successes) {
+ printk("--------------------------------------------------------\n");
+ printk("All %3d testcases failed, as expected. |\n",
+ expected_testcase_failures);
+ printk("----------------------------------------\n");
+ debug_locks = 1;
+ } else {
+ printk("-------------------------------------------------------\n");
+ printk("Good, all %3d testcases passed! |\n",
+ testcase_successes);
+ printk("---------------------------------\n");
+ debug_locks = 1;
+ }
+ debug_locks_silent = 0;
+}
diff --git a/lib/lru_cache.c b/lib/lru_cache.c
new file mode 100644
index 00000000..270de9d3
--- /dev/null
+++ b/lib/lru_cache.c
@@ -0,0 +1,560 @@
+/*
+ lru_cache.c
+
+ This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
+
+ Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
+ Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
+ Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
+
+ drbd is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ drbd is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with drbd; see the file COPYING. If not, write to
+ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ */
+
+#include <linux/module.h>
+#include <linux/bitops.h>
+#include <linux/slab.h>
+#include <linux/string.h> /* for memset */
+#include <linux/seq_file.h> /* for seq_printf */
+#include <linux/lru_cache.h>
+
+MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
+ "Lars Ellenberg <lars@linbit.com>");
+MODULE_DESCRIPTION("lru_cache - Track sets of hot objects");
+MODULE_LICENSE("GPL");
+
+/* this is developers aid only.
+ * it catches concurrent access (lack of locking on the users part) */
+#define PARANOIA_ENTRY() do { \
+ BUG_ON(!lc); \
+ BUG_ON(!lc->nr_elements); \
+ BUG_ON(test_and_set_bit(__LC_PARANOIA, &lc->flags)); \
+} while (0)
+
+#define RETURN(x...) do { \
+ clear_bit(__LC_PARANOIA, &lc->flags); \
+ smp_mb__after_clear_bit(); return x ; } while (0)
+
+/* BUG() if e is not one of the elements tracked by lc */
+#define PARANOIA_LC_ELEMENT(lc, e) do { \
+ struct lru_cache *lc_ = (lc); \
+ struct lc_element *e_ = (e); \
+ unsigned i = e_->lc_index; \
+ BUG_ON(i >= lc_->nr_elements); \
+ BUG_ON(lc_->lc_element[i] != e_); } while (0)
+
+/**
+ * lc_create - prepares to track objects in an active set
+ * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details
+ * @e_count: number of elements allowed to be active simultaneously
+ * @e_size: size of the tracked objects
+ * @e_off: offset to the &struct lc_element member in a tracked object
+ *
+ * Returns a pointer to a newly initialized struct lru_cache on success,
+ * or NULL on (allocation) failure.
+ */
+struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
+ unsigned e_count, size_t e_size, size_t e_off)
+{
+ struct hlist_head *slot = NULL;
+ struct lc_element **element = NULL;
+ struct lru_cache *lc;
+ struct lc_element *e;
+ unsigned cache_obj_size = kmem_cache_size(cache);
+ unsigned i;
+
+ WARN_ON(cache_obj_size < e_size);
+ if (cache_obj_size < e_size)
+ return NULL;
+
+ /* e_count too big; would probably fail the allocation below anyways.
+ * for typical use cases, e_count should be few thousand at most. */
+ if (e_count > LC_MAX_ACTIVE)
+ return NULL;
+
+ slot = kzalloc(e_count * sizeof(struct hlist_head*), GFP_KERNEL);
+ if (!slot)
+ goto out_fail;
+ element = kzalloc(e_count * sizeof(struct lc_element *), GFP_KERNEL);
+ if (!element)
+ goto out_fail;
+
+ lc = kzalloc(sizeof(*lc), GFP_KERNEL);
+ if (!lc)
+ goto out_fail;
+
+ INIT_LIST_HEAD(&lc->in_use);
+ INIT_LIST_HEAD(&lc->lru);
+ INIT_LIST_HEAD(&lc->free);
+
+ lc->name = name;
+ lc->element_size = e_size;
+ lc->element_off = e_off;
+ lc->nr_elements = e_count;
+ lc->new_number = LC_FREE;
+ lc->lc_cache = cache;
+ lc->lc_element = element;
+ lc->lc_slot = slot;
+
+ /* preallocate all objects */
+ for (i = 0; i < e_count; i++) {
+ void *p = kmem_cache_alloc(cache, GFP_KERNEL);
+ if (!p)
+ break;
+ memset(p, 0, lc->element_size);
+ e = p + e_off;
+ e->lc_index = i;
+ e->lc_number = LC_FREE;
+ list_add(&e->list, &lc->free);
+ element[i] = e;
+ }
+ if (i == e_count)
+ return lc;
+
+ /* else: could not allocate all elements, give up */
+ for (i--; i; i--) {
+ void *p = element[i];
+ kmem_cache_free(cache, p - e_off);
+ }
+ kfree(lc);
+out_fail:
+ kfree(element);
+ kfree(slot);
+ return NULL;
+}
+
+void lc_free_by_index(struct lru_cache *lc, unsigned i)
+{
+ void *p = lc->lc_element[i];
+ WARN_ON(!p);
+ if (p) {
+ p -= lc->element_off;
+ kmem_cache_free(lc->lc_cache, p);
+ }
+}
+
+/**
+ * lc_destroy - frees memory allocated by lc_create()
+ * @lc: the lru cache to destroy
+ */
+void lc_destroy(struct lru_cache *lc)
+{
+ unsigned i;
+ if (!lc)
+ return;
+ for (i = 0; i < lc->nr_elements; i++)
+ lc_free_by_index(lc, i);
+ kfree(lc->lc_element);
+ kfree(lc->lc_slot);
+ kfree(lc);
+}
+
+/**
+ * lc_reset - does a full reset for @lc and the hash table slots.
+ * @lc: the lru cache to operate on
+ *
+ * It is roughly the equivalent of re-allocating a fresh lru_cache object,
+ * basically a short cut to lc_destroy(lc); lc = lc_create(...);
+ */
+void lc_reset(struct lru_cache *lc)
+{
+ unsigned i;
+
+ INIT_LIST_HEAD(&lc->in_use);
+ INIT_LIST_HEAD(&lc->lru);
+ INIT_LIST_HEAD(&lc->free);
+ lc->used = 0;
+ lc->hits = 0;
+ lc->misses = 0;
+ lc->starving = 0;
+ lc->dirty = 0;
+ lc->changed = 0;
+ lc->flags = 0;
+ lc->changing_element = NULL;
+ lc->new_number = LC_FREE;
+ memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements);
+
+ for (i = 0; i < lc->nr_elements; i++) {
+ struct lc_element *e = lc->lc_element[i];
+ void *p = e;
+ p -= lc->element_off;
+ memset(p, 0, lc->element_size);
+ /* re-init it */
+ e->lc_index = i;
+ e->lc_number = LC_FREE;
+ list_add(&e->list, &lc->free);
+ }
+}
+
+/**
+ * lc_seq_printf_stats - print stats about @lc into @seq
+ * @seq: the seq_file to print into
+ * @lc: the lru cache to print statistics of
+ */
+size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
+{
+ /* NOTE:
+ * total calls to lc_get are
+ * (starving + hits + misses)
+ * misses include "dirty" count (update from an other thread in
+ * progress) and "changed", when this in fact lead to an successful
+ * update of the cache.
+ */
+ return seq_printf(seq, "\t%s: used:%u/%u "
+ "hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n",
+ lc->name, lc->used, lc->nr_elements,
+ lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed);
+}
+
+static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
+{
+ return lc->lc_slot + (enr % lc->nr_elements);
+}
+
+
+/**
+ * lc_find - find element by label, if present in the hash table
+ * @lc: The lru_cache object
+ * @enr: element number
+ *
+ * Returns the pointer to an element, if the element with the requested
+ * "label" or element number is present in the hash table,
+ * or NULL if not found. Does not change the refcnt.
+ */
+struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
+{
+ struct hlist_node *n;
+ struct lc_element *e;
+
+ BUG_ON(!lc);
+ BUG_ON(!lc->nr_elements);
+ hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) {
+ if (e->lc_number == enr)
+ return e;
+ }
+ return NULL;
+}
+
+/* returned element will be "recycled" immediately */
+static struct lc_element *lc_evict(struct lru_cache *lc)
+{
+ struct list_head *n;
+ struct lc_element *e;
+
+ if (list_empty(&lc->lru))
+ return NULL;
+
+ n = lc->lru.prev;
+ e = list_entry(n, struct lc_element, list);
+
+ PARANOIA_LC_ELEMENT(lc, e);
+
+ list_del(&e->list);
+ hlist_del(&e->colision);
+ return e;
+}
+
+/**
+ * lc_del - removes an element from the cache
+ * @lc: The lru_cache object
+ * @e: The element to remove
+ *
+ * @e must be unused (refcnt == 0). Moves @e from "lru" to "free" list,
+ * sets @e->enr to %LC_FREE.
+ */
+void lc_del(struct lru_cache *lc, struct lc_element *e)
+{
+ PARANOIA_ENTRY();
+ PARANOIA_LC_ELEMENT(lc, e);
+ BUG_ON(e->refcnt);
+
+ e->lc_number = LC_FREE;
+ hlist_del_init(&e->colision);
+ list_move(&e->list, &lc->free);
+ RETURN();
+}
+
+static struct lc_element *lc_get_unused_element(struct lru_cache *lc)
+{
+ struct list_head *n;
+
+ if (list_empty(&lc->free))
+ return lc_evict(lc);
+
+ n = lc->free.next;
+ list_del(n);
+ return list_entry(n, struct lc_element, list);
+}
+
+static int lc_unused_element_available(struct lru_cache *lc)
+{
+ if (!list_empty(&lc->free))
+ return 1; /* something on the free list */
+ if (!list_empty(&lc->lru))
+ return 1; /* something to evict */
+
+ return 0;
+}
+
+
+/**
+ * lc_get - get element by label, maybe change the active set
+ * @lc: the lru cache to operate on
+ * @enr: the label to look up
+ *
+ * Finds an element in the cache, increases its usage count,
+ * "touches" and returns it.
+ *
+ * In case the requested number is not present, it needs to be added to the
+ * cache. Therefore it is possible that an other element becomes evicted from
+ * the cache. In either case, the user is notified so he is able to e.g. keep
+ * a persistent log of the cache changes, and therefore the objects in use.
+ *
+ * Return values:
+ * NULL
+ * The cache was marked %LC_STARVING,
+ * or the requested label was not in the active set
+ * and a changing transaction is still pending (@lc was marked %LC_DIRTY).
+ * Or no unused or free element could be recycled (@lc will be marked as
+ * %LC_STARVING, blocking further lc_get() operations).
+ *
+ * pointer to the element with the REQUESTED element number.
+ * In this case, it can be used right away
+ *
+ * pointer to an UNUSED element with some different element number,
+ * where that different number may also be %LC_FREE.
+ *
+ * In this case, the cache is marked %LC_DIRTY (blocking further changes),
+ * and the returned element pointer is removed from the lru list and
+ * hash collision chains. The user now should do whatever housekeeping
+ * is necessary.
+ * Then he must call lc_changed(lc,element_pointer), to finish
+ * the change.
+ *
+ * NOTE: The user needs to check the lc_number on EACH use, so he recognizes
+ * any cache set change.
+ */
+struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr)
+{
+ struct lc_element *e;
+
+ PARANOIA_ENTRY();
+ if (lc->flags & LC_STARVING) {
+ ++lc->starving;
+ RETURN(NULL);
+ }
+
+ e = lc_find(lc, enr);
+ if (e) {
+ ++lc->hits;
+ if (e->refcnt++ == 0)
+ lc->used++;
+ list_move(&e->list, &lc->in_use); /* Not evictable... */
+ RETURN(e);
+ }
+
+ ++lc->misses;
+
+ /* In case there is nothing available and we can not kick out
+ * the LRU element, we have to wait ...
+ */
+ if (!lc_unused_element_available(lc)) {
+ __set_bit(__LC_STARVING, &lc->flags);
+ RETURN(NULL);
+ }
+
+ /* it was not present in the active set.
+ * we are going to recycle an unused (or even "free") element.
+ * user may need to commit a transaction to record that change.
+ * we serialize on flags & TF_DIRTY */
+ if (test_and_set_bit(__LC_DIRTY, &lc->flags)) {
+ ++lc->dirty;
+ RETURN(NULL);
+ }
+
+ e = lc_get_unused_element(lc);
+ BUG_ON(!e);
+
+ clear_bit(__LC_STARVING, &lc->flags);
+ BUG_ON(++e->refcnt != 1);
+ lc->used++;
+
+ lc->changing_element = e;
+ lc->new_number = enr;
+
+ RETURN(e);
+}
+
+/* similar to lc_get,
+ * but only gets a new reference on an existing element.
+ * you either get the requested element, or NULL.
+ * will be consolidated into one function.
+ */
+struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr)
+{
+ struct lc_element *e;
+
+ PARANOIA_ENTRY();
+ if (lc->flags & LC_STARVING) {
+ ++lc->starving;
+ RETURN(NULL);
+ }
+
+ e = lc_find(lc, enr);
+ if (e) {
+ ++lc->hits;
+ if (e->refcnt++ == 0)
+ lc->used++;
+ list_move(&e->list, &lc->in_use); /* Not evictable... */
+ }
+ RETURN(e);
+}
+
+/**
+ * lc_changed - tell @lc that the change has been recorded
+ * @lc: the lru cache to operate on
+ * @e: the element pending label change
+ */
+void lc_changed(struct lru_cache *lc, struct lc_element *e)
+{
+ PARANOIA_ENTRY();
+ BUG_ON(e != lc->changing_element);
+ PARANOIA_LC_ELEMENT(lc, e);
+ ++lc->changed;
+ e->lc_number = lc->new_number;
+ list_add(&e->list, &lc->in_use);
+ hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number));
+ lc->changing_element = NULL;
+ lc->new_number = LC_FREE;
+ clear_bit(__LC_DIRTY, &lc->flags);
+ smp_mb__after_clear_bit();
+ RETURN();
+}
+
+
+/**
+ * lc_put - give up refcnt of @e
+ * @lc: the lru cache to operate on
+ * @e: the element to put
+ *
+ * If refcnt reaches zero, the element is moved to the lru list,
+ * and a %LC_STARVING (if set) is cleared.
+ * Returns the new (post-decrement) refcnt.
+ */
+unsigned int lc_put(struct lru_cache *lc, struct lc_element *e)
+{
+ PARANOIA_ENTRY();
+ PARANOIA_LC_ELEMENT(lc, e);
+ BUG_ON(e->refcnt == 0);
+ BUG_ON(e == lc->changing_element);
+ if (--e->refcnt == 0) {
+ /* move it to the front of LRU. */
+ list_move(&e->list, &lc->lru);
+ lc->used--;
+ clear_bit(__LC_STARVING, &lc->flags);
+ smp_mb__after_clear_bit();
+ }
+ RETURN(e->refcnt);
+}
+
+/**
+ * lc_element_by_index
+ * @lc: the lru cache to operate on
+ * @i: the index of the element to return
+ */
+struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i)
+{
+ BUG_ON(i >= lc->nr_elements);
+ BUG_ON(lc->lc_element[i] == NULL);
+ BUG_ON(lc->lc_element[i]->lc_index != i);
+ return lc->lc_element[i];
+}
+
+/**
+ * lc_index_of
+ * @lc: the lru cache to operate on
+ * @e: the element to query for its index position in lc->element
+ */
+unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e)
+{
+ PARANOIA_LC_ELEMENT(lc, e);
+ return e->lc_index;
+}
+
+/**
+ * lc_set - associate index with label
+ * @lc: the lru cache to operate on
+ * @enr: the label to set
+ * @index: the element index to associate label with.
+ *
+ * Used to initialize the active set to some previously recorded state.
+ */
+void lc_set(struct lru_cache *lc, unsigned int enr, int index)
+{
+ struct lc_element *e;
+
+ if (index < 0 || index >= lc->nr_elements)
+ return;
+
+ e = lc_element_by_index(lc, index);
+ e->lc_number = enr;
+
+ hlist_del_init(&e->colision);
+ hlist_add_head(&e->colision, lc_hash_slot(lc, enr));
+ list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru);
+}
+
+/**
+ * lc_dump - Dump a complete LRU cache to seq in textual form.
+ * @lc: the lru cache to operate on
+ * @seq: the &struct seq_file pointer to seq_printf into
+ * @utext: user supplied "heading" or other info
+ * @detail: function pointer the user may provide to dump further details
+ * of the object the lc_element is embedded in.
+ */
+void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext,
+ void (*detail) (struct seq_file *, struct lc_element *))
+{
+ unsigned int nr_elements = lc->nr_elements;
+ struct lc_element *e;
+ int i;
+
+ seq_printf(seq, "\tnn: lc_number refcnt %s\n ", utext);
+ for (i = 0; i < nr_elements; i++) {
+ e = lc_element_by_index(lc, i);
+ if (e->lc_number == LC_FREE) {
+ seq_printf(seq, "\t%2d: FREE\n", i);
+ } else {
+ seq_printf(seq, "\t%2d: %4u %4u ", i,
+ e->lc_number, e->refcnt);
+ detail(seq, e);
+ }
+ }
+}
+
+EXPORT_SYMBOL(lc_create);
+EXPORT_SYMBOL(lc_reset);
+EXPORT_SYMBOL(lc_destroy);
+EXPORT_SYMBOL(lc_set);
+EXPORT_SYMBOL(lc_del);
+EXPORT_SYMBOL(lc_try_get);
+EXPORT_SYMBOL(lc_find);
+EXPORT_SYMBOL(lc_get);
+EXPORT_SYMBOL(lc_put);
+EXPORT_SYMBOL(lc_changed);
+EXPORT_SYMBOL(lc_element_by_index);
+EXPORT_SYMBOL(lc_index_of);
+EXPORT_SYMBOL(lc_seq_printf_stats);
+EXPORT_SYMBOL(lc_seq_dump_details);
diff --git a/lib/lzo/Makefile b/lib/lzo/Makefile
new file mode 100644
index 00000000..e764116e
--- /dev/null
+++ b/lib/lzo/Makefile
@@ -0,0 +1,5 @@
+lzo_compress-objs := lzo1x_compress.o
+lzo_decompress-objs := lzo1x_decompress.o
+
+obj-$(CONFIG_LZO_COMPRESS) += lzo_compress.o
+obj-$(CONFIG_LZO_DECOMPRESS) += lzo_decompress.o
diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c
new file mode 100644
index 00000000..a6040990
--- /dev/null
+++ b/lib/lzo/lzo1x_compress.c
@@ -0,0 +1,226 @@
+/*
+ * LZO1X Compressor from MiniLZO
+ *
+ * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com>
+ *
+ * The full LZO package can be found at:
+ * http://www.oberhumer.com/opensource/lzo/
+ *
+ * Changed for kernel use by:
+ * Nitin Gupta <nitingupta910@gmail.com>
+ * Richard Purdie <rpurdie@openedhand.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/lzo.h>
+#include <asm/unaligned.h>
+#include "lzodefs.h"
+
+static noinline size_t
+_lzo1x_1_do_compress(const unsigned char *in, size_t in_len,
+ unsigned char *out, size_t *out_len, void *wrkmem)
+{
+ const unsigned char * const in_end = in + in_len;
+ const unsigned char * const ip_end = in + in_len - M2_MAX_LEN - 5;
+ const unsigned char ** const dict = wrkmem;
+ const unsigned char *ip = in, *ii = ip;
+ const unsigned char *end, *m, *m_pos;
+ size_t m_off, m_len, dindex;
+ unsigned char *op = out;
+
+ ip += 4;
+
+ for (;;) {
+ dindex = ((size_t)(0x21 * DX3(ip, 5, 5, 6)) >> 5) & D_MASK;
+ m_pos = dict[dindex];
+
+ if (m_pos < in)
+ goto literal;
+
+ if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET))
+ goto literal;
+
+ m_off = ip - m_pos;
+ if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3])
+ goto try_match;
+
+ dindex = (dindex & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f);
+ m_pos = dict[dindex];
+
+ if (m_pos < in)
+ goto literal;
+
+ if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET))
+ goto literal;
+
+ m_off = ip - m_pos;
+ if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3])
+ goto try_match;
+
+ goto literal;
+
+try_match:
+ if (get_unaligned((const unsigned short *)m_pos)
+ == get_unaligned((const unsigned short *)ip)) {
+ if (likely(m_pos[2] == ip[2]))
+ goto match;
+ }
+
+literal:
+ dict[dindex] = ip;
+ ++ip;
+ if (unlikely(ip >= ip_end))
+ break;
+ continue;
+
+match:
+ dict[dindex] = ip;
+ if (ip != ii) {
+ size_t t = ip - ii;
+
+ if (t <= 3) {
+ op[-2] |= t;
+ } else if (t <= 18) {
+ *op++ = (t - 3);
+ } else {
+ size_t tt = t - 18;
+
+ *op++ = 0;
+ while (tt > 255) {
+ tt -= 255;
+ *op++ = 0;
+ }
+ *op++ = tt;
+ }
+ do {
+ *op++ = *ii++;
+ } while (--t > 0);
+ }
+
+ ip += 3;
+ if (m_pos[3] != *ip++ || m_pos[4] != *ip++
+ || m_pos[5] != *ip++ || m_pos[6] != *ip++
+ || m_pos[7] != *ip++ || m_pos[8] != *ip++) {
+ --ip;
+ m_len = ip - ii;
+
+ if (m_off <= M2_MAX_OFFSET) {
+ m_off -= 1;
+ *op++ = (((m_len - 1) << 5)
+ | ((m_off & 7) << 2));
+ *op++ = (m_off >> 3);
+ } else if (m_off <= M3_MAX_OFFSET) {
+ m_off -= 1;
+ *op++ = (M3_MARKER | (m_len - 2));
+ goto m3_m4_offset;
+ } else {
+ m_off -= 0x4000;
+
+ *op++ = (M4_MARKER | ((m_off & 0x4000) >> 11)
+ | (m_len - 2));
+ goto m3_m4_offset;
+ }
+ } else {
+ end = in_end;
+ m = m_pos + M2_MAX_LEN + 1;
+
+ while (ip < end && *m == *ip) {
+ m++;
+ ip++;
+ }
+ m_len = ip - ii;
+
+ if (m_off <= M3_MAX_OFFSET) {
+ m_off -= 1;
+ if (m_len <= 33) {
+ *op++ = (M3_MARKER | (m_len - 2));
+ } else {
+ m_len -= 33;
+ *op++ = M3_MARKER | 0;
+ goto m3_m4_len;
+ }
+ } else {
+ m_off -= 0x4000;
+ if (m_len <= M4_MAX_LEN) {
+ *op++ = (M4_MARKER
+ | ((m_off & 0x4000) >> 11)
+ | (m_len - 2));
+ } else {
+ m_len -= M4_MAX_LEN;
+ *op++ = (M4_MARKER
+ | ((m_off & 0x4000) >> 11));
+m3_m4_len:
+ while (m_len > 255) {
+ m_len -= 255;
+ *op++ = 0;
+ }
+
+ *op++ = (m_len);
+ }
+ }
+m3_m4_offset:
+ *op++ = ((m_off & 63) << 2);
+ *op++ = (m_off >> 6);
+ }
+
+ ii = ip;
+ if (unlikely(ip >= ip_end))
+ break;
+ }
+
+ *out_len = op - out;
+ return in_end - ii;
+}
+
+int lzo1x_1_compress(const unsigned char *in, size_t in_len, unsigned char *out,
+ size_t *out_len, void *wrkmem)
+{
+ const unsigned char *ii;
+ unsigned char *op = out;
+ size_t t;
+
+ if (unlikely(in_len <= M2_MAX_LEN + 5)) {
+ t = in_len;
+ } else {
+ t = _lzo1x_1_do_compress(in, in_len, op, out_len, wrkmem);
+ op += *out_len;
+ }
+
+ if (t > 0) {
+ ii = in + in_len - t;
+
+ if (op == out && t <= 238) {
+ *op++ = (17 + t);
+ } else if (t <= 3) {
+ op[-2] |= t;
+ } else if (t <= 18) {
+ *op++ = (t - 3);
+ } else {
+ size_t tt = t - 18;
+
+ *op++ = 0;
+ while (tt > 255) {
+ tt -= 255;
+ *op++ = 0;
+ }
+
+ *op++ = tt;
+ }
+ do {
+ *op++ = *ii++;
+ } while (--t > 0);
+ }
+
+ *op++ = M4_MARKER | 1;
+ *op++ = 0;
+ *op++ = 0;
+
+ *out_len = op - out;
+ return LZO_E_OK;
+}
+EXPORT_SYMBOL_GPL(lzo1x_1_compress);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("LZO1X-1 Compressor");
+
diff --git a/lib/lzo/lzo1x_decompress.c b/lib/lzo/lzo1x_decompress.c
new file mode 100644
index 00000000..f2fd0985
--- /dev/null
+++ b/lib/lzo/lzo1x_decompress.c
@@ -0,0 +1,255 @@
+/*
+ * LZO1X Decompressor from MiniLZO
+ *
+ * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com>
+ *
+ * The full LZO package can be found at:
+ * http://www.oberhumer.com/opensource/lzo/
+ *
+ * Changed for kernel use by:
+ * Nitin Gupta <nitingupta910@gmail.com>
+ * Richard Purdie <rpurdie@openedhand.com>
+ */
+
+#ifndef STATIC
+#include <linux/module.h>
+#include <linux/kernel.h>
+#endif
+
+#include <asm/unaligned.h>
+#include <linux/lzo.h>
+#include "lzodefs.h"
+
+#define HAVE_IP(x, ip_end, ip) ((size_t)(ip_end - ip) < (x))
+#define HAVE_OP(x, op_end, op) ((size_t)(op_end - op) < (x))
+#define HAVE_LB(m_pos, out, op) (m_pos < out || m_pos >= op)
+
+#define COPY4(dst, src) \
+ put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst))
+
+int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
+ unsigned char *out, size_t *out_len)
+{
+ const unsigned char * const ip_end = in + in_len;
+ unsigned char * const op_end = out + *out_len;
+ const unsigned char *ip = in, *m_pos;
+ unsigned char *op = out;
+ size_t t;
+
+ *out_len = 0;
+
+ if (*ip > 17) {
+ t = *ip++ - 17;
+ if (t < 4)
+ goto match_next;
+ if (HAVE_OP(t, op_end, op))
+ goto output_overrun;
+ if (HAVE_IP(t + 1, ip_end, ip))
+ goto input_overrun;
+ do {
+ *op++ = *ip++;
+ } while (--t > 0);
+ goto first_literal_run;
+ }
+
+ while ((ip < ip_end)) {
+ t = *ip++;
+ if (t >= 16)
+ goto match;
+ if (t == 0) {
+ if (HAVE_IP(1, ip_end, ip))
+ goto input_overrun;
+ while (*ip == 0) {
+ t += 255;
+ ip++;
+ if (HAVE_IP(1, ip_end, ip))
+ goto input_overrun;
+ }
+ t += 15 + *ip++;
+ }
+ if (HAVE_OP(t + 3, op_end, op))
+ goto output_overrun;
+ if (HAVE_IP(t + 4, ip_end, ip))
+ goto input_overrun;
+
+ COPY4(op, ip);
+ op += 4;
+ ip += 4;
+ if (--t > 0) {
+ if (t >= 4) {
+ do {
+ COPY4(op, ip);
+ op += 4;
+ ip += 4;
+ t -= 4;
+ } while (t >= 4);
+ if (t > 0) {
+ do {
+ *op++ = *ip++;
+ } while (--t > 0);
+ }
+ } else {
+ do {
+ *op++ = *ip++;
+ } while (--t > 0);
+ }
+ }
+
+first_literal_run:
+ t = *ip++;
+ if (t >= 16)
+ goto match;
+ m_pos = op - (1 + M2_MAX_OFFSET);
+ m_pos -= t >> 2;
+ m_pos -= *ip++ << 2;
+
+ if (HAVE_LB(m_pos, out, op))
+ goto lookbehind_overrun;
+
+ if (HAVE_OP(3, op_end, op))
+ goto output_overrun;
+ *op++ = *m_pos++;
+ *op++ = *m_pos++;
+ *op++ = *m_pos;
+
+ goto match_done;
+
+ do {
+match:
+ if (t >= 64) {
+ m_pos = op - 1;
+ m_pos -= (t >> 2) & 7;
+ m_pos -= *ip++ << 3;
+ t = (t >> 5) - 1;
+ if (HAVE_LB(m_pos, out, op))
+ goto lookbehind_overrun;
+ if (HAVE_OP(t + 3 - 1, op_end, op))
+ goto output_overrun;
+ goto copy_match;
+ } else if (t >= 32) {
+ t &= 31;
+ if (t == 0) {
+ if (HAVE_IP(1, ip_end, ip))
+ goto input_overrun;
+ while (*ip == 0) {
+ t += 255;
+ ip++;
+ if (HAVE_IP(1, ip_end, ip))
+ goto input_overrun;
+ }
+ t += 31 + *ip++;
+ }
+ m_pos = op - 1;
+ m_pos -= get_unaligned_le16(ip) >> 2;
+ ip += 2;
+ } else if (t >= 16) {
+ m_pos = op;
+ m_pos -= (t & 8) << 11;
+
+ t &= 7;
+ if (t == 0) {
+ if (HAVE_IP(1, ip_end, ip))
+ goto input_overrun;
+ while (*ip == 0) {
+ t += 255;
+ ip++;
+ if (HAVE_IP(1, ip_end, ip))
+ goto input_overrun;
+ }
+ t += 7 + *ip++;
+ }
+ m_pos -= get_unaligned_le16(ip) >> 2;
+ ip += 2;
+ if (m_pos == op)
+ goto eof_found;
+ m_pos -= 0x4000;
+ } else {
+ m_pos = op - 1;
+ m_pos -= t >> 2;
+ m_pos -= *ip++ << 2;
+
+ if (HAVE_LB(m_pos, out, op))
+ goto lookbehind_overrun;
+ if (HAVE_OP(2, op_end, op))
+ goto output_overrun;
+
+ *op++ = *m_pos++;
+ *op++ = *m_pos;
+ goto match_done;
+ }
+
+ if (HAVE_LB(m_pos, out, op))
+ goto lookbehind_overrun;
+ if (HAVE_OP(t + 3 - 1, op_end, op))
+ goto output_overrun;
+
+ if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) {
+ COPY4(op, m_pos);
+ op += 4;
+ m_pos += 4;
+ t -= 4 - (3 - 1);
+ do {
+ COPY4(op, m_pos);
+ op += 4;
+ m_pos += 4;
+ t -= 4;
+ } while (t >= 4);
+ if (t > 0)
+ do {
+ *op++ = *m_pos++;
+ } while (--t > 0);
+ } else {
+copy_match:
+ *op++ = *m_pos++;
+ *op++ = *m_pos++;
+ do {
+ *op++ = *m_pos++;
+ } while (--t > 0);
+ }
+match_done:
+ t = ip[-2] & 3;
+ if (t == 0)
+ break;
+match_next:
+ if (HAVE_OP(t, op_end, op))
+ goto output_overrun;
+ if (HAVE_IP(t + 1, ip_end, ip))
+ goto input_overrun;
+
+ *op++ = *ip++;
+ if (t > 1) {
+ *op++ = *ip++;
+ if (t > 2)
+ *op++ = *ip++;
+ }
+
+ t = *ip++;
+ } while (ip < ip_end);
+ }
+
+ *out_len = op - out;
+ return LZO_E_EOF_NOT_FOUND;
+
+eof_found:
+ *out_len = op - out;
+ return (ip == ip_end ? LZO_E_OK :
+ (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN));
+input_overrun:
+ *out_len = op - out;
+ return LZO_E_INPUT_OVERRUN;
+
+output_overrun:
+ *out_len = op - out;
+ return LZO_E_OUTPUT_OVERRUN;
+
+lookbehind_overrun:
+ *out_len = op - out;
+ return LZO_E_LOOKBEHIND_OVERRUN;
+}
+#ifndef STATIC
+EXPORT_SYMBOL_GPL(lzo1x_decompress_safe);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("LZO1X Decompressor");
+
+#endif
diff --git a/lib/lzo/lzodefs.h b/lib/lzo/lzodefs.h
new file mode 100644
index 00000000..b6d482c4
--- /dev/null
+++ b/lib/lzo/lzodefs.h
@@ -0,0 +1,43 @@
+/*
+ * lzodefs.h -- architecture, OS and compiler specific defines
+ *
+ * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com>
+ *
+ * The full LZO package can be found at:
+ * http://www.oberhumer.com/opensource/lzo/
+ *
+ * Changed for kernel use by:
+ * Nitin Gupta <nitingupta910@gmail.com>
+ * Richard Purdie <rpurdie@openedhand.com>
+ */
+
+#define LZO_VERSION 0x2020
+#define LZO_VERSION_STRING "2.02"
+#define LZO_VERSION_DATE "Oct 17 2005"
+
+#define M1_MAX_OFFSET 0x0400
+#define M2_MAX_OFFSET 0x0800
+#define M3_MAX_OFFSET 0x4000
+#define M4_MAX_OFFSET 0xbfff
+
+#define M1_MIN_LEN 2
+#define M1_MAX_LEN 2
+#define M2_MIN_LEN 3
+#define M2_MAX_LEN 8
+#define M3_MIN_LEN 3
+#define M3_MAX_LEN 33
+#define M4_MIN_LEN 3
+#define M4_MAX_LEN 9
+
+#define M1_MARKER 0
+#define M2_MARKER 64
+#define M3_MARKER 32
+#define M4_MARKER 16
+
+#define D_BITS 14
+#define D_MASK ((1u << D_BITS) - 1)
+#define D_HIGH ((D_MASK >> 1) + 1)
+
+#define DX2(p, s1, s2) (((((size_t)((p)[2]) << (s2)) ^ (p)[1]) \
+ << (s1)) ^ (p)[0])
+#define DX3(p, s1, s2, s3) ((DX2((p)+1, s2, s3) << (s1)) ^ (p)[0])
diff --git a/lib/nlattr.c b/lib/nlattr.c
new file mode 100644
index 00000000..c4706eb9
--- /dev/null
+++ b/lib/nlattr.c
@@ -0,0 +1,502 @@
+/*
+ * NETLINK Netlink attributes
+ *
+ * Authors: Thomas Graf <tgraf@suug.ch>
+ * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/jiffies.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <net/netlink.h>
+
+static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = {
+ [NLA_U8] = sizeof(u8),
+ [NLA_U16] = sizeof(u16),
+ [NLA_U32] = sizeof(u32),
+ [NLA_U64] = sizeof(u64),
+ [NLA_NESTED] = NLA_HDRLEN,
+};
+
+static int validate_nla(struct nlattr *nla, int maxtype,
+ const struct nla_policy *policy)
+{
+ const struct nla_policy *pt;
+ int minlen = 0, attrlen = nla_len(nla), type = nla_type(nla);
+
+ if (type <= 0 || type > maxtype)
+ return 0;
+
+ pt = &policy[type];
+
+ BUG_ON(pt->type > NLA_TYPE_MAX);
+
+ switch (pt->type) {
+ case NLA_FLAG:
+ if (attrlen > 0)
+ return -ERANGE;
+ break;
+
+ case NLA_NUL_STRING:
+ if (pt->len)
+ minlen = min_t(int, attrlen, pt->len + 1);
+ else
+ minlen = attrlen;
+
+ if (!minlen || memchr(nla_data(nla), '\0', minlen) == NULL)
+ return -EINVAL;
+ /* fall through */
+
+ case NLA_STRING:
+ if (attrlen < 1)
+ return -ERANGE;
+
+ if (pt->len) {
+ char *buf = nla_data(nla);
+
+ if (buf[attrlen - 1] == '\0')
+ attrlen--;
+
+ if (attrlen > pt->len)
+ return -ERANGE;
+ }
+ break;
+
+ case NLA_BINARY:
+ if (pt->len && attrlen > pt->len)
+ return -ERANGE;
+ break;
+
+ case NLA_NESTED_COMPAT:
+ if (attrlen < pt->len)
+ return -ERANGE;
+ if (attrlen < NLA_ALIGN(pt->len))
+ break;
+ if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN)
+ return -ERANGE;
+ nla = nla_data(nla) + NLA_ALIGN(pt->len);
+ if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN + nla_len(nla))
+ return -ERANGE;
+ break;
+ case NLA_NESTED:
+ /* a nested attributes is allowed to be empty; if its not,
+ * it must have a size of at least NLA_HDRLEN.
+ */
+ if (attrlen == 0)
+ break;
+ default:
+ if (pt->len)
+ minlen = pt->len;
+ else if (pt->type != NLA_UNSPEC)
+ minlen = nla_attr_minlen[pt->type];
+
+ if (attrlen < minlen)
+ return -ERANGE;
+ }
+
+ return 0;
+}
+
+/**
+ * nla_validate - Validate a stream of attributes
+ * @head: head of attribute stream
+ * @len: length of attribute stream
+ * @maxtype: maximum attribute type to be expected
+ * @policy: validation policy
+ *
+ * Validates all attributes in the specified attribute stream against the
+ * specified policy. Attributes with a type exceeding maxtype will be
+ * ignored. See documenation of struct nla_policy for more details.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int nla_validate(struct nlattr *head, int len, int maxtype,
+ const struct nla_policy *policy)
+{
+ struct nlattr *nla;
+ int rem, err;
+
+ nla_for_each_attr(nla, head, len, rem) {
+ err = validate_nla(nla, maxtype, policy);
+ if (err < 0)
+ goto errout;
+ }
+
+ err = 0;
+errout:
+ return err;
+}
+
+/**
+ * nla_policy_len - Determin the max. length of a policy
+ * @policy: policy to use
+ * @n: number of policies
+ *
+ * Determines the max. length of the policy. It is currently used
+ * to allocated Netlink buffers roughly the size of the actual
+ * message.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int
+nla_policy_len(const struct nla_policy *p, int n)
+{
+ int i, len = 0;
+
+ for (i = 0; i < n; i++) {
+ if (p->len)
+ len += nla_total_size(p->len);
+ else if (nla_attr_minlen[p->type])
+ len += nla_total_size(nla_attr_minlen[p->type]);
+ }
+
+ return len;
+}
+
+/**
+ * nla_parse - Parse a stream of attributes into a tb buffer
+ * @tb: destination array with maxtype+1 elements
+ * @maxtype: maximum attribute type to be expected
+ * @head: head of attribute stream
+ * @len: length of attribute stream
+ * @policy: validation policy
+ *
+ * Parses a stream of attributes and stores a pointer to each attribute in
+ * the tb array accessable via the attribute type. Attributes with a type
+ * exceeding maxtype will be silently ignored for backwards compatibility
+ * reasons. policy may be set to NULL if no validation is required.
+ *
+ * Returns 0 on success or a negative error code.
+ */
+int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len,
+ const struct nla_policy *policy)
+{
+ struct nlattr *nla;
+ int rem, err;
+
+ memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
+
+ nla_for_each_attr(nla, head, len, rem) {
+ u16 type = nla_type(nla);
+
+ if (type > 0 && type <= maxtype) {
+ if (policy) {
+ err = validate_nla(nla, maxtype, policy);
+ if (err < 0)
+ goto errout;
+ }
+
+ tb[type] = nla;
+ }
+ }
+
+ if (unlikely(rem > 0))
+ printk(KERN_WARNING "netlink: %d bytes leftover after parsing "
+ "attributes.\n", rem);
+
+ err = 0;
+errout:
+ return err;
+}
+
+/**
+ * nla_find - Find a specific attribute in a stream of attributes
+ * @head: head of attribute stream
+ * @len: length of attribute stream
+ * @attrtype: type of attribute to look for
+ *
+ * Returns the first attribute in the stream matching the specified type.
+ */
+struct nlattr *nla_find(struct nlattr *head, int len, int attrtype)
+{
+ struct nlattr *nla;
+ int rem;
+
+ nla_for_each_attr(nla, head, len, rem)
+ if (nla_type(nla) == attrtype)
+ return nla;
+
+ return NULL;
+}
+
+/**
+ * nla_strlcpy - Copy string attribute payload into a sized buffer
+ * @dst: where to copy the string to
+ * @nla: attribute to copy the string from
+ * @dstsize: size of destination buffer
+ *
+ * Copies at most dstsize - 1 bytes into the destination buffer.
+ * The result is always a valid NUL-terminated string. Unlike
+ * strlcpy the destination buffer is always padded out.
+ *
+ * Returns the length of the source buffer.
+ */
+size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize)
+{
+ size_t srclen = nla_len(nla);
+ char *src = nla_data(nla);
+
+ if (srclen > 0 && src[srclen - 1] == '\0')
+ srclen--;
+
+ if (dstsize > 0) {
+ size_t len = (srclen >= dstsize) ? dstsize - 1 : srclen;
+
+ memset(dst, 0, dstsize);
+ memcpy(dst, src, len);
+ }
+
+ return srclen;
+}
+
+/**
+ * nla_memcpy - Copy a netlink attribute into another memory area
+ * @dest: where to copy to memcpy
+ * @src: netlink attribute to copy from
+ * @count: size of the destination area
+ *
+ * Note: The number of bytes copied is limited by the length of
+ * attribute's payload. memcpy
+ *
+ * Returns the number of bytes copied.
+ */
+int nla_memcpy(void *dest, const struct nlattr *src, int count)
+{
+ int minlen = min_t(int, count, nla_len(src));
+
+ memcpy(dest, nla_data(src), minlen);
+
+ return minlen;
+}
+
+/**
+ * nla_memcmp - Compare an attribute with sized memory area
+ * @nla: netlink attribute
+ * @data: memory area
+ * @size: size of memory area
+ */
+int nla_memcmp(const struct nlattr *nla, const void *data,
+ size_t size)
+{
+ int d = nla_len(nla) - size;
+
+ if (d == 0)
+ d = memcmp(nla_data(nla), data, size);
+
+ return d;
+}
+
+/**
+ * nla_strcmp - Compare a string attribute against a string
+ * @nla: netlink string attribute
+ * @str: another string
+ */
+int nla_strcmp(const struct nlattr *nla, const char *str)
+{
+ int len = strlen(str) + 1;
+ int d = nla_len(nla) - len;
+
+ if (d == 0)
+ d = memcmp(nla_data(nla), str, len);
+
+ return d;
+}
+
+#ifdef CONFIG_NET
+/**
+ * __nla_reserve - reserve room for attribute on the skb
+ * @skb: socket buffer to reserve room on
+ * @attrtype: attribute type
+ * @attrlen: length of attribute payload
+ *
+ * Adds a netlink attribute header to a socket buffer and reserves
+ * room for the payload but does not copy it.
+ *
+ * The caller is responsible to ensure that the skb provides enough
+ * tailroom for the attribute header and payload.
+ */
+struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen)
+{
+ struct nlattr *nla;
+
+ nla = (struct nlattr *) skb_put(skb, nla_total_size(attrlen));
+ nla->nla_type = attrtype;
+ nla->nla_len = nla_attr_size(attrlen);
+
+ memset((unsigned char *) nla + nla->nla_len, 0, nla_padlen(attrlen));
+
+ return nla;
+}
+EXPORT_SYMBOL(__nla_reserve);
+
+/**
+ * __nla_reserve_nohdr - reserve room for attribute without header
+ * @skb: socket buffer to reserve room on
+ * @attrlen: length of attribute payload
+ *
+ * Reserves room for attribute payload without a header.
+ *
+ * The caller is responsible to ensure that the skb provides enough
+ * tailroom for the payload.
+ */
+void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen)
+{
+ void *start;
+
+ start = skb_put(skb, NLA_ALIGN(attrlen));
+ memset(start, 0, NLA_ALIGN(attrlen));
+
+ return start;
+}
+EXPORT_SYMBOL(__nla_reserve_nohdr);
+
+/**
+ * nla_reserve - reserve room for attribute on the skb
+ * @skb: socket buffer to reserve room on
+ * @attrtype: attribute type
+ * @attrlen: length of attribute payload
+ *
+ * Adds a netlink attribute header to a socket buffer and reserves
+ * room for the payload but does not copy it.
+ *
+ * Returns NULL if the tailroom of the skb is insufficient to store
+ * the attribute header and payload.
+ */
+struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen)
+{
+ if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen)))
+ return NULL;
+
+ return __nla_reserve(skb, attrtype, attrlen);
+}
+EXPORT_SYMBOL(nla_reserve);
+
+/**
+ * nla_reserve_nohdr - reserve room for attribute without header
+ * @skb: socket buffer to reserve room on
+ * @attrlen: length of attribute payload
+ *
+ * Reserves room for attribute payload without a header.
+ *
+ * Returns NULL if the tailroom of the skb is insufficient to store
+ * the attribute payload.
+ */
+void *nla_reserve_nohdr(struct sk_buff *skb, int attrlen)
+{
+ if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
+ return NULL;
+
+ return __nla_reserve_nohdr(skb, attrlen);
+}
+EXPORT_SYMBOL(nla_reserve_nohdr);
+
+/**
+ * __nla_put - Add a netlink attribute to a socket buffer
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @attrlen: length of attribute payload
+ * @data: head of attribute payload
+ *
+ * The caller is responsible to ensure that the skb provides enough
+ * tailroom for the attribute header and payload.
+ */
+void __nla_put(struct sk_buff *skb, int attrtype, int attrlen,
+ const void *data)
+{
+ struct nlattr *nla;
+
+ nla = __nla_reserve(skb, attrtype, attrlen);
+ memcpy(nla_data(nla), data, attrlen);
+}
+EXPORT_SYMBOL(__nla_put);
+
+/**
+ * __nla_put_nohdr - Add a netlink attribute without header
+ * @skb: socket buffer to add attribute to
+ * @attrlen: length of attribute payload
+ * @data: head of attribute payload
+ *
+ * The caller is responsible to ensure that the skb provides enough
+ * tailroom for the attribute payload.
+ */
+void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
+{
+ void *start;
+
+ start = __nla_reserve_nohdr(skb, attrlen);
+ memcpy(start, data, attrlen);
+}
+EXPORT_SYMBOL(__nla_put_nohdr);
+
+/**
+ * nla_put - Add a netlink attribute to a socket buffer
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @attrlen: length of attribute payload
+ * @data: head of attribute payload
+ *
+ * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store
+ * the attribute header and payload.
+ */
+int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
+{
+ if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen)))
+ return -EMSGSIZE;
+
+ __nla_put(skb, attrtype, attrlen, data);
+ return 0;
+}
+EXPORT_SYMBOL(nla_put);
+
+/**
+ * nla_put_nohdr - Add a netlink attribute without header
+ * @skb: socket buffer to add attribute to
+ * @attrlen: length of attribute payload
+ * @data: head of attribute payload
+ *
+ * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store
+ * the attribute payload.
+ */
+int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
+{
+ if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
+ return -EMSGSIZE;
+
+ __nla_put_nohdr(skb, attrlen, data);
+ return 0;
+}
+EXPORT_SYMBOL(nla_put_nohdr);
+
+/**
+ * nla_append - Add a netlink attribute without header or padding
+ * @skb: socket buffer to add attribute to
+ * @attrlen: length of attribute payload
+ * @data: head of attribute payload
+ *
+ * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store
+ * the attribute payload.
+ */
+int nla_append(struct sk_buff *skb, int attrlen, const void *data)
+{
+ if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
+ return -EMSGSIZE;
+
+ memcpy(skb_put(skb, attrlen), data, attrlen);
+ return 0;
+}
+EXPORT_SYMBOL(nla_append);
+#endif
+
+EXPORT_SYMBOL(nla_validate);
+EXPORT_SYMBOL(nla_policy_len);
+EXPORT_SYMBOL(nla_parse);
+EXPORT_SYMBOL(nla_find);
+EXPORT_SYMBOL(nla_strlcpy);
+EXPORT_SYMBOL(nla_memcpy);
+EXPORT_SYMBOL(nla_memcmp);
+EXPORT_SYMBOL(nla_strcmp);
diff --git a/lib/parser.c b/lib/parser.c
new file mode 100644
index 00000000..fb349772
--- /dev/null
+++ b/lib/parser.c
@@ -0,0 +1,231 @@
+/*
+ * lib/parser.c - simple parser for mount, etc. options.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include <linux/ctype.h>
+#include <linux/module.h>
+#include <linux/parser.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+/**
+ * match_one: - Determines if a string matches a simple pattern
+ * @s: the string to examine for presense of the pattern
+ * @p: the string containing the pattern
+ * @args: array of %MAX_OPT_ARGS &substring_t elements. Used to return match
+ * locations.
+ *
+ * Description: Determines if the pattern @p is present in string @s. Can only
+ * match extremely simple token=arg style patterns. If the pattern is found,
+ * the location(s) of the arguments will be returned in the @args array.
+ */
+static int match_one(char *s, const char *p, substring_t args[])
+{
+ char *meta;
+ int argc = 0;
+
+ if (!p)
+ return 1;
+
+ while(1) {
+ int len = -1;
+ meta = strchr(p, '%');
+ if (!meta)
+ return strcmp(p, s) == 0;
+
+ if (strncmp(p, s, meta-p))
+ return 0;
+
+ s += meta - p;
+ p = meta + 1;
+
+ if (isdigit(*p))
+ len = simple_strtoul(p, (char **) &p, 10);
+ else if (*p == '%') {
+ if (*s++ != '%')
+ return 0;
+ p++;
+ continue;
+ }
+
+ if (argc >= MAX_OPT_ARGS)
+ return 0;
+
+ args[argc].from = s;
+ switch (*p++) {
+ case 's': {
+ size_t str_len = strlen(s);
+
+ if (str_len == 0)
+ return 0;
+ if (len == -1 || len > str_len)
+ len = str_len;
+ args[argc].to = s + len;
+ break;
+ }
+ case 'd':
+ simple_strtol(s, &args[argc].to, 0);
+ goto num;
+ case 'u':
+ simple_strtoul(s, &args[argc].to, 0);
+ goto num;
+ case 'o':
+ simple_strtoul(s, &args[argc].to, 8);
+ goto num;
+ case 'x':
+ simple_strtoul(s, &args[argc].to, 16);
+ num:
+ if (args[argc].to == args[argc].from)
+ return 0;
+ break;
+ default:
+ return 0;
+ }
+ s = args[argc].to;
+ argc++;
+ }
+}
+
+/**
+ * match_token: - Find a token (and optional args) in a string
+ * @s: the string to examine for token/argument pairs
+ * @table: match_table_t describing the set of allowed option tokens and the
+ * arguments that may be associated with them. Must be terminated with a
+ * &struct match_token whose pattern is set to the NULL pointer.
+ * @args: array of %MAX_OPT_ARGS &substring_t elements. Used to return match
+ * locations.
+ *
+ * Description: Detects which if any of a set of token strings has been passed
+ * to it. Tokens can include up to MAX_OPT_ARGS instances of basic c-style
+ * format identifiers which will be taken into account when matching the
+ * tokens, and whose locations will be returned in the @args array.
+ */
+int match_token(char *s, const match_table_t table, substring_t args[])
+{
+ const struct match_token *p;
+
+ for (p = table; !match_one(s, p->pattern, args) ; p++)
+ ;
+
+ return p->token;
+}
+
+/**
+ * match_number: scan a number in the given base from a substring_t
+ * @s: substring to be scanned
+ * @result: resulting integer on success
+ * @base: base to use when converting string
+ *
+ * Description: Given a &substring_t and a base, attempts to parse the substring
+ * as a number in that base. On success, sets @result to the integer represented
+ * by the string and returns 0. Returns either -ENOMEM or -EINVAL on failure.
+ */
+static int match_number(substring_t *s, int *result, int base)
+{
+ char *endp;
+ char *buf;
+ int ret;
+
+ buf = kmalloc(s->to - s->from + 1, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ memcpy(buf, s->from, s->to - s->from);
+ buf[s->to - s->from] = '\0';
+ *result = simple_strtol(buf, &endp, base);
+ ret = 0;
+ if (endp == buf)
+ ret = -EINVAL;
+ kfree(buf);
+ return ret;
+}
+
+/**
+ * match_int: - scan a decimal representation of an integer from a substring_t
+ * @s: substring_t to be scanned
+ * @result: resulting integer on success
+ *
+ * Description: Attempts to parse the &substring_t @s as a decimal integer. On
+ * success, sets @result to the integer represented by the string and returns 0.
+ * Returns either -ENOMEM or -EINVAL on failure.
+ */
+int match_int(substring_t *s, int *result)
+{
+ return match_number(s, result, 0);
+}
+
+/**
+ * match_octal: - scan an octal representation of an integer from a substring_t
+ * @s: substring_t to be scanned
+ * @result: resulting integer on success
+ *
+ * Description: Attempts to parse the &substring_t @s as an octal integer. On
+ * success, sets @result to the integer represented by the string and returns
+ * 0. Returns either -ENOMEM or -EINVAL on failure.
+ */
+int match_octal(substring_t *s, int *result)
+{
+ return match_number(s, result, 8);
+}
+
+/**
+ * match_hex: - scan a hex representation of an integer from a substring_t
+ * @s: substring_t to be scanned
+ * @result: resulting integer on success
+ *
+ * Description: Attempts to parse the &substring_t @s as a hexadecimal integer.
+ * On success, sets @result to the integer represented by the string and
+ * returns 0. Returns either -ENOMEM or -EINVAL on failure.
+ */
+int match_hex(substring_t *s, int *result)
+{
+ return match_number(s, result, 16);
+}
+
+/**
+ * match_strlcpy: - Copy the characters from a substring_t to a sized buffer
+ * @dest: where to copy to
+ * @src: &substring_t to copy
+ * @size: size of destination buffer
+ *
+ * Description: Copy the characters in &substring_t @src to the
+ * c-style string @dest. Copy no more than @size - 1 characters, plus
+ * the terminating NUL. Return length of @src.
+ */
+size_t match_strlcpy(char *dest, const substring_t *src, size_t size)
+{
+ size_t ret = src->to - src->from;
+
+ if (size) {
+ size_t len = ret >= size ? size - 1 : ret;
+ memcpy(dest, src->from, len);
+ dest[len] = '\0';
+ }
+ return ret;
+}
+
+/**
+ * match_strdup: - allocate a new string with the contents of a substring_t
+ * @s: &substring_t to copy
+ *
+ * Description: Allocates and returns a string filled with the contents of
+ * the &substring_t @s. The caller is responsible for freeing the returned
+ * string with kfree().
+ */
+char *match_strdup(const substring_t *s)
+{
+ size_t sz = s->to - s->from + 1;
+ char *p = kmalloc(sz, GFP_KERNEL);
+ if (p)
+ match_strlcpy(p, s, sz);
+ return p;
+}
+
+EXPORT_SYMBOL(match_token);
+EXPORT_SYMBOL(match_int);
+EXPORT_SYMBOL(match_octal);
+EXPORT_SYMBOL(match_hex);
+EXPORT_SYMBOL(match_strlcpy);
+EXPORT_SYMBOL(match_strdup);
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
new file mode 100644
index 00000000..209448e1
--- /dev/null
+++ b/lib/percpu_counter.c
@@ -0,0 +1,174 @@
+/*
+ * Fast batching percpu counters.
+ */
+
+#include <linux/percpu_counter.h>
+#include <linux/notifier.h>
+#include <linux/mutex.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+
+static LIST_HEAD(percpu_counters);
+static DEFINE_MUTEX(percpu_counters_lock);
+
+void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
+{
+ int cpu;
+
+ spin_lock(&fbc->lock);
+ for_each_possible_cpu(cpu) {
+ s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
+ *pcount = 0;
+ }
+ fbc->count = amount;
+ spin_unlock(&fbc->lock);
+}
+EXPORT_SYMBOL(percpu_counter_set);
+
+void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch)
+{
+ s64 count;
+ s32 *pcount;
+ int cpu = get_cpu();
+
+ pcount = per_cpu_ptr(fbc->counters, cpu);
+ count = *pcount + amount;
+ if (count >= batch || count <= -batch) {
+ spin_lock(&fbc->lock);
+ fbc->count += count;
+ *pcount = 0;
+ spin_unlock(&fbc->lock);
+ } else {
+ *pcount = count;
+ }
+ put_cpu();
+}
+EXPORT_SYMBOL(__percpu_counter_add);
+
+/*
+ * Add up all the per-cpu counts, return the result. This is a more accurate
+ * but much slower version of percpu_counter_read_positive()
+ */
+s64 __percpu_counter_sum(struct percpu_counter *fbc)
+{
+ s64 ret;
+ int cpu;
+
+ spin_lock(&fbc->lock);
+ ret = fbc->count;
+ for_each_online_cpu(cpu) {
+ s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
+ ret += *pcount;
+ }
+ spin_unlock(&fbc->lock);
+ return ret;
+}
+EXPORT_SYMBOL(__percpu_counter_sum);
+
+int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
+ struct lock_class_key *key)
+{
+ spin_lock_init(&fbc->lock);
+ lockdep_set_class(&fbc->lock, key);
+ fbc->count = amount;
+ fbc->counters = alloc_percpu(s32);
+ if (!fbc->counters)
+ return -ENOMEM;
+#ifdef CONFIG_HOTPLUG_CPU
+ INIT_LIST_HEAD(&fbc->list);
+ mutex_lock(&percpu_counters_lock);
+ list_add(&fbc->list, &percpu_counters);
+ mutex_unlock(&percpu_counters_lock);
+#endif
+ return 0;
+}
+EXPORT_SYMBOL(__percpu_counter_init);
+
+void percpu_counter_destroy(struct percpu_counter *fbc)
+{
+ if (!fbc->counters)
+ return;
+
+#ifdef CONFIG_HOTPLUG_CPU
+ mutex_lock(&percpu_counters_lock);
+ list_del(&fbc->list);
+ mutex_unlock(&percpu_counters_lock);
+#endif
+ free_percpu(fbc->counters);
+ fbc->counters = NULL;
+}
+EXPORT_SYMBOL(percpu_counter_destroy);
+
+int percpu_counter_batch __read_mostly = 32;
+EXPORT_SYMBOL(percpu_counter_batch);
+
+static void compute_batch_value(void)
+{
+ int nr = num_online_cpus();
+
+ percpu_counter_batch = max(32, nr*2);
+}
+
+static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
+ unsigned long action, void *hcpu)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+ unsigned int cpu;
+ struct percpu_counter *fbc;
+
+ compute_batch_value();
+ if (action != CPU_DEAD)
+ return NOTIFY_OK;
+
+ cpu = (unsigned long)hcpu;
+ mutex_lock(&percpu_counters_lock);
+ list_for_each_entry(fbc, &percpu_counters, list) {
+ s32 *pcount;
+ unsigned long flags;
+
+ spin_lock_irqsave(&fbc->lock, flags);
+ pcount = per_cpu_ptr(fbc->counters, cpu);
+ fbc->count += *pcount;
+ *pcount = 0;
+ spin_unlock_irqrestore(&fbc->lock, flags);
+ }
+ mutex_unlock(&percpu_counters_lock);
+#endif
+ return NOTIFY_OK;
+}
+
+/*
+ * Compare counter against given value.
+ * Return 1 if greater, 0 if equal and -1 if less
+ */
+int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
+{
+ s64 count;
+
+ count = percpu_counter_read(fbc);
+ /* Check to see if rough count will be sufficient for comparison */
+ if (abs(count - rhs) > (percpu_counter_batch*num_online_cpus())) {
+ if (count > rhs)
+ return 1;
+ else
+ return -1;
+ }
+ /* Need to use precise count */
+ count = percpu_counter_sum(fbc);
+ if (count > rhs)
+ return 1;
+ else if (count < rhs)
+ return -1;
+ else
+ return 0;
+}
+EXPORT_SYMBOL(percpu_counter_compare);
+
+static int __init percpu_counter_startup(void)
+{
+ compute_batch_value();
+ hotcpu_notifier(percpu_counter_hotcpu_callback, 0);
+ return 0;
+}
+module_init(percpu_counter_startup);
diff --git a/lib/plist.c b/lib/plist.c
new file mode 100644
index 00000000..1471988d
--- /dev/null
+++ b/lib/plist.c
@@ -0,0 +1,121 @@
+/*
+ * lib/plist.c
+ *
+ * Descending-priority-sorted double-linked list
+ *
+ * (C) 2002-2003 Intel Corp
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>.
+ *
+ * 2001-2005 (c) MontaVista Software, Inc.
+ * Daniel Walker <dwalker@mvista.com>
+ *
+ * (C) 2005 Thomas Gleixner <tglx@linutronix.de>
+ *
+ * Simplifications of the original code by
+ * Oleg Nesterov <oleg@tv-sign.ru>
+ *
+ * Licensed under the FSF's GNU Public License v2 or later.
+ *
+ * Based on simple lists (include/linux/list.h).
+ *
+ * This file contains the add / del functions which are considered to
+ * be too large to inline. See include/linux/plist.h for further
+ * information.
+ */
+
+#include <linux/plist.h>
+#include <linux/spinlock.h>
+
+#ifdef CONFIG_DEBUG_PI_LIST
+
+static void plist_check_prev_next(struct list_head *t, struct list_head *p,
+ struct list_head *n)
+{
+ WARN(n->prev != p || p->next != n,
+ "top: %p, n: %p, p: %p\n"
+ "prev: %p, n: %p, p: %p\n"
+ "next: %p, n: %p, p: %p\n",
+ t, t->next, t->prev,
+ p, p->next, p->prev,
+ n, n->next, n->prev);
+}
+
+static void plist_check_list(struct list_head *top)
+{
+ struct list_head *prev = top, *next = top->next;
+
+ plist_check_prev_next(top, prev, next);
+ while (next != top) {
+ prev = next;
+ next = prev->next;
+ plist_check_prev_next(top, prev, next);
+ }
+}
+
+static void plist_check_head(struct plist_head *head)
+{
+ WARN_ON(!head->rawlock && !head->spinlock);
+ if (head->rawlock)
+ WARN_ON_SMP(!raw_spin_is_locked(head->rawlock));
+ if (head->spinlock)
+ WARN_ON_SMP(!spin_is_locked(head->spinlock));
+ plist_check_list(&head->prio_list);
+ plist_check_list(&head->node_list);
+}
+
+#else
+# define plist_check_head(h) do { } while (0)
+#endif
+
+/**
+ * plist_add - add @node to @head
+ *
+ * @node: &struct plist_node pointer
+ * @head: &struct plist_head pointer
+ */
+void plist_add(struct plist_node *node, struct plist_head *head)
+{
+ struct plist_node *iter;
+
+ plist_check_head(head);
+ WARN_ON(!plist_node_empty(node));
+
+ list_for_each_entry(iter, &head->prio_list, plist.prio_list) {
+ if (node->prio < iter->prio)
+ goto lt_prio;
+ else if (node->prio == iter->prio) {
+ iter = list_entry(iter->plist.prio_list.next,
+ struct plist_node, plist.prio_list);
+ goto eq_prio;
+ }
+ }
+
+lt_prio:
+ list_add_tail(&node->plist.prio_list, &iter->plist.prio_list);
+eq_prio:
+ list_add_tail(&node->plist.node_list, &iter->plist.node_list);
+
+ plist_check_head(head);
+}
+
+/**
+ * plist_del - Remove a @node from plist.
+ *
+ * @node: &struct plist_node pointer - entry to be removed
+ * @head: &struct plist_head pointer - list head
+ */
+void plist_del(struct plist_node *node, struct plist_head *head)
+{
+ plist_check_head(head);
+
+ if (!list_empty(&node->plist.prio_list)) {
+ struct plist_node *next = plist_first(&node->plist);
+
+ list_move_tail(&next->plist.prio_list, &node->plist.prio_list);
+ list_del_init(&node->plist.prio_list);
+ }
+
+ list_del_init(&node->plist.node_list);
+
+ plist_check_head(head);
+}
diff --git a/lib/prio_heap.c b/lib/prio_heap.c
new file mode 100644
index 00000000..a7af6f85
--- /dev/null
+++ b/lib/prio_heap.c
@@ -0,0 +1,70 @@
+/*
+ * Simple insertion-only static-sized priority heap containing
+ * pointers, based on CLR, chapter 7
+ */
+
+#include <linux/slab.h>
+#include <linux/prio_heap.h>
+
+int heap_init(struct ptr_heap *heap, size_t size, gfp_t gfp_mask,
+ int (*gt)(void *, void *))
+{
+ heap->ptrs = kmalloc(size, gfp_mask);
+ if (!heap->ptrs)
+ return -ENOMEM;
+ heap->size = 0;
+ heap->max = size / sizeof(void *);
+ heap->gt = gt;
+ return 0;
+}
+
+void heap_free(struct ptr_heap *heap)
+{
+ kfree(heap->ptrs);
+}
+
+void *heap_insert(struct ptr_heap *heap, void *p)
+{
+ void *res;
+ void **ptrs = heap->ptrs;
+ int pos;
+
+ if (heap->size < heap->max) {
+ /* Heap insertion */
+ pos = heap->size++;
+ while (pos > 0 && heap->gt(p, ptrs[(pos-1)/2])) {
+ ptrs[pos] = ptrs[(pos-1)/2];
+ pos = (pos-1)/2;
+ }
+ ptrs[pos] = p;
+ return NULL;
+ }
+
+ /* The heap is full, so something will have to be dropped */
+
+ /* If the new pointer is greater than the current max, drop it */
+ if (heap->gt(p, ptrs[0]))
+ return p;
+
+ /* Replace the current max and heapify */
+ res = ptrs[0];
+ ptrs[0] = p;
+ pos = 0;
+
+ while (1) {
+ int left = 2 * pos + 1;
+ int right = 2 * pos + 2;
+ int largest = pos;
+ if (left < heap->size && heap->gt(ptrs[left], p))
+ largest = left;
+ if (right < heap->size && heap->gt(ptrs[right], ptrs[largest]))
+ largest = right;
+ if (largest == pos)
+ break;
+ /* Push p down the heap one level and bump one up */
+ ptrs[pos] = ptrs[largest];
+ ptrs[largest] = p;
+ pos = largest;
+ }
+ return res;
+}
diff --git a/lib/prio_tree.c b/lib/prio_tree.c
new file mode 100644
index 00000000..ccfd850b
--- /dev/null
+++ b/lib/prio_tree.c
@@ -0,0 +1,484 @@
+/*
+ * lib/prio_tree.c - priority search tree
+ *
+ * Copyright (C) 2004, Rajesh Venkatasubramanian <vrajesh@umich.edu>
+ *
+ * This file is released under the GPL v2.
+ *
+ * Based on the radix priority search tree proposed by Edward M. McCreight
+ * SIAM Journal of Computing, vol. 14, no.2, pages 257-276, May 1985
+ *
+ * 02Feb2004 Initial version
+ */
+
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/prio_tree.h>
+
+/*
+ * A clever mix of heap and radix trees forms a radix priority search tree (PST)
+ * which is useful for storing intervals, e.g, we can consider a vma as a closed
+ * interval of file pages [offset_begin, offset_end], and store all vmas that
+ * map a file in a PST. Then, using the PST, we can answer a stabbing query,
+ * i.e., selecting a set of stored intervals (vmas) that overlap with (map) a
+ * given input interval X (a set of consecutive file pages), in "O(log n + m)"
+ * time where 'log n' is the height of the PST, and 'm' is the number of stored
+ * intervals (vmas) that overlap (map) with the input interval X (the set of
+ * consecutive file pages).
+ *
+ * In our implementation, we store closed intervals of the form [radix_index,
+ * heap_index]. We assume that always radix_index <= heap_index. McCreight's PST
+ * is designed for storing intervals with unique radix indices, i.e., each
+ * interval have different radix_index. However, this limitation can be easily
+ * overcome by using the size, i.e., heap_index - radix_index, as part of the
+ * index, so we index the tree using [(radix_index,size), heap_index].
+ *
+ * When the above-mentioned indexing scheme is used, theoretically, in a 32 bit
+ * machine, the maximum height of a PST can be 64. We can use a balanced version
+ * of the priority search tree to optimize the tree height, but the balanced
+ * tree proposed by McCreight is too complex and memory-hungry for our purpose.
+ */
+
+/*
+ * The following macros are used for implementing prio_tree for i_mmap
+ */
+
+#define RADIX_INDEX(vma) ((vma)->vm_pgoff)
+#define VMA_SIZE(vma) (((vma)->vm_end - (vma)->vm_start) >> PAGE_SHIFT)
+/* avoid overflow */
+#define HEAP_INDEX(vma) ((vma)->vm_pgoff + (VMA_SIZE(vma) - 1))
+
+
+static void get_index(const struct prio_tree_root *root,
+ const struct prio_tree_node *node,
+ unsigned long *radix, unsigned long *heap)
+{
+ if (root->raw) {
+ struct vm_area_struct *vma = prio_tree_entry(
+ node, struct vm_area_struct, shared.prio_tree_node);
+
+ *radix = RADIX_INDEX(vma);
+ *heap = HEAP_INDEX(vma);
+ }
+ else {
+ *radix = node->start;
+ *heap = node->last;
+ }
+}
+
+static unsigned long index_bits_to_maxindex[BITS_PER_LONG];
+
+void __init prio_tree_init(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(index_bits_to_maxindex) - 1; i++)
+ index_bits_to_maxindex[i] = (1UL << (i + 1)) - 1;
+ index_bits_to_maxindex[ARRAY_SIZE(index_bits_to_maxindex) - 1] = ~0UL;
+}
+
+/*
+ * Maximum heap_index that can be stored in a PST with index_bits bits
+ */
+static inline unsigned long prio_tree_maxindex(unsigned int bits)
+{
+ return index_bits_to_maxindex[bits - 1];
+}
+
+/*
+ * Extend a priority search tree so that it can store a node with heap_index
+ * max_heap_index. In the worst case, this algorithm takes O((log n)^2).
+ * However, this function is used rarely and the common case performance is
+ * not bad.
+ */
+static struct prio_tree_node *prio_tree_expand(struct prio_tree_root *root,
+ struct prio_tree_node *node, unsigned long max_heap_index)
+{
+ struct prio_tree_node *first = NULL, *prev, *last = NULL;
+
+ if (max_heap_index > prio_tree_maxindex(root->index_bits))
+ root->index_bits++;
+
+ while (max_heap_index > prio_tree_maxindex(root->index_bits)) {
+ root->index_bits++;
+
+ if (prio_tree_empty(root))
+ continue;
+
+ if (first == NULL) {
+ first = root->prio_tree_node;
+ prio_tree_remove(root, root->prio_tree_node);
+ INIT_PRIO_TREE_NODE(first);
+ last = first;
+ } else {
+ prev = last;
+ last = root->prio_tree_node;
+ prio_tree_remove(root, root->prio_tree_node);
+ INIT_PRIO_TREE_NODE(last);
+ prev->left = last;
+ last->parent = prev;
+ }
+ }
+
+ INIT_PRIO_TREE_NODE(node);
+
+ if (first) {
+ node->left = first;
+ first->parent = node;
+ } else
+ last = node;
+
+ if (!prio_tree_empty(root)) {
+ last->left = root->prio_tree_node;
+ last->left->parent = last;
+ }
+
+ root->prio_tree_node = node;
+ return node;
+}
+
+/*
+ * Replace a prio_tree_node with a new node and return the old node
+ */
+struct prio_tree_node *prio_tree_replace(struct prio_tree_root *root,
+ struct prio_tree_node *old, struct prio_tree_node *node)
+{
+ INIT_PRIO_TREE_NODE(node);
+
+ if (prio_tree_root(old)) {
+ BUG_ON(root->prio_tree_node != old);
+ /*
+ * We can reduce root->index_bits here. However, it is complex
+ * and does not help much to improve performance (IMO).
+ */
+ node->parent = node;
+ root->prio_tree_node = node;
+ } else {
+ node->parent = old->parent;
+ if (old->parent->left == old)
+ old->parent->left = node;
+ else
+ old->parent->right = node;
+ }
+
+ if (!prio_tree_left_empty(old)) {
+ node->left = old->left;
+ old->left->parent = node;
+ }
+
+ if (!prio_tree_right_empty(old)) {
+ node->right = old->right;
+ old->right->parent = node;
+ }
+
+ return old;
+}
+
+/*
+ * Insert a prio_tree_node @node into a radix priority search tree @root. The
+ * algorithm typically takes O(log n) time where 'log n' is the number of bits
+ * required to represent the maximum heap_index. In the worst case, the algo
+ * can take O((log n)^2) - check prio_tree_expand.
+ *
+ * If a prior node with same radix_index and heap_index is already found in
+ * the tree, then returns the address of the prior node. Otherwise, inserts
+ * @node into the tree and returns @node.
+ */
+struct prio_tree_node *prio_tree_insert(struct prio_tree_root *root,
+ struct prio_tree_node *node)
+{
+ struct prio_tree_node *cur, *res = node;
+ unsigned long radix_index, heap_index;
+ unsigned long r_index, h_index, index, mask;
+ int size_flag = 0;
+
+ get_index(root, node, &radix_index, &heap_index);
+
+ if (prio_tree_empty(root) ||
+ heap_index > prio_tree_maxindex(root->index_bits))
+ return prio_tree_expand(root, node, heap_index);
+
+ cur = root->prio_tree_node;
+ mask = 1UL << (root->index_bits - 1);
+
+ while (mask) {
+ get_index(root, cur, &r_index, &h_index);
+
+ if (r_index == radix_index && h_index == heap_index)
+ return cur;
+
+ if (h_index < heap_index ||
+ (h_index == heap_index && r_index > radix_index)) {
+ struct prio_tree_node *tmp = node;
+ node = prio_tree_replace(root, cur, node);
+ cur = tmp;
+ /* swap indices */
+ index = r_index;
+ r_index = radix_index;
+ radix_index = index;
+ index = h_index;
+ h_index = heap_index;
+ heap_index = index;
+ }
+
+ if (size_flag)
+ index = heap_index - radix_index;
+ else
+ index = radix_index;
+
+ if (index & mask) {
+ if (prio_tree_right_empty(cur)) {
+ INIT_PRIO_TREE_NODE(node);
+ cur->right = node;
+ node->parent = cur;
+ return res;
+ } else
+ cur = cur->right;
+ } else {
+ if (prio_tree_left_empty(cur)) {
+ INIT_PRIO_TREE_NODE(node);
+ cur->left = node;
+ node->parent = cur;
+ return res;
+ } else
+ cur = cur->left;
+ }
+
+ mask >>= 1;
+
+ if (!mask) {
+ mask = 1UL << (BITS_PER_LONG - 1);
+ size_flag = 1;
+ }
+ }
+ /* Should not reach here */
+ BUG();
+ return NULL;
+}
+
+/*
+ * Remove a prio_tree_node @node from a radix priority search tree @root. The
+ * algorithm takes O(log n) time where 'log n' is the number of bits required
+ * to represent the maximum heap_index.
+ */
+void prio_tree_remove(struct prio_tree_root *root, struct prio_tree_node *node)
+{
+ struct prio_tree_node *cur;
+ unsigned long r_index, h_index_right, h_index_left;
+
+ cur = node;
+
+ while (!prio_tree_left_empty(cur) || !prio_tree_right_empty(cur)) {
+ if (!prio_tree_left_empty(cur))
+ get_index(root, cur->left, &r_index, &h_index_left);
+ else {
+ cur = cur->right;
+ continue;
+ }
+
+ if (!prio_tree_right_empty(cur))
+ get_index(root, cur->right, &r_index, &h_index_right);
+ else {
+ cur = cur->left;
+ continue;
+ }
+
+ /* both h_index_left and h_index_right cannot be 0 */
+ if (h_index_left >= h_index_right)
+ cur = cur->left;
+ else
+ cur = cur->right;
+ }
+
+ if (prio_tree_root(cur)) {
+ BUG_ON(root->prio_tree_node != cur);
+ __INIT_PRIO_TREE_ROOT(root, root->raw);
+ return;
+ }
+
+ if (cur->parent->right == cur)
+ cur->parent->right = cur->parent;
+ else
+ cur->parent->left = cur->parent;
+
+ while (cur != node)
+ cur = prio_tree_replace(root, cur->parent, cur);
+}
+
+/*
+ * Following functions help to enumerate all prio_tree_nodes in the tree that
+ * overlap with the input interval X [radix_index, heap_index]. The enumeration
+ * takes O(log n + m) time where 'log n' is the height of the tree (which is
+ * proportional to # of bits required to represent the maximum heap_index) and
+ * 'm' is the number of prio_tree_nodes that overlap the interval X.
+ */
+
+static struct prio_tree_node *prio_tree_left(struct prio_tree_iter *iter,
+ unsigned long *r_index, unsigned long *h_index)
+{
+ if (prio_tree_left_empty(iter->cur))
+ return NULL;
+
+ get_index(iter->root, iter->cur->left, r_index, h_index);
+
+ if (iter->r_index <= *h_index) {
+ iter->cur = iter->cur->left;
+ iter->mask >>= 1;
+ if (iter->mask) {
+ if (iter->size_level)
+ iter->size_level++;
+ } else {
+ if (iter->size_level) {
+ BUG_ON(!prio_tree_left_empty(iter->cur));
+ BUG_ON(!prio_tree_right_empty(iter->cur));
+ iter->size_level++;
+ iter->mask = ULONG_MAX;
+ } else {
+ iter->size_level = 1;
+ iter->mask = 1UL << (BITS_PER_LONG - 1);
+ }
+ }
+ return iter->cur;
+ }
+
+ return NULL;
+}
+
+static struct prio_tree_node *prio_tree_right(struct prio_tree_iter *iter,
+ unsigned long *r_index, unsigned long *h_index)
+{
+ unsigned long value;
+
+ if (prio_tree_right_empty(iter->cur))
+ return NULL;
+
+ if (iter->size_level)
+ value = iter->value;
+ else
+ value = iter->value | iter->mask;
+
+ if (iter->h_index < value)
+ return NULL;
+
+ get_index(iter->root, iter->cur->right, r_index, h_index);
+
+ if (iter->r_index <= *h_index) {
+ iter->cur = iter->cur->right;
+ iter->mask >>= 1;
+ iter->value = value;
+ if (iter->mask) {
+ if (iter->size_level)
+ iter->size_level++;
+ } else {
+ if (iter->size_level) {
+ BUG_ON(!prio_tree_left_empty(iter->cur));
+ BUG_ON(!prio_tree_right_empty(iter->cur));
+ iter->size_level++;
+ iter->mask = ULONG_MAX;
+ } else {
+ iter->size_level = 1;
+ iter->mask = 1UL << (BITS_PER_LONG - 1);
+ }
+ }
+ return iter->cur;
+ }
+
+ return NULL;
+}
+
+static struct prio_tree_node *prio_tree_parent(struct prio_tree_iter *iter)
+{
+ iter->cur = iter->cur->parent;
+ if (iter->mask == ULONG_MAX)
+ iter->mask = 1UL;
+ else if (iter->size_level == 1)
+ iter->mask = 1UL;
+ else
+ iter->mask <<= 1;
+ if (iter->size_level)
+ iter->size_level--;
+ if (!iter->size_level && (iter->value & iter->mask))
+ iter->value ^= iter->mask;
+ return iter->cur;
+}
+
+static inline int overlap(struct prio_tree_iter *iter,
+ unsigned long r_index, unsigned long h_index)
+{
+ return iter->h_index >= r_index && iter->r_index <= h_index;
+}
+
+/*
+ * prio_tree_first:
+ *
+ * Get the first prio_tree_node that overlaps with the interval [radix_index,
+ * heap_index]. Note that always radix_index <= heap_index. We do a pre-order
+ * traversal of the tree.
+ */
+static struct prio_tree_node *prio_tree_first(struct prio_tree_iter *iter)
+{
+ struct prio_tree_root *root;
+ unsigned long r_index, h_index;
+
+ INIT_PRIO_TREE_ITER(iter);
+
+ root = iter->root;
+ if (prio_tree_empty(root))
+ return NULL;
+
+ get_index(root, root->prio_tree_node, &r_index, &h_index);
+
+ if (iter->r_index > h_index)
+ return NULL;
+
+ iter->mask = 1UL << (root->index_bits - 1);
+ iter->cur = root->prio_tree_node;
+
+ while (1) {
+ if (overlap(iter, r_index, h_index))
+ return iter->cur;
+
+ if (prio_tree_left(iter, &r_index, &h_index))
+ continue;
+
+ if (prio_tree_right(iter, &r_index, &h_index))
+ continue;
+
+ break;
+ }
+ return NULL;
+}
+
+/*
+ * prio_tree_next:
+ *
+ * Get the next prio_tree_node that overlaps with the input interval in iter
+ */
+struct prio_tree_node *prio_tree_next(struct prio_tree_iter *iter)
+{
+ unsigned long r_index, h_index;
+
+ if (iter->cur == NULL)
+ return prio_tree_first(iter);
+
+repeat:
+ while (prio_tree_left(iter, &r_index, &h_index))
+ if (overlap(iter, r_index, h_index))
+ return iter->cur;
+
+ while (!prio_tree_right(iter, &r_index, &h_index)) {
+ while (!prio_tree_root(iter->cur) &&
+ iter->cur->parent->right == iter->cur)
+ prio_tree_parent(iter);
+
+ if (prio_tree_root(iter->cur))
+ return NULL;
+
+ prio_tree_parent(iter);
+ }
+
+ if (overlap(iter, r_index, h_index))
+ return iter->cur;
+
+ goto repeat;
+}
diff --git a/lib/proportions.c b/lib/proportions.c
new file mode 100644
index 00000000..d50746a7
--- /dev/null
+++ b/lib/proportions.c
@@ -0,0 +1,407 @@
+/*
+ * Floating proportions
+ *
+ * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *
+ * Description:
+ *
+ * The floating proportion is a time derivative with an exponentially decaying
+ * history:
+ *
+ * p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
+ *
+ * Where j is an element from {prop_local}, x_{j} is j's number of events,
+ * and i the time period over which the differential is taken. So d/dt_{-i} is
+ * the differential over the i-th last period.
+ *
+ * The decaying history gives smooth transitions. The time differential carries
+ * the notion of speed.
+ *
+ * The denominator is 2^(1+i) because we want the series to be normalised, ie.
+ *
+ * \Sum_{i=0} 1/2^(1+i) = 1
+ *
+ * Further more, if we measure time (t) in the same events as x; so that:
+ *
+ * t = \Sum_{j} x_{j}
+ *
+ * we get that:
+ *
+ * \Sum_{j} p_{j} = 1
+ *
+ * Writing this in an iterative fashion we get (dropping the 'd's):
+ *
+ * if (++x_{j}, ++t > period)
+ * t /= 2;
+ * for_each (j)
+ * x_{j} /= 2;
+ *
+ * so that:
+ *
+ * p_{j} = x_{j} / t;
+ *
+ * We optimize away the '/= 2' for the global time delta by noting that:
+ *
+ * if (++t > period) t /= 2:
+ *
+ * Can be approximated by:
+ *
+ * period/2 + (++t % period/2)
+ *
+ * [ Furthermore, when we choose period to be 2^n it can be written in terms of
+ * binary operations and wraparound artefacts disappear. ]
+ *
+ * Also note that this yields a natural counter of the elapsed periods:
+ *
+ * c = t / (period/2)
+ *
+ * [ Its monotonic increasing property can be applied to mitigate the wrap-
+ * around issue. ]
+ *
+ * This allows us to do away with the loop over all prop_locals on each period
+ * expiration. By remembering the period count under which it was last accessed
+ * as c_{j}, we can obtain the number of 'missed' cycles from:
+ *
+ * c - c_{j}
+ *
+ * We can then lazily catch up to the global period count every time we are
+ * going to use x_{j}, by doing:
+ *
+ * x_{j} /= 2^(c - c_{j}), c_{j} = c
+ */
+
+#include <linux/proportions.h>
+#include <linux/rcupdate.h>
+
+int prop_descriptor_init(struct prop_descriptor *pd, int shift)
+{
+ int err;
+
+ if (shift > PROP_MAX_SHIFT)
+ shift = PROP_MAX_SHIFT;
+
+ pd->index = 0;
+ pd->pg[0].shift = shift;
+ mutex_init(&pd->mutex);
+ err = percpu_counter_init(&pd->pg[0].events, 0);
+ if (err)
+ goto out;
+
+ err = percpu_counter_init(&pd->pg[1].events, 0);
+ if (err)
+ percpu_counter_destroy(&pd->pg[0].events);
+
+out:
+ return err;
+}
+
+/*
+ * We have two copies, and flip between them to make it seem like an atomic
+ * update. The update is not really atomic wrt the events counter, but
+ * it is internally consistent with the bit layout depending on shift.
+ *
+ * We copy the events count, move the bits around and flip the index.
+ */
+void prop_change_shift(struct prop_descriptor *pd, int shift)
+{
+ int index;
+ int offset;
+ u64 events;
+ unsigned long flags;
+
+ if (shift > PROP_MAX_SHIFT)
+ shift = PROP_MAX_SHIFT;
+
+ mutex_lock(&pd->mutex);
+
+ index = pd->index ^ 1;
+ offset = pd->pg[pd->index].shift - shift;
+ if (!offset)
+ goto out;
+
+ pd->pg[index].shift = shift;
+
+ local_irq_save(flags);
+ events = percpu_counter_sum(&pd->pg[pd->index].events);
+ if (offset < 0)
+ events <<= -offset;
+ else
+ events >>= offset;
+ percpu_counter_set(&pd->pg[index].events, events);
+
+ /*
+ * ensure the new pg is fully written before the switch
+ */
+ smp_wmb();
+ pd->index = index;
+ local_irq_restore(flags);
+
+ synchronize_rcu();
+
+out:
+ mutex_unlock(&pd->mutex);
+}
+
+/*
+ * wrap the access to the data in an rcu_read_lock() section;
+ * this is used to track the active references.
+ */
+static struct prop_global *prop_get_global(struct prop_descriptor *pd)
+__acquires(RCU)
+{
+ int index;
+
+ rcu_read_lock();
+ index = pd->index;
+ /*
+ * match the wmb from vcd_flip()
+ */
+ smp_rmb();
+ return &pd->pg[index];
+}
+
+static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
+__releases(RCU)
+{
+ rcu_read_unlock();
+}
+
+static void
+prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
+{
+ int offset = *pl_shift - new_shift;
+
+ if (!offset)
+ return;
+
+ if (offset < 0)
+ *pl_period <<= -offset;
+ else
+ *pl_period >>= offset;
+
+ *pl_shift = new_shift;
+}
+
+/*
+ * PERCPU
+ */
+
+#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
+
+int prop_local_init_percpu(struct prop_local_percpu *pl)
+{
+ spin_lock_init(&pl->lock);
+ pl->shift = 0;
+ pl->period = 0;
+ return percpu_counter_init(&pl->events, 0);
+}
+
+void prop_local_destroy_percpu(struct prop_local_percpu *pl)
+{
+ percpu_counter_destroy(&pl->events);
+}
+
+/*
+ * Catch up with missed period expirations.
+ *
+ * until (c_{j} == c)
+ * x_{j} -= x_{j}/2;
+ * c_{j}++;
+ */
+static
+void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
+{
+ unsigned long period = 1UL << (pg->shift - 1);
+ unsigned long period_mask = ~(period - 1);
+ unsigned long global_period;
+ unsigned long flags;
+
+ global_period = percpu_counter_read(&pg->events);
+ global_period &= period_mask;
+
+ /*
+ * Fast path - check if the local and global period count still match
+ * outside of the lock.
+ */
+ if (pl->period == global_period)
+ return;
+
+ spin_lock_irqsave(&pl->lock, flags);
+ prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
+
+ /*
+ * For each missed period, we half the local counter.
+ * basically:
+ * pl->events >> (global_period - pl->period);
+ */
+ period = (global_period - pl->period) >> (pg->shift - 1);
+ if (period < BITS_PER_LONG) {
+ s64 val = percpu_counter_read(&pl->events);
+
+ if (val < (nr_cpu_ids * PROP_BATCH))
+ val = percpu_counter_sum(&pl->events);
+
+ __percpu_counter_add(&pl->events, -val + (val >> period),
+ PROP_BATCH);
+ } else
+ percpu_counter_set(&pl->events, 0);
+
+ pl->period = global_period;
+ spin_unlock_irqrestore(&pl->lock, flags);
+}
+
+/*
+ * ++x_{j}, ++t
+ */
+void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
+{
+ struct prop_global *pg = prop_get_global(pd);
+
+ prop_norm_percpu(pg, pl);
+ __percpu_counter_add(&pl->events, 1, PROP_BATCH);
+ percpu_counter_add(&pg->events, 1);
+ prop_put_global(pd, pg);
+}
+
+/*
+ * identical to __prop_inc_percpu, except that it limits this pl's fraction to
+ * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
+ */
+void __prop_inc_percpu_max(struct prop_descriptor *pd,
+ struct prop_local_percpu *pl, long frac)
+{
+ struct prop_global *pg = prop_get_global(pd);
+
+ prop_norm_percpu(pg, pl);
+
+ if (unlikely(frac != PROP_FRAC_BASE)) {
+ unsigned long period_2 = 1UL << (pg->shift - 1);
+ unsigned long counter_mask = period_2 - 1;
+ unsigned long global_count;
+ long numerator, denominator;
+
+ numerator = percpu_counter_read_positive(&pl->events);
+ global_count = percpu_counter_read(&pg->events);
+ denominator = period_2 + (global_count & counter_mask);
+
+ if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT))
+ goto out_put;
+ }
+
+ percpu_counter_add(&pl->events, 1);
+ percpu_counter_add(&pg->events, 1);
+
+out_put:
+ prop_put_global(pd, pg);
+}
+
+/*
+ * Obtain a fraction of this proportion
+ *
+ * p_{j} = x_{j} / (period/2 + t % period/2)
+ */
+void prop_fraction_percpu(struct prop_descriptor *pd,
+ struct prop_local_percpu *pl,
+ long *numerator, long *denominator)
+{
+ struct prop_global *pg = prop_get_global(pd);
+ unsigned long period_2 = 1UL << (pg->shift - 1);
+ unsigned long counter_mask = period_2 - 1;
+ unsigned long global_count;
+
+ prop_norm_percpu(pg, pl);
+ *numerator = percpu_counter_read_positive(&pl->events);
+
+ global_count = percpu_counter_read(&pg->events);
+ *denominator = period_2 + (global_count & counter_mask);
+
+ prop_put_global(pd, pg);
+}
+
+/*
+ * SINGLE
+ */
+
+int prop_local_init_single(struct prop_local_single *pl)
+{
+ spin_lock_init(&pl->lock);
+ pl->shift = 0;
+ pl->period = 0;
+ pl->events = 0;
+ return 0;
+}
+
+void prop_local_destroy_single(struct prop_local_single *pl)
+{
+}
+
+/*
+ * Catch up with missed period expirations.
+ */
+static
+void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
+{
+ unsigned long period = 1UL << (pg->shift - 1);
+ unsigned long period_mask = ~(period - 1);
+ unsigned long global_period;
+ unsigned long flags;
+
+ global_period = percpu_counter_read(&pg->events);
+ global_period &= period_mask;
+
+ /*
+ * Fast path - check if the local and global period count still match
+ * outside of the lock.
+ */
+ if (pl->period == global_period)
+ return;
+
+ spin_lock_irqsave(&pl->lock, flags);
+ prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
+ /*
+ * For each missed period, we half the local counter.
+ */
+ period = (global_period - pl->period) >> (pg->shift - 1);
+ if (likely(period < BITS_PER_LONG))
+ pl->events >>= period;
+ else
+ pl->events = 0;
+ pl->period = global_period;
+ spin_unlock_irqrestore(&pl->lock, flags);
+}
+
+/*
+ * ++x_{j}, ++t
+ */
+void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
+{
+ struct prop_global *pg = prop_get_global(pd);
+
+ prop_norm_single(pg, pl);
+ pl->events++;
+ percpu_counter_add(&pg->events, 1);
+ prop_put_global(pd, pg);
+}
+
+/*
+ * Obtain a fraction of this proportion
+ *
+ * p_{j} = x_{j} / (period/2 + t % period/2)
+ */
+void prop_fraction_single(struct prop_descriptor *pd,
+ struct prop_local_single *pl,
+ long *numerator, long *denominator)
+{
+ struct prop_global *pg = prop_get_global(pd);
+ unsigned long period_2 = 1UL << (pg->shift - 1);
+ unsigned long counter_mask = period_2 - 1;
+ unsigned long global_count;
+
+ prop_norm_single(pg, pl);
+ *numerator = pl->events;
+
+ global_count = percpu_counter_read(&pg->events);
+ *denominator = period_2 + (global_count & counter_mask);
+
+ prop_put_global(pd, pg);
+}
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
new file mode 100644
index 00000000..296eb810
--- /dev/null
+++ b/lib/radix-tree.c
@@ -0,0 +1,1419 @@
+/*
+ * Copyright (C) 2001 Momchil Velikov
+ * Portions Copyright (C) 2001 Christoph Hellwig
+ * Copyright (C) 2005 SGI, Christoph Lameter
+ * Copyright (C) 2006 Nick Piggin
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/radix-tree.h>
+#include <linux/percpu.h>
+#include <linux/slab.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/string.h>
+#include <linux/bitops.h>
+#include <linux/rcupdate.h>
+
+
+#ifdef __KERNEL__
+#define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6)
+#else
+#define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */
+#endif
+
+#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT)
+#define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1)
+
+#define RADIX_TREE_TAG_LONGS \
+ ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
+
+struct radix_tree_node {
+ unsigned int height; /* Height from the bottom */
+ unsigned int count;
+ struct rcu_head rcu_head;
+ void *slots[RADIX_TREE_MAP_SIZE];
+ unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
+};
+
+struct radix_tree_path {
+ struct radix_tree_node *node;
+ int offset;
+};
+
+#define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long))
+#define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
+ RADIX_TREE_MAP_SHIFT))
+
+/*
+ * The height_to_maxindex array needs to be one deeper than the maximum
+ * path as height 0 holds only 1 entry.
+ */
+static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly;
+
+/*
+ * Radix tree node cache.
+ */
+static struct kmem_cache *radix_tree_node_cachep;
+
+/*
+ * Per-cpu pool of preloaded nodes
+ */
+struct radix_tree_preload {
+ int nr;
+ struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH];
+};
+static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
+
+static inline void *ptr_to_indirect(void *ptr)
+{
+ return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR);
+}
+
+static inline void *indirect_to_ptr(void *ptr)
+{
+ return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR);
+}
+
+static inline gfp_t root_gfp_mask(struct radix_tree_root *root)
+{
+ return root->gfp_mask & __GFP_BITS_MASK;
+}
+
+static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
+ int offset)
+{
+ __set_bit(offset, node->tags[tag]);
+}
+
+static inline void tag_clear(struct radix_tree_node *node, unsigned int tag,
+ int offset)
+{
+ __clear_bit(offset, node->tags[tag]);
+}
+
+static inline int tag_get(struct radix_tree_node *node, unsigned int tag,
+ int offset)
+{
+ return test_bit(offset, node->tags[tag]);
+}
+
+static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag)
+{
+ root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT));
+}
+
+static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag)
+{
+ root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT));
+}
+
+static inline void root_tag_clear_all(struct radix_tree_root *root)
+{
+ root->gfp_mask &= __GFP_BITS_MASK;
+}
+
+static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag)
+{
+ return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT));
+}
+
+/*
+ * Returns 1 if any slot in the node has this tag set.
+ * Otherwise returns 0.
+ */
+static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
+{
+ int idx;
+ for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
+ if (node->tags[tag][idx])
+ return 1;
+ }
+ return 0;
+}
+/*
+ * This assumes that the caller has performed appropriate preallocation, and
+ * that the caller has pinned this thread of control to the current CPU.
+ */
+static struct radix_tree_node *
+radix_tree_node_alloc(struct radix_tree_root *root)
+{
+ struct radix_tree_node *ret = NULL;
+ gfp_t gfp_mask = root_gfp_mask(root);
+
+ if (!(gfp_mask & __GFP_WAIT)) {
+ struct radix_tree_preload *rtp;
+
+ /*
+ * Provided the caller has preloaded here, we will always
+ * succeed in getting a node here (and never reach
+ * kmem_cache_alloc)
+ */
+ rtp = &__get_cpu_var(radix_tree_preloads);
+ if (rtp->nr) {
+ ret = rtp->nodes[rtp->nr - 1];
+ rtp->nodes[rtp->nr - 1] = NULL;
+ rtp->nr--;
+ }
+ }
+ if (ret == NULL)
+ ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
+
+ BUG_ON(radix_tree_is_indirect_ptr(ret));
+ return ret;
+}
+
+static void radix_tree_node_rcu_free(struct rcu_head *head)
+{
+ struct radix_tree_node *node =
+ container_of(head, struct radix_tree_node, rcu_head);
+ int i;
+
+ /*
+ * must only free zeroed nodes into the slab. radix_tree_shrink
+ * can leave us with a non-NULL entry in the first slot, so clear
+ * that here to make sure.
+ */
+ for (i = 0; i < RADIX_TREE_MAX_TAGS; i++)
+ tag_clear(node, i, 0);
+
+ node->slots[0] = NULL;
+ node->count = 0;
+
+ kmem_cache_free(radix_tree_node_cachep, node);
+}
+
+static inline void
+radix_tree_node_free(struct radix_tree_node *node)
+{
+ call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
+}
+
+/*
+ * Load up this CPU's radix_tree_node buffer with sufficient objects to
+ * ensure that the addition of a single element in the tree cannot fail. On
+ * success, return zero, with preemption disabled. On error, return -ENOMEM
+ * with preemption not disabled.
+ *
+ * To make use of this facility, the radix tree must be initialised without
+ * __GFP_WAIT being passed to INIT_RADIX_TREE().
+ */
+int radix_tree_preload(gfp_t gfp_mask)
+{
+ struct radix_tree_preload *rtp;
+ struct radix_tree_node *node;
+ int ret = -ENOMEM;
+
+ preempt_disable();
+ rtp = &__get_cpu_var(radix_tree_preloads);
+ while (rtp->nr < ARRAY_SIZE(rtp->nodes)) {
+ preempt_enable();
+ node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
+ if (node == NULL)
+ goto out;
+ preempt_disable();
+ rtp = &__get_cpu_var(radix_tree_preloads);
+ if (rtp->nr < ARRAY_SIZE(rtp->nodes))
+ rtp->nodes[rtp->nr++] = node;
+ else
+ kmem_cache_free(radix_tree_node_cachep, node);
+ }
+ ret = 0;
+out:
+ return ret;
+}
+EXPORT_SYMBOL(radix_tree_preload);
+
+/*
+ * Return the maximum key which can be store into a
+ * radix tree with height HEIGHT.
+ */
+static inline unsigned long radix_tree_maxindex(unsigned int height)
+{
+ return height_to_maxindex[height];
+}
+
+/*
+ * Extend a radix tree so it can store key @index.
+ */
+static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
+{
+ struct radix_tree_node *node;
+ unsigned int height;
+ int tag;
+
+ /* Figure out what the height should be. */
+ height = root->height + 1;
+ while (index > radix_tree_maxindex(height))
+ height++;
+
+ if (root->rnode == NULL) {
+ root->height = height;
+ goto out;
+ }
+
+ do {
+ unsigned int newheight;
+ if (!(node = radix_tree_node_alloc(root)))
+ return -ENOMEM;
+
+ /* Increase the height. */
+ node->slots[0] = indirect_to_ptr(root->rnode);
+
+ /* Propagate the aggregated tag info into the new root */
+ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
+ if (root_tag_get(root, tag))
+ tag_set(node, tag, 0);
+ }
+
+ newheight = root->height+1;
+ node->height = newheight;
+ node->count = 1;
+ node = ptr_to_indirect(node);
+ rcu_assign_pointer(root->rnode, node);
+ root->height = newheight;
+ } while (height > root->height);
+out:
+ return 0;
+}
+
+/**
+ * radix_tree_insert - insert into a radix tree
+ * @root: radix tree root
+ * @index: index key
+ * @item: item to insert
+ *
+ * Insert an item into the radix tree at position @index.
+ */
+int radix_tree_insert(struct radix_tree_root *root,
+ unsigned long index, void *item)
+{
+ struct radix_tree_node *node = NULL, *slot;
+ unsigned int height, shift;
+ int offset;
+ int error;
+
+ BUG_ON(radix_tree_is_indirect_ptr(item));
+
+ /* Make sure the tree is high enough. */
+ if (index > radix_tree_maxindex(root->height)) {
+ error = radix_tree_extend(root, index);
+ if (error)
+ return error;
+ }
+
+ slot = indirect_to_ptr(root->rnode);
+
+ height = root->height;
+ shift = (height-1) * RADIX_TREE_MAP_SHIFT;
+
+ offset = 0; /* uninitialised var warning */
+ while (height > 0) {
+ if (slot == NULL) {
+ /* Have to add a child node. */
+ if (!(slot = radix_tree_node_alloc(root)))
+ return -ENOMEM;
+ slot->height = height;
+ if (node) {
+ rcu_assign_pointer(node->slots[offset], slot);
+ node->count++;
+ } else
+ rcu_assign_pointer(root->rnode, ptr_to_indirect(slot));
+ }
+
+ /* Go a level down */
+ offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+ node = slot;
+ slot = node->slots[offset];
+ shift -= RADIX_TREE_MAP_SHIFT;
+ height--;
+ }
+
+ if (slot != NULL)
+ return -EEXIST;
+
+ if (node) {
+ node->count++;
+ rcu_assign_pointer(node->slots[offset], item);
+ BUG_ON(tag_get(node, 0, offset));
+ BUG_ON(tag_get(node, 1, offset));
+ } else {
+ rcu_assign_pointer(root->rnode, item);
+ BUG_ON(root_tag_get(root, 0));
+ BUG_ON(root_tag_get(root, 1));
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(radix_tree_insert);
+
+/*
+ * is_slot == 1 : search for the slot.
+ * is_slot == 0 : search for the node.
+ */
+static void *radix_tree_lookup_element(struct radix_tree_root *root,
+ unsigned long index, int is_slot)
+{
+ unsigned int height, shift;
+ struct radix_tree_node *node, **slot;
+
+ node = rcu_dereference_raw(root->rnode);
+ if (node == NULL)
+ return NULL;
+
+ if (!radix_tree_is_indirect_ptr(node)) {
+ if (index > 0)
+ return NULL;
+ return is_slot ? (void *)&root->rnode : node;
+ }
+ node = indirect_to_ptr(node);
+
+ height = node->height;
+ if (index > radix_tree_maxindex(height))
+ return NULL;
+
+ shift = (height-1) * RADIX_TREE_MAP_SHIFT;
+
+ do {
+ slot = (struct radix_tree_node **)
+ (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
+ node = rcu_dereference_raw(*slot);
+ if (node == NULL)
+ return NULL;
+
+ shift -= RADIX_TREE_MAP_SHIFT;
+ height--;
+ } while (height > 0);
+
+ return is_slot ? (void *)slot : indirect_to_ptr(node);
+}
+
+/**
+ * radix_tree_lookup_slot - lookup a slot in a radix tree
+ * @root: radix tree root
+ * @index: index key
+ *
+ * Returns: the slot corresponding to the position @index in the
+ * radix tree @root. This is useful for update-if-exists operations.
+ *
+ * This function can be called under rcu_read_lock iff the slot is not
+ * modified by radix_tree_replace_slot, otherwise it must be called
+ * exclusive from other writers. Any dereference of the slot must be done
+ * using radix_tree_deref_slot.
+ */
+void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index)
+{
+ return (void **)radix_tree_lookup_element(root, index, 1);
+}
+EXPORT_SYMBOL(radix_tree_lookup_slot);
+
+/**
+ * radix_tree_lookup - perform lookup operation on a radix tree
+ * @root: radix tree root
+ * @index: index key
+ *
+ * Lookup the item at the position @index in the radix tree @root.
+ *
+ * This function can be called under rcu_read_lock, however the caller
+ * must manage lifetimes of leaf nodes (eg. RCU may also be used to free
+ * them safely). No RCU barriers are required to access or modify the
+ * returned item, however.
+ */
+void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
+{
+ return radix_tree_lookup_element(root, index, 0);
+}
+EXPORT_SYMBOL(radix_tree_lookup);
+
+/**
+ * radix_tree_tag_set - set a tag on a radix tree node
+ * @root: radix tree root
+ * @index: index key
+ * @tag: tag index
+ *
+ * Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
+ * corresponding to @index in the radix tree. From
+ * the root all the way down to the leaf node.
+ *
+ * Returns the address of the tagged item. Setting a tag on a not-present
+ * item is a bug.
+ */
+void *radix_tree_tag_set(struct radix_tree_root *root,
+ unsigned long index, unsigned int tag)
+{
+ unsigned int height, shift;
+ struct radix_tree_node *slot;
+
+ height = root->height;
+ BUG_ON(index > radix_tree_maxindex(height));
+
+ slot = indirect_to_ptr(root->rnode);
+ shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+
+ while (height > 0) {
+ int offset;
+
+ offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+ if (!tag_get(slot, tag, offset))
+ tag_set(slot, tag, offset);
+ slot = slot->slots[offset];
+ BUG_ON(slot == NULL);
+ shift -= RADIX_TREE_MAP_SHIFT;
+ height--;
+ }
+
+ /* set the root's tag bit */
+ if (slot && !root_tag_get(root, tag))
+ root_tag_set(root, tag);
+
+ return slot;
+}
+EXPORT_SYMBOL(radix_tree_tag_set);
+
+/**
+ * radix_tree_tag_clear - clear a tag on a radix tree node
+ * @root: radix tree root
+ * @index: index key
+ * @tag: tag index
+ *
+ * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
+ * corresponding to @index in the radix tree. If
+ * this causes the leaf node to have no tags set then clear the tag in the
+ * next-to-leaf node, etc.
+ *
+ * Returns the address of the tagged item on success, else NULL. ie:
+ * has the same return value and semantics as radix_tree_lookup().
+ */
+void *radix_tree_tag_clear(struct radix_tree_root *root,
+ unsigned long index, unsigned int tag)
+{
+ /*
+ * The radix tree path needs to be one longer than the maximum path
+ * since the "list" is null terminated.
+ */
+ struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path;
+ struct radix_tree_node *slot = NULL;
+ unsigned int height, shift;
+
+ height = root->height;
+ if (index > radix_tree_maxindex(height))
+ goto out;
+
+ shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+ pathp->node = NULL;
+ slot = indirect_to_ptr(root->rnode);
+
+ while (height > 0) {
+ int offset;
+
+ if (slot == NULL)
+ goto out;
+
+ offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+ pathp[1].offset = offset;
+ pathp[1].node = slot;
+ slot = slot->slots[offset];
+ pathp++;
+ shift -= RADIX_TREE_MAP_SHIFT;
+ height--;
+ }
+
+ if (slot == NULL)
+ goto out;
+
+ while (pathp->node) {
+ if (!tag_get(pathp->node, tag, pathp->offset))
+ goto out;
+ tag_clear(pathp->node, tag, pathp->offset);
+ if (any_tag_set(pathp->node, tag))
+ goto out;
+ pathp--;
+ }
+
+ /* clear the root's tag bit */
+ if (root_tag_get(root, tag))
+ root_tag_clear(root, tag);
+
+out:
+ return slot;
+}
+EXPORT_SYMBOL(radix_tree_tag_clear);
+
+/**
+ * radix_tree_tag_get - get a tag on a radix tree node
+ * @root: radix tree root
+ * @index: index key
+ * @tag: tag index (< RADIX_TREE_MAX_TAGS)
+ *
+ * Return values:
+ *
+ * 0: tag not present or not set
+ * 1: tag set
+ *
+ * Note that the return value of this function may not be relied on, even if
+ * the RCU lock is held, unless tag modification and node deletion are excluded
+ * from concurrency.
+ */
+int radix_tree_tag_get(struct radix_tree_root *root,
+ unsigned long index, unsigned int tag)
+{
+ unsigned int height, shift;
+ struct radix_tree_node *node;
+ int saw_unset_tag = 0;
+
+ /* check the root's tag bit */
+ if (!root_tag_get(root, tag))
+ return 0;
+
+ node = rcu_dereference_raw(root->rnode);
+ if (node == NULL)
+ return 0;
+
+ if (!radix_tree_is_indirect_ptr(node))
+ return (index == 0);
+ node = indirect_to_ptr(node);
+
+ height = node->height;
+ if (index > radix_tree_maxindex(height))
+ return 0;
+
+ shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+
+ for ( ; ; ) {
+ int offset;
+
+ if (node == NULL)
+ return 0;
+
+ offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+
+ /*
+ * This is just a debug check. Later, we can bale as soon as
+ * we see an unset tag.
+ */
+ if (!tag_get(node, tag, offset))
+ saw_unset_tag = 1;
+ if (height == 1)
+ return !!tag_get(node, tag, offset);
+ node = rcu_dereference_raw(node->slots[offset]);
+ shift -= RADIX_TREE_MAP_SHIFT;
+ height--;
+ }
+}
+EXPORT_SYMBOL(radix_tree_tag_get);
+
+/**
+ * radix_tree_range_tag_if_tagged - for each item in given range set given
+ * tag if item has another tag set
+ * @root: radix tree root
+ * @first_indexp: pointer to a starting index of a range to scan
+ * @last_index: last index of a range to scan
+ * @nr_to_tag: maximum number items to tag
+ * @iftag: tag index to test
+ * @settag: tag index to set if tested tag is set
+ *
+ * This function scans range of radix tree from first_index to last_index
+ * (inclusive). For each item in the range if iftag is set, the function sets
+ * also settag. The function stops either after tagging nr_to_tag items or
+ * after reaching last_index.
+ *
+ * The tags must be set from the leaf level only and propagated back up the
+ * path to the root. We must do this so that we resolve the full path before
+ * setting any tags on intermediate nodes. If we set tags as we descend, then
+ * we can get to the leaf node and find that the index that has the iftag
+ * set is outside the range we are scanning. This reults in dangling tags and
+ * can lead to problems with later tag operations (e.g. livelocks on lookups).
+ *
+ * The function returns number of leaves where the tag was set and sets
+ * *first_indexp to the first unscanned index.
+ * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must
+ * be prepared to handle that.
+ */
+unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
+ unsigned long *first_indexp, unsigned long last_index,
+ unsigned long nr_to_tag,
+ unsigned int iftag, unsigned int settag)
+{
+ unsigned int height = root->height;
+ struct radix_tree_path path[height];
+ struct radix_tree_path *pathp = path;
+ struct radix_tree_node *slot;
+ unsigned int shift;
+ unsigned long tagged = 0;
+ unsigned long index = *first_indexp;
+
+ last_index = min(last_index, radix_tree_maxindex(height));
+ if (index > last_index)
+ return 0;
+ if (!nr_to_tag)
+ return 0;
+ if (!root_tag_get(root, iftag)) {
+ *first_indexp = last_index + 1;
+ return 0;
+ }
+ if (height == 0) {
+ *first_indexp = last_index + 1;
+ root_tag_set(root, settag);
+ return 1;
+ }
+
+ shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+ slot = indirect_to_ptr(root->rnode);
+
+ /*
+ * we fill the path from (root->height - 2) to 0, leaving the index at
+ * (root->height - 1) as a terminator. Zero the node in the terminator
+ * so that we can use this to end walk loops back up the path.
+ */
+ path[height - 1].node = NULL;
+
+ for (;;) {
+ int offset;
+
+ offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+ if (!slot->slots[offset])
+ goto next;
+ if (!tag_get(slot, iftag, offset))
+ goto next;
+ if (height > 1) {
+ /* Go down one level */
+ height--;
+ shift -= RADIX_TREE_MAP_SHIFT;
+ path[height - 1].node = slot;
+ path[height - 1].offset = offset;
+ slot = slot->slots[offset];
+ continue;
+ }
+
+ /* tag the leaf */
+ tagged++;
+ tag_set(slot, settag, offset);
+
+ /* walk back up the path tagging interior nodes */
+ pathp = &path[0];
+ while (pathp->node) {
+ /* stop if we find a node with the tag already set */
+ if (tag_get(pathp->node, settag, pathp->offset))
+ break;
+ tag_set(pathp->node, settag, pathp->offset);
+ pathp++;
+ }
+
+next:
+ /* Go to next item at level determined by 'shift' */
+ index = ((index >> shift) + 1) << shift;
+ /* Overflow can happen when last_index is ~0UL... */
+ if (index > last_index || !index)
+ break;
+ if (tagged >= nr_to_tag)
+ break;
+ while (((index >> shift) & RADIX_TREE_MAP_MASK) == 0) {
+ /*
+ * We've fully scanned this node. Go up. Because
+ * last_index is guaranteed to be in the tree, what
+ * we do below cannot wander astray.
+ */
+ slot = path[height - 1].node;
+ height++;
+ shift += RADIX_TREE_MAP_SHIFT;
+ }
+ }
+ /*
+ * The iftag must have been set somewhere because otherwise
+ * we would return immediated at the beginning of the function
+ */
+ root_tag_set(root, settag);
+ *first_indexp = index;
+
+ return tagged;
+}
+EXPORT_SYMBOL(radix_tree_range_tag_if_tagged);
+
+
+/**
+ * radix_tree_next_hole - find the next hole (not-present entry)
+ * @root: tree root
+ * @index: index key
+ * @max_scan: maximum range to search
+ *
+ * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest
+ * indexed hole.
+ *
+ * Returns: the index of the hole if found, otherwise returns an index
+ * outside of the set specified (in which case 'return - index >= max_scan'
+ * will be true). In rare cases of index wrap-around, 0 will be returned.
+ *
+ * radix_tree_next_hole may be called under rcu_read_lock. However, like
+ * radix_tree_gang_lookup, this will not atomically search a snapshot of
+ * the tree at a single point in time. For example, if a hole is created
+ * at index 5, then subsequently a hole is created at index 10,
+ * radix_tree_next_hole covering both indexes may return 10 if called
+ * under rcu_read_lock.
+ */
+unsigned long radix_tree_next_hole(struct radix_tree_root *root,
+ unsigned long index, unsigned long max_scan)
+{
+ unsigned long i;
+
+ for (i = 0; i < max_scan; i++) {
+ if (!radix_tree_lookup(root, index))
+ break;
+ index++;
+ if (index == 0)
+ break;
+ }
+
+ return index;
+}
+EXPORT_SYMBOL(radix_tree_next_hole);
+
+/**
+ * radix_tree_prev_hole - find the prev hole (not-present entry)
+ * @root: tree root
+ * @index: index key
+ * @max_scan: maximum range to search
+ *
+ * Search backwards in the range [max(index-max_scan+1, 0), index]
+ * for the first hole.
+ *
+ * Returns: the index of the hole if found, otherwise returns an index
+ * outside of the set specified (in which case 'index - return >= max_scan'
+ * will be true). In rare cases of wrap-around, ULONG_MAX will be returned.
+ *
+ * radix_tree_next_hole may be called under rcu_read_lock. However, like
+ * radix_tree_gang_lookup, this will not atomically search a snapshot of
+ * the tree at a single point in time. For example, if a hole is created
+ * at index 10, then subsequently a hole is created at index 5,
+ * radix_tree_prev_hole covering both indexes may return 5 if called under
+ * rcu_read_lock.
+ */
+unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
+ unsigned long index, unsigned long max_scan)
+{
+ unsigned long i;
+
+ for (i = 0; i < max_scan; i++) {
+ if (!radix_tree_lookup(root, index))
+ break;
+ index--;
+ if (index == ULONG_MAX)
+ break;
+ }
+
+ return index;
+}
+EXPORT_SYMBOL(radix_tree_prev_hole);
+
+static unsigned int
+__lookup(struct radix_tree_node *slot, void ***results, unsigned long index,
+ unsigned int max_items, unsigned long *next_index)
+{
+ unsigned int nr_found = 0;
+ unsigned int shift, height;
+ unsigned long i;
+
+ height = slot->height;
+ if (height == 0)
+ goto out;
+ shift = (height-1) * RADIX_TREE_MAP_SHIFT;
+
+ for ( ; height > 1; height--) {
+ i = (index >> shift) & RADIX_TREE_MAP_MASK;
+ for (;;) {
+ if (slot->slots[i] != NULL)
+ break;
+ index &= ~((1UL << shift) - 1);
+ index += 1UL << shift;
+ if (index == 0)
+ goto out; /* 32-bit wraparound */
+ i++;
+ if (i == RADIX_TREE_MAP_SIZE)
+ goto out;
+ }
+
+ shift -= RADIX_TREE_MAP_SHIFT;
+ slot = rcu_dereference_raw(slot->slots[i]);
+ if (slot == NULL)
+ goto out;
+ }
+
+ /* Bottom level: grab some items */
+ for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) {
+ index++;
+ if (slot->slots[i]) {
+ results[nr_found++] = &(slot->slots[i]);
+ if (nr_found == max_items)
+ goto out;
+ }
+ }
+out:
+ *next_index = index;
+ return nr_found;
+}
+
+/**
+ * radix_tree_gang_lookup - perform multiple lookup on a radix tree
+ * @root: radix tree root
+ * @results: where the results of the lookup are placed
+ * @first_index: start the lookup from this key
+ * @max_items: place up to this many items at *results
+ *
+ * Performs an index-ascending scan of the tree for present items. Places
+ * them at *@results and returns the number of items which were placed at
+ * *@results.
+ *
+ * The implementation is naive.
+ *
+ * Like radix_tree_lookup, radix_tree_gang_lookup may be called under
+ * rcu_read_lock. In this case, rather than the returned results being
+ * an atomic snapshot of the tree at a single point in time, the semantics
+ * of an RCU protected gang lookup are as though multiple radix_tree_lookups
+ * have been issued in individual locks, and results stored in 'results'.
+ */
+unsigned int
+radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
+ unsigned long first_index, unsigned int max_items)
+{
+ unsigned long max_index;
+ struct radix_tree_node *node;
+ unsigned long cur_index = first_index;
+ unsigned int ret;
+
+ node = rcu_dereference_raw(root->rnode);
+ if (!node)
+ return 0;
+
+ if (!radix_tree_is_indirect_ptr(node)) {
+ if (first_index > 0)
+ return 0;
+ results[0] = node;
+ return 1;
+ }
+ node = indirect_to_ptr(node);
+
+ max_index = radix_tree_maxindex(node->height);
+
+ ret = 0;
+ while (ret < max_items) {
+ unsigned int nr_found, slots_found, i;
+ unsigned long next_index; /* Index of next search */
+
+ if (cur_index > max_index)
+ break;
+ slots_found = __lookup(node, (void ***)results + ret, cur_index,
+ max_items - ret, &next_index);
+ nr_found = 0;
+ for (i = 0; i < slots_found; i++) {
+ struct radix_tree_node *slot;
+ slot = *(((void ***)results)[ret + i]);
+ if (!slot)
+ continue;
+ results[ret + nr_found] =
+ indirect_to_ptr(rcu_dereference_raw(slot));
+ nr_found++;
+ }
+ ret += nr_found;
+ if (next_index == 0)
+ break;
+ cur_index = next_index;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(radix_tree_gang_lookup);
+
+/**
+ * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree
+ * @root: radix tree root
+ * @results: where the results of the lookup are placed
+ * @first_index: start the lookup from this key
+ * @max_items: place up to this many items at *results
+ *
+ * Performs an index-ascending scan of the tree for present items. Places
+ * their slots at *@results and returns the number of items which were
+ * placed at *@results.
+ *
+ * The implementation is naive.
+ *
+ * Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must
+ * be dereferenced with radix_tree_deref_slot, and if using only RCU
+ * protection, radix_tree_deref_slot may fail requiring a retry.
+ */
+unsigned int
+radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
+ unsigned long first_index, unsigned int max_items)
+{
+ unsigned long max_index;
+ struct radix_tree_node *node;
+ unsigned long cur_index = first_index;
+ unsigned int ret;
+
+ node = rcu_dereference_raw(root->rnode);
+ if (!node)
+ return 0;
+
+ if (!radix_tree_is_indirect_ptr(node)) {
+ if (first_index > 0)
+ return 0;
+ results[0] = (void **)&root->rnode;
+ return 1;
+ }
+ node = indirect_to_ptr(node);
+
+ max_index = radix_tree_maxindex(node->height);
+
+ ret = 0;
+ while (ret < max_items) {
+ unsigned int slots_found;
+ unsigned long next_index; /* Index of next search */
+
+ if (cur_index > max_index)
+ break;
+ slots_found = __lookup(node, results + ret, cur_index,
+ max_items - ret, &next_index);
+ ret += slots_found;
+ if (next_index == 0)
+ break;
+ cur_index = next_index;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(radix_tree_gang_lookup_slot);
+
+/*
+ * FIXME: the two tag_get()s here should use find_next_bit() instead of
+ * open-coding the search.
+ */
+static unsigned int
+__lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index,
+ unsigned int max_items, unsigned long *next_index, unsigned int tag)
+{
+ unsigned int nr_found = 0;
+ unsigned int shift, height;
+
+ height = slot->height;
+ if (height == 0)
+ goto out;
+ shift = (height-1) * RADIX_TREE_MAP_SHIFT;
+
+ while (height > 0) {
+ unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK ;
+
+ for (;;) {
+ if (tag_get(slot, tag, i))
+ break;
+ index &= ~((1UL << shift) - 1);
+ index += 1UL << shift;
+ if (index == 0)
+ goto out; /* 32-bit wraparound */
+ i++;
+ if (i == RADIX_TREE_MAP_SIZE)
+ goto out;
+ }
+ height--;
+ if (height == 0) { /* Bottom level: grab some items */
+ unsigned long j = index & RADIX_TREE_MAP_MASK;
+
+ for ( ; j < RADIX_TREE_MAP_SIZE; j++) {
+ index++;
+ if (!tag_get(slot, tag, j))
+ continue;
+ /*
+ * Even though the tag was found set, we need to
+ * recheck that we have a non-NULL node, because
+ * if this lookup is lockless, it may have been
+ * subsequently deleted.
+ *
+ * Similar care must be taken in any place that
+ * lookup ->slots[x] without a lock (ie. can't
+ * rely on its value remaining the same).
+ */
+ if (slot->slots[j]) {
+ results[nr_found++] = &(slot->slots[j]);
+ if (nr_found == max_items)
+ goto out;
+ }
+ }
+ }
+ shift -= RADIX_TREE_MAP_SHIFT;
+ slot = rcu_dereference_raw(slot->slots[i]);
+ if (slot == NULL)
+ break;
+ }
+out:
+ *next_index = index;
+ return nr_found;
+}
+
+/**
+ * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
+ * based on a tag
+ * @root: radix tree root
+ * @results: where the results of the lookup are placed
+ * @first_index: start the lookup from this key
+ * @max_items: place up to this many items at *results
+ * @tag: the tag index (< RADIX_TREE_MAX_TAGS)
+ *
+ * Performs an index-ascending scan of the tree for present items which
+ * have the tag indexed by @tag set. Places the items at *@results and
+ * returns the number of items which were placed at *@results.
+ */
+unsigned int
+radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
+ unsigned long first_index, unsigned int max_items,
+ unsigned int tag)
+{
+ struct radix_tree_node *node;
+ unsigned long max_index;
+ unsigned long cur_index = first_index;
+ unsigned int ret;
+
+ /* check the root's tag bit */
+ if (!root_tag_get(root, tag))
+ return 0;
+
+ node = rcu_dereference_raw(root->rnode);
+ if (!node)
+ return 0;
+
+ if (!radix_tree_is_indirect_ptr(node)) {
+ if (first_index > 0)
+ return 0;
+ results[0] = node;
+ return 1;
+ }
+ node = indirect_to_ptr(node);
+
+ max_index = radix_tree_maxindex(node->height);
+
+ ret = 0;
+ while (ret < max_items) {
+ unsigned int nr_found, slots_found, i;
+ unsigned long next_index; /* Index of next search */
+
+ if (cur_index > max_index)
+ break;
+ slots_found = __lookup_tag(node, (void ***)results + ret,
+ cur_index, max_items - ret, &next_index, tag);
+ nr_found = 0;
+ for (i = 0; i < slots_found; i++) {
+ struct radix_tree_node *slot;
+ slot = *(((void ***)results)[ret + i]);
+ if (!slot)
+ continue;
+ results[ret + nr_found] =
+ indirect_to_ptr(rcu_dereference_raw(slot));
+ nr_found++;
+ }
+ ret += nr_found;
+ if (next_index == 0)
+ break;
+ cur_index = next_index;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(radix_tree_gang_lookup_tag);
+
+/**
+ * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a
+ * radix tree based on a tag
+ * @root: radix tree root
+ * @results: where the results of the lookup are placed
+ * @first_index: start the lookup from this key
+ * @max_items: place up to this many items at *results
+ * @tag: the tag index (< RADIX_TREE_MAX_TAGS)
+ *
+ * Performs an index-ascending scan of the tree for present items which
+ * have the tag indexed by @tag set. Places the slots at *@results and
+ * returns the number of slots which were placed at *@results.
+ */
+unsigned int
+radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results,
+ unsigned long first_index, unsigned int max_items,
+ unsigned int tag)
+{
+ struct radix_tree_node *node;
+ unsigned long max_index;
+ unsigned long cur_index = first_index;
+ unsigned int ret;
+
+ /* check the root's tag bit */
+ if (!root_tag_get(root, tag))
+ return 0;
+
+ node = rcu_dereference_raw(root->rnode);
+ if (!node)
+ return 0;
+
+ if (!radix_tree_is_indirect_ptr(node)) {
+ if (first_index > 0)
+ return 0;
+ results[0] = (void **)&root->rnode;
+ return 1;
+ }
+ node = indirect_to_ptr(node);
+
+ max_index = radix_tree_maxindex(node->height);
+
+ ret = 0;
+ while (ret < max_items) {
+ unsigned int slots_found;
+ unsigned long next_index; /* Index of next search */
+
+ if (cur_index > max_index)
+ break;
+ slots_found = __lookup_tag(node, results + ret,
+ cur_index, max_items - ret, &next_index, tag);
+ ret += slots_found;
+ if (next_index == 0)
+ break;
+ cur_index = next_index;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
+
+
+/**
+ * radix_tree_shrink - shrink height of a radix tree to minimal
+ * @root radix tree root
+ */
+static inline void radix_tree_shrink(struct radix_tree_root *root)
+{
+ /* try to shrink tree height */
+ while (root->height > 0) {
+ struct radix_tree_node *to_free = root->rnode;
+ void *newptr;
+
+ BUG_ON(!radix_tree_is_indirect_ptr(to_free));
+ to_free = indirect_to_ptr(to_free);
+
+ /*
+ * The candidate node has more than one child, or its child
+ * is not at the leftmost slot, we cannot shrink.
+ */
+ if (to_free->count != 1)
+ break;
+ if (!to_free->slots[0])
+ break;
+
+ /*
+ * We don't need rcu_assign_pointer(), since we are simply
+ * moving the node from one part of the tree to another: if it
+ * was safe to dereference the old pointer to it
+ * (to_free->slots[0]), it will be safe to dereference the new
+ * one (root->rnode) as far as dependent read barriers go.
+ */
+ newptr = to_free->slots[0];
+ if (root->height > 1)
+ newptr = ptr_to_indirect(newptr);
+ root->rnode = newptr;
+ root->height--;
+
+ /*
+ * We have a dilemma here. The node's slot[0] must not be
+ * NULLed in case there are concurrent lookups expecting to
+ * find the item. However if this was a bottom-level node,
+ * then it may be subject to the slot pointer being visible
+ * to callers dereferencing it. If item corresponding to
+ * slot[0] is subsequently deleted, these callers would expect
+ * their slot to become empty sooner or later.
+ *
+ * For example, lockless pagecache will look up a slot, deref
+ * the page pointer, and if the page is 0 refcount it means it
+ * was concurrently deleted from pagecache so try the deref
+ * again. Fortunately there is already a requirement for logic
+ * to retry the entire slot lookup -- the indirect pointer
+ * problem (replacing direct root node with an indirect pointer
+ * also results in a stale slot). So tag the slot as indirect
+ * to force callers to retry.
+ */
+ if (root->height == 0)
+ *((unsigned long *)&to_free->slots[0]) |=
+ RADIX_TREE_INDIRECT_PTR;
+
+ radix_tree_node_free(to_free);
+ }
+}
+
+/**
+ * radix_tree_delete - delete an item from a radix tree
+ * @root: radix tree root
+ * @index: index key
+ *
+ * Remove the item at @index from the radix tree rooted at @root.
+ *
+ * Returns the address of the deleted item, or NULL if it was not present.
+ */
+void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
+{
+ /*
+ * The radix tree path needs to be one longer than the maximum path
+ * since the "list" is null terminated.
+ */
+ struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path;
+ struct radix_tree_node *slot = NULL;
+ struct radix_tree_node *to_free;
+ unsigned int height, shift;
+ int tag;
+ int offset;
+
+ height = root->height;
+ if (index > radix_tree_maxindex(height))
+ goto out;
+
+ slot = root->rnode;
+ if (height == 0) {
+ root_tag_clear_all(root);
+ root->rnode = NULL;
+ goto out;
+ }
+ slot = indirect_to_ptr(slot);
+
+ shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
+ pathp->node = NULL;
+
+ do {
+ if (slot == NULL)
+ goto out;
+
+ pathp++;
+ offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+ pathp->offset = offset;
+ pathp->node = slot;
+ slot = slot->slots[offset];
+ shift -= RADIX_TREE_MAP_SHIFT;
+ height--;
+ } while (height > 0);
+
+ if (slot == NULL)
+ goto out;
+
+ /*
+ * Clear all tags associated with the just-deleted item
+ */
+ for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
+ if (tag_get(pathp->node, tag, pathp->offset))
+ radix_tree_tag_clear(root, index, tag);
+ }
+
+ to_free = NULL;
+ /* Now free the nodes we do not need anymore */
+ while (pathp->node) {
+ pathp->node->slots[pathp->offset] = NULL;
+ pathp->node->count--;
+ /*
+ * Queue the node for deferred freeing after the
+ * last reference to it disappears (set NULL, above).
+ */
+ if (to_free)
+ radix_tree_node_free(to_free);
+
+ if (pathp->node->count) {
+ if (pathp->node == indirect_to_ptr(root->rnode))
+ radix_tree_shrink(root);
+ goto out;
+ }
+
+ /* Node with zero slots in use so free it */
+ to_free = pathp->node;
+ pathp--;
+
+ }
+ root_tag_clear_all(root);
+ root->height = 0;
+ root->rnode = NULL;
+ if (to_free)
+ radix_tree_node_free(to_free);
+
+out:
+ return slot;
+}
+EXPORT_SYMBOL(radix_tree_delete);
+
+/**
+ * radix_tree_tagged - test whether any items in the tree are tagged
+ * @root: radix tree root
+ * @tag: tag to test
+ */
+int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag)
+{
+ return root_tag_get(root, tag);
+}
+EXPORT_SYMBOL(radix_tree_tagged);
+
+static void
+radix_tree_node_ctor(void *node)
+{
+ memset(node, 0, sizeof(struct radix_tree_node));
+}
+
+static __init unsigned long __maxindex(unsigned int height)
+{
+ unsigned int width = height * RADIX_TREE_MAP_SHIFT;
+ int shift = RADIX_TREE_INDEX_BITS - width;
+
+ if (shift < 0)
+ return ~0UL;
+ if (shift >= BITS_PER_LONG)
+ return 0UL;
+ return ~0UL >> shift;
+}
+
+static __init void radix_tree_init_maxindex(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
+ height_to_maxindex[i] = __maxindex(i);
+}
+
+static int radix_tree_callback(struct notifier_block *nfb,
+ unsigned long action,
+ void *hcpu)
+{
+ int cpu = (long)hcpu;
+ struct radix_tree_preload *rtp;
+
+ /* Free per-cpu pool of perloaded nodes */
+ if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
+ rtp = &per_cpu(radix_tree_preloads, cpu);
+ while (rtp->nr) {
+ kmem_cache_free(radix_tree_node_cachep,
+ rtp->nodes[rtp->nr-1]);
+ rtp->nodes[rtp->nr-1] = NULL;
+ rtp->nr--;
+ }
+ }
+ return NOTIFY_OK;
+}
+
+void __init radix_tree_init(void)
+{
+ radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
+ sizeof(struct radix_tree_node), 0,
+ SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
+ radix_tree_node_ctor);
+ radix_tree_init_maxindex();
+ hotcpu_notifier(radix_tree_callback, 0);
+}
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
new file mode 100644
index 00000000..8a381027
--- /dev/null
+++ b/lib/raid6/Makefile
@@ -0,0 +1,75 @@
+obj-$(CONFIG_RAID6_PQ) += raid6_pq.o
+
+raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \
+ int8.o int16.o int32.o altivec1.o altivec2.o altivec4.o \
+ altivec8.o mmx.o sse1.o sse2.o
+hostprogs-y += mktables
+
+quiet_cmd_unroll = UNROLL $@
+ cmd_unroll = $(AWK) -f$(srctree)/$(src)/unroll.awk -vN=$(UNROLL) \
+ < $< > $@ || ( rm -f $@ && exit 1 )
+
+ifeq ($(CONFIG_ALTIVEC),y)
+altivec_flags := -maltivec -mabi=altivec
+endif
+
+targets += int1.c
+$(obj)/int1.c: UNROLL := 1
+$(obj)/int1.c: $(src)/int.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+targets += int2.c
+$(obj)/int2.c: UNROLL := 2
+$(obj)/int2.c: $(src)/int.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+targets += int4.c
+$(obj)/int4.c: UNROLL := 4
+$(obj)/int4.c: $(src)/int.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+targets += int8.c
+$(obj)/int8.c: UNROLL := 8
+$(obj)/int8.c: $(src)/int.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+targets += int16.c
+$(obj)/int16.c: UNROLL := 16
+$(obj)/int16.c: $(src)/int.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+targets += int32.c
+$(obj)/int32.c: UNROLL := 32
+$(obj)/int32.c: $(src)/int.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+CFLAGS_altivec1.o += $(altivec_flags)
+targets += altivec1.c
+$(obj)/altivec1.c: UNROLL := 1
+$(obj)/altivec1.c: $(src)/altivec.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+CFLAGS_altivec2.o += $(altivec_flags)
+targets += altivec2.c
+$(obj)/altivec2.c: UNROLL := 2
+$(obj)/altivec2.c: $(src)/altivec.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+CFLAGS_altivec4.o += $(altivec_flags)
+targets += altivec4.c
+$(obj)/altivec4.c: UNROLL := 4
+$(obj)/altivec4.c: $(src)/altivec.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+CFLAGS_altivec8.o += $(altivec_flags)
+targets += altivec8.c
+$(obj)/altivec8.c: UNROLL := 8
+$(obj)/altivec8.c: $(src)/altivec.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+quiet_cmd_mktable = TABLE $@
+ cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 )
+
+targets += tables.c
+$(obj)/tables.c: $(obj)/mktables FORCE
+ $(call if_changed,mktable)
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
new file mode 100644
index 00000000..b595f560
--- /dev/null
+++ b/lib/raid6/algos.c
@@ -0,0 +1,154 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright 2002 H. Peter Anvin - All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ * Boston MA 02111-1307, USA; either version 2 of the License, or
+ * (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * raid6/algos.c
+ *
+ * Algorithm list and algorithm selection for RAID-6
+ */
+
+#include <linux/raid/pq.h>
+#ifndef __KERNEL__
+#include <sys/mman.h>
+#include <stdio.h>
+#else
+#include <linux/gfp.h>
+#if !RAID6_USE_EMPTY_ZERO_PAGE
+/* In .bss so it's zeroed */
+const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
+EXPORT_SYMBOL(raid6_empty_zero_page);
+#endif
+#endif
+
+struct raid6_calls raid6_call;
+EXPORT_SYMBOL_GPL(raid6_call);
+
+const struct raid6_calls * const raid6_algos[] = {
+ &raid6_intx1,
+ &raid6_intx2,
+ &raid6_intx4,
+ &raid6_intx8,
+#if defined(__ia64__)
+ &raid6_intx16,
+ &raid6_intx32,
+#endif
+#if defined(__i386__) && !defined(__arch_um__)
+ &raid6_mmxx1,
+ &raid6_mmxx2,
+ &raid6_sse1x1,
+ &raid6_sse1x2,
+ &raid6_sse2x1,
+ &raid6_sse2x2,
+#endif
+#if defined(__x86_64__) && !defined(__arch_um__)
+ &raid6_sse2x1,
+ &raid6_sse2x2,
+ &raid6_sse2x4,
+#endif
+#ifdef CONFIG_ALTIVEC
+ &raid6_altivec1,
+ &raid6_altivec2,
+ &raid6_altivec4,
+ &raid6_altivec8,
+#endif
+ NULL
+};
+
+#ifdef __KERNEL__
+#define RAID6_TIME_JIFFIES_LG2 4
+#else
+/* Need more time to be stable in userspace */
+#define RAID6_TIME_JIFFIES_LG2 9
+#define time_before(x, y) ((x) < (y))
+#endif
+
+/* Try to pick the best algorithm */
+/* This code uses the gfmul table as convenient data set to abuse */
+
+int __init raid6_select_algo(void)
+{
+ const struct raid6_calls * const * algo;
+ const struct raid6_calls * best;
+ char *syndromes;
+ void *dptrs[(65536/PAGE_SIZE)+2];
+ int i, disks;
+ unsigned long perf, bestperf;
+ int bestprefer;
+ unsigned long j0, j1;
+
+ disks = (65536/PAGE_SIZE)+2;
+ for ( i = 0 ; i < disks-2 ; i++ ) {
+ dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i;
+ }
+
+ /* Normal code - use a 2-page allocation to avoid D$ conflict */
+ syndromes = (void *) __get_free_pages(GFP_KERNEL, 1);
+
+ if ( !syndromes ) {
+ printk("raid6: Yikes! No memory available.\n");
+ return -ENOMEM;
+ }
+
+ dptrs[disks-2] = syndromes;
+ dptrs[disks-1] = syndromes + PAGE_SIZE;
+
+ bestperf = 0; bestprefer = 0; best = NULL;
+
+ for ( algo = raid6_algos ; *algo ; algo++ ) {
+ if ( !(*algo)->valid || (*algo)->valid() ) {
+ perf = 0;
+
+ preempt_disable();
+ j0 = jiffies;
+ while ( (j1 = jiffies) == j0 )
+ cpu_relax();
+ while (time_before(jiffies,
+ j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
+ (*algo)->gen_syndrome(disks, PAGE_SIZE, dptrs);
+ perf++;
+ }
+ preempt_enable();
+
+ if ( (*algo)->prefer > bestprefer ||
+ ((*algo)->prefer == bestprefer &&
+ perf > bestperf) ) {
+ best = *algo;
+ bestprefer = best->prefer;
+ bestperf = perf;
+ }
+ printk("raid6: %-8s %5ld MB/s\n", (*algo)->name,
+ (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
+ }
+ }
+
+ if (best) {
+ printk("raid6: using algorithm %s (%ld MB/s)\n",
+ best->name,
+ (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
+ raid6_call = *best;
+ } else
+ printk("raid6: Yikes! No algorithm found!\n");
+
+ free_pages((unsigned long)syndromes, 1);
+
+ return best ? 0 : -EINVAL;
+}
+
+static void raid6_exit(void)
+{
+ do { } while (0);
+}
+
+subsys_initcall(raid6_select_algo);
+module_exit(raid6_exit);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("RAID6 Q-syndrome calculations");
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc
new file mode 100644
index 00000000..2654d5c8
--- /dev/null
+++ b/lib/raid6/altivec.uc
@@ -0,0 +1,130 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ * Boston MA 02111-1307, USA; either version 2 of the License, or
+ * (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * raid6altivec$#.c
+ *
+ * $#-way unrolled portable integer math RAID-6 instruction set
+ *
+ * This file is postprocessed using unroll.awk
+ *
+ * <benh> hpa: in process,
+ * you can just "steal" the vec unit with enable_kernel_altivec() (but
+ * bracked this with preempt_disable/enable or in a lock)
+ */
+
+#include <linux/raid/pq.h>
+
+#ifdef CONFIG_ALTIVEC
+
+#include <altivec.h>
+#ifdef __KERNEL__
+# include <asm/system.h>
+# include <asm/cputable.h>
+#endif
+
+/*
+ * This is the C data type to use. We use a vector of
+ * signed char so vec_cmpgt() will generate the right
+ * instruction.
+ */
+
+typedef vector signed char unative_t;
+
+#define NBYTES(x) ((vector signed char) {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x})
+#define NSIZE sizeof(unative_t)
+
+/*
+ * The SHLBYTE() operation shifts each byte left by 1, *not*
+ * rolling over into the next byte
+ */
+static inline __attribute_const__ unative_t SHLBYTE(unative_t v)
+{
+ return vec_add(v,v);
+}
+
+/*
+ * The MASK() operation returns 0xFF in any byte for which the high
+ * bit is 1, 0x00 for any byte for which the high bit is 0.
+ */
+static inline __attribute_const__ unative_t MASK(unative_t v)
+{
+ unative_t zv = NBYTES(0);
+
+ /* vec_cmpgt returns a vector bool char; thus the need for the cast */
+ return (unative_t)vec_cmpgt(zv, v);
+}
+
+
+/* This is noinline to make damned sure that gcc doesn't move any of the
+ Altivec code around the enable/disable code */
+static void noinline
+raid6_altivec$#_gen_syndrome_real(int disks, size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
+ unative_t x1d = NBYTES(0x1d);
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0+1]; /* XOR parity */
+ q = dptr[z0+2]; /* RS syndrome */
+
+ for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
+ wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
+ for ( z = z0-1 ; z >= 0 ; z-- ) {
+ wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
+ wp$$ = vec_xor(wp$$, wd$$);
+ w2$$ = MASK(wq$$);
+ w1$$ = SHLBYTE(wq$$);
+ w2$$ = vec_and(w2$$, x1d);
+ w1$$ = vec_xor(w1$$, w2$$);
+ wq$$ = vec_xor(w1$$, wd$$);
+ }
+ *(unative_t *)&p[d+NSIZE*$$] = wp$$;
+ *(unative_t *)&q[d+NSIZE*$$] = wq$$;
+ }
+}
+
+static void raid6_altivec$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+ preempt_disable();
+ enable_kernel_altivec();
+
+ raid6_altivec$#_gen_syndrome_real(disks, bytes, ptrs);
+
+ preempt_enable();
+}
+
+int raid6_have_altivec(void);
+#if $# == 1
+int raid6_have_altivec(void)
+{
+ /* This assumes either all CPUs have Altivec or none does */
+# ifdef __KERNEL__
+ return cpu_has_feature(CPU_FTR_ALTIVEC);
+# else
+ return 1;
+# endif
+}
+#endif
+
+const struct raid6_calls raid6_altivec$# = {
+ raid6_altivec$#_gen_syndrome,
+ raid6_have_altivec,
+ "altivecx$#",
+ 0
+};
+
+#endif /* CONFIG_ALTIVEC */
diff --git a/lib/raid6/int.uc b/lib/raid6/int.uc
new file mode 100644
index 00000000..d1e276a1
--- /dev/null
+++ b/lib/raid6/int.uc
@@ -0,0 +1,117 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ * Boston MA 02111-1307, USA; either version 2 of the License, or
+ * (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * raid6int$#.c
+ *
+ * $#-way unrolled portable integer math RAID-6 instruction set
+ *
+ * This file is postprocessed using unroll.awk
+ */
+
+#include <linux/raid/pq.h>
+
+/*
+ * This is the C data type to use
+ */
+
+/* Change this from BITS_PER_LONG if there is something better... */
+#if BITS_PER_LONG == 64
+# define NBYTES(x) ((x) * 0x0101010101010101UL)
+# define NSIZE 8
+# define NSHIFT 3
+# define NSTRING "64"
+typedef u64 unative_t;
+#else
+# define NBYTES(x) ((x) * 0x01010101U)
+# define NSIZE 4
+# define NSHIFT 2
+# define NSTRING "32"
+typedef u32 unative_t;
+#endif
+
+
+
+/*
+ * IA-64 wants insane amounts of unrolling. On other architectures that
+ * is just a waste of space.
+ */
+#if ($# <= 8) || defined(__ia64__)
+
+
+/*
+ * These sub-operations are separate inlines since they can sometimes be
+ * specially optimized using architecture-specific hacks.
+ */
+
+/*
+ * The SHLBYTE() operation shifts each byte left by 1, *not*
+ * rolling over into the next byte
+ */
+static inline __attribute_const__ unative_t SHLBYTE(unative_t v)
+{
+ unative_t vv;
+
+ vv = (v << 1) & NBYTES(0xfe);
+ return vv;
+}
+
+/*
+ * The MASK() operation returns 0xFF in any byte for which the high
+ * bit is 1, 0x00 for any byte for which the high bit is 0.
+ */
+static inline __attribute_const__ unative_t MASK(unative_t v)
+{
+ unative_t vv;
+
+ vv = v & NBYTES(0x80);
+ vv = (vv << 1) - (vv >> 7); /* Overflow on the top bit is OK */
+ return vv;
+}
+
+
+static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0+1]; /* XOR parity */
+ q = dptr[z0+2]; /* RS syndrome */
+
+ for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
+ wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
+ for ( z = z0-1 ; z >= 0 ; z-- ) {
+ wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
+ wp$$ ^= wd$$;
+ w2$$ = MASK(wq$$);
+ w1$$ = SHLBYTE(wq$$);
+ w2$$ &= NBYTES(0x1d);
+ w1$$ ^= w2$$;
+ wq$$ = w1$$ ^ wd$$;
+ }
+ *(unative_t *)&p[d+NSIZE*$$] = wp$$;
+ *(unative_t *)&q[d+NSIZE*$$] = wq$$;
+ }
+}
+
+const struct raid6_calls raid6_intx$# = {
+ raid6_int$#_gen_syndrome,
+ NULL, /* always valid */
+ "int" NSTRING "x$#",
+ 0
+};
+
+#endif
diff --git a/lib/raid6/mktables.c b/lib/raid6/mktables.c
new file mode 100644
index 00000000..3b150084
--- /dev/null
+++ b/lib/raid6/mktables.c
@@ -0,0 +1,132 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2 or (at your
+ * option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * mktables.c
+ *
+ * Make RAID-6 tables. This is a host user space program to be run at
+ * compile time.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <time.h>
+
+static uint8_t gfmul(uint8_t a, uint8_t b)
+{
+ uint8_t v = 0;
+
+ while (b) {
+ if (b & 1)
+ v ^= a;
+ a = (a << 1) ^ (a & 0x80 ? 0x1d : 0);
+ b >>= 1;
+ }
+
+ return v;
+}
+
+static uint8_t gfpow(uint8_t a, int b)
+{
+ uint8_t v = 1;
+
+ b %= 255;
+ if (b < 0)
+ b += 255;
+
+ while (b) {
+ if (b & 1)
+ v = gfmul(v, a);
+ a = gfmul(a, a);
+ b >>= 1;
+ }
+
+ return v;
+}
+
+int main(int argc, char *argv[])
+{
+ int i, j, k;
+ uint8_t v;
+ uint8_t exptbl[256], invtbl[256];
+
+ printf("#include <linux/raid/pq.h>\n");
+
+ /* Compute multiplication table */
+ printf("\nconst u8 __attribute__((aligned(256)))\n"
+ "raid6_gfmul[256][256] =\n"
+ "{\n");
+ for (i = 0; i < 256; i++) {
+ printf("\t{\n");
+ for (j = 0; j < 256; j += 8) {
+ printf("\t\t");
+ for (k = 0; k < 8; k++)
+ printf("0x%02x,%c", gfmul(i, j + k),
+ (k == 7) ? '\n' : ' ');
+ }
+ printf("\t},\n");
+ }
+ printf("};\n");
+ printf("#ifdef __KERNEL__\n");
+ printf("EXPORT_SYMBOL(raid6_gfmul);\n");
+ printf("#endif\n");
+
+ /* Compute power-of-2 table (exponent) */
+ v = 1;
+ printf("\nconst u8 __attribute__((aligned(256)))\n"
+ "raid6_gfexp[256] =\n" "{\n");
+ for (i = 0; i < 256; i += 8) {
+ printf("\t");
+ for (j = 0; j < 8; j++) {
+ exptbl[i + j] = v;
+ printf("0x%02x,%c", v, (j == 7) ? '\n' : ' ');
+ v = gfmul(v, 2);
+ if (v == 1)
+ v = 0; /* For entry 255, not a real entry */
+ }
+ }
+ printf("};\n");
+ printf("#ifdef __KERNEL__\n");
+ printf("EXPORT_SYMBOL(raid6_gfexp);\n");
+ printf("#endif\n");
+
+ /* Compute inverse table x^-1 == x^254 */
+ printf("\nconst u8 __attribute__((aligned(256)))\n"
+ "raid6_gfinv[256] =\n" "{\n");
+ for (i = 0; i < 256; i += 8) {
+ printf("\t");
+ for (j = 0; j < 8; j++) {
+ invtbl[i + j] = v = gfpow(i + j, 254);
+ printf("0x%02x,%c", v, (j == 7) ? '\n' : ' ');
+ }
+ }
+ printf("};\n");
+ printf("#ifdef __KERNEL__\n");
+ printf("EXPORT_SYMBOL(raid6_gfinv);\n");
+ printf("#endif\n");
+
+ /* Compute inv(2^x + 1) (exponent-xor-inverse) table */
+ printf("\nconst u8 __attribute__((aligned(256)))\n"
+ "raid6_gfexi[256] =\n" "{\n");
+ for (i = 0; i < 256; i += 8) {
+ printf("\t");
+ for (j = 0; j < 8; j++)
+ printf("0x%02x,%c", invtbl[exptbl[i + j] ^ 1],
+ (j == 7) ? '\n' : ' ');
+ }
+ printf("};\n");
+ printf("#ifdef __KERNEL__\n");
+ printf("EXPORT_SYMBOL(raid6_gfexi);\n");
+ printf("#endif\n");
+
+ return 0;
+}
diff --git a/lib/raid6/mmx.c b/lib/raid6/mmx.c
new file mode 100644
index 00000000..279347f2
--- /dev/null
+++ b/lib/raid6/mmx.c
@@ -0,0 +1,142 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright 2002 H. Peter Anvin - All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ * Boston MA 02111-1307, USA; either version 2 of the License, or
+ * (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * raid6/mmx.c
+ *
+ * MMX implementation of RAID-6 syndrome functions
+ */
+
+#if defined(__i386__) && !defined(__arch_um__)
+
+#include <linux/raid/pq.h>
+#include "x86.h"
+
+/* Shared with raid6/sse1.c */
+const struct raid6_mmx_constants {
+ u64 x1d;
+} raid6_mmx_constants = {
+ 0x1d1d1d1d1d1d1d1dULL,
+};
+
+static int raid6_have_mmx(void)
+{
+ /* Not really "boot_cpu" but "all_cpus" */
+ return boot_cpu_has(X86_FEATURE_MMX);
+}
+
+/*
+ * Plain MMX implementation
+ */
+static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0+1]; /* XOR parity */
+ q = dptr[z0+2]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
+ asm volatile("pxor %mm5,%mm5"); /* Zero temp */
+
+ for ( d = 0 ; d < bytes ; d += 8 ) {
+ asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
+ asm volatile("movq %mm2,%mm4"); /* Q[0] */
+ for ( z = z0-1 ; z >= 0 ; z-- ) {
+ asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d]));
+ asm volatile("pcmpgtb %mm4,%mm5");
+ asm volatile("paddb %mm4,%mm4");
+ asm volatile("pand %mm0,%mm5");
+ asm volatile("pxor %mm5,%mm4");
+ asm volatile("pxor %mm5,%mm5");
+ asm volatile("pxor %mm6,%mm2");
+ asm volatile("pxor %mm6,%mm4");
+ }
+ asm volatile("movq %%mm2,%0" : "=m" (p[d]));
+ asm volatile("pxor %mm2,%mm2");
+ asm volatile("movq %%mm4,%0" : "=m" (q[d]));
+ asm volatile("pxor %mm4,%mm4");
+ }
+
+ kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_mmxx1 = {
+ raid6_mmx1_gen_syndrome,
+ raid6_have_mmx,
+ "mmxx1",
+ 0
+};
+
+/*
+ * Unrolled-by-2 MMX implementation
+ */
+static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0+1]; /* XOR parity */
+ q = dptr[z0+2]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
+ asm volatile("pxor %mm5,%mm5"); /* Zero temp */
+ asm volatile("pxor %mm7,%mm7"); /* Zero temp */
+
+ for ( d = 0 ; d < bytes ; d += 16 ) {
+ asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
+ asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8]));
+ asm volatile("movq %mm2,%mm4"); /* Q[0] */
+ asm volatile("movq %mm3,%mm6"); /* Q[1] */
+ for ( z = z0-1 ; z >= 0 ; z-- ) {
+ asm volatile("pcmpgtb %mm4,%mm5");
+ asm volatile("pcmpgtb %mm6,%mm7");
+ asm volatile("paddb %mm4,%mm4");
+ asm volatile("paddb %mm6,%mm6");
+ asm volatile("pand %mm0,%mm5");
+ asm volatile("pand %mm0,%mm7");
+ asm volatile("pxor %mm5,%mm4");
+ asm volatile("pxor %mm7,%mm6");
+ asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d]));
+ asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8]));
+ asm volatile("pxor %mm5,%mm2");
+ asm volatile("pxor %mm7,%mm3");
+ asm volatile("pxor %mm5,%mm4");
+ asm volatile("pxor %mm7,%mm6");
+ asm volatile("pxor %mm5,%mm5");
+ asm volatile("pxor %mm7,%mm7");
+ }
+ asm volatile("movq %%mm2,%0" : "=m" (p[d]));
+ asm volatile("movq %%mm3,%0" : "=m" (p[d+8]));
+ asm volatile("movq %%mm4,%0" : "=m" (q[d]));
+ asm volatile("movq %%mm6,%0" : "=m" (q[d+8]));
+ }
+
+ kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_mmxx2 = {
+ raid6_mmx2_gen_syndrome,
+ raid6_have_mmx,
+ "mmxx2",
+ 0
+};
+
+#endif
diff --git a/lib/raid6/recov.c b/lib/raid6/recov.c
new file mode 100644
index 00000000..8590d19c
--- /dev/null
+++ b/lib/raid6/recov.c
@@ -0,0 +1,132 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright 2002 H. Peter Anvin - All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ * Boston MA 02111-1307, USA; either version 2 of the License, or
+ * (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * raid6/recov.c
+ *
+ * RAID-6 data recovery in dual failure mode. In single failure mode,
+ * use the RAID-5 algorithm (or, in the case of Q failure, just reconstruct
+ * the syndrome.)
+ */
+
+#include <linux/raid/pq.h>
+
+/* Recover two failed data blocks. */
+void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
+ void **ptrs)
+{
+ u8 *p, *q, *dp, *dq;
+ u8 px, qx, db;
+ const u8 *pbmul; /* P multiplier table for B data */
+ const u8 *qmul; /* Q multiplier table (for both) */
+
+ p = (u8 *)ptrs[disks-2];
+ q = (u8 *)ptrs[disks-1];
+
+ /* Compute syndrome with zero for the missing data pages
+ Use the dead data pages as temporary storage for
+ delta p and delta q */
+ dp = (u8 *)ptrs[faila];
+ ptrs[faila] = (void *)raid6_empty_zero_page;
+ ptrs[disks-2] = dp;
+ dq = (u8 *)ptrs[failb];
+ ptrs[failb] = (void *)raid6_empty_zero_page;
+ ptrs[disks-1] = dq;
+
+ raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+ /* Restore pointer table */
+ ptrs[faila] = dp;
+ ptrs[failb] = dq;
+ ptrs[disks-2] = p;
+ ptrs[disks-1] = q;
+
+ /* Now, pick the proper data tables */
+ pbmul = raid6_gfmul[raid6_gfexi[failb-faila]];
+ qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]];
+
+ /* Now do it... */
+ while ( bytes-- ) {
+ px = *p ^ *dp;
+ qx = qmul[*q ^ *dq];
+ *dq++ = db = pbmul[px] ^ qx; /* Reconstructed B */
+ *dp++ = db ^ px; /* Reconstructed A */
+ p++; q++;
+ }
+}
+EXPORT_SYMBOL_GPL(raid6_2data_recov);
+
+/* Recover failure of one data block plus the P block */
+void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs)
+{
+ u8 *p, *q, *dq;
+ const u8 *qmul; /* Q multiplier table */
+
+ p = (u8 *)ptrs[disks-2];
+ q = (u8 *)ptrs[disks-1];
+
+ /* Compute syndrome with zero for the missing data page
+ Use the dead data page as temporary storage for delta q */
+ dq = (u8 *)ptrs[faila];
+ ptrs[faila] = (void *)raid6_empty_zero_page;
+ ptrs[disks-1] = dq;
+
+ raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+ /* Restore pointer table */
+ ptrs[faila] = dq;
+ ptrs[disks-1] = q;
+
+ /* Now, pick the proper data tables */
+ qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+ /* Now do it... */
+ while ( bytes-- ) {
+ *p++ ^= *dq = qmul[*q ^ *dq];
+ q++; dq++;
+ }
+}
+EXPORT_SYMBOL_GPL(raid6_datap_recov);
+
+#ifndef __KERNEL__
+/* Testing only */
+
+/* Recover two failed blocks. */
+void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs)
+{
+ if ( faila > failb ) {
+ int tmp = faila;
+ faila = failb;
+ failb = tmp;
+ }
+
+ if ( failb == disks-1 ) {
+ if ( faila == disks-2 ) {
+ /* P+Q failure. Just rebuild the syndrome. */
+ raid6_call.gen_syndrome(disks, bytes, ptrs);
+ } else {
+ /* data+Q failure. Reconstruct data from P,
+ then rebuild syndrome. */
+ /* NOT IMPLEMENTED - equivalent to RAID-5 */
+ }
+ } else {
+ if ( failb == disks-2 ) {
+ /* data+P failure. */
+ raid6_datap_recov(disks, bytes, faila, ptrs);
+ } else {
+ /* data+data failure. */
+ raid6_2data_recov(disks, bytes, faila, failb, ptrs);
+ }
+ }
+}
+
+#endif
diff --git a/lib/raid6/sse1.c b/lib/raid6/sse1.c
new file mode 100644
index 00000000..10dd9194
--- /dev/null
+++ b/lib/raid6/sse1.c
@@ -0,0 +1,162 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright 2002 H. Peter Anvin - All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ * Boston MA 02111-1307, USA; either version 2 of the License, or
+ * (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * raid6/sse1.c
+ *
+ * SSE-1/MMXEXT implementation of RAID-6 syndrome functions
+ *
+ * This is really an MMX implementation, but it requires SSE-1 or
+ * AMD MMXEXT for prefetch support and a few other features. The
+ * support for nontemporal memory accesses is enough to make this
+ * worthwhile as a separate implementation.
+ */
+
+#if defined(__i386__) && !defined(__arch_um__)
+
+#include <linux/raid/pq.h>
+#include "x86.h"
+
+/* Defined in raid6/mmx.c */
+extern const struct raid6_mmx_constants {
+ u64 x1d;
+} raid6_mmx_constants;
+
+static int raid6_have_sse1_or_mmxext(void)
+{
+ /* Not really boot_cpu but "all_cpus" */
+ return boot_cpu_has(X86_FEATURE_MMX) &&
+ (boot_cpu_has(X86_FEATURE_XMM) ||
+ boot_cpu_has(X86_FEATURE_MMXEXT));
+}
+
+/*
+ * Plain SSE1 implementation
+ */
+static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0+1]; /* XOR parity */
+ q = dptr[z0+2]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
+ asm volatile("pxor %mm5,%mm5"); /* Zero temp */
+
+ for ( d = 0 ; d < bytes ; d += 8 ) {
+ asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
+ asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
+ asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
+ asm volatile("movq %mm2,%mm4"); /* Q[0] */
+ asm volatile("movq %0,%%mm6" : : "m" (dptr[z0-1][d]));
+ for ( z = z0-2 ; z >= 0 ; z-- ) {
+ asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
+ asm volatile("pcmpgtb %mm4,%mm5");
+ asm volatile("paddb %mm4,%mm4");
+ asm volatile("pand %mm0,%mm5");
+ asm volatile("pxor %mm5,%mm4");
+ asm volatile("pxor %mm5,%mm5");
+ asm volatile("pxor %mm6,%mm2");
+ asm volatile("pxor %mm6,%mm4");
+ asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d]));
+ }
+ asm volatile("pcmpgtb %mm4,%mm5");
+ asm volatile("paddb %mm4,%mm4");
+ asm volatile("pand %mm0,%mm5");
+ asm volatile("pxor %mm5,%mm4");
+ asm volatile("pxor %mm5,%mm5");
+ asm volatile("pxor %mm6,%mm2");
+ asm volatile("pxor %mm6,%mm4");
+
+ asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
+ asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
+ }
+
+ asm volatile("sfence" : : : "memory");
+ kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_sse1x1 = {
+ raid6_sse11_gen_syndrome,
+ raid6_have_sse1_or_mmxext,
+ "sse1x1",
+ 1 /* Has cache hints */
+};
+
+/*
+ * Unrolled-by-2 SSE1 implementation
+ */
+static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0+1]; /* XOR parity */
+ q = dptr[z0+2]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
+ asm volatile("pxor %mm5,%mm5"); /* Zero temp */
+ asm volatile("pxor %mm7,%mm7"); /* Zero temp */
+
+ /* We uniformly assume a single prefetch covers at least 16 bytes */
+ for ( d = 0 ; d < bytes ; d += 16 ) {
+ asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
+ asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
+ asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); /* P[1] */
+ asm volatile("movq %mm2,%mm4"); /* Q[0] */
+ asm volatile("movq %mm3,%mm6"); /* Q[1] */
+ for ( z = z0-1 ; z >= 0 ; z-- ) {
+ asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
+ asm volatile("pcmpgtb %mm4,%mm5");
+ asm volatile("pcmpgtb %mm6,%mm7");
+ asm volatile("paddb %mm4,%mm4");
+ asm volatile("paddb %mm6,%mm6");
+ asm volatile("pand %mm0,%mm5");
+ asm volatile("pand %mm0,%mm7");
+ asm volatile("pxor %mm5,%mm4");
+ asm volatile("pxor %mm7,%mm6");
+ asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d]));
+ asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8]));
+ asm volatile("pxor %mm5,%mm2");
+ asm volatile("pxor %mm7,%mm3");
+ asm volatile("pxor %mm5,%mm4");
+ asm volatile("pxor %mm7,%mm6");
+ asm volatile("pxor %mm5,%mm5");
+ asm volatile("pxor %mm7,%mm7");
+ }
+ asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
+ asm volatile("movntq %%mm3,%0" : "=m" (p[d+8]));
+ asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
+ asm volatile("movntq %%mm6,%0" : "=m" (q[d+8]));
+ }
+
+ asm volatile("sfence" : :: "memory");
+ kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_sse1x2 = {
+ raid6_sse12_gen_syndrome,
+ raid6_have_sse1_or_mmxext,
+ "sse1x2",
+ 1 /* Has cache hints */
+};
+
+#endif
diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c
new file mode 100644
index 00000000..bc2d57da
--- /dev/null
+++ b/lib/raid6/sse2.c
@@ -0,0 +1,262 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright 2002 H. Peter Anvin - All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ * Boston MA 02111-1307, USA; either version 2 of the License, or
+ * (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * raid6/sse2.c
+ *
+ * SSE-2 implementation of RAID-6 syndrome functions
+ *
+ */
+
+#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
+
+#include <linux/raid/pq.h>
+#include "x86.h"
+
+static const struct raid6_sse_constants {
+ u64 x1d[2];
+} raid6_sse_constants __attribute__((aligned(16))) = {
+ { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL },
+};
+
+static int raid6_have_sse2(void)
+{
+ /* Not really boot_cpu but "all_cpus" */
+ return boot_cpu_has(X86_FEATURE_MMX) &&
+ boot_cpu_has(X86_FEATURE_FXSR) &&
+ boot_cpu_has(X86_FEATURE_XMM) &&
+ boot_cpu_has(X86_FEATURE_XMM2);
+}
+
+/*
+ * Plain SSE2 implementation
+ */
+static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0+1]; /* XOR parity */
+ q = dptr[z0+2]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
+ asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
+
+ for ( d = 0 ; d < bytes ; d += 16 ) {
+ asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
+ asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */
+ asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
+ asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
+ asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z0-1][d]));
+ for ( z = z0-2 ; z >= 0 ; z-- ) {
+ asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
+ asm volatile("pcmpgtb %xmm4,%xmm5");
+ asm volatile("paddb %xmm4,%xmm4");
+ asm volatile("pand %xmm0,%xmm5");
+ asm volatile("pxor %xmm5,%xmm4");
+ asm volatile("pxor %xmm5,%xmm5");
+ asm volatile("pxor %xmm6,%xmm2");
+ asm volatile("pxor %xmm6,%xmm4");
+ asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z][d]));
+ }
+ asm volatile("pcmpgtb %xmm4,%xmm5");
+ asm volatile("paddb %xmm4,%xmm4");
+ asm volatile("pand %xmm0,%xmm5");
+ asm volatile("pxor %xmm5,%xmm4");
+ asm volatile("pxor %xmm5,%xmm5");
+ asm volatile("pxor %xmm6,%xmm2");
+ asm volatile("pxor %xmm6,%xmm4");
+
+ asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
+ asm volatile("pxor %xmm2,%xmm2");
+ asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
+ asm volatile("pxor %xmm4,%xmm4");
+ }
+
+ asm volatile("sfence" : : : "memory");
+ kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_sse2x1 = {
+ raid6_sse21_gen_syndrome,
+ raid6_have_sse2,
+ "sse2x1",
+ 1 /* Has cache hints */
+};
+
+/*
+ * Unrolled-by-2 SSE2 implementation
+ */
+static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0+1]; /* XOR parity */
+ q = dptr[z0+2]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
+ asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
+ asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */
+
+ /* We uniformly assume a single prefetch covers at least 32 bytes */
+ for ( d = 0 ; d < bytes ; d += 32 ) {
+ asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
+ asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */
+ asm volatile("movdqa %0,%%xmm3" : : "m" (dptr[z0][d+16])); /* P[1] */
+ asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
+ asm volatile("movdqa %xmm3,%xmm6"); /* Q[1] */
+ for ( z = z0-1 ; z >= 0 ; z-- ) {
+ asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
+ asm volatile("pcmpgtb %xmm4,%xmm5");
+ asm volatile("pcmpgtb %xmm6,%xmm7");
+ asm volatile("paddb %xmm4,%xmm4");
+ asm volatile("paddb %xmm6,%xmm6");
+ asm volatile("pand %xmm0,%xmm5");
+ asm volatile("pand %xmm0,%xmm7");
+ asm volatile("pxor %xmm5,%xmm4");
+ asm volatile("pxor %xmm7,%xmm6");
+ asm volatile("movdqa %0,%%xmm5" : : "m" (dptr[z][d]));
+ asm volatile("movdqa %0,%%xmm7" : : "m" (dptr[z][d+16]));
+ asm volatile("pxor %xmm5,%xmm2");
+ asm volatile("pxor %xmm7,%xmm3");
+ asm volatile("pxor %xmm5,%xmm4");
+ asm volatile("pxor %xmm7,%xmm6");
+ asm volatile("pxor %xmm5,%xmm5");
+ asm volatile("pxor %xmm7,%xmm7");
+ }
+ asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
+ asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
+ asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
+ asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
+ }
+
+ asm volatile("sfence" : : : "memory");
+ kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_sse2x2 = {
+ raid6_sse22_gen_syndrome,
+ raid6_have_sse2,
+ "sse2x2",
+ 1 /* Has cache hints */
+};
+
+#endif
+
+#if defined(__x86_64__) && !defined(__arch_um__)
+
+/*
+ * Unrolled-by-4 SSE2 implementation
+ */
+static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0+1]; /* XOR parity */
+ q = dptr[z0+2]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
+ asm volatile("pxor %xmm2,%xmm2"); /* P[0] */
+ asm volatile("pxor %xmm3,%xmm3"); /* P[1] */
+ asm volatile("pxor %xmm4,%xmm4"); /* Q[0] */
+ asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
+ asm volatile("pxor %xmm6,%xmm6"); /* Q[1] */
+ asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */
+ asm volatile("pxor %xmm10,%xmm10"); /* P[2] */
+ asm volatile("pxor %xmm11,%xmm11"); /* P[3] */
+ asm volatile("pxor %xmm12,%xmm12"); /* Q[2] */
+ asm volatile("pxor %xmm13,%xmm13"); /* Zero temp */
+ asm volatile("pxor %xmm14,%xmm14"); /* Q[3] */
+ asm volatile("pxor %xmm15,%xmm15"); /* Zero temp */
+
+ for ( d = 0 ; d < bytes ; d += 64 ) {
+ for ( z = z0 ; z >= 0 ; z-- ) {
+ /* The second prefetch seems to improve performance... */
+ asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
+ asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
+ asm volatile("pcmpgtb %xmm4,%xmm5");
+ asm volatile("pcmpgtb %xmm6,%xmm7");
+ asm volatile("pcmpgtb %xmm12,%xmm13");
+ asm volatile("pcmpgtb %xmm14,%xmm15");
+ asm volatile("paddb %xmm4,%xmm4");
+ asm volatile("paddb %xmm6,%xmm6");
+ asm volatile("paddb %xmm12,%xmm12");
+ asm volatile("paddb %xmm14,%xmm14");
+ asm volatile("pand %xmm0,%xmm5");
+ asm volatile("pand %xmm0,%xmm7");
+ asm volatile("pand %xmm0,%xmm13");
+ asm volatile("pand %xmm0,%xmm15");
+ asm volatile("pxor %xmm5,%xmm4");
+ asm volatile("pxor %xmm7,%xmm6");
+ asm volatile("pxor %xmm13,%xmm12");
+ asm volatile("pxor %xmm15,%xmm14");
+ asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
+ asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
+ asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
+ asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
+ asm volatile("pxor %xmm5,%xmm2");
+ asm volatile("pxor %xmm7,%xmm3");
+ asm volatile("pxor %xmm13,%xmm10");
+ asm volatile("pxor %xmm15,%xmm11");
+ asm volatile("pxor %xmm5,%xmm4");
+ asm volatile("pxor %xmm7,%xmm6");
+ asm volatile("pxor %xmm13,%xmm12");
+ asm volatile("pxor %xmm15,%xmm14");
+ asm volatile("pxor %xmm5,%xmm5");
+ asm volatile("pxor %xmm7,%xmm7");
+ asm volatile("pxor %xmm13,%xmm13");
+ asm volatile("pxor %xmm15,%xmm15");
+ }
+ asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
+ asm volatile("pxor %xmm2,%xmm2");
+ asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
+ asm volatile("pxor %xmm3,%xmm3");
+ asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
+ asm volatile("pxor %xmm10,%xmm10");
+ asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
+ asm volatile("pxor %xmm11,%xmm11");
+ asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
+ asm volatile("pxor %xmm4,%xmm4");
+ asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
+ asm volatile("pxor %xmm6,%xmm6");
+ asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
+ asm volatile("pxor %xmm12,%xmm12");
+ asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
+ asm volatile("pxor %xmm14,%xmm14");
+ }
+
+ asm volatile("sfence" : : : "memory");
+ kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_sse2x4 = {
+ raid6_sse24_gen_syndrome,
+ raid6_have_sse2,
+ "sse2x4",
+ 1 /* Has cache hints */
+};
+
+#endif
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
new file mode 100644
index 00000000..aa651697
--- /dev/null
+++ b/lib/raid6/test/Makefile
@@ -0,0 +1,72 @@
+#
+# This is a simple Makefile to test some of the RAID-6 code
+# from userspace.
+#
+
+CC = gcc
+OPTFLAGS = -O2 # Adjust as desired
+CFLAGS = -I.. -I ../../../include -g $(OPTFLAGS)
+LD = ld
+AWK = awk -f
+AR = ar
+RANLIB = ranlib
+
+.c.o:
+ $(CC) $(CFLAGS) -c -o $@ $<
+
+%.c: ../%.c
+ cp -f $< $@
+
+%.uc: ../%.uc
+ cp -f $< $@
+
+all: raid6.a raid6test
+
+raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o \
+ altivec1.o altivec2.o altivec4.o altivec8.o recov.o algos.o \
+ tables.o
+ rm -f $@
+ $(AR) cq $@ $^
+ $(RANLIB) $@
+
+raid6test: test.c raid6.a
+ $(CC) $(CFLAGS) -o raid6test $^
+
+altivec1.c: altivec.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=1 < altivec.uc > $@
+
+altivec2.c: altivec.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=2 < altivec.uc > $@
+
+altivec4.c: altivec.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=4 < altivec.uc > $@
+
+altivec8.c: altivec.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=8 < altivec.uc > $@
+
+int1.c: int.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=1 < int.uc > $@
+
+int2.c: int.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=2 < int.uc > $@
+
+int4.c: int.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=4 < int.uc > $@
+
+int8.c: int.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=8 < int.uc > $@
+
+int16.c: int.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=16 < int.uc > $@
+
+int32.c: int.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=32 < int.uc > $@
+
+tables.c: mktables
+ ./mktables > tables.c
+
+clean:
+ rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c tables.c raid6test
+
+spotless: clean
+ rm -f *~
diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c
new file mode 100644
index 00000000..7a930318
--- /dev/null
+++ b/lib/raid6/test/test.c
@@ -0,0 +1,124 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved
+ *
+ * This file is part of the Linux kernel, and is made available under
+ * the terms of the GNU General Public License version 2 or (at your
+ * option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * raid6test.c
+ *
+ * Test RAID-6 recovery with various algorithms
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <linux/raid/pq.h>
+
+#define NDISKS 16 /* Including P and Q */
+
+const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
+struct raid6_calls raid6_call;
+
+char *dataptrs[NDISKS];
+char data[NDISKS][PAGE_SIZE];
+char recovi[PAGE_SIZE], recovj[PAGE_SIZE];
+
+static void makedata(void)
+{
+ int i, j;
+
+ for (i = 0; i < NDISKS; i++) {
+ for (j = 0; j < PAGE_SIZE; j++)
+ data[i][j] = rand();
+
+ dataptrs[i] = data[i];
+ }
+}
+
+static char disk_type(int d)
+{
+ switch (d) {
+ case NDISKS-2:
+ return 'P';
+ case NDISKS-1:
+ return 'Q';
+ default:
+ return 'D';
+ }
+}
+
+static int test_disks(int i, int j)
+{
+ int erra, errb;
+
+ memset(recovi, 0xf0, PAGE_SIZE);
+ memset(recovj, 0xba, PAGE_SIZE);
+
+ dataptrs[i] = recovi;
+ dataptrs[j] = recovj;
+
+ raid6_dual_recov(NDISKS, PAGE_SIZE, i, j, (void **)&dataptrs);
+
+ erra = memcmp(data[i], recovi, PAGE_SIZE);
+ errb = memcmp(data[j], recovj, PAGE_SIZE);
+
+ if (i < NDISKS-2 && j == NDISKS-1) {
+ /* We don't implement the DQ failure scenario, since it's
+ equivalent to a RAID-5 failure (XOR, then recompute Q) */
+ erra = errb = 0;
+ } else {
+ printf("algo=%-8s faila=%3d(%c) failb=%3d(%c) %s\n",
+ raid6_call.name,
+ i, disk_type(i),
+ j, disk_type(j),
+ (!erra && !errb) ? "OK" :
+ !erra ? "ERRB" :
+ !errb ? "ERRA" : "ERRAB");
+ }
+
+ dataptrs[i] = data[i];
+ dataptrs[j] = data[j];
+
+ return erra || errb;
+}
+
+int main(int argc, char *argv[])
+{
+ const struct raid6_calls *const *algo;
+ int i, j;
+ int err = 0;
+
+ makedata();
+
+ for (algo = raid6_algos; *algo; algo++) {
+ if (!(*algo)->valid || (*algo)->valid()) {
+ raid6_call = **algo;
+
+ /* Nuke syndromes */
+ memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
+
+ /* Generate assumed good syndrome */
+ raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
+ (void **)&dataptrs);
+
+ for (i = 0; i < NDISKS-1; i++)
+ for (j = i+1; j < NDISKS; j++)
+ err += test_disks(i, j);
+ }
+ printf("\n");
+ }
+
+ printf("\n");
+ /* Pick the best algorithm test */
+ raid6_select_algo();
+
+ if (err)
+ printf("\n*** ERRORS FOUND ***\n");
+
+ return err;
+}
diff --git a/lib/raid6/unroll.awk b/lib/raid6/unroll.awk
new file mode 100644
index 00000000..c6aa0363
--- /dev/null
+++ b/lib/raid6/unroll.awk
@@ -0,0 +1,20 @@
+
+# This filter requires one command line option of form -vN=n
+# where n must be a decimal number.
+#
+# Repeat each input line containing $$ n times, replacing $$ with 0...n-1.
+# Replace each $# with n, and each $* with a single $.
+
+BEGIN {
+ n = N + 0
+}
+{
+ if (/\$\$/) { rep = n } else { rep = 1 }
+ for (i = 0; i < rep; ++i) {
+ tmp = $0
+ gsub(/\$\$/, i, tmp)
+ gsub(/\$\#/, n, tmp)
+ gsub(/\$\*/, "$", tmp)
+ print tmp
+ }
+}
diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h
new file mode 100644
index 00000000..cb2a8c91
--- /dev/null
+++ b/lib/raid6/x86.h
@@ -0,0 +1,61 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ * Boston MA 02111-1307, USA; either version 2 of the License, or
+ * (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * raid6/x86.h
+ *
+ * Definitions common to x86 and x86-64 RAID-6 code only
+ */
+
+#ifndef LINUX_RAID_RAID6X86_H
+#define LINUX_RAID_RAID6X86_H
+
+#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
+
+#ifdef __KERNEL__ /* Real code */
+
+#include <asm/i387.h>
+
+#else /* Dummy code for user space testing */
+
+static inline void kernel_fpu_begin(void)
+{
+}
+
+static inline void kernel_fpu_end(void)
+{
+}
+
+#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */
+#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions
+ * (fast save and restore) */
+#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */
+#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */
+#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
+
+/* Should work well enough on modern CPUs for testing */
+static inline int boot_cpu_has(int flag)
+{
+ u32 eax = (flag >> 5) ? 0x80000001 : 1;
+ u32 edx;
+
+ asm volatile("cpuid"
+ : "+a" (eax), "=d" (edx)
+ : : "ecx", "ebx");
+
+ return (edx >> (flag & 31)) & 1;
+}
+
+#endif /* ndef __KERNEL__ */
+
+#endif
+#endif
diff --git a/lib/random32.c b/lib/random32.c
new file mode 100644
index 00000000..fc3545a3
--- /dev/null
+++ b/lib/random32.c
@@ -0,0 +1,150 @@
+/*
+ This is a maximally equidistributed combined Tausworthe generator
+ based on code from GNU Scientific Library 1.5 (30 Jun 2004)
+
+ x_n = (s1_n ^ s2_n ^ s3_n)
+
+ s1_{n+1} = (((s1_n & 4294967294) <<12) ^ (((s1_n <<13) ^ s1_n) >>19))
+ s2_{n+1} = (((s2_n & 4294967288) << 4) ^ (((s2_n << 2) ^ s2_n) >>25))
+ s3_{n+1} = (((s3_n & 4294967280) <<17) ^ (((s3_n << 3) ^ s3_n) >>11))
+
+ The period of this generator is about 2^88.
+
+ From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe
+ Generators", Mathematics of Computation, 65, 213 (1996), 203--213.
+
+ This is available on the net from L'Ecuyer's home page,
+
+ http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps
+ ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps
+
+ There is an erratum in the paper "Tables of Maximally
+ Equidistributed Combined LFSR Generators", Mathematics of
+ Computation, 68, 225 (1999), 261--269:
+ http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps
+
+ ... the k_j most significant bits of z_j must be non-
+ zero, for each j. (Note: this restriction also applies to the
+ computer code given in [4], but was mistakenly not mentioned in
+ that paper.)
+
+ This affects the seeding procedure by imposing the requirement
+ s1 > 1, s2 > 7, s3 > 15.
+
+*/
+
+#include <linux/types.h>
+#include <linux/percpu.h>
+#include <linux/module.h>
+#include <linux/jiffies.h>
+#include <linux/random.h>
+
+static DEFINE_PER_CPU(struct rnd_state, net_rand_state);
+
+/**
+ * prandom32 - seeded pseudo-random number generator.
+ * @state: pointer to state structure holding seeded state.
+ *
+ * This is used for pseudo-randomness with no outside seeding.
+ * For more random results, use random32().
+ */
+u32 prandom32(struct rnd_state *state)
+{
+#define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b)
+
+ state->s1 = TAUSWORTHE(state->s1, 13, 19, 4294967294UL, 12);
+ state->s2 = TAUSWORTHE(state->s2, 2, 25, 4294967288UL, 4);
+ state->s3 = TAUSWORTHE(state->s3, 3, 11, 4294967280UL, 17);
+
+ return (state->s1 ^ state->s2 ^ state->s3);
+}
+EXPORT_SYMBOL(prandom32);
+
+/**
+ * random32 - pseudo random number generator
+ *
+ * A 32 bit pseudo-random number is generated using a fast
+ * algorithm suitable for simulation. This algorithm is NOT
+ * considered safe for cryptographic use.
+ */
+u32 random32(void)
+{
+ unsigned long r;
+ struct rnd_state *state = &get_cpu_var(net_rand_state);
+ r = prandom32(state);
+ put_cpu_var(state);
+ return r;
+}
+EXPORT_SYMBOL(random32);
+
+/**
+ * srandom32 - add entropy to pseudo random number generator
+ * @seed: seed value
+ *
+ * Add some additional seeding to the random32() pool.
+ */
+void srandom32(u32 entropy)
+{
+ int i;
+ /*
+ * No locking on the CPUs, but then somewhat random results are, well,
+ * expected.
+ */
+ for_each_possible_cpu (i) {
+ struct rnd_state *state = &per_cpu(net_rand_state, i);
+ state->s1 = __seed(state->s1 ^ entropy, 1);
+ }
+}
+EXPORT_SYMBOL(srandom32);
+
+/*
+ * Generate some initially weak seeding values to allow
+ * to start the random32() engine.
+ */
+static int __init random32_init(void)
+{
+ int i;
+
+ for_each_possible_cpu(i) {
+ struct rnd_state *state = &per_cpu(net_rand_state,i);
+
+#define LCG(x) ((x) * 69069) /* super-duper LCG */
+ state->s1 = __seed(LCG(i + jiffies), 1);
+ state->s2 = __seed(LCG(state->s1), 7);
+ state->s3 = __seed(LCG(state->s2), 15);
+
+ /* "warm it up" */
+ prandom32(state);
+ prandom32(state);
+ prandom32(state);
+ prandom32(state);
+ prandom32(state);
+ prandom32(state);
+ }
+ return 0;
+}
+core_initcall(random32_init);
+
+/*
+ * Generate better values after random number generator
+ * is fully initialized.
+ */
+static int __init random32_reseed(void)
+{
+ int i;
+
+ for_each_possible_cpu(i) {
+ struct rnd_state *state = &per_cpu(net_rand_state,i);
+ u32 seeds[3];
+
+ get_random_bytes(&seeds, sizeof(seeds));
+ state->s1 = __seed(seeds[0], 1);
+ state->s2 = __seed(seeds[1], 7);
+ state->s3 = __seed(seeds[2], 15);
+
+ /* mix it in */
+ prandom32(state);
+ }
+ return 0;
+}
+late_initcall(random32_reseed);
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
new file mode 100644
index 00000000..027a03f4
--- /dev/null
+++ b/lib/ratelimit.c
@@ -0,0 +1,67 @@
+/*
+ * ratelimit.c - Do something with rate limit.
+ *
+ * Isolated from kernel/printk.c by Dave Young <hidave.darkstar@gmail.com>
+ *
+ * 2008-05-01 rewrite the function and use a ratelimit_state data struct as
+ * parameter. Now every user can use their own standalone ratelimit_state.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/ratelimit.h>
+#include <linux/jiffies.h>
+#include <linux/module.h>
+
+/*
+ * __ratelimit - rate limiting
+ * @rs: ratelimit_state data
+ * @func: name of calling function
+ *
+ * This enforces a rate limit: not more than @rs->burst callbacks
+ * in every @rs->interval
+ *
+ * RETURNS:
+ * 0 means callbacks will be suppressed.
+ * 1 means go ahead and do it.
+ */
+int ___ratelimit(struct ratelimit_state *rs, const char *func)
+{
+ unsigned long flags;
+ int ret;
+
+ if (!rs->interval)
+ return 1;
+
+ /*
+ * If we contend on this state's lock then almost
+ * by definition we are too busy to print a message,
+ * in addition to the one that will be printed by
+ * the entity that is holding the lock already:
+ */
+ if (!spin_trylock_irqsave(&rs->lock, flags))
+ return 0;
+
+ if (!rs->begin)
+ rs->begin = jiffies;
+
+ if (time_is_before_jiffies(rs->begin + rs->interval)) {
+ if (rs->missed)
+ printk(KERN_WARNING "%s: %d callbacks suppressed\n",
+ func, rs->missed);
+ rs->begin = 0;
+ rs->printed = 0;
+ rs->missed = 0;
+ }
+ if (rs->burst && rs->burst > rs->printed) {
+ rs->printed++;
+ ret = 1;
+ } else {
+ rs->missed++;
+ ret = 0;
+ }
+ spin_unlock_irqrestore(&rs->lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(___ratelimit);
diff --git a/lib/rational.c b/lib/rational.c
new file mode 100644
index 00000000..3ed247b8
--- /dev/null
+++ b/lib/rational.c
@@ -0,0 +1,63 @@
+/*
+ * rational fractions
+ *
+ * Copyright (C) 2009 emlix GmbH, Oskar Schirmer <os@emlix.com>
+ *
+ * helper functions when coping with rational numbers
+ */
+
+#include <linux/rational.h>
+#include <linux/module.h>
+
+/*
+ * calculate best rational approximation for a given fraction
+ * taking into account restricted register size, e.g. to find
+ * appropriate values for a pll with 5 bit denominator and
+ * 8 bit numerator register fields, trying to set up with a
+ * frequency ratio of 3.1415, one would say:
+ *
+ * rational_best_approximation(31415, 10000,
+ * (1 << 8) - 1, (1 << 5) - 1, &n, &d);
+ *
+ * you may look at given_numerator as a fixed point number,
+ * with the fractional part size described in given_denominator.
+ *
+ * for theoretical background, see:
+ * http://en.wikipedia.org/wiki/Continued_fraction
+ */
+
+void rational_best_approximation(
+ unsigned long given_numerator, unsigned long given_denominator,
+ unsigned long max_numerator, unsigned long max_denominator,
+ unsigned long *best_numerator, unsigned long *best_denominator)
+{
+ unsigned long n, d, n0, d0, n1, d1;
+ n = given_numerator;
+ d = given_denominator;
+ n0 = d1 = 0;
+ n1 = d0 = 1;
+ for (;;) {
+ unsigned long t, a;
+ if ((n1 > max_numerator) || (d1 > max_denominator)) {
+ n1 = n0;
+ d1 = d0;
+ break;
+ }
+ if (d == 0)
+ break;
+ t = d;
+ a = n / d;
+ d = n % d;
+ n = t;
+ t = n0 + a * n1;
+ n0 = n1;
+ n1 = t;
+ t = d0 + a * d1;
+ d0 = d1;
+ d1 = t;
+ }
+ *best_numerator = n1;
+ *best_denominator = d1;
+}
+
+EXPORT_SYMBOL(rational_best_approximation);
diff --git a/lib/rbtree.c b/lib/rbtree.c
new file mode 100644
index 00000000..4693f791
--- /dev/null
+++ b/lib/rbtree.c
@@ -0,0 +1,459 @@
+/*
+ Red Black Trees
+ (C) 1999 Andrea Arcangeli <andrea@suse.de>
+ (C) 2002 David Woodhouse <dwmw2@infradead.org>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ linux/lib/rbtree.c
+*/
+
+#include <linux/rbtree.h>
+#include <linux/module.h>
+
+static void __rb_rotate_left(struct rb_node *node, struct rb_root *root)
+{
+ struct rb_node *right = node->rb_right;
+ struct rb_node *parent = rb_parent(node);
+
+ if ((node->rb_right = right->rb_left))
+ rb_set_parent(right->rb_left, node);
+ right->rb_left = node;
+
+ rb_set_parent(right, parent);
+
+ if (parent)
+ {
+ if (node == parent->rb_left)
+ parent->rb_left = right;
+ else
+ parent->rb_right = right;
+ }
+ else
+ root->rb_node = right;
+ rb_set_parent(node, right);
+}
+
+static void __rb_rotate_right(struct rb_node *node, struct rb_root *root)
+{
+ struct rb_node *left = node->rb_left;
+ struct rb_node *parent = rb_parent(node);
+
+ if ((node->rb_left = left->rb_right))
+ rb_set_parent(left->rb_right, node);
+ left->rb_right = node;
+
+ rb_set_parent(left, parent);
+
+ if (parent)
+ {
+ if (node == parent->rb_right)
+ parent->rb_right = left;
+ else
+ parent->rb_left = left;
+ }
+ else
+ root->rb_node = left;
+ rb_set_parent(node, left);
+}
+
+void rb_insert_color(struct rb_node *node, struct rb_root *root)
+{
+ struct rb_node *parent, *gparent;
+
+ while ((parent = rb_parent(node)) && rb_is_red(parent))
+ {
+ gparent = rb_parent(parent);
+
+ if (parent == gparent->rb_left)
+ {
+ {
+ register struct rb_node *uncle = gparent->rb_right;
+ if (uncle && rb_is_red(uncle))
+ {
+ rb_set_black(uncle);
+ rb_set_black(parent);
+ rb_set_red(gparent);
+ node = gparent;
+ continue;
+ }
+ }
+
+ if (parent->rb_right == node)
+ {
+ register struct rb_node *tmp;
+ __rb_rotate_left(parent, root);
+ tmp = parent;
+ parent = node;
+ node = tmp;
+ }
+
+ rb_set_black(parent);
+ rb_set_red(gparent);
+ __rb_rotate_right(gparent, root);
+ } else {
+ {
+ register struct rb_node *uncle = gparent->rb_left;
+ if (uncle && rb_is_red(uncle))
+ {
+ rb_set_black(uncle);
+ rb_set_black(parent);
+ rb_set_red(gparent);
+ node = gparent;
+ continue;
+ }
+ }
+
+ if (parent->rb_left == node)
+ {
+ register struct rb_node *tmp;
+ __rb_rotate_right(parent, root);
+ tmp = parent;
+ parent = node;
+ node = tmp;
+ }
+
+ rb_set_black(parent);
+ rb_set_red(gparent);
+ __rb_rotate_left(gparent, root);
+ }
+ }
+
+ rb_set_black(root->rb_node);
+}
+EXPORT_SYMBOL(rb_insert_color);
+
+static void __rb_erase_color(struct rb_node *node, struct rb_node *parent,
+ struct rb_root *root)
+{
+ struct rb_node *other;
+
+ while ((!node || rb_is_black(node)) && node != root->rb_node)
+ {
+ if (parent->rb_left == node)
+ {
+ other = parent->rb_right;
+ if (rb_is_red(other))
+ {
+ rb_set_black(other);
+ rb_set_red(parent);
+ __rb_rotate_left(parent, root);
+ other = parent->rb_right;
+ }
+ if ((!other->rb_left || rb_is_black(other->rb_left)) &&
+ (!other->rb_right || rb_is_black(other->rb_right)))
+ {
+ rb_set_red(other);
+ node = parent;
+ parent = rb_parent(node);
+ }
+ else
+ {
+ if (!other->rb_right || rb_is_black(other->rb_right))
+ {
+ rb_set_black(other->rb_left);
+ rb_set_red(other);
+ __rb_rotate_right(other, root);
+ other = parent->rb_right;
+ }
+ rb_set_color(other, rb_color(parent));
+ rb_set_black(parent);
+ rb_set_black(other->rb_right);
+ __rb_rotate_left(parent, root);
+ node = root->rb_node;
+ break;
+ }
+ }
+ else
+ {
+ other = parent->rb_left;
+ if (rb_is_red(other))
+ {
+ rb_set_black(other);
+ rb_set_red(parent);
+ __rb_rotate_right(parent, root);
+ other = parent->rb_left;
+ }
+ if ((!other->rb_left || rb_is_black(other->rb_left)) &&
+ (!other->rb_right || rb_is_black(other->rb_right)))
+ {
+ rb_set_red(other);
+ node = parent;
+ parent = rb_parent(node);
+ }
+ else
+ {
+ if (!other->rb_left || rb_is_black(other->rb_left))
+ {
+ rb_set_black(other->rb_right);
+ rb_set_red(other);
+ __rb_rotate_left(other, root);
+ other = parent->rb_left;
+ }
+ rb_set_color(other, rb_color(parent));
+ rb_set_black(parent);
+ rb_set_black(other->rb_left);
+ __rb_rotate_right(parent, root);
+ node = root->rb_node;
+ break;
+ }
+ }
+ }
+ if (node)
+ rb_set_black(node);
+}
+
+void rb_erase(struct rb_node *node, struct rb_root *root)
+{
+ struct rb_node *child, *parent;
+ int color;
+
+ if (!node->rb_left)
+ child = node->rb_right;
+ else if (!node->rb_right)
+ child = node->rb_left;
+ else
+ {
+ struct rb_node *old = node, *left;
+
+ node = node->rb_right;
+ while ((left = node->rb_left) != NULL)
+ node = left;
+
+ if (rb_parent(old)) {
+ if (rb_parent(old)->rb_left == old)
+ rb_parent(old)->rb_left = node;
+ else
+ rb_parent(old)->rb_right = node;
+ } else
+ root->rb_node = node;
+
+ child = node->rb_right;
+ parent = rb_parent(node);
+ color = rb_color(node);
+
+ if (parent == old) {
+ parent = node;
+ } else {
+ if (child)
+ rb_set_parent(child, parent);
+ parent->rb_left = child;
+
+ node->rb_right = old->rb_right;
+ rb_set_parent(old->rb_right, node);
+ }
+
+ node->rb_parent_color = old->rb_parent_color;
+ node->rb_left = old->rb_left;
+ rb_set_parent(old->rb_left, node);
+
+ goto color;
+ }
+
+ parent = rb_parent(node);
+ color = rb_color(node);
+
+ if (child)
+ rb_set_parent(child, parent);
+ if (parent)
+ {
+ if (parent->rb_left == node)
+ parent->rb_left = child;
+ else
+ parent->rb_right = child;
+ }
+ else
+ root->rb_node = child;
+
+ color:
+ if (color == RB_BLACK)
+ __rb_erase_color(child, parent, root);
+}
+EXPORT_SYMBOL(rb_erase);
+
+static void rb_augment_path(struct rb_node *node, rb_augment_f func, void *data)
+{
+ struct rb_node *parent;
+
+up:
+ func(node, data);
+ parent = rb_parent(node);
+ if (!parent)
+ return;
+
+ if (node == parent->rb_left && parent->rb_right)
+ func(parent->rb_right, data);
+ else if (parent->rb_left)
+ func(parent->rb_left, data);
+
+ node = parent;
+ goto up;
+}
+
+/*
+ * after inserting @node into the tree, update the tree to account for
+ * both the new entry and any damage done by rebalance
+ */
+void rb_augment_insert(struct rb_node *node, rb_augment_f func, void *data)
+{
+ if (node->rb_left)
+ node = node->rb_left;
+ else if (node->rb_right)
+ node = node->rb_right;
+
+ rb_augment_path(node, func, data);
+}
+
+/*
+ * before removing the node, find the deepest node on the rebalance path
+ * that will still be there after @node gets removed
+ */
+struct rb_node *rb_augment_erase_begin(struct rb_node *node)
+{
+ struct rb_node *deepest;
+
+ if (!node->rb_right && !node->rb_left)
+ deepest = rb_parent(node);
+ else if (!node->rb_right)
+ deepest = node->rb_left;
+ else if (!node->rb_left)
+ deepest = node->rb_right;
+ else {
+ deepest = rb_next(node);
+ if (deepest->rb_right)
+ deepest = deepest->rb_right;
+ else if (rb_parent(deepest) != node)
+ deepest = rb_parent(deepest);
+ }
+
+ return deepest;
+}
+
+/*
+ * after removal, update the tree to account for the removed entry
+ * and any rebalance damage.
+ */
+void rb_augment_erase_end(struct rb_node *node, rb_augment_f func, void *data)
+{
+ if (node)
+ rb_augment_path(node, func, data);
+}
+
+/*
+ * This function returns the first node (in sort order) of the tree.
+ */
+struct rb_node *rb_first(const struct rb_root *root)
+{
+ struct rb_node *n;
+
+ n = root->rb_node;
+ if (!n)
+ return NULL;
+ while (n->rb_left)
+ n = n->rb_left;
+ return n;
+}
+EXPORT_SYMBOL(rb_first);
+
+struct rb_node *rb_last(const struct rb_root *root)
+{
+ struct rb_node *n;
+
+ n = root->rb_node;
+ if (!n)
+ return NULL;
+ while (n->rb_right)
+ n = n->rb_right;
+ return n;
+}
+EXPORT_SYMBOL(rb_last);
+
+struct rb_node *rb_next(const struct rb_node *node)
+{
+ struct rb_node *parent;
+
+ if (rb_parent(node) == node)
+ return NULL;
+
+ /* If we have a right-hand child, go down and then left as far
+ as we can. */
+ if (node->rb_right) {
+ node = node->rb_right;
+ while (node->rb_left)
+ node=node->rb_left;
+ return (struct rb_node *)node;
+ }
+
+ /* No right-hand children. Everything down and left is
+ smaller than us, so any 'next' node must be in the general
+ direction of our parent. Go up the tree; any time the
+ ancestor is a right-hand child of its parent, keep going
+ up. First time it's a left-hand child of its parent, said
+ parent is our 'next' node. */
+ while ((parent = rb_parent(node)) && node == parent->rb_right)
+ node = parent;
+
+ return parent;
+}
+EXPORT_SYMBOL(rb_next);
+
+struct rb_node *rb_prev(const struct rb_node *node)
+{
+ struct rb_node *parent;
+
+ if (rb_parent(node) == node)
+ return NULL;
+
+ /* If we have a left-hand child, go down and then right as far
+ as we can. */
+ if (node->rb_left) {
+ node = node->rb_left;
+ while (node->rb_right)
+ node=node->rb_right;
+ return (struct rb_node *)node;
+ }
+
+ /* No left-hand children. Go up till we find an ancestor which
+ is a right-hand child of its parent */
+ while ((parent = rb_parent(node)) && node == parent->rb_left)
+ node = parent;
+
+ return parent;
+}
+EXPORT_SYMBOL(rb_prev);
+
+void rb_replace_node(struct rb_node *victim, struct rb_node *new,
+ struct rb_root *root)
+{
+ struct rb_node *parent = rb_parent(victim);
+
+ /* Set the surrounding nodes to point to the replacement */
+ if (parent) {
+ if (victim == parent->rb_left)
+ parent->rb_left = new;
+ else
+ parent->rb_right = new;
+ } else {
+ root->rb_node = new;
+ }
+ if (victim->rb_left)
+ rb_set_parent(victim->rb_left, new);
+ if (victim->rb_right)
+ rb_set_parent(victim->rb_right, new);
+
+ /* Copy the pointers/colour from the victim to the replacement */
+ *new = *victim;
+}
+EXPORT_SYMBOL(rb_replace_node);
diff --git a/lib/reciprocal_div.c b/lib/reciprocal_div.c
new file mode 100644
index 00000000..6a3bd48f
--- /dev/null
+++ b/lib/reciprocal_div.c
@@ -0,0 +1,9 @@
+#include <asm/div64.h>
+#include <linux/reciprocal_div.h>
+
+u32 reciprocal_value(u32 k)
+{
+ u64 val = (1LL << 32) + (k - 1);
+ do_div(val, k);
+ return (u32)val;
+}
diff --git a/lib/reed_solomon/Makefile b/lib/reed_solomon/Makefile
new file mode 100644
index 00000000..c3d71368
--- /dev/null
+++ b/lib/reed_solomon/Makefile
@@ -0,0 +1,6 @@
+#
+# This is a modified version of reed solomon lib,
+#
+
+obj-$(CONFIG_REED_SOLOMON) += reed_solomon.o
+
diff --git a/lib/reed_solomon/decode_rs.c b/lib/reed_solomon/decode_rs.c
new file mode 100644
index 00000000..0ec3f257
--- /dev/null
+++ b/lib/reed_solomon/decode_rs.c
@@ -0,0 +1,271 @@
+/*
+ * lib/reed_solomon/decode_rs.c
+ *
+ * Overview:
+ * Generic Reed Solomon encoder / decoder library
+ *
+ * Copyright 2002, Phil Karn, KA9Q
+ * May be used under the terms of the GNU General Public License (GPL)
+ *
+ * Adaption to the kernel by Thomas Gleixner (tglx@linutronix.de)
+ *
+ * $Id: decode_rs.c,v 1.7 2005/11/07 11:14:59 gleixner Exp $
+ *
+ */
+
+/* Generic data width independent code which is included by the
+ * wrappers.
+ */
+{
+ int deg_lambda, el, deg_omega;
+ int i, j, r, k, pad;
+ int nn = rs->nn;
+ int nroots = rs->nroots;
+ int fcr = rs->fcr;
+ int prim = rs->prim;
+ int iprim = rs->iprim;
+ uint16_t *alpha_to = rs->alpha_to;
+ uint16_t *index_of = rs->index_of;
+ uint16_t u, q, tmp, num1, num2, den, discr_r, syn_error;
+ /* Err+Eras Locator poly and syndrome poly The maximum value
+ * of nroots is 8. So the necessary stack size will be about
+ * 220 bytes max.
+ */
+ uint16_t lambda[nroots + 1], syn[nroots];
+ uint16_t b[nroots + 1], t[nroots + 1], omega[nroots + 1];
+ uint16_t root[nroots], reg[nroots + 1], loc[nroots];
+ int count = 0;
+ uint16_t msk = (uint16_t) rs->nn;
+
+ /* Check length parameter for validity */
+ pad = nn - nroots - len;
+ BUG_ON(pad < 0 || pad >= nn);
+
+ /* Does the caller provide the syndrome ? */
+ if (s != NULL)
+ goto decode;
+
+ /* form the syndromes; i.e., evaluate data(x) at roots of
+ * g(x) */
+ for (i = 0; i < nroots; i++)
+ syn[i] = (((uint16_t) data[0]) ^ invmsk) & msk;
+
+ for (j = 1; j < len; j++) {
+ for (i = 0; i < nroots; i++) {
+ if (syn[i] == 0) {
+ syn[i] = (((uint16_t) data[j]) ^
+ invmsk) & msk;
+ } else {
+ syn[i] = ((((uint16_t) data[j]) ^
+ invmsk) & msk) ^
+ alpha_to[rs_modnn(rs, index_of[syn[i]] +
+ (fcr + i) * prim)];
+ }
+ }
+ }
+
+ for (j = 0; j < nroots; j++) {
+ for (i = 0; i < nroots; i++) {
+ if (syn[i] == 0) {
+ syn[i] = ((uint16_t) par[j]) & msk;
+ } else {
+ syn[i] = (((uint16_t) par[j]) & msk) ^
+ alpha_to[rs_modnn(rs, index_of[syn[i]] +
+ (fcr+i)*prim)];
+ }
+ }
+ }
+ s = syn;
+
+ /* Convert syndromes to index form, checking for nonzero condition */
+ syn_error = 0;
+ for (i = 0; i < nroots; i++) {
+ syn_error |= s[i];
+ s[i] = index_of[s[i]];
+ }
+
+ if (!syn_error) {
+ /* if syndrome is zero, data[] is a codeword and there are no
+ * errors to correct. So return data[] unmodified
+ */
+ count = 0;
+ goto finish;
+ }
+
+ decode:
+ memset(&lambda[1], 0, nroots * sizeof(lambda[0]));
+ lambda[0] = 1;
+
+ if (no_eras > 0) {
+ /* Init lambda to be the erasure locator polynomial */
+ lambda[1] = alpha_to[rs_modnn(rs,
+ prim * (nn - 1 - eras_pos[0]))];
+ for (i = 1; i < no_eras; i++) {
+ u = rs_modnn(rs, prim * (nn - 1 - eras_pos[i]));
+ for (j = i + 1; j > 0; j--) {
+ tmp = index_of[lambda[j - 1]];
+ if (tmp != nn) {
+ lambda[j] ^=
+ alpha_to[rs_modnn(rs, u + tmp)];
+ }
+ }
+ }
+ }
+
+ for (i = 0; i < nroots + 1; i++)
+ b[i] = index_of[lambda[i]];
+
+ /*
+ * Begin Berlekamp-Massey algorithm to determine error+erasure
+ * locator polynomial
+ */
+ r = no_eras;
+ el = no_eras;
+ while (++r <= nroots) { /* r is the step number */
+ /* Compute discrepancy at the r-th step in poly-form */
+ discr_r = 0;
+ for (i = 0; i < r; i++) {
+ if ((lambda[i] != 0) && (s[r - i - 1] != nn)) {
+ discr_r ^=
+ alpha_to[rs_modnn(rs,
+ index_of[lambda[i]] +
+ s[r - i - 1])];
+ }
+ }
+ discr_r = index_of[discr_r]; /* Index form */
+ if (discr_r == nn) {
+ /* 2 lines below: B(x) <-- x*B(x) */
+ memmove (&b[1], b, nroots * sizeof (b[0]));
+ b[0] = nn;
+ } else {
+ /* 7 lines below: T(x) <-- lambda(x)-discr_r*x*b(x) */
+ t[0] = lambda[0];
+ for (i = 0; i < nroots; i++) {
+ if (b[i] != nn) {
+ t[i + 1] = lambda[i + 1] ^
+ alpha_to[rs_modnn(rs, discr_r +
+ b[i])];
+ } else
+ t[i + 1] = lambda[i + 1];
+ }
+ if (2 * el <= r + no_eras - 1) {
+ el = r + no_eras - el;
+ /*
+ * 2 lines below: B(x) <-- inv(discr_r) *
+ * lambda(x)
+ */
+ for (i = 0; i <= nroots; i++) {
+ b[i] = (lambda[i] == 0) ? nn :
+ rs_modnn(rs, index_of[lambda[i]]
+ - discr_r + nn);
+ }
+ } else {
+ /* 2 lines below: B(x) <-- x*B(x) */
+ memmove(&b[1], b, nroots * sizeof(b[0]));
+ b[0] = nn;
+ }
+ memcpy(lambda, t, (nroots + 1) * sizeof(t[0]));
+ }
+ }
+
+ /* Convert lambda to index form and compute deg(lambda(x)) */
+ deg_lambda = 0;
+ for (i = 0; i < nroots + 1; i++) {
+ lambda[i] = index_of[lambda[i]];
+ if (lambda[i] != nn)
+ deg_lambda = i;
+ }
+ /* Find roots of error+erasure locator polynomial by Chien search */
+ memcpy(&reg[1], &lambda[1], nroots * sizeof(reg[0]));
+ count = 0; /* Number of roots of lambda(x) */
+ for (i = 1, k = iprim - 1; i <= nn; i++, k = rs_modnn(rs, k + iprim)) {
+ q = 1; /* lambda[0] is always 0 */
+ for (j = deg_lambda; j > 0; j--) {
+ if (reg[j] != nn) {
+ reg[j] = rs_modnn(rs, reg[j] + j);
+ q ^= alpha_to[reg[j]];
+ }
+ }
+ if (q != 0)
+ continue; /* Not a root */
+ /* store root (index-form) and error location number */
+ root[count] = i;
+ loc[count] = k;
+ /* If we've already found max possible roots,
+ * abort the search to save time
+ */
+ if (++count == deg_lambda)
+ break;
+ }
+ if (deg_lambda != count) {
+ /*
+ * deg(lambda) unequal to number of roots => uncorrectable
+ * error detected
+ */
+ count = -EBADMSG;
+ goto finish;
+ }
+ /*
+ * Compute err+eras evaluator poly omega(x) = s(x)*lambda(x) (modulo
+ * x**nroots). in index form. Also find deg(omega).
+ */
+ deg_omega = deg_lambda - 1;
+ for (i = 0; i <= deg_omega; i++) {
+ tmp = 0;
+ for (j = i; j >= 0; j--) {
+ if ((s[i - j] != nn) && (lambda[j] != nn))
+ tmp ^=
+ alpha_to[rs_modnn(rs, s[i - j] + lambda[j])];
+ }
+ omega[i] = index_of[tmp];
+ }
+
+ /*
+ * Compute error values in poly-form. num1 = omega(inv(X(l))), num2 =
+ * inv(X(l))**(fcr-1) and den = lambda_pr(inv(X(l))) all in poly-form
+ */
+ for (j = count - 1; j >= 0; j--) {
+ num1 = 0;
+ for (i = deg_omega; i >= 0; i--) {
+ if (omega[i] != nn)
+ num1 ^= alpha_to[rs_modnn(rs, omega[i] +
+ i * root[j])];
+ }
+ num2 = alpha_to[rs_modnn(rs, root[j] * (fcr - 1) + nn)];
+ den = 0;
+
+ /* lambda[i+1] for i even is the formal derivative
+ * lambda_pr of lambda[i] */
+ for (i = min(deg_lambda, nroots - 1) & ~1; i >= 0; i -= 2) {
+ if (lambda[i + 1] != nn) {
+ den ^= alpha_to[rs_modnn(rs, lambda[i + 1] +
+ i * root[j])];
+ }
+ }
+ /* Apply error to data */
+ if (num1 != 0 && loc[j] >= pad) {
+ uint16_t cor = alpha_to[rs_modnn(rs,index_of[num1] +
+ index_of[num2] +
+ nn - index_of[den])];
+ /* Store the error correction pattern, if a
+ * correction buffer is available */
+ if (corr) {
+ corr[j] = cor;
+ } else {
+ /* If a data buffer is given and the
+ * error is inside the message,
+ * correct it */
+ if (data && (loc[j] < (nn - nroots)))
+ data[loc[j] - pad] ^= cor;
+ }
+ }
+ }
+
+finish:
+ if (eras_pos != NULL) {
+ for (i = 0; i < count; i++)
+ eras_pos[i] = loc[i] - pad;
+ }
+ return count;
+
+}
diff --git a/lib/reed_solomon/encode_rs.c b/lib/reed_solomon/encode_rs.c
new file mode 100644
index 00000000..0b5b1a67
--- /dev/null
+++ b/lib/reed_solomon/encode_rs.c
@@ -0,0 +1,54 @@
+/*
+ * lib/reed_solomon/encode_rs.c
+ *
+ * Overview:
+ * Generic Reed Solomon encoder / decoder library
+ *
+ * Copyright 2002, Phil Karn, KA9Q
+ * May be used under the terms of the GNU General Public License (GPL)
+ *
+ * Adaption to the kernel by Thomas Gleixner (tglx@linutronix.de)
+ *
+ * $Id: encode_rs.c,v 1.5 2005/11/07 11:14:59 gleixner Exp $
+ *
+ */
+
+/* Generic data width independent code which is included by the
+ * wrappers.
+ * int encode_rsX (struct rs_control *rs, uintX_t *data, int len, uintY_t *par)
+ */
+{
+ int i, j, pad;
+ int nn = rs->nn;
+ int nroots = rs->nroots;
+ uint16_t *alpha_to = rs->alpha_to;
+ uint16_t *index_of = rs->index_of;
+ uint16_t *genpoly = rs->genpoly;
+ uint16_t fb;
+ uint16_t msk = (uint16_t) rs->nn;
+
+ /* Check length parameter for validity */
+ pad = nn - nroots - len;
+ if (pad < 0 || pad >= nn)
+ return -ERANGE;
+
+ for (i = 0; i < len; i++) {
+ fb = index_of[((((uint16_t) data[i])^invmsk) & msk) ^ par[0]];
+ /* feedback term is non-zero */
+ if (fb != nn) {
+ for (j = 1; j < nroots; j++) {
+ par[j] ^= alpha_to[rs_modnn(rs, fb +
+ genpoly[nroots - j])];
+ }
+ }
+ /* Shift */
+ memmove(&par[0], &par[1], sizeof(uint16_t) * (nroots - 1));
+ if (fb != nn) {
+ par[nroots - 1] = alpha_to[rs_modnn(rs,
+ fb + genpoly[0])];
+ } else {
+ par[nroots - 1] = 0;
+ }
+ }
+ return 0;
+}
diff --git a/lib/reed_solomon/reed_solomon.c b/lib/reed_solomon/reed_solomon.c
new file mode 100644
index 00000000..06d04cfa
--- /dev/null
+++ b/lib/reed_solomon/reed_solomon.c
@@ -0,0 +1,384 @@
+/*
+ * lib/reed_solomon/reed_solomon.c
+ *
+ * Overview:
+ * Generic Reed Solomon encoder / decoder library
+ *
+ * Copyright (C) 2004 Thomas Gleixner (tglx@linutronix.de)
+ *
+ * Reed Solomon code lifted from reed solomon library written by Phil Karn
+ * Copyright 2002 Phil Karn, KA9Q
+ *
+ * $Id: rslib.c,v 1.7 2005/11/07 11:14:59 gleixner Exp $
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Description:
+ *
+ * The generic Reed Solomon library provides runtime configurable
+ * encoding / decoding of RS codes.
+ * Each user must call init_rs to get a pointer to a rs_control
+ * structure for the given rs parameters. This structure is either
+ * generated or a already available matching control structure is used.
+ * If a structure is generated then the polynomial arrays for
+ * fast encoding / decoding are built. This can take some time so
+ * make sure not to call this function from a time critical path.
+ * Usually a module / driver should initialize the necessary
+ * rs_control structure on module / driver init and release it
+ * on exit.
+ * The encoding puts the calculated syndrome into a given syndrome
+ * buffer.
+ * The decoding is a two step process. The first step calculates
+ * the syndrome over the received (data + syndrome) and calls the
+ * second stage, which does the decoding / error correction itself.
+ * Many hw encoders provide a syndrome calculation over the received
+ * data + syndrome and can call the second stage directly.
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/rslib.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+
+/* This list holds all currently allocated rs control structures */
+static LIST_HEAD (rslist);
+/* Protection for the list */
+static DEFINE_MUTEX(rslistlock);
+
+/**
+ * rs_init - Initialize a Reed-Solomon codec
+ * @symsize: symbol size, bits (1-8)
+ * @gfpoly: Field generator polynomial coefficients
+ * @gffunc: Field generator function
+ * @fcr: first root of RS code generator polynomial, index form
+ * @prim: primitive element to generate polynomial roots
+ * @nroots: RS code generator polynomial degree (number of roots)
+ *
+ * Allocate a control structure and the polynom arrays for faster
+ * en/decoding. Fill the arrays according to the given parameters.
+ */
+static struct rs_control *rs_init(int symsize, int gfpoly, int (*gffunc)(int),
+ int fcr, int prim, int nroots)
+{
+ struct rs_control *rs;
+ int i, j, sr, root, iprim;
+
+ /* Allocate the control structure */
+ rs = kmalloc(sizeof (struct rs_control), GFP_KERNEL);
+ if (rs == NULL)
+ return NULL;
+
+ INIT_LIST_HEAD(&rs->list);
+
+ rs->mm = symsize;
+ rs->nn = (1 << symsize) - 1;
+ rs->fcr = fcr;
+ rs->prim = prim;
+ rs->nroots = nroots;
+ rs->gfpoly = gfpoly;
+ rs->gffunc = gffunc;
+
+ /* Allocate the arrays */
+ rs->alpha_to = kmalloc(sizeof(uint16_t) * (rs->nn + 1), GFP_KERNEL);
+ if (rs->alpha_to == NULL)
+ goto errrs;
+
+ rs->index_of = kmalloc(sizeof(uint16_t) * (rs->nn + 1), GFP_KERNEL);
+ if (rs->index_of == NULL)
+ goto erralp;
+
+ rs->genpoly = kmalloc(sizeof(uint16_t) * (rs->nroots + 1), GFP_KERNEL);
+ if(rs->genpoly == NULL)
+ goto erridx;
+
+ /* Generate Galois field lookup tables */
+ rs->index_of[0] = rs->nn; /* log(zero) = -inf */
+ rs->alpha_to[rs->nn] = 0; /* alpha**-inf = 0 */
+ if (gfpoly) {
+ sr = 1;
+ for (i = 0; i < rs->nn; i++) {
+ rs->index_of[sr] = i;
+ rs->alpha_to[i] = sr;
+ sr <<= 1;
+ if (sr & (1 << symsize))
+ sr ^= gfpoly;
+ sr &= rs->nn;
+ }
+ } else {
+ sr = gffunc(0);
+ for (i = 0; i < rs->nn; i++) {
+ rs->index_of[sr] = i;
+ rs->alpha_to[i] = sr;
+ sr = gffunc(sr);
+ }
+ }
+ /* If it's not primitive, exit */
+ if(sr != rs->alpha_to[0])
+ goto errpol;
+
+ /* Find prim-th root of 1, used in decoding */
+ for(iprim = 1; (iprim % prim) != 0; iprim += rs->nn);
+ /* prim-th root of 1, index form */
+ rs->iprim = iprim / prim;
+
+ /* Form RS code generator polynomial from its roots */
+ rs->genpoly[0] = 1;
+ for (i = 0, root = fcr * prim; i < nroots; i++, root += prim) {
+ rs->genpoly[i + 1] = 1;
+ /* Multiply rs->genpoly[] by @**(root + x) */
+ for (j = i; j > 0; j--) {
+ if (rs->genpoly[j] != 0) {
+ rs->genpoly[j] = rs->genpoly[j -1] ^
+ rs->alpha_to[rs_modnn(rs,
+ rs->index_of[rs->genpoly[j]] + root)];
+ } else
+ rs->genpoly[j] = rs->genpoly[j - 1];
+ }
+ /* rs->genpoly[0] can never be zero */
+ rs->genpoly[0] =
+ rs->alpha_to[rs_modnn(rs,
+ rs->index_of[rs->genpoly[0]] + root)];
+ }
+ /* convert rs->genpoly[] to index form for quicker encoding */
+ for (i = 0; i <= nroots; i++)
+ rs->genpoly[i] = rs->index_of[rs->genpoly[i]];
+ return rs;
+
+ /* Error exit */
+errpol:
+ kfree(rs->genpoly);
+erridx:
+ kfree(rs->index_of);
+erralp:
+ kfree(rs->alpha_to);
+errrs:
+ kfree(rs);
+ return NULL;
+}
+
+
+/**
+ * free_rs - Free the rs control structure, if it is no longer used
+ * @rs: the control structure which is not longer used by the
+ * caller
+ */
+void free_rs(struct rs_control *rs)
+{
+ mutex_lock(&rslistlock);
+ rs->users--;
+ if(!rs->users) {
+ list_del(&rs->list);
+ kfree(rs->alpha_to);
+ kfree(rs->index_of);
+ kfree(rs->genpoly);
+ kfree(rs);
+ }
+ mutex_unlock(&rslistlock);
+}
+
+/**
+ * init_rs_internal - Find a matching or allocate a new rs control structure
+ * @symsize: the symbol size (number of bits)
+ * @gfpoly: the extended Galois field generator polynomial coefficients,
+ * with the 0th coefficient in the low order bit. The polynomial
+ * must be primitive;
+ * @gffunc: pointer to function to generate the next field element,
+ * or the multiplicative identity element if given 0. Used
+ * instead of gfpoly if gfpoly is 0
+ * @fcr: the first consecutive root of the rs code generator polynomial
+ * in index form
+ * @prim: primitive element to generate polynomial roots
+ * @nroots: RS code generator polynomial degree (number of roots)
+ */
+static struct rs_control *init_rs_internal(int symsize, int gfpoly,
+ int (*gffunc)(int), int fcr,
+ int prim, int nroots)
+{
+ struct list_head *tmp;
+ struct rs_control *rs;
+
+ /* Sanity checks */
+ if (symsize < 1)
+ return NULL;
+ if (fcr < 0 || fcr >= (1<<symsize))
+ return NULL;
+ if (prim <= 0 || prim >= (1<<symsize))
+ return NULL;
+ if (nroots < 0 || nroots >= (1<<symsize))
+ return NULL;
+
+ mutex_lock(&rslistlock);
+
+ /* Walk through the list and look for a matching entry */
+ list_for_each(tmp, &rslist) {
+ rs = list_entry(tmp, struct rs_control, list);
+ if (symsize != rs->mm)
+ continue;
+ if (gfpoly != rs->gfpoly)
+ continue;
+ if (gffunc != rs->gffunc)
+ continue;
+ if (fcr != rs->fcr)
+ continue;
+ if (prim != rs->prim)
+ continue;
+ if (nroots != rs->nroots)
+ continue;
+ /* We have a matching one already */
+ rs->users++;
+ goto out;
+ }
+
+ /* Create a new one */
+ rs = rs_init(symsize, gfpoly, gffunc, fcr, prim, nroots);
+ if (rs) {
+ rs->users = 1;
+ list_add(&rs->list, &rslist);
+ }
+out:
+ mutex_unlock(&rslistlock);
+ return rs;
+}
+
+/**
+ * init_rs - Find a matching or allocate a new rs control structure
+ * @symsize: the symbol size (number of bits)
+ * @gfpoly: the extended Galois field generator polynomial coefficients,
+ * with the 0th coefficient in the low order bit. The polynomial
+ * must be primitive;
+ * @fcr: the first consecutive root of the rs code generator polynomial
+ * in index form
+ * @prim: primitive element to generate polynomial roots
+ * @nroots: RS code generator polynomial degree (number of roots)
+ */
+struct rs_control *init_rs(int symsize, int gfpoly, int fcr, int prim,
+ int nroots)
+{
+ return init_rs_internal(symsize, gfpoly, NULL, fcr, prim, nroots);
+}
+
+/**
+ * init_rs_non_canonical - Find a matching or allocate a new rs control
+ * structure, for fields with non-canonical
+ * representation
+ * @symsize: the symbol size (number of bits)
+ * @gffunc: pointer to function to generate the next field element,
+ * or the multiplicative identity element if given 0. Used
+ * instead of gfpoly if gfpoly is 0
+ * @fcr: the first consecutive root of the rs code generator polynomial
+ * in index form
+ * @prim: primitive element to generate polynomial roots
+ * @nroots: RS code generator polynomial degree (number of roots)
+ */
+struct rs_control *init_rs_non_canonical(int symsize, int (*gffunc)(int),
+ int fcr, int prim, int nroots)
+{
+ return init_rs_internal(symsize, 0, gffunc, fcr, prim, nroots);
+}
+
+#ifdef CONFIG_REED_SOLOMON_ENC8
+/**
+ * encode_rs8 - Calculate the parity for data values (8bit data width)
+ * @rs: the rs control structure
+ * @data: data field of a given type
+ * @len: data length
+ * @par: parity data, must be initialized by caller (usually all 0)
+ * @invmsk: invert data mask (will be xored on data)
+ *
+ * The parity uses a uint16_t data type to enable
+ * symbol size > 8. The calling code must take care of encoding of the
+ * syndrome result for storage itself.
+ */
+int encode_rs8(struct rs_control *rs, uint8_t *data, int len, uint16_t *par,
+ uint16_t invmsk)
+{
+#include "encode_rs.c"
+}
+EXPORT_SYMBOL_GPL(encode_rs8);
+#endif
+
+#ifdef CONFIG_REED_SOLOMON_DEC8
+/**
+ * decode_rs8 - Decode codeword (8bit data width)
+ * @rs: the rs control structure
+ * @data: data field of a given type
+ * @par: received parity data field
+ * @len: data length
+ * @s: syndrome data field (if NULL, syndrome is calculated)
+ * @no_eras: number of erasures
+ * @eras_pos: position of erasures, can be NULL
+ * @invmsk: invert data mask (will be xored on data, not on parity!)
+ * @corr: buffer to store correction bitmask on eras_pos
+ *
+ * The syndrome and parity uses a uint16_t data type to enable
+ * symbol size > 8. The calling code must take care of decoding of the
+ * syndrome result and the received parity before calling this code.
+ * Returns the number of corrected bits or -EBADMSG for uncorrectable errors.
+ */
+int decode_rs8(struct rs_control *rs, uint8_t *data, uint16_t *par, int len,
+ uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk,
+ uint16_t *corr)
+{
+#include "decode_rs.c"
+}
+EXPORT_SYMBOL_GPL(decode_rs8);
+#endif
+
+#ifdef CONFIG_REED_SOLOMON_ENC16
+/**
+ * encode_rs16 - Calculate the parity for data values (16bit data width)
+ * @rs: the rs control structure
+ * @data: data field of a given type
+ * @len: data length
+ * @par: parity data, must be initialized by caller (usually all 0)
+ * @invmsk: invert data mask (will be xored on data, not on parity!)
+ *
+ * Each field in the data array contains up to symbol size bits of valid data.
+ */
+int encode_rs16(struct rs_control *rs, uint16_t *data, int len, uint16_t *par,
+ uint16_t invmsk)
+{
+#include "encode_rs.c"
+}
+EXPORT_SYMBOL_GPL(encode_rs16);
+#endif
+
+#ifdef CONFIG_REED_SOLOMON_DEC16
+/**
+ * decode_rs16 - Decode codeword (16bit data width)
+ * @rs: the rs control structure
+ * @data: data field of a given type
+ * @par: received parity data field
+ * @len: data length
+ * @s: syndrome data field (if NULL, syndrome is calculated)
+ * @no_eras: number of erasures
+ * @eras_pos: position of erasures, can be NULL
+ * @invmsk: invert data mask (will be xored on data, not on parity!)
+ * @corr: buffer to store correction bitmask on eras_pos
+ *
+ * Each field in the data array contains up to symbol size bits of valid data.
+ * Returns the number of corrected bits or -EBADMSG for uncorrectable errors.
+ */
+int decode_rs16(struct rs_control *rs, uint16_t *data, uint16_t *par, int len,
+ uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk,
+ uint16_t *corr)
+{
+#include "decode_rs.c"
+}
+EXPORT_SYMBOL_GPL(decode_rs16);
+#endif
+
+EXPORT_SYMBOL_GPL(init_rs);
+EXPORT_SYMBOL_GPL(init_rs_non_canonical);
+EXPORT_SYMBOL_GPL(free_rs);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Reed Solomon encoder/decoder");
+MODULE_AUTHOR("Phil Karn, Thomas Gleixner");
+
diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c
new file mode 100644
index 00000000..ffc9fc7f
--- /dev/null
+++ b/lib/rwsem-spinlock.c
@@ -0,0 +1,323 @@
+/* rwsem-spinlock.c: R/W semaphores: contention handling functions for
+ * generic spinlock implementation
+ *
+ * Copyright (c) 2001 David Howells (dhowells@redhat.com).
+ * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
+ * - Derived also from comments by Linus
+ */
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+
+struct rwsem_waiter {
+ struct list_head list;
+ struct task_struct *task;
+ unsigned int flags;
+#define RWSEM_WAITING_FOR_READ 0x00000001
+#define RWSEM_WAITING_FOR_WRITE 0x00000002
+};
+
+int rwsem_is_locked(struct rw_semaphore *sem)
+{
+ int ret = 1;
+ unsigned long flags;
+
+ if (spin_trylock_irqsave(&sem->wait_lock, flags)) {
+ ret = (sem->activity != 0);
+ spin_unlock_irqrestore(&sem->wait_lock, flags);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(rwsem_is_locked);
+
+/*
+ * initialise the semaphore
+ */
+void __init_rwsem(struct rw_semaphore *sem, const char *name,
+ struct lock_class_key *key)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ /*
+ * Make sure we are not reinitializing a held semaphore:
+ */
+ debug_check_no_locks_freed((void *)sem, sizeof(*sem));
+ lockdep_init_map(&sem->dep_map, name, key, 0);
+#endif
+ sem->activity = 0;
+ spin_lock_init(&sem->wait_lock);
+ INIT_LIST_HEAD(&sem->wait_list);
+}
+EXPORT_SYMBOL(__init_rwsem);
+
+/*
+ * handle the lock release when processes blocked on it that can now run
+ * - if we come here, then:
+ * - the 'active count' _reached_ zero
+ * - the 'waiting count' is non-zero
+ * - the spinlock must be held by the caller
+ * - woken process blocks are discarded from the list after having task zeroed
+ * - writers are only woken if wakewrite is non-zero
+ */
+static inline struct rw_semaphore *
+__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
+{
+ struct rwsem_waiter *waiter;
+ struct task_struct *tsk;
+ int woken;
+
+ waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
+
+ if (!wakewrite) {
+ if (waiter->flags & RWSEM_WAITING_FOR_WRITE)
+ goto out;
+ goto dont_wake_writers;
+ }
+
+ /* if we are allowed to wake writers try to grant a single write lock
+ * if there's a writer at the front of the queue
+ * - we leave the 'waiting count' incremented to signify potential
+ * contention
+ */
+ if (waiter->flags & RWSEM_WAITING_FOR_WRITE) {
+ sem->activity = -1;
+ list_del(&waiter->list);
+ tsk = waiter->task;
+ /* Don't touch waiter after ->task has been NULLed */
+ smp_mb();
+ waiter->task = NULL;
+ wake_up_process(tsk);
+ put_task_struct(tsk);
+ goto out;
+ }
+
+ /* grant an infinite number of read locks to the front of the queue */
+ dont_wake_writers:
+ woken = 0;
+ while (waiter->flags & RWSEM_WAITING_FOR_READ) {
+ struct list_head *next = waiter->list.next;
+
+ list_del(&waiter->list);
+ tsk = waiter->task;
+ smp_mb();
+ waiter->task = NULL;
+ wake_up_process(tsk);
+ put_task_struct(tsk);
+ woken++;
+ if (list_empty(&sem->wait_list))
+ break;
+ waiter = list_entry(next, struct rwsem_waiter, list);
+ }
+
+ sem->activity += woken;
+
+ out:
+ return sem;
+}
+
+/*
+ * wake a single writer
+ */
+static inline struct rw_semaphore *
+__rwsem_wake_one_writer(struct rw_semaphore *sem)
+{
+ struct rwsem_waiter *waiter;
+ struct task_struct *tsk;
+
+ sem->activity = -1;
+
+ waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
+ list_del(&waiter->list);
+
+ tsk = waiter->task;
+ smp_mb();
+ waiter->task = NULL;
+ wake_up_process(tsk);
+ put_task_struct(tsk);
+ return sem;
+}
+
+/*
+ * get a read lock on the semaphore
+ */
+void __sched __down_read(struct rw_semaphore *sem)
+{
+ struct rwsem_waiter waiter;
+ struct task_struct *tsk;
+ unsigned long flags;
+
+ spin_lock_irqsave(&sem->wait_lock, flags);
+
+ if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
+ /* granted */
+ sem->activity++;
+ spin_unlock_irqrestore(&sem->wait_lock, flags);
+ goto out;
+ }
+
+ tsk = current;
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+
+ /* set up my own style of waitqueue */
+ waiter.task = tsk;
+ waiter.flags = RWSEM_WAITING_FOR_READ;
+ get_task_struct(tsk);
+
+ list_add_tail(&waiter.list, &sem->wait_list);
+
+ /* we don't need to touch the semaphore struct anymore */
+ spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+ /* wait to be given the lock */
+ for (;;) {
+ if (!waiter.task)
+ break;
+ schedule();
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ }
+
+ tsk->state = TASK_RUNNING;
+ out:
+ ;
+}
+
+/*
+ * trylock for reading -- returns 1 if successful, 0 if contention
+ */
+int __down_read_trylock(struct rw_semaphore *sem)
+{
+ unsigned long flags;
+ int ret = 0;
+
+
+ spin_lock_irqsave(&sem->wait_lock, flags);
+
+ if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
+ /* granted */
+ sem->activity++;
+ ret = 1;
+ }
+
+ spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+ return ret;
+}
+
+/*
+ * get a write lock on the semaphore
+ * - we increment the waiting count anyway to indicate an exclusive lock
+ */
+void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
+{
+ struct rwsem_waiter waiter;
+ struct task_struct *tsk;
+ unsigned long flags;
+
+ spin_lock_irqsave(&sem->wait_lock, flags);
+
+ if (sem->activity == 0 && list_empty(&sem->wait_list)) {
+ /* granted */
+ sem->activity = -1;
+ spin_unlock_irqrestore(&sem->wait_lock, flags);
+ goto out;
+ }
+
+ tsk = current;
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+
+ /* set up my own style of waitqueue */
+ waiter.task = tsk;
+ waiter.flags = RWSEM_WAITING_FOR_WRITE;
+ get_task_struct(tsk);
+
+ list_add_tail(&waiter.list, &sem->wait_list);
+
+ /* we don't need to touch the semaphore struct anymore */
+ spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+ /* wait to be given the lock */
+ for (;;) {
+ if (!waiter.task)
+ break;
+ schedule();
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ }
+
+ tsk->state = TASK_RUNNING;
+ out:
+ ;
+}
+
+void __sched __down_write(struct rw_semaphore *sem)
+{
+ __down_write_nested(sem, 0);
+}
+
+/*
+ * trylock for writing -- returns 1 if successful, 0 if contention
+ */
+int __down_write_trylock(struct rw_semaphore *sem)
+{
+ unsigned long flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&sem->wait_lock, flags);
+
+ if (sem->activity == 0 && list_empty(&sem->wait_list)) {
+ /* granted */
+ sem->activity = -1;
+ ret = 1;
+ }
+
+ spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+ return ret;
+}
+
+/*
+ * release a read lock on the semaphore
+ */
+void __up_read(struct rw_semaphore *sem)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&sem->wait_lock, flags);
+
+ if (--sem->activity == 0 && !list_empty(&sem->wait_list))
+ sem = __rwsem_wake_one_writer(sem);
+
+ spin_unlock_irqrestore(&sem->wait_lock, flags);
+}
+
+/*
+ * release a write lock on the semaphore
+ */
+void __up_write(struct rw_semaphore *sem)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&sem->wait_lock, flags);
+
+ sem->activity = 0;
+ if (!list_empty(&sem->wait_list))
+ sem = __rwsem_do_wake(sem, 1);
+
+ spin_unlock_irqrestore(&sem->wait_lock, flags);
+}
+
+/*
+ * downgrade a write lock into a read lock
+ * - just wake up any readers at the front of the queue
+ */
+void __downgrade_write(struct rw_semaphore *sem)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&sem->wait_lock, flags);
+
+ sem->activity = 1;
+ if (!list_empty(&sem->wait_list))
+ sem = __rwsem_do_wake(sem, 0);
+
+ spin_unlock_irqrestore(&sem->wait_lock, flags);
+}
+
diff --git a/lib/rwsem.c b/lib/rwsem.c
new file mode 100644
index 00000000..f236d7cd
--- /dev/null
+++ b/lib/rwsem.c
@@ -0,0 +1,284 @@
+/* rwsem.c: R/W semaphores: contention handling functions
+ *
+ * Written by David Howells (dhowells@redhat.com).
+ * Derived from arch/i386/kernel/semaphore.c
+ */
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+/*
+ * Initialize an rwsem:
+ */
+void __init_rwsem(struct rw_semaphore *sem, const char *name,
+ struct lock_class_key *key)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ /*
+ * Make sure we are not reinitializing a held semaphore:
+ */
+ debug_check_no_locks_freed((void *)sem, sizeof(*sem));
+ lockdep_init_map(&sem->dep_map, name, key, 0);
+#endif
+ sem->count = RWSEM_UNLOCKED_VALUE;
+ spin_lock_init(&sem->wait_lock);
+ INIT_LIST_HEAD(&sem->wait_list);
+}
+
+EXPORT_SYMBOL(__init_rwsem);
+
+struct rwsem_waiter {
+ struct list_head list;
+ struct task_struct *task;
+ unsigned int flags;
+#define RWSEM_WAITING_FOR_READ 0x00000001
+#define RWSEM_WAITING_FOR_WRITE 0x00000002
+};
+
+/* Wake types for __rwsem_do_wake(). Note that RWSEM_WAKE_NO_ACTIVE and
+ * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held
+ * since the rwsem value was observed.
+ */
+#define RWSEM_WAKE_ANY 0 /* Wake whatever's at head of wait list */
+#define RWSEM_WAKE_NO_ACTIVE 1 /* rwsem was observed with no active thread */
+#define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */
+
+/*
+ * handle the lock release when processes blocked on it that can now run
+ * - if we come here from up_xxxx(), then:
+ * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
+ * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
+ * - there must be someone on the queue
+ * - the spinlock must be held by the caller
+ * - woken process blocks are discarded from the list after having task zeroed
+ * - writers are only woken if downgrading is false
+ */
+static struct rw_semaphore *
+__rwsem_do_wake(struct rw_semaphore *sem, int wake_type)
+{
+ struct rwsem_waiter *waiter;
+ struct task_struct *tsk;
+ struct list_head *next;
+ signed long oldcount, woken, loop, adjustment;
+
+ waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
+ if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE))
+ goto readers_only;
+
+ if (wake_type == RWSEM_WAKE_READ_OWNED)
+ /* Another active reader was observed, so wakeup is not
+ * likely to succeed. Save the atomic op.
+ */
+ goto out;
+
+ /* There's a writer at the front of the queue - try to grant it the
+ * write lock. However, we only wake this writer if we can transition
+ * the active part of the count from 0 -> 1
+ */
+ adjustment = RWSEM_ACTIVE_WRITE_BIAS;
+ if (waiter->list.next == &sem->wait_list)
+ adjustment -= RWSEM_WAITING_BIAS;
+
+ try_again_write:
+ oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
+ if (oldcount & RWSEM_ACTIVE_MASK)
+ /* Someone grabbed the sem already */
+ goto undo_write;
+
+ /* We must be careful not to touch 'waiter' after we set ->task = NULL.
+ * It is an allocated on the waiter's stack and may become invalid at
+ * any time after that point (due to a wakeup from another source).
+ */
+ list_del(&waiter->list);
+ tsk = waiter->task;
+ smp_mb();
+ waiter->task = NULL;
+ wake_up_process(tsk);
+ put_task_struct(tsk);
+ goto out;
+
+ readers_only:
+ /* If we come here from up_xxxx(), another thread might have reached
+ * rwsem_down_failed_common() before we acquired the spinlock and
+ * woken up a waiter, making it now active. We prefer to check for
+ * this first in order to not spend too much time with the spinlock
+ * held if we're not going to be able to wake up readers in the end.
+ *
+ * Note that we do not need to update the rwsem count: any writer
+ * trying to acquire rwsem will run rwsem_down_write_failed() due
+ * to the waiting threads and block trying to acquire the spinlock.
+ *
+ * We use a dummy atomic update in order to acquire the cache line
+ * exclusively since we expect to succeed and run the final rwsem
+ * count adjustment pretty soon.
+ */
+ if (wake_type == RWSEM_WAKE_ANY &&
+ rwsem_atomic_update(0, sem) < RWSEM_WAITING_BIAS)
+ /* Someone grabbed the sem for write already */
+ goto out;
+
+ /* Grant an infinite number of read locks to the readers at the front
+ * of the queue. Note we increment the 'active part' of the count by
+ * the number of readers before waking any processes up.
+ */
+ woken = 0;
+ do {
+ woken++;
+
+ if (waiter->list.next == &sem->wait_list)
+ break;
+
+ waiter = list_entry(waiter->list.next,
+ struct rwsem_waiter, list);
+
+ } while (waiter->flags & RWSEM_WAITING_FOR_READ);
+
+ adjustment = woken * RWSEM_ACTIVE_READ_BIAS;
+ if (waiter->flags & RWSEM_WAITING_FOR_READ)
+ /* hit end of list above */
+ adjustment -= RWSEM_WAITING_BIAS;
+
+ rwsem_atomic_add(adjustment, sem);
+
+ next = sem->wait_list.next;
+ for (loop = woken; loop > 0; loop--) {
+ waiter = list_entry(next, struct rwsem_waiter, list);
+ next = waiter->list.next;
+ tsk = waiter->task;
+ smp_mb();
+ waiter->task = NULL;
+ wake_up_process(tsk);
+ put_task_struct(tsk);
+ }
+
+ sem->wait_list.next = next;
+ next->prev = &sem->wait_list;
+
+ out:
+ return sem;
+
+ /* undo the change to the active count, but check for a transition
+ * 1->0 */
+ undo_write:
+ if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK)
+ goto out;
+ goto try_again_write;
+}
+
+/*
+ * wait for a lock to be granted
+ */
+static struct rw_semaphore __sched *
+rwsem_down_failed_common(struct rw_semaphore *sem,
+ unsigned int flags, signed long adjustment)
+{
+ struct rwsem_waiter waiter;
+ struct task_struct *tsk = current;
+ signed long count;
+
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+
+ /* set up my own style of waitqueue */
+ spin_lock_irq(&sem->wait_lock);
+ waiter.task = tsk;
+ waiter.flags = flags;
+ get_task_struct(tsk);
+
+ if (list_empty(&sem->wait_list))
+ adjustment += RWSEM_WAITING_BIAS;
+ list_add_tail(&waiter.list, &sem->wait_list);
+
+ /* we're now waiting on the lock, but no longer actively locking */
+ count = rwsem_atomic_update(adjustment, sem);
+
+ /* If there are no active locks, wake the front queued process(es) up.
+ *
+ * Alternatively, if we're called from a failed down_write(), there
+ * were already threads queued before us and there are no active
+ * writers, the lock must be read owned; so we try to wake any read
+ * locks that were queued ahead of us. */
+ if (count == RWSEM_WAITING_BIAS)
+ sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE);
+ else if (count > RWSEM_WAITING_BIAS &&
+ adjustment == -RWSEM_ACTIVE_WRITE_BIAS)
+ sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
+
+ spin_unlock_irq(&sem->wait_lock);
+
+ /* wait to be given the lock */
+ for (;;) {
+ if (!waiter.task)
+ break;
+ schedule();
+ set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ }
+
+ tsk->state = TASK_RUNNING;
+
+ return sem;
+}
+
+/*
+ * wait for the read lock to be granted
+ */
+asmregparm struct rw_semaphore __sched *
+rwsem_down_read_failed(struct rw_semaphore *sem)
+{
+ return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ,
+ -RWSEM_ACTIVE_READ_BIAS);
+}
+
+/*
+ * wait for the write lock to be granted
+ */
+asmregparm struct rw_semaphore __sched *
+rwsem_down_write_failed(struct rw_semaphore *sem)
+{
+ return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE,
+ -RWSEM_ACTIVE_WRITE_BIAS);
+}
+
+/*
+ * handle waking up a waiter on the semaphore
+ * - up_read/up_write has decremented the active part of count if we come here
+ */
+asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&sem->wait_lock, flags);
+
+ /* do nothing if list empty */
+ if (!list_empty(&sem->wait_list))
+ sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
+
+ spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+ return sem;
+}
+
+/*
+ * downgrade a write lock into a read lock
+ * - caller incremented waiting part of count and discovered it still negative
+ * - just wake up any readers at the front of the queue
+ */
+asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&sem->wait_lock, flags);
+
+ /* do nothing if list empty */
+ if (!list_empty(&sem->wait_list))
+ sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
+
+ spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+ return sem;
+}
+
+EXPORT_SYMBOL(rwsem_down_read_failed);
+EXPORT_SYMBOL(rwsem_down_write_failed);
+EXPORT_SYMBOL(rwsem_wake);
+EXPORT_SYMBOL(rwsem_downgrade_wake);
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
new file mode 100644
index 00000000..4ceb05d7
--- /dev/null
+++ b/lib/scatterlist.c
@@ -0,0 +1,519 @@
+/*
+ * Copyright (C) 2007 Jens Axboe <jens.axboe@oracle.com>
+ *
+ * Scatterlist handling helpers.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/scatterlist.h>
+#include <linux/highmem.h>
+#include <linux/kmemleak.h>
+
+/**
+ * sg_next - return the next scatterlist entry in a list
+ * @sg: The current sg entry
+ *
+ * Description:
+ * Usually the next entry will be @sg@ + 1, but if this sg element is part
+ * of a chained scatterlist, it could jump to the start of a new
+ * scatterlist array.
+ *
+ **/
+struct scatterlist *sg_next(struct scatterlist *sg)
+{
+#ifdef CONFIG_DEBUG_SG
+ BUG_ON(sg->sg_magic != SG_MAGIC);
+#endif
+ if (sg_is_last(sg))
+ return NULL;
+
+ sg++;
+ if (unlikely(sg_is_chain(sg)))
+ sg = sg_chain_ptr(sg);
+
+ return sg;
+}
+EXPORT_SYMBOL(sg_next);
+
+/**
+ * sg_last - return the last scatterlist entry in a list
+ * @sgl: First entry in the scatterlist
+ * @nents: Number of entries in the scatterlist
+ *
+ * Description:
+ * Should only be used casually, it (currently) scans the entire list
+ * to get the last entry.
+ *
+ * Note that the @sgl@ pointer passed in need not be the first one,
+ * the important bit is that @nents@ denotes the number of entries that
+ * exist from @sgl@.
+ *
+ **/
+struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents)
+{
+#ifndef ARCH_HAS_SG_CHAIN
+ struct scatterlist *ret = &sgl[nents - 1];
+#else
+ struct scatterlist *sg, *ret = NULL;
+ unsigned int i;
+
+ for_each_sg(sgl, sg, nents, i)
+ ret = sg;
+
+#endif
+#ifdef CONFIG_DEBUG_SG
+ BUG_ON(sgl[0].sg_magic != SG_MAGIC);
+ BUG_ON(!sg_is_last(ret));
+#endif
+ return ret;
+}
+EXPORT_SYMBOL(sg_last);
+
+/**
+ * sg_init_table - Initialize SG table
+ * @sgl: The SG table
+ * @nents: Number of entries in table
+ *
+ * Notes:
+ * If this is part of a chained sg table, sg_mark_end() should be
+ * used only on the last table part.
+ *
+ **/
+void sg_init_table(struct scatterlist *sgl, unsigned int nents)
+{
+ memset(sgl, 0, sizeof(*sgl) * nents);
+#ifdef CONFIG_DEBUG_SG
+ {
+ unsigned int i;
+ for (i = 0; i < nents; i++)
+ sgl[i].sg_magic = SG_MAGIC;
+ }
+#endif
+ sg_mark_end(&sgl[nents - 1]);
+}
+EXPORT_SYMBOL(sg_init_table);
+
+/**
+ * sg_init_one - Initialize a single entry sg list
+ * @sg: SG entry
+ * @buf: Virtual address for IO
+ * @buflen: IO length
+ *
+ **/
+void sg_init_one(struct scatterlist *sg, const void *buf, unsigned int buflen)
+{
+ sg_init_table(sg, 1);
+ sg_set_buf(sg, buf, buflen);
+}
+EXPORT_SYMBOL(sg_init_one);
+
+/*
+ * The default behaviour of sg_alloc_table() is to use these kmalloc/kfree
+ * helpers.
+ */
+static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask)
+{
+ if (nents == SG_MAX_SINGLE_ALLOC) {
+ /*
+ * Kmemleak doesn't track page allocations as they are not
+ * commonly used (in a raw form) for kernel data structures.
+ * As we chain together a list of pages and then a normal
+ * kmalloc (tracked by kmemleak), in order to for that last
+ * allocation not to become decoupled (and thus a
+ * false-positive) we need to inform kmemleak of all the
+ * intermediate allocations.
+ */
+ void *ptr = (void *) __get_free_page(gfp_mask);
+ kmemleak_alloc(ptr, PAGE_SIZE, 1, gfp_mask);
+ return ptr;
+ } else
+ return kmalloc(nents * sizeof(struct scatterlist), gfp_mask);
+}
+
+static void sg_kfree(struct scatterlist *sg, unsigned int nents)
+{
+ if (nents == SG_MAX_SINGLE_ALLOC) {
+ kmemleak_free(sg);
+ free_page((unsigned long) sg);
+ } else
+ kfree(sg);
+}
+
+/**
+ * __sg_free_table - Free a previously mapped sg table
+ * @table: The sg table header to use
+ * @max_ents: The maximum number of entries per single scatterlist
+ * @free_fn: Free function
+ *
+ * Description:
+ * Free an sg table previously allocated and setup with
+ * __sg_alloc_table(). The @max_ents value must be identical to
+ * that previously used with __sg_alloc_table().
+ *
+ **/
+void __sg_free_table(struct sg_table *table, unsigned int max_ents,
+ sg_free_fn *free_fn)
+{
+ struct scatterlist *sgl, *next;
+
+ if (unlikely(!table->sgl))
+ return;
+
+ sgl = table->sgl;
+ while (table->orig_nents) {
+ unsigned int alloc_size = table->orig_nents;
+ unsigned int sg_size;
+
+ /*
+ * If we have more than max_ents segments left,
+ * then assign 'next' to the sg table after the current one.
+ * sg_size is then one less than alloc size, since the last
+ * element is the chain pointer.
+ */
+ if (alloc_size > max_ents) {
+ next = sg_chain_ptr(&sgl[max_ents - 1]);
+ alloc_size = max_ents;
+ sg_size = alloc_size - 1;
+ } else {
+ sg_size = alloc_size;
+ next = NULL;
+ }
+
+ table->orig_nents -= sg_size;
+ free_fn(sgl, alloc_size);
+ sgl = next;
+ }
+
+ table->sgl = NULL;
+}
+EXPORT_SYMBOL(__sg_free_table);
+
+/**
+ * sg_free_table - Free a previously allocated sg table
+ * @table: The mapped sg table header
+ *
+ **/
+void sg_free_table(struct sg_table *table)
+{
+ __sg_free_table(table, SG_MAX_SINGLE_ALLOC, sg_kfree);
+}
+EXPORT_SYMBOL(sg_free_table);
+
+/**
+ * __sg_alloc_table - Allocate and initialize an sg table with given allocator
+ * @table: The sg table header to use
+ * @nents: Number of entries in sg list
+ * @max_ents: The maximum number of entries the allocator returns per call
+ * @gfp_mask: GFP allocation mask
+ * @alloc_fn: Allocator to use
+ *
+ * Description:
+ * This function returns a @table @nents long. The allocator is
+ * defined to return scatterlist chunks of maximum size @max_ents.
+ * Thus if @nents is bigger than @max_ents, the scatterlists will be
+ * chained in units of @max_ents.
+ *
+ * Notes:
+ * If this function returns non-0 (eg failure), the caller must call
+ * __sg_free_table() to cleanup any leftover allocations.
+ *
+ **/
+int __sg_alloc_table(struct sg_table *table, unsigned int nents,
+ unsigned int max_ents, gfp_t gfp_mask,
+ sg_alloc_fn *alloc_fn)
+{
+ struct scatterlist *sg, *prv;
+ unsigned int left;
+
+#ifndef ARCH_HAS_SG_CHAIN
+ BUG_ON(nents > max_ents);
+#endif
+
+ memset(table, 0, sizeof(*table));
+
+ left = nents;
+ prv = NULL;
+ do {
+ unsigned int sg_size, alloc_size = left;
+
+ if (alloc_size > max_ents) {
+ alloc_size = max_ents;
+ sg_size = alloc_size - 1;
+ } else
+ sg_size = alloc_size;
+
+ left -= sg_size;
+
+ sg = alloc_fn(alloc_size, gfp_mask);
+ if (unlikely(!sg)) {
+ /*
+ * Adjust entry count to reflect that the last
+ * entry of the previous table won't be used for
+ * linkage. Without this, sg_kfree() may get
+ * confused.
+ */
+ if (prv)
+ table->nents = ++table->orig_nents;
+
+ return -ENOMEM;
+ }
+
+ sg_init_table(sg, alloc_size);
+ table->nents = table->orig_nents += sg_size;
+
+ /*
+ * If this is the first mapping, assign the sg table header.
+ * If this is not the first mapping, chain previous part.
+ */
+ if (prv)
+ sg_chain(prv, max_ents, sg);
+ else
+ table->sgl = sg;
+
+ /*
+ * If no more entries after this one, mark the end
+ */
+ if (!left)
+ sg_mark_end(&sg[sg_size - 1]);
+
+ /*
+ * only really needed for mempool backed sg allocations (like
+ * SCSI), a possible improvement here would be to pass the
+ * table pointer into the allocator and let that clear these
+ * flags
+ */
+ gfp_mask &= ~__GFP_WAIT;
+ gfp_mask |= __GFP_HIGH;
+ prv = sg;
+ } while (left);
+
+ return 0;
+}
+EXPORT_SYMBOL(__sg_alloc_table);
+
+/**
+ * sg_alloc_table - Allocate and initialize an sg table
+ * @table: The sg table header to use
+ * @nents: Number of entries in sg list
+ * @gfp_mask: GFP allocation mask
+ *
+ * Description:
+ * Allocate and initialize an sg table. If @nents@ is larger than
+ * SG_MAX_SINGLE_ALLOC a chained sg table will be setup.
+ *
+ **/
+int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask)
+{
+ int ret;
+
+ ret = __sg_alloc_table(table, nents, SG_MAX_SINGLE_ALLOC,
+ gfp_mask, sg_kmalloc);
+ if (unlikely(ret))
+ __sg_free_table(table, SG_MAX_SINGLE_ALLOC, sg_kfree);
+
+ return ret;
+}
+EXPORT_SYMBOL(sg_alloc_table);
+
+/**
+ * sg_miter_start - start mapping iteration over a sg list
+ * @miter: sg mapping iter to be started
+ * @sgl: sg list to iterate over
+ * @nents: number of sg entries
+ *
+ * Description:
+ * Starts mapping iterator @miter.
+ *
+ * Context:
+ * Don't care.
+ */
+void sg_miter_start(struct sg_mapping_iter *miter, struct scatterlist *sgl,
+ unsigned int nents, unsigned int flags)
+{
+ memset(miter, 0, sizeof(struct sg_mapping_iter));
+
+ miter->__sg = sgl;
+ miter->__nents = nents;
+ miter->__offset = 0;
+ WARN_ON(!(flags & (SG_MITER_TO_SG | SG_MITER_FROM_SG)));
+ miter->__flags = flags;
+}
+EXPORT_SYMBOL(sg_miter_start);
+
+/**
+ * sg_miter_next - proceed mapping iterator to the next mapping
+ * @miter: sg mapping iter to proceed
+ *
+ * Description:
+ * Proceeds @miter@ to the next mapping. @miter@ should have been
+ * started using sg_miter_start(). On successful return,
+ * @miter@->page, @miter@->addr and @miter@->length point to the
+ * current mapping.
+ *
+ * Context:
+ * IRQ disabled if SG_MITER_ATOMIC. IRQ must stay disabled till
+ * @miter@ is stopped. May sleep if !SG_MITER_ATOMIC.
+ *
+ * Returns:
+ * true if @miter contains the next mapping. false if end of sg
+ * list is reached.
+ */
+bool sg_miter_next(struct sg_mapping_iter *miter)
+{
+ unsigned int off, len;
+
+ /* check for end and drop resources from the last iteration */
+ if (!miter->__nents)
+ return false;
+
+ sg_miter_stop(miter);
+
+ /* get to the next sg if necessary. __offset is adjusted by stop */
+ while (miter->__offset == miter->__sg->length) {
+ if (--miter->__nents) {
+ miter->__sg = sg_next(miter->__sg);
+ miter->__offset = 0;
+ } else
+ return false;
+ }
+
+ /* map the next page */
+ off = miter->__sg->offset + miter->__offset;
+ len = miter->__sg->length - miter->__offset;
+
+ miter->page = nth_page(sg_page(miter->__sg), off >> PAGE_SHIFT);
+ off &= ~PAGE_MASK;
+ miter->length = min_t(unsigned int, len, PAGE_SIZE - off);
+ miter->consumed = miter->length;
+
+ if (miter->__flags & SG_MITER_ATOMIC)
+ miter->addr = kmap_atomic(miter->page, KM_BIO_SRC_IRQ) + off;
+ else
+ miter->addr = kmap(miter->page) + off;
+
+ return true;
+}
+EXPORT_SYMBOL(sg_miter_next);
+
+/**
+ * sg_miter_stop - stop mapping iteration
+ * @miter: sg mapping iter to be stopped
+ *
+ * Description:
+ * Stops mapping iterator @miter. @miter should have been started
+ * started using sg_miter_start(). A stopped iteration can be
+ * resumed by calling sg_miter_next() on it. This is useful when
+ * resources (kmap) need to be released during iteration.
+ *
+ * Context:
+ * IRQ disabled if the SG_MITER_ATOMIC is set. Don't care otherwise.
+ */
+void sg_miter_stop(struct sg_mapping_iter *miter)
+{
+ WARN_ON(miter->consumed > miter->length);
+
+ /* drop resources from the last iteration */
+ if (miter->addr) {
+ miter->__offset += miter->consumed;
+
+ if (miter->__flags & SG_MITER_TO_SG)
+ flush_kernel_dcache_page(miter->page);
+
+ if (miter->__flags & SG_MITER_ATOMIC) {
+ WARN_ON(!irqs_disabled());
+ kunmap_atomic(miter->addr, KM_BIO_SRC_IRQ);
+ } else
+ kunmap(miter->page);
+
+ miter->page = NULL;
+ miter->addr = NULL;
+ miter->length = 0;
+ miter->consumed = 0;
+ }
+}
+EXPORT_SYMBOL(sg_miter_stop);
+
+/**
+ * sg_copy_buffer - Copy data between a linear buffer and an SG list
+ * @sgl: The SG list
+ * @nents: Number of SG entries
+ * @buf: Where to copy from
+ * @buflen: The number of bytes to copy
+ * @to_buffer: transfer direction (non zero == from an sg list to a
+ * buffer, 0 == from a buffer to an sg list
+ *
+ * Returns the number of copied bytes.
+ *
+ **/
+static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents,
+ void *buf, size_t buflen, int to_buffer)
+{
+ unsigned int offset = 0;
+ struct sg_mapping_iter miter;
+ unsigned long flags;
+ unsigned int sg_flags = SG_MITER_ATOMIC;
+
+ if (to_buffer)
+ sg_flags |= SG_MITER_FROM_SG;
+ else
+ sg_flags |= SG_MITER_TO_SG;
+
+ sg_miter_start(&miter, sgl, nents, sg_flags);
+
+ local_irq_save(flags);
+
+ while (sg_miter_next(&miter) && offset < buflen) {
+ unsigned int len;
+
+ len = min(miter.length, buflen - offset);
+
+ if (to_buffer)
+ memcpy(buf + offset, miter.addr, len);
+ else
+ memcpy(miter.addr, buf + offset, len);
+
+ offset += len;
+ }
+
+ sg_miter_stop(&miter);
+
+ local_irq_restore(flags);
+ return offset;
+}
+
+/**
+ * sg_copy_from_buffer - Copy from a linear buffer to an SG list
+ * @sgl: The SG list
+ * @nents: Number of SG entries
+ * @buf: Where to copy from
+ * @buflen: The number of bytes to copy
+ *
+ * Returns the number of copied bytes.
+ *
+ **/
+size_t sg_copy_from_buffer(struct scatterlist *sgl, unsigned int nents,
+ void *buf, size_t buflen)
+{
+ return sg_copy_buffer(sgl, nents, buf, buflen, 0);
+}
+EXPORT_SYMBOL(sg_copy_from_buffer);
+
+/**
+ * sg_copy_to_buffer - Copy from an SG list to a linear buffer
+ * @sgl: The SG list
+ * @nents: Number of SG entries
+ * @buf: Where to copy to
+ * @buflen: The number of bytes to copy
+ *
+ * Returns the number of copied bytes.
+ *
+ **/
+size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents,
+ void *buf, size_t buflen)
+{
+ return sg_copy_buffer(sgl, nents, buf, buflen, 1);
+}
+EXPORT_SYMBOL(sg_copy_to_buffer);
diff --git a/lib/sha1.c b/lib/sha1.c
new file mode 100644
index 00000000..4c45fd50
--- /dev/null
+++ b/lib/sha1.c
@@ -0,0 +1,95 @@
+/*
+ * SHA transform algorithm, originally taken from code written by
+ * Peter Gutmann, and placed in the public domain.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/cryptohash.h>
+
+/* The SHA f()-functions. */
+
+#define f1(x,y,z) (z ^ (x & (y ^ z))) /* x ? y : z */
+#define f2(x,y,z) (x ^ y ^ z) /* XOR */
+#define f3(x,y,z) ((x & y) + (z & (x ^ y))) /* majority */
+
+/* The SHA Mysterious Constants */
+
+#define K1 0x5A827999L /* Rounds 0-19: sqrt(2) * 2^30 */
+#define K2 0x6ED9EBA1L /* Rounds 20-39: sqrt(3) * 2^30 */
+#define K3 0x8F1BBCDCL /* Rounds 40-59: sqrt(5) * 2^30 */
+#define K4 0xCA62C1D6L /* Rounds 60-79: sqrt(10) * 2^30 */
+
+/**
+ * sha_transform - single block SHA1 transform
+ *
+ * @digest: 160 bit digest to update
+ * @data: 512 bits of data to hash
+ * @W: 80 words of workspace (see note)
+ *
+ * This function generates a SHA1 digest for a single 512-bit block.
+ * Be warned, it does not handle padding and message digest, do not
+ * confuse it with the full FIPS 180-1 digest algorithm for variable
+ * length messages.
+ *
+ * Note: If the hash is security sensitive, the caller should be sure
+ * to clear the workspace. This is left to the caller to avoid
+ * unnecessary clears between chained hashing operations.
+ */
+void sha_transform(__u32 *digest, const char *in, __u32 *W)
+{
+ __u32 a, b, c, d, e, t, i;
+
+ for (i = 0; i < 16; i++)
+ W[i] = be32_to_cpu(((const __be32 *)in)[i]);
+
+ for (i = 0; i < 64; i++)
+ W[i+16] = rol32(W[i+13] ^ W[i+8] ^ W[i+2] ^ W[i], 1);
+
+ a = digest[0];
+ b = digest[1];
+ c = digest[2];
+ d = digest[3];
+ e = digest[4];
+
+ for (i = 0; i < 20; i++) {
+ t = f1(b, c, d) + K1 + rol32(a, 5) + e + W[i];
+ e = d; d = c; c = rol32(b, 30); b = a; a = t;
+ }
+
+ for (; i < 40; i ++) {
+ t = f2(b, c, d) + K2 + rol32(a, 5) + e + W[i];
+ e = d; d = c; c = rol32(b, 30); b = a; a = t;
+ }
+
+ for (; i < 60; i ++) {
+ t = f3(b, c, d) + K3 + rol32(a, 5) + e + W[i];
+ e = d; d = c; c = rol32(b, 30); b = a; a = t;
+ }
+
+ for (; i < 80; i ++) {
+ t = f2(b, c, d) + K4 + rol32(a, 5) + e + W[i];
+ e = d; d = c; c = rol32(b, 30); b = a; a = t;
+ }
+
+ digest[0] += a;
+ digest[1] += b;
+ digest[2] += c;
+ digest[3] += d;
+ digest[4] += e;
+}
+EXPORT_SYMBOL(sha_transform);
+
+/**
+ * sha_init - initialize the vectors for a SHA1 digest
+ * @buf: vector to initialize
+ */
+void sha_init(__u32 *buf)
+{
+ buf[0] = 0x67452301;
+ buf[1] = 0xefcdab89;
+ buf[2] = 0x98badcfe;
+ buf[3] = 0x10325476;
+ buf[4] = 0xc3d2e1f0;
+}
+
diff --git a/lib/show_mem.c b/lib/show_mem.c
new file mode 100644
index 00000000..fdc77c82
--- /dev/null
+++ b/lib/show_mem.c
@@ -0,0 +1,63 @@
+/*
+ * Generic show_mem() implementation
+ *
+ * Copyright (C) 2008 Johannes Weiner <hannes@saeurebad.de>
+ * All code subject to the GPL version 2.
+ */
+
+#include <linux/mm.h>
+#include <linux/nmi.h>
+#include <linux/quicklist.h>
+
+void show_mem(void)
+{
+ pg_data_t *pgdat;
+ unsigned long total = 0, reserved = 0, shared = 0,
+ nonshared = 0, highmem = 0;
+
+ printk("Mem-Info:\n");
+ show_free_areas();
+
+ for_each_online_pgdat(pgdat) {
+ unsigned long i, flags;
+
+ pgdat_resize_lock(pgdat, &flags);
+ for (i = 0; i < pgdat->node_spanned_pages; i++) {
+ struct page *page;
+ unsigned long pfn = pgdat->node_start_pfn + i;
+
+ if (unlikely(!(i % MAX_ORDER_NR_PAGES)))
+ touch_nmi_watchdog();
+
+ if (!pfn_valid(pfn))
+ continue;
+
+ page = pfn_to_page(pfn);
+
+ if (PageHighMem(page))
+ highmem++;
+
+ if (PageReserved(page))
+ reserved++;
+ else if (page_count(page) == 1)
+ nonshared++;
+ else if (page_count(page) > 1)
+ shared += page_count(page) - 1;
+
+ total++;
+ }
+ pgdat_resize_unlock(pgdat, &flags);
+ }
+
+ printk("%lu pages RAM\n", total);
+#ifdef CONFIG_HIGHMEM
+ printk("%lu pages HighMem\n", highmem);
+#endif
+ printk("%lu pages reserved\n", reserved);
+ printk("%lu pages shared\n", shared);
+ printk("%lu pages non-shared\n", nonshared);
+#ifdef CONFIG_QUICKLIST
+ printk("%lu pages in pagetable cache\n",
+ quicklist_total_size());
+#endif
+}
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
new file mode 100644
index 00000000..4689cb07
--- /dev/null
+++ b/lib/smp_processor_id.c
@@ -0,0 +1,55 @@
+/*
+ * lib/smp_processor_id.c
+ *
+ * DEBUG_PREEMPT variant of smp_processor_id().
+ */
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/sched.h>
+
+notrace unsigned int debug_smp_processor_id(void)
+{
+ unsigned long preempt_count = preempt_count();
+ int this_cpu = raw_smp_processor_id();
+
+ if (likely(preempt_count))
+ goto out;
+
+ if (irqs_disabled())
+ goto out;
+
+ /*
+ * Kernel threads bound to a single CPU can safely use
+ * smp_processor_id():
+ */
+ if (cpumask_equal(&current->cpus_allowed, cpumask_of(this_cpu)))
+ goto out;
+
+ /*
+ * It is valid to assume CPU-locality during early bootup:
+ */
+ if (system_state != SYSTEM_RUNNING)
+ goto out;
+
+ /*
+ * Avoid recursion:
+ */
+ preempt_disable_notrace();
+
+ if (!printk_ratelimit())
+ goto out_enable;
+
+ printk(KERN_ERR "BUG: using smp_processor_id() in preemptible [%08x] "
+ "code: %s/%d\n",
+ preempt_count() - 1, current->comm, current->pid);
+ print_symbol("caller is %s\n", (long)__builtin_return_address(0));
+ dump_stack();
+
+out_enable:
+ preempt_enable_no_resched_notrace();
+out:
+ return this_cpu;
+}
+
+EXPORT_SYMBOL(debug_smp_processor_id);
+
diff --git a/lib/sort.c b/lib/sort.c
new file mode 100644
index 00000000..926d0042
--- /dev/null
+++ b/lib/sort.c
@@ -0,0 +1,123 @@
+/*
+ * A fast, small, non-recursive O(nlog n) sort for the Linux kernel
+ *
+ * Jan 23 2005 Matt Mackall <mpm@selenic.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sort.h>
+#include <linux/slab.h>
+
+static void u32_swap(void *a, void *b, int size)
+{
+ u32 t = *(u32 *)a;
+ *(u32 *)a = *(u32 *)b;
+ *(u32 *)b = t;
+}
+
+static void generic_swap(void *a, void *b, int size)
+{
+ char t;
+
+ do {
+ t = *(char *)a;
+ *(char *)a++ = *(char *)b;
+ *(char *)b++ = t;
+ } while (--size > 0);
+}
+
+/**
+ * sort - sort an array of elements
+ * @base: pointer to data to sort
+ * @num: number of elements
+ * @size: size of each element
+ * @cmp_func: pointer to comparison function
+ * @swap_func: pointer to swap function or NULL
+ *
+ * This function does a heapsort on the given array. You may provide a
+ * swap_func function optimized to your element type.
+ *
+ * Sorting time is O(n log n) both on average and worst-case. While
+ * qsort is about 20% faster on average, it suffers from exploitable
+ * O(n*n) worst-case behavior and extra memory requirements that make
+ * it less suitable for kernel use.
+ */
+
+void sort(void *base, size_t num, size_t size,
+ int (*cmp_func)(const void *, const void *),
+ void (*swap_func)(void *, void *, int size))
+{
+ /* pre-scale counters for performance */
+ int i = (num/2 - 1) * size, n = num * size, c, r;
+
+ if (!swap_func)
+ swap_func = (size == 4 ? u32_swap : generic_swap);
+
+ /* heapify */
+ for ( ; i >= 0; i -= size) {
+ for (r = i; r * 2 + size < n; r = c) {
+ c = r * 2 + size;
+ if (c < n - size &&
+ cmp_func(base + c, base + c + size) < 0)
+ c += size;
+ if (cmp_func(base + r, base + c) >= 0)
+ break;
+ swap_func(base + r, base + c, size);
+ }
+ }
+
+ /* sort */
+ for (i = n - size; i > 0; i -= size) {
+ swap_func(base, base + i, size);
+ for (r = 0; r * 2 + size < i; r = c) {
+ c = r * 2 + size;
+ if (c < i - size &&
+ cmp_func(base + c, base + c + size) < 0)
+ c += size;
+ if (cmp_func(base + r, base + c) >= 0)
+ break;
+ swap_func(base + r, base + c, size);
+ }
+ }
+}
+
+EXPORT_SYMBOL(sort);
+
+#if 0
+/* a simple boot-time regression test */
+
+int cmpint(const void *a, const void *b)
+{
+ return *(int *)a - *(int *)b;
+}
+
+static int sort_test(void)
+{
+ int *a, i, r = 1;
+
+ a = kmalloc(1000 * sizeof(int), GFP_KERNEL);
+ BUG_ON(!a);
+
+ printk("testing sort()\n");
+
+ for (i = 0; i < 1000; i++) {
+ r = (r * 725861) % 6599;
+ a[i] = r;
+ }
+
+ sort(a, 1000, sizeof(int), cmpint, NULL);
+
+ for (i = 0; i < 999; i++)
+ if (a[i] > a[i+1]) {
+ printk("sort() failed!\n");
+ break;
+ }
+
+ kfree(a);
+
+ return 0;
+}
+
+module_init(sort_test);
+#endif
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
new file mode 100644
index 00000000..4755b98b
--- /dev/null
+++ b/lib/spinlock_debug.c
@@ -0,0 +1,297 @@
+/*
+ * Copyright 2005, Red Hat, Inc., Ingo Molnar
+ * Released under the General Public License (GPL).
+ *
+ * This file contains the spinlock/rwlock implementations for
+ * DEBUG_SPINLOCK.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/nmi.h>
+#include <linux/interrupt.h>
+#include <linux/debug_locks.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+
+void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
+ struct lock_class_key *key)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ /*
+ * Make sure we are not reinitializing a held lock:
+ */
+ debug_check_no_locks_freed((void *)lock, sizeof(*lock));
+ lockdep_init_map(&lock->dep_map, name, key, 0);
+#endif
+ lock->raw_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
+ lock->magic = SPINLOCK_MAGIC;
+ lock->owner = SPINLOCK_OWNER_INIT;
+ lock->owner_cpu = -1;
+}
+
+EXPORT_SYMBOL(__raw_spin_lock_init);
+
+void __rwlock_init(rwlock_t *lock, const char *name,
+ struct lock_class_key *key)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ /*
+ * Make sure we are not reinitializing a held lock:
+ */
+ debug_check_no_locks_freed((void *)lock, sizeof(*lock));
+ lockdep_init_map(&lock->dep_map, name, key, 0);
+#endif
+ lock->raw_lock = (arch_rwlock_t) __ARCH_RW_LOCK_UNLOCKED;
+ lock->magic = RWLOCK_MAGIC;
+ lock->owner = SPINLOCK_OWNER_INIT;
+ lock->owner_cpu = -1;
+}
+
+EXPORT_SYMBOL(__rwlock_init);
+
+static void spin_bug(raw_spinlock_t *lock, const char *msg)
+{
+ struct task_struct *owner = NULL;
+
+ if (!debug_locks_off())
+ return;
+
+ if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT)
+ owner = lock->owner;
+ printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n",
+ msg, raw_smp_processor_id(),
+ current->comm, task_pid_nr(current));
+ printk(KERN_EMERG " lock: %p, .magic: %08x, .owner: %s/%d, "
+ ".owner_cpu: %d\n",
+ lock, lock->magic,
+ owner ? owner->comm : "<none>",
+ owner ? task_pid_nr(owner) : -1,
+ lock->owner_cpu);
+ dump_stack();
+}
+
+#define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg)
+
+static inline void
+debug_spin_lock_before(raw_spinlock_t *lock)
+{
+ SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
+ SPIN_BUG_ON(lock->owner == current, lock, "recursion");
+ SPIN_BUG_ON(lock->owner_cpu == raw_smp_processor_id(),
+ lock, "cpu recursion");
+}
+
+static inline void debug_spin_lock_after(raw_spinlock_t *lock)
+{
+ lock->owner_cpu = raw_smp_processor_id();
+ lock->owner = current;
+}
+
+static inline void debug_spin_unlock(raw_spinlock_t *lock)
+{
+ SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
+ SPIN_BUG_ON(!raw_spin_is_locked(lock), lock, "already unlocked");
+ SPIN_BUG_ON(lock->owner != current, lock, "wrong owner");
+ SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
+ lock, "wrong CPU");
+ lock->owner = SPINLOCK_OWNER_INIT;
+ lock->owner_cpu = -1;
+}
+
+static void __spin_lock_debug(raw_spinlock_t *lock)
+{
+ u64 i;
+ u64 loops = loops_per_jiffy * HZ;
+ int print_once = 1;
+
+ for (;;) {
+ for (i = 0; i < loops; i++) {
+ if (arch_spin_trylock(&lock->raw_lock))
+ return;
+ __delay(1);
+ }
+ /* lockup suspected: */
+ if (print_once) {
+ print_once = 0;
+ printk(KERN_EMERG "BUG: spinlock lockup on CPU#%d, "
+ "%s/%d, %p\n",
+ raw_smp_processor_id(), current->comm,
+ task_pid_nr(current), lock);
+ dump_stack();
+#ifdef CONFIG_SMP
+ trigger_all_cpu_backtrace();
+#endif
+ }
+ }
+}
+
+void do_raw_spin_lock(raw_spinlock_t *lock)
+{
+ debug_spin_lock_before(lock);
+ if (unlikely(!arch_spin_trylock(&lock->raw_lock)))
+ __spin_lock_debug(lock);
+ debug_spin_lock_after(lock);
+}
+
+int do_raw_spin_trylock(raw_spinlock_t *lock)
+{
+ int ret = arch_spin_trylock(&lock->raw_lock);
+
+ if (ret)
+ debug_spin_lock_after(lock);
+#ifndef CONFIG_SMP
+ /*
+ * Must not happen on UP:
+ */
+ SPIN_BUG_ON(!ret, lock, "trylock failure on UP");
+#endif
+ return ret;
+}
+
+void do_raw_spin_unlock(raw_spinlock_t *lock)
+{
+ debug_spin_unlock(lock);
+ arch_spin_unlock(&lock->raw_lock);
+}
+
+static void rwlock_bug(rwlock_t *lock, const char *msg)
+{
+ if (!debug_locks_off())
+ return;
+
+ printk(KERN_EMERG "BUG: rwlock %s on CPU#%d, %s/%d, %p\n",
+ msg, raw_smp_processor_id(), current->comm,
+ task_pid_nr(current), lock);
+ dump_stack();
+}
+
+#define RWLOCK_BUG_ON(cond, lock, msg) if (unlikely(cond)) rwlock_bug(lock, msg)
+
+#if 0 /* __write_lock_debug() can lock up - maybe this can too? */
+static void __read_lock_debug(rwlock_t *lock)
+{
+ u64 i;
+ u64 loops = loops_per_jiffy * HZ;
+ int print_once = 1;
+
+ for (;;) {
+ for (i = 0; i < loops; i++) {
+ if (arch_read_trylock(&lock->raw_lock))
+ return;
+ __delay(1);
+ }
+ /* lockup suspected: */
+ if (print_once) {
+ print_once = 0;
+ printk(KERN_EMERG "BUG: read-lock lockup on CPU#%d, "
+ "%s/%d, %p\n",
+ raw_smp_processor_id(), current->comm,
+ current->pid, lock);
+ dump_stack();
+ }
+ }
+}
+#endif
+
+void do_raw_read_lock(rwlock_t *lock)
+{
+ RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
+ arch_read_lock(&lock->raw_lock);
+}
+
+int do_raw_read_trylock(rwlock_t *lock)
+{
+ int ret = arch_read_trylock(&lock->raw_lock);
+
+#ifndef CONFIG_SMP
+ /*
+ * Must not happen on UP:
+ */
+ RWLOCK_BUG_ON(!ret, lock, "trylock failure on UP");
+#endif
+ return ret;
+}
+
+void do_raw_read_unlock(rwlock_t *lock)
+{
+ RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
+ arch_read_unlock(&lock->raw_lock);
+}
+
+static inline void debug_write_lock_before(rwlock_t *lock)
+{
+ RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
+ RWLOCK_BUG_ON(lock->owner == current, lock, "recursion");
+ RWLOCK_BUG_ON(lock->owner_cpu == raw_smp_processor_id(),
+ lock, "cpu recursion");
+}
+
+static inline void debug_write_lock_after(rwlock_t *lock)
+{
+ lock->owner_cpu = raw_smp_processor_id();
+ lock->owner = current;
+}
+
+static inline void debug_write_unlock(rwlock_t *lock)
+{
+ RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
+ RWLOCK_BUG_ON(lock->owner != current, lock, "wrong owner");
+ RWLOCK_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
+ lock, "wrong CPU");
+ lock->owner = SPINLOCK_OWNER_INIT;
+ lock->owner_cpu = -1;
+}
+
+#if 0 /* This can cause lockups */
+static void __write_lock_debug(rwlock_t *lock)
+{
+ u64 i;
+ u64 loops = loops_per_jiffy * HZ;
+ int print_once = 1;
+
+ for (;;) {
+ for (i = 0; i < loops; i++) {
+ if (arch_write_trylock(&lock->raw_lock))
+ return;
+ __delay(1);
+ }
+ /* lockup suspected: */
+ if (print_once) {
+ print_once = 0;
+ printk(KERN_EMERG "BUG: write-lock lockup on CPU#%d, "
+ "%s/%d, %p\n",
+ raw_smp_processor_id(), current->comm,
+ current->pid, lock);
+ dump_stack();
+ }
+ }
+}
+#endif
+
+void do_raw_write_lock(rwlock_t *lock)
+{
+ debug_write_lock_before(lock);
+ arch_write_lock(&lock->raw_lock);
+ debug_write_lock_after(lock);
+}
+
+int do_raw_write_trylock(rwlock_t *lock)
+{
+ int ret = arch_write_trylock(&lock->raw_lock);
+
+ if (ret)
+ debug_write_lock_after(lock);
+#ifndef CONFIG_SMP
+ /*
+ * Must not happen on UP:
+ */
+ RWLOCK_BUG_ON(!ret, lock, "trylock failure on UP");
+#endif
+ return ret;
+}
+
+void do_raw_write_unlock(rwlock_t *lock)
+{
+ debug_write_unlock(lock);
+ arch_write_unlock(&lock->raw_lock);
+}
diff --git a/lib/string.c b/lib/string.c
new file mode 100644
index 00000000..f71bead1
--- /dev/null
+++ b/lib/string.c
@@ -0,0 +1,729 @@
+/*
+ * linux/lib/string.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/*
+ * stupid library routines.. The optimized versions should generally be found
+ * as inline code in <asm-xx/string.h>
+ *
+ * These are buggy as well..
+ *
+ * * Fri Jun 25 1999, Ingo Oeser <ioe@informatik.tu-chemnitz.de>
+ * - Added strsep() which will replace strtok() soon (because strsep() is
+ * reentrant and should be faster). Use only strsep() in new code, please.
+ *
+ * * Sat Feb 09 2002, Jason Thomas <jason@topic.com.au>,
+ * Matthew Hawkins <matt@mh.dropbear.id.au>
+ * - Kissed strtok() goodbye
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/module.h>
+
+#ifndef __HAVE_ARCH_STRNICMP
+/**
+ * strnicmp - Case insensitive, length-limited string comparison
+ * @s1: One string
+ * @s2: The other string
+ * @len: the maximum number of characters to compare
+ */
+int strnicmp(const char *s1, const char *s2, size_t len)
+{
+ /* Yes, Virginia, it had better be unsigned */
+ unsigned char c1, c2;
+
+ if (!len)
+ return 0;
+
+ do {
+ c1 = *s1++;
+ c2 = *s2++;
+ if (!c1 || !c2)
+ break;
+ if (c1 == c2)
+ continue;
+ c1 = tolower(c1);
+ c2 = tolower(c2);
+ if (c1 != c2)
+ break;
+ } while (--len);
+ return (int)c1 - (int)c2;
+}
+EXPORT_SYMBOL(strnicmp);
+#endif
+
+#ifndef __HAVE_ARCH_STRCASECMP
+int strcasecmp(const char *s1, const char *s2)
+{
+ int c1, c2;
+
+ do {
+ c1 = tolower(*s1++);
+ c2 = tolower(*s2++);
+ } while (c1 == c2 && c1 != 0);
+ return c1 - c2;
+}
+EXPORT_SYMBOL(strcasecmp);
+#endif
+
+#ifndef __HAVE_ARCH_STRNCASECMP
+int strncasecmp(const char *s1, const char *s2, size_t n)
+{
+ int c1, c2;
+
+ do {
+ c1 = tolower(*s1++);
+ c2 = tolower(*s2++);
+ } while ((--n > 0) && c1 == c2 && c1 != 0);
+ return c1 - c2;
+}
+EXPORT_SYMBOL(strncasecmp);
+#endif
+
+#ifndef __HAVE_ARCH_STRCPY
+/**
+ * strcpy - Copy a %NUL terminated string
+ * @dest: Where to copy the string to
+ * @src: Where to copy the string from
+ */
+#undef strcpy
+char *strcpy(char *dest, const char *src)
+{
+ char *tmp = dest;
+
+ while ((*dest++ = *src++) != '\0')
+ /* nothing */;
+ return tmp;
+}
+EXPORT_SYMBOL(strcpy);
+#endif
+
+#ifndef __HAVE_ARCH_STRNCPY
+/**
+ * strncpy - Copy a length-limited, %NUL-terminated string
+ * @dest: Where to copy the string to
+ * @src: Where to copy the string from
+ * @count: The maximum number of bytes to copy
+ *
+ * The result is not %NUL-terminated if the source exceeds
+ * @count bytes.
+ *
+ * In the case where the length of @src is less than that of
+ * count, the remainder of @dest will be padded with %NUL.
+ *
+ */
+char *strncpy(char *dest, const char *src, size_t count)
+{
+ char *tmp = dest;
+
+ while (count) {
+ if ((*tmp = *src) != 0)
+ src++;
+ tmp++;
+ count--;
+ }
+ return dest;
+}
+EXPORT_SYMBOL(strncpy);
+#endif
+
+#ifndef __HAVE_ARCH_STRLCPY
+/**
+ * strlcpy - Copy a %NUL terminated string into a sized buffer
+ * @dest: Where to copy the string to
+ * @src: Where to copy the string from
+ * @size: size of destination buffer
+ *
+ * Compatible with *BSD: the result is always a valid
+ * NUL-terminated string that fits in the buffer (unless,
+ * of course, the buffer size is zero). It does not pad
+ * out the result like strncpy() does.
+ */
+size_t strlcpy(char *dest, const char *src, size_t size)
+{
+ size_t ret = strlen(src);
+
+ if (size) {
+ size_t len = (ret >= size) ? size - 1 : ret;
+ memcpy(dest, src, len);
+ dest[len] = '\0';
+ }
+ return ret;
+}
+EXPORT_SYMBOL(strlcpy);
+#endif
+
+#ifndef __HAVE_ARCH_STRCAT
+/**
+ * strcat - Append one %NUL-terminated string to another
+ * @dest: The string to be appended to
+ * @src: The string to append to it
+ */
+#undef strcat
+char *strcat(char *dest, const char *src)
+{
+ char *tmp = dest;
+
+ while (*dest)
+ dest++;
+ while ((*dest++ = *src++) != '\0')
+ ;
+ return tmp;
+}
+EXPORT_SYMBOL(strcat);
+#endif
+
+#ifndef __HAVE_ARCH_STRNCAT
+/**
+ * strncat - Append a length-limited, %NUL-terminated string to another
+ * @dest: The string to be appended to
+ * @src: The string to append to it
+ * @count: The maximum numbers of bytes to copy
+ *
+ * Note that in contrast to strncpy(), strncat() ensures the result is
+ * terminated.
+ */
+char *strncat(char *dest, const char *src, size_t count)
+{
+ char *tmp = dest;
+
+ if (count) {
+ while (*dest)
+ dest++;
+ while ((*dest++ = *src++) != 0) {
+ if (--count == 0) {
+ *dest = '\0';
+ break;
+ }
+ }
+ }
+ return tmp;
+}
+EXPORT_SYMBOL(strncat);
+#endif
+
+#ifndef __HAVE_ARCH_STRLCAT
+/**
+ * strlcat - Append a length-limited, %NUL-terminated string to another
+ * @dest: The string to be appended to
+ * @src: The string to append to it
+ * @count: The size of the destination buffer.
+ */
+size_t strlcat(char *dest, const char *src, size_t count)
+{
+ size_t dsize = strlen(dest);
+ size_t len = strlen(src);
+ size_t res = dsize + len;
+
+ /* This would be a bug */
+ BUG_ON(dsize >= count);
+
+ dest += dsize;
+ count -= dsize;
+ if (len >= count)
+ len = count-1;
+ memcpy(dest, src, len);
+ dest[len] = 0;
+ return res;
+}
+EXPORT_SYMBOL(strlcat);
+#endif
+
+#ifndef __HAVE_ARCH_STRCMP
+/**
+ * strcmp - Compare two strings
+ * @cs: One string
+ * @ct: Another string
+ */
+#undef strcmp
+int strcmp(const char *cs, const char *ct)
+{
+ unsigned char c1, c2;
+
+ while (1) {
+ c1 = *cs++;
+ c2 = *ct++;
+ if (c1 != c2)
+ return c1 < c2 ? -1 : 1;
+ if (!c1)
+ break;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(strcmp);
+#endif
+
+#ifndef __HAVE_ARCH_STRNCMP
+/**
+ * strncmp - Compare two length-limited strings
+ * @cs: One string
+ * @ct: Another string
+ * @count: The maximum number of bytes to compare
+ */
+int strncmp(const char *cs, const char *ct, size_t count)
+{
+ unsigned char c1, c2;
+
+ while (count) {
+ c1 = *cs++;
+ c2 = *ct++;
+ if (c1 != c2)
+ return c1 < c2 ? -1 : 1;
+ if (!c1)
+ break;
+ count--;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(strncmp);
+#endif
+
+#ifndef __HAVE_ARCH_STRCHR
+/**
+ * strchr - Find the first occurrence of a character in a string
+ * @s: The string to be searched
+ * @c: The character to search for
+ */
+char *strchr(const char *s, int c)
+{
+ for (; *s != (char)c; ++s)
+ if (*s == '\0')
+ return NULL;
+ return (char *)s;
+}
+EXPORT_SYMBOL(strchr);
+#endif
+
+#ifndef __HAVE_ARCH_STRRCHR
+/**
+ * strrchr - Find the last occurrence of a character in a string
+ * @s: The string to be searched
+ * @c: The character to search for
+ */
+char *strrchr(const char *s, int c)
+{
+ const char *p = s + strlen(s);
+ do {
+ if (*p == (char)c)
+ return (char *)p;
+ } while (--p >= s);
+ return NULL;
+}
+EXPORT_SYMBOL(strrchr);
+#endif
+
+#ifndef __HAVE_ARCH_STRNCHR
+/**
+ * strnchr - Find a character in a length limited string
+ * @s: The string to be searched
+ * @count: The number of characters to be searched
+ * @c: The character to search for
+ */
+char *strnchr(const char *s, size_t count, int c)
+{
+ for (; count-- && *s != '\0'; ++s)
+ if (*s == (char)c)
+ return (char *)s;
+ return NULL;
+}
+EXPORT_SYMBOL(strnchr);
+#endif
+
+/**
+ * skip_spaces - Removes leading whitespace from @str.
+ * @str: The string to be stripped.
+ *
+ * Returns a pointer to the first non-whitespace character in @str.
+ */
+char *skip_spaces(const char *str)
+{
+ while (isspace(*str))
+ ++str;
+ return (char *)str;
+}
+EXPORT_SYMBOL(skip_spaces);
+
+/**
+ * strim - Removes leading and trailing whitespace from @s.
+ * @s: The string to be stripped.
+ *
+ * Note that the first trailing whitespace is replaced with a %NUL-terminator
+ * in the given string @s. Returns a pointer to the first non-whitespace
+ * character in @s.
+ */
+char *strim(char *s)
+{
+ size_t size;
+ char *end;
+
+ s = skip_spaces(s);
+ size = strlen(s);
+ if (!size)
+ return s;
+
+ end = s + size - 1;
+ while (end >= s && isspace(*end))
+ end--;
+ *(end + 1) = '\0';
+
+ return s;
+}
+EXPORT_SYMBOL(strim);
+
+#ifndef __HAVE_ARCH_STRLEN
+/**
+ * strlen - Find the length of a string
+ * @s: The string to be sized
+ */
+size_t strlen(const char *s)
+{
+ const char *sc;
+
+ for (sc = s; *sc != '\0'; ++sc)
+ /* nothing */;
+ return sc - s;
+}
+EXPORT_SYMBOL(strlen);
+#endif
+
+#ifndef __HAVE_ARCH_STRNLEN
+/**
+ * strnlen - Find the length of a length-limited string
+ * @s: The string to be sized
+ * @count: The maximum number of bytes to search
+ */
+size_t strnlen(const char *s, size_t count)
+{
+ const char *sc;
+
+ for (sc = s; count-- && *sc != '\0'; ++sc)
+ /* nothing */;
+ return sc - s;
+}
+EXPORT_SYMBOL(strnlen);
+#endif
+
+#ifndef __HAVE_ARCH_STRSPN
+/**
+ * strspn - Calculate the length of the initial substring of @s which only contain letters in @accept
+ * @s: The string to be searched
+ * @accept: The string to search for
+ */
+size_t strspn(const char *s, const char *accept)
+{
+ const char *p;
+ const char *a;
+ size_t count = 0;
+
+ for (p = s; *p != '\0'; ++p) {
+ for (a = accept; *a != '\0'; ++a) {
+ if (*p == *a)
+ break;
+ }
+ if (*a == '\0')
+ return count;
+ ++count;
+ }
+ return count;
+}
+
+EXPORT_SYMBOL(strspn);
+#endif
+
+#ifndef __HAVE_ARCH_STRCSPN
+/**
+ * strcspn - Calculate the length of the initial substring of @s which does not contain letters in @reject
+ * @s: The string to be searched
+ * @reject: The string to avoid
+ */
+size_t strcspn(const char *s, const char *reject)
+{
+ const char *p;
+ const char *r;
+ size_t count = 0;
+
+ for (p = s; *p != '\0'; ++p) {
+ for (r = reject; *r != '\0'; ++r) {
+ if (*p == *r)
+ return count;
+ }
+ ++count;
+ }
+ return count;
+}
+EXPORT_SYMBOL(strcspn);
+#endif
+
+#ifndef __HAVE_ARCH_STRPBRK
+/**
+ * strpbrk - Find the first occurrence of a set of characters
+ * @cs: The string to be searched
+ * @ct: The characters to search for
+ */
+char *strpbrk(const char *cs, const char *ct)
+{
+ const char *sc1, *sc2;
+
+ for (sc1 = cs; *sc1 != '\0'; ++sc1) {
+ for (sc2 = ct; *sc2 != '\0'; ++sc2) {
+ if (*sc1 == *sc2)
+ return (char *)sc1;
+ }
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(strpbrk);
+#endif
+
+#ifndef __HAVE_ARCH_STRSEP
+/**
+ * strsep - Split a string into tokens
+ * @s: The string to be searched
+ * @ct: The characters to search for
+ *
+ * strsep() updates @s to point after the token, ready for the next call.
+ *
+ * It returns empty tokens, too, behaving exactly like the libc function
+ * of that name. In fact, it was stolen from glibc2 and de-fancy-fied.
+ * Same semantics, slimmer shape. ;)
+ */
+char *strsep(char **s, const char *ct)
+{
+ char *sbegin = *s;
+ char *end;
+
+ if (sbegin == NULL)
+ return NULL;
+
+ end = strpbrk(sbegin, ct);
+ if (end)
+ *end++ = '\0';
+ *s = end;
+ return sbegin;
+}
+EXPORT_SYMBOL(strsep);
+#endif
+
+/**
+ * sysfs_streq - return true if strings are equal, modulo trailing newline
+ * @s1: one string
+ * @s2: another string
+ *
+ * This routine returns true iff two strings are equal, treating both
+ * NUL and newline-then-NUL as equivalent string terminations. It's
+ * geared for use with sysfs input strings, which generally terminate
+ * with newlines but are compared against values without newlines.
+ */
+bool sysfs_streq(const char *s1, const char *s2)
+{
+ while (*s1 && *s1 == *s2) {
+ s1++;
+ s2++;
+ }
+
+ if (*s1 == *s2)
+ return true;
+ if (!*s1 && *s2 == '\n' && !s2[1])
+ return true;
+ if (*s1 == '\n' && !s1[1] && !*s2)
+ return true;
+ return false;
+}
+EXPORT_SYMBOL(sysfs_streq);
+
+#ifndef __HAVE_ARCH_MEMSET
+/**
+ * memset - Fill a region of memory with the given value
+ * @s: Pointer to the start of the area.
+ * @c: The byte to fill the area with
+ * @count: The size of the area.
+ *
+ * Do not use memset() to access IO space, use memset_io() instead.
+ */
+void *memset(void *s, int c, size_t count)
+{
+ char *xs = s;
+
+ while (count--)
+ *xs++ = c;
+ return s;
+}
+EXPORT_SYMBOL(memset);
+#endif
+
+#ifndef __HAVE_ARCH_MEMCPY
+/**
+ * memcpy - Copy one area of memory to another
+ * @dest: Where to copy to
+ * @src: Where to copy from
+ * @count: The size of the area.
+ *
+ * You should not use this function to access IO space, use memcpy_toio()
+ * or memcpy_fromio() instead.
+ */
+void *memcpy(void *dest, const void *src, size_t count)
+{
+ char *tmp = dest;
+ const char *s = src;
+
+ while (count--)
+ *tmp++ = *s++;
+ return dest;
+}
+EXPORT_SYMBOL(memcpy);
+#endif
+
+#ifndef __HAVE_ARCH_MEMMOVE
+/**
+ * memmove - Copy one area of memory to another
+ * @dest: Where to copy to
+ * @src: Where to copy from
+ * @count: The size of the area.
+ *
+ * Unlike memcpy(), memmove() copes with overlapping areas.
+ */
+void *memmove(void *dest, const void *src, size_t count)
+{
+ char *tmp;
+ const char *s;
+
+ if (dest <= src) {
+ tmp = dest;
+ s = src;
+ while (count--)
+ *tmp++ = *s++;
+ } else {
+ tmp = dest;
+ tmp += count;
+ s = src;
+ s += count;
+ while (count--)
+ *--tmp = *--s;
+ }
+ return dest;
+}
+EXPORT_SYMBOL(memmove);
+#endif
+
+#ifndef __HAVE_ARCH_MEMCMP
+/**
+ * memcmp - Compare two areas of memory
+ * @cs: One area of memory
+ * @ct: Another area of memory
+ * @count: The size of the area.
+ */
+#undef memcmp
+int memcmp(const void *cs, const void *ct, size_t count)
+{
+ const unsigned char *su1, *su2;
+ int res = 0;
+
+ for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--)
+ if ((res = *su1 - *su2) != 0)
+ break;
+ return res;
+}
+EXPORT_SYMBOL(memcmp);
+#endif
+
+#ifndef __HAVE_ARCH_MEMSCAN
+/**
+ * memscan - Find a character in an area of memory.
+ * @addr: The memory area
+ * @c: The byte to search for
+ * @size: The size of the area.
+ *
+ * returns the address of the first occurrence of @c, or 1 byte past
+ * the area if @c is not found
+ */
+void *memscan(void *addr, int c, size_t size)
+{
+ unsigned char *p = addr;
+
+ while (size) {
+ if (*p == c)
+ return (void *)p;
+ p++;
+ size--;
+ }
+ return (void *)p;
+}
+EXPORT_SYMBOL(memscan);
+#endif
+
+#ifndef __HAVE_ARCH_STRSTR
+/**
+ * strstr - Find the first substring in a %NUL terminated string
+ * @s1: The string to be searched
+ * @s2: The string to search for
+ */
+char *strstr(const char *s1, const char *s2)
+{
+ size_t l1, l2;
+
+ l2 = strlen(s2);
+ if (!l2)
+ return (char *)s1;
+ l1 = strlen(s1);
+ while (l1 >= l2) {
+ l1--;
+ if (!memcmp(s1, s2, l2))
+ return (char *)s1;
+ s1++;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(strstr);
+#endif
+
+#ifndef __HAVE_ARCH_STRNSTR
+/**
+ * strnstr - Find the first substring in a length-limited string
+ * @s1: The string to be searched
+ * @s2: The string to search for
+ * @len: the maximum number of characters to search
+ */
+char *strnstr(const char *s1, const char *s2, size_t len)
+{
+ size_t l2;
+
+ l2 = strlen(s2);
+ if (!l2)
+ return (char *)s1;
+ while (len >= l2) {
+ len--;
+ if (!memcmp(s1, s2, l2))
+ return (char *)s1;
+ s1++;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(strnstr);
+#endif
+
+#ifndef __HAVE_ARCH_MEMCHR
+/**
+ * memchr - Find a character in an area of memory.
+ * @s: The memory area
+ * @c: The byte to search for
+ * @n: The size of the area.
+ *
+ * returns the address of the first occurrence of @c, or %NULL
+ * if @c is not found
+ */
+void *memchr(const void *s, int c, size_t n)
+{
+ const unsigned char *p = s;
+ while (n-- != 0) {
+ if ((unsigned char)c == *p++) {
+ return (void *)(p - 1);
+ }
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(memchr);
+#endif
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
new file mode 100644
index 00000000..ab431d4c
--- /dev/null
+++ b/lib/string_helpers.c
@@ -0,0 +1,68 @@
+/*
+ * Helpers for formatting and printing strings
+ *
+ * Copyright 31 August 2008 James Bottomley
+ */
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/string_helpers.h>
+
+/**
+ * string_get_size - get the size in the specified units
+ * @size: The size to be converted
+ * @units: units to use (powers of 1000 or 1024)
+ * @buf: buffer to format to
+ * @len: length of buffer
+ *
+ * This function returns a string formatted to 3 significant figures
+ * giving the size in the required units. Returns 0 on success or
+ * error on failure. @buf is always zero terminated.
+ *
+ */
+int string_get_size(u64 size, const enum string_size_units units,
+ char *buf, int len)
+{
+ const char *units_10[] = { "B", "kB", "MB", "GB", "TB", "PB",
+ "EB", "ZB", "YB", NULL};
+ const char *units_2[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB",
+ "EiB", "ZiB", "YiB", NULL };
+ const char **units_str[] = {
+ [STRING_UNITS_10] = units_10,
+ [STRING_UNITS_2] = units_2,
+ };
+ const unsigned int divisor[] = {
+ [STRING_UNITS_10] = 1000,
+ [STRING_UNITS_2] = 1024,
+ };
+ int i, j;
+ u64 remainder = 0, sf_cap;
+ char tmp[8];
+
+ tmp[0] = '\0';
+ i = 0;
+ if (size >= divisor[units]) {
+ while (size >= divisor[units] && units_str[units][i]) {
+ remainder = do_div(size, divisor[units]);
+ i++;
+ }
+
+ sf_cap = size;
+ for (j = 0; sf_cap*10 < 1000; j++)
+ sf_cap *= 10;
+
+ if (j) {
+ remainder *= 1000;
+ do_div(remainder, divisor[units]);
+ snprintf(tmp, sizeof(tmp), ".%03lld",
+ (unsigned long long)remainder);
+ tmp[j+1] = '\0';
+ }
+ }
+
+ snprintf(buf, len, "%lld%s %s", (unsigned long long)size,
+ tmp, units_str[units][i]);
+
+ return 0;
+}
+EXPORT_SYMBOL(string_get_size);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
new file mode 100644
index 00000000..34e30826
--- /dev/null
+++ b/lib/swiotlb.c
@@ -0,0 +1,923 @@
+/*
+ * Dynamic DMA mapping support.
+ *
+ * This implementation is a fallback for platforms that do not support
+ * I/O TLBs (aka DMA address translation hardware).
+ * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
+ * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
+ * Copyright (C) 2000, 2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API.
+ * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid
+ * unnecessary i-cache flushing.
+ * 04/07/.. ak Better overflow handling. Assorted fixes.
+ * 05/09/10 linville Add support for syncing ranges, support syncing for
+ * DMA_BIDIRECTIONAL mappings, miscellaneous cleanup.
+ * 08/12/11 beckyb Add highmem support
+ */
+
+#include <linux/cache.h>
+#include <linux/dma-mapping.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/swiotlb.h>
+#include <linux/pfn.h>
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/highmem.h>
+#include <linux/gfp.h>
+
+#include <asm/io.h>
+#include <asm/dma.h>
+#include <asm/scatterlist.h>
+
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/iommu-helper.h>
+
+#define OFFSET(val,align) ((unsigned long) \
+ ( (val) & ( (align) - 1)))
+
+#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
+
+/*
+ * Minimum IO TLB size to bother booting with. Systems with mainly
+ * 64bit capable cards will only lightly use the swiotlb. If we can't
+ * allocate a contiguous 1MB, we're probably in trouble anyway.
+ */
+#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
+
+int swiotlb_force;
+
+/*
+ * Used to do a quick range check in swiotlb_tbl_unmap_single and
+ * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
+ * API.
+ */
+static char *io_tlb_start, *io_tlb_end;
+
+/*
+ * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and
+ * io_tlb_end. This is command line adjustable via setup_io_tlb_npages.
+ */
+static unsigned long io_tlb_nslabs;
+
+/*
+ * When the IOMMU overflows we return a fallback buffer. This sets the size.
+ */
+static unsigned long io_tlb_overflow = 32*1024;
+
+void *io_tlb_overflow_buffer;
+
+/*
+ * This is a free list describing the number of free entries available from
+ * each index
+ */
+static unsigned int *io_tlb_list;
+static unsigned int io_tlb_index;
+
+/*
+ * We need to save away the original address corresponding to a mapped entry
+ * for the sync operations.
+ */
+static phys_addr_t *io_tlb_orig_addr;
+
+/*
+ * Protect the above data structures in the map and unmap calls
+ */
+static DEFINE_SPINLOCK(io_tlb_lock);
+
+static int late_alloc;
+
+static int __init
+setup_io_tlb_npages(char *str)
+{
+ if (isdigit(*str)) {
+ io_tlb_nslabs = simple_strtoul(str, &str, 0);
+ /* avoid tail segment of size < IO_TLB_SEGSIZE */
+ io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+ }
+ if (*str == ',')
+ ++str;
+ if (!strcmp(str, "force"))
+ swiotlb_force = 1;
+
+ return 1;
+}
+__setup("swiotlb=", setup_io_tlb_npages);
+/* make io_tlb_overflow tunable too? */
+
+/* Note that this doesn't work with highmem page */
+static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
+ volatile void *address)
+{
+ return phys_to_dma(hwdev, virt_to_phys(address));
+}
+
+void swiotlb_print_info(void)
+{
+ unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+ phys_addr_t pstart, pend;
+
+ pstart = virt_to_phys(io_tlb_start);
+ pend = virt_to_phys(io_tlb_end);
+
+ printk(KERN_INFO "Placing %luMB software IO TLB between %p - %p\n",
+ bytes >> 20, io_tlb_start, io_tlb_end);
+ printk(KERN_INFO "software IO TLB at phys %#llx - %#llx\n",
+ (unsigned long long)pstart,
+ (unsigned long long)pend);
+}
+
+void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
+{
+ unsigned long i, bytes;
+
+ bytes = nslabs << IO_TLB_SHIFT;
+
+ io_tlb_nslabs = nslabs;
+ io_tlb_start = tlb;
+ io_tlb_end = io_tlb_start + bytes;
+
+ /*
+ * Allocate and initialize the free list array. This array is used
+ * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
+ * between io_tlb_start and io_tlb_end.
+ */
+ io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
+ for (i = 0; i < io_tlb_nslabs; i++)
+ io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+ io_tlb_index = 0;
+ io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(phys_addr_t));
+
+ /*
+ * Get the overflow emergency buffer
+ */
+ io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow);
+ if (!io_tlb_overflow_buffer)
+ panic("Cannot allocate SWIOTLB overflow buffer!\n");
+ if (verbose)
+ swiotlb_print_info();
+}
+
+/*
+ * Statically reserve bounce buffer space and initialize bounce buffer data
+ * structures for the software IO TLB used to implement the DMA API.
+ */
+void __init
+swiotlb_init_with_default_size(size_t default_size, int verbose)
+{
+ unsigned long bytes;
+
+ if (!io_tlb_nslabs) {
+ io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
+ io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+ }
+
+ bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+
+ /*
+ * Get IO TLB memory from the low pages
+ */
+ io_tlb_start = alloc_bootmem_low_pages(bytes);
+ if (!io_tlb_start)
+ panic("Cannot allocate SWIOTLB buffer");
+
+ swiotlb_init_with_tbl(io_tlb_start, io_tlb_nslabs, verbose);
+}
+
+void __init
+swiotlb_init(int verbose)
+{
+ swiotlb_init_with_default_size(64 * (1<<20), verbose); /* default to 64MB */
+}
+
+/*
+ * Systems with larger DMA zones (those that don't support ISA) can
+ * initialize the swiotlb later using the slab allocator if needed.
+ * This should be just like above, but with some error catching.
+ */
+int
+swiotlb_late_init_with_default_size(size_t default_size)
+{
+ unsigned long i, bytes, req_nslabs = io_tlb_nslabs;
+ unsigned int order;
+
+ if (!io_tlb_nslabs) {
+ io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
+ io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+ }
+
+ /*
+ * Get IO TLB memory from the low pages
+ */
+ order = get_order(io_tlb_nslabs << IO_TLB_SHIFT);
+ io_tlb_nslabs = SLABS_PER_PAGE << order;
+ bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+
+ while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
+ io_tlb_start = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
+ order);
+ if (io_tlb_start)
+ break;
+ order--;
+ }
+
+ if (!io_tlb_start)
+ goto cleanup1;
+
+ if (order != get_order(bytes)) {
+ printk(KERN_WARNING "Warning: only able to allocate %ld MB "
+ "for software IO TLB\n", (PAGE_SIZE << order) >> 20);
+ io_tlb_nslabs = SLABS_PER_PAGE << order;
+ bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+ }
+ io_tlb_end = io_tlb_start + bytes;
+ memset(io_tlb_start, 0, bytes);
+
+ /*
+ * Allocate and initialize the free list array. This array is used
+ * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
+ * between io_tlb_start and io_tlb_end.
+ */
+ io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
+ get_order(io_tlb_nslabs * sizeof(int)));
+ if (!io_tlb_list)
+ goto cleanup2;
+
+ for (i = 0; i < io_tlb_nslabs; i++)
+ io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+ io_tlb_index = 0;
+
+ io_tlb_orig_addr = (phys_addr_t *)
+ __get_free_pages(GFP_KERNEL,
+ get_order(io_tlb_nslabs *
+ sizeof(phys_addr_t)));
+ if (!io_tlb_orig_addr)
+ goto cleanup3;
+
+ memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(phys_addr_t));
+
+ /*
+ * Get the overflow emergency buffer
+ */
+ io_tlb_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
+ get_order(io_tlb_overflow));
+ if (!io_tlb_overflow_buffer)
+ goto cleanup4;
+
+ swiotlb_print_info();
+
+ late_alloc = 1;
+
+ return 0;
+
+cleanup4:
+ free_pages((unsigned long)io_tlb_orig_addr,
+ get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
+ io_tlb_orig_addr = NULL;
+cleanup3:
+ free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
+ sizeof(int)));
+ io_tlb_list = NULL;
+cleanup2:
+ io_tlb_end = NULL;
+ free_pages((unsigned long)io_tlb_start, order);
+ io_tlb_start = NULL;
+cleanup1:
+ io_tlb_nslabs = req_nslabs;
+ return -ENOMEM;
+}
+
+void __init swiotlb_free(void)
+{
+ if (!io_tlb_overflow_buffer)
+ return;
+
+ if (late_alloc) {
+ free_pages((unsigned long)io_tlb_overflow_buffer,
+ get_order(io_tlb_overflow));
+ free_pages((unsigned long)io_tlb_orig_addr,
+ get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
+ free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
+ sizeof(int)));
+ free_pages((unsigned long)io_tlb_start,
+ get_order(io_tlb_nslabs << IO_TLB_SHIFT));
+ } else {
+ free_bootmem_late(__pa(io_tlb_overflow_buffer),
+ io_tlb_overflow);
+ free_bootmem_late(__pa(io_tlb_orig_addr),
+ io_tlb_nslabs * sizeof(phys_addr_t));
+ free_bootmem_late(__pa(io_tlb_list),
+ io_tlb_nslabs * sizeof(int));
+ free_bootmem_late(__pa(io_tlb_start),
+ io_tlb_nslabs << IO_TLB_SHIFT);
+ }
+}
+
+static int is_swiotlb_buffer(phys_addr_t paddr)
+{
+ return paddr >= virt_to_phys(io_tlb_start) &&
+ paddr < virt_to_phys(io_tlb_end);
+}
+
+/*
+ * Bounce: copy the swiotlb buffer back to the original dma location
+ */
+void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
+ enum dma_data_direction dir)
+{
+ unsigned long pfn = PFN_DOWN(phys);
+
+ if (PageHighMem(pfn_to_page(pfn))) {
+ /* The buffer does not have a mapping. Map it in and copy */
+ unsigned int offset = phys & ~PAGE_MASK;
+ char *buffer;
+ unsigned int sz = 0;
+ unsigned long flags;
+
+ while (size) {
+ sz = min_t(size_t, PAGE_SIZE - offset, size);
+
+ local_irq_save(flags);
+ buffer = kmap_atomic(pfn_to_page(pfn),
+ KM_BOUNCE_READ);
+ if (dir == DMA_TO_DEVICE)
+ memcpy(dma_addr, buffer + offset, sz);
+ else
+ memcpy(buffer + offset, dma_addr, sz);
+ kunmap_atomic(buffer, KM_BOUNCE_READ);
+ local_irq_restore(flags);
+
+ size -= sz;
+ pfn++;
+ dma_addr += sz;
+ offset = 0;
+ }
+ } else {
+ if (dir == DMA_TO_DEVICE)
+ memcpy(dma_addr, phys_to_virt(phys), size);
+ else
+ memcpy(phys_to_virt(phys), dma_addr, size);
+ }
+}
+EXPORT_SYMBOL_GPL(swiotlb_bounce);
+
+void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
+ phys_addr_t phys, size_t size,
+ enum dma_data_direction dir)
+{
+ unsigned long flags;
+ char *dma_addr;
+ unsigned int nslots, stride, index, wrap;
+ int i;
+ unsigned long mask;
+ unsigned long offset_slots;
+ unsigned long max_slots;
+
+ mask = dma_get_seg_boundary(hwdev);
+
+ tbl_dma_addr &= mask;
+
+ offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+
+ /*
+ * Carefully handle integer overflow which can occur when mask == ~0UL.
+ */
+ max_slots = mask + 1
+ ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT
+ : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
+
+ /*
+ * For mappings greater than a page, we limit the stride (and
+ * hence alignment) to a page size.
+ */
+ nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+ if (size > PAGE_SIZE)
+ stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
+ else
+ stride = 1;
+
+ BUG_ON(!nslots);
+
+ /*
+ * Find suitable number of IO TLB entries size that will fit this
+ * request and allocate a buffer from that IO TLB pool.
+ */
+ spin_lock_irqsave(&io_tlb_lock, flags);
+ index = ALIGN(io_tlb_index, stride);
+ if (index >= io_tlb_nslabs)
+ index = 0;
+ wrap = index;
+
+ do {
+ while (iommu_is_span_boundary(index, nslots, offset_slots,
+ max_slots)) {
+ index += stride;
+ if (index >= io_tlb_nslabs)
+ index = 0;
+ if (index == wrap)
+ goto not_found;
+ }
+
+ /*
+ * If we find a slot that indicates we have 'nslots' number of
+ * contiguous buffers, we allocate the buffers from that slot
+ * and mark the entries as '0' indicating unavailable.
+ */
+ if (io_tlb_list[index] >= nslots) {
+ int count = 0;
+
+ for (i = index; i < (int) (index + nslots); i++)
+ io_tlb_list[i] = 0;
+ for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
+ io_tlb_list[i] = ++count;
+ dma_addr = io_tlb_start + (index << IO_TLB_SHIFT);
+
+ /*
+ * Update the indices to avoid searching in the next
+ * round.
+ */
+ io_tlb_index = ((index + nslots) < io_tlb_nslabs
+ ? (index + nslots) : 0);
+
+ goto found;
+ }
+ index += stride;
+ if (index >= io_tlb_nslabs)
+ index = 0;
+ } while (index != wrap);
+
+not_found:
+ spin_unlock_irqrestore(&io_tlb_lock, flags);
+ return NULL;
+found:
+ spin_unlock_irqrestore(&io_tlb_lock, flags);
+
+ /*
+ * Save away the mapping from the original address to the DMA address.
+ * This is needed when we sync the memory. Then we sync the buffer if
+ * needed.
+ */
+ for (i = 0; i < nslots; i++)
+ io_tlb_orig_addr[index+i] = phys + (i << IO_TLB_SHIFT);
+ if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
+ swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE);
+
+ return dma_addr;
+}
+EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single);
+
+/*
+ * Allocates bounce buffer and returns its kernel virtual address.
+ */
+
+static void *
+map_single(struct device *hwdev, phys_addr_t phys, size_t size,
+ enum dma_data_direction dir)
+{
+ dma_addr_t start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start);
+
+ return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir);
+}
+
+/*
+ * dma_addr is the kernel virtual address of the bounce buffer to unmap.
+ */
+void
+swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size,
+ enum dma_data_direction dir)
+{
+ unsigned long flags;
+ int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+ int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
+ phys_addr_t phys = io_tlb_orig_addr[index];
+
+ /*
+ * First, sync the memory before unmapping the entry
+ */
+ if (phys && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
+ swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE);
+
+ /*
+ * Return the buffer to the free list by setting the corresponding
+ * entries to indicate the number of contiguous entries available.
+ * While returning the entries to the free list, we merge the entries
+ * with slots below and above the pool being returned.
+ */
+ spin_lock_irqsave(&io_tlb_lock, flags);
+ {
+ count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
+ io_tlb_list[index + nslots] : 0);
+ /*
+ * Step 1: return the slots to the free list, merging the
+ * slots with superceeding slots
+ */
+ for (i = index + nslots - 1; i >= index; i--)
+ io_tlb_list[i] = ++count;
+ /*
+ * Step 2: merge the returned slots with the preceding slots,
+ * if available (non zero)
+ */
+ for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
+ io_tlb_list[i] = ++count;
+ }
+ spin_unlock_irqrestore(&io_tlb_lock, flags);
+}
+EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single);
+
+void
+swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, size_t size,
+ enum dma_data_direction dir,
+ enum dma_sync_target target)
+{
+ int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
+ phys_addr_t phys = io_tlb_orig_addr[index];
+
+ phys += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1));
+
+ switch (target) {
+ case SYNC_FOR_CPU:
+ if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
+ swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE);
+ else
+ BUG_ON(dir != DMA_TO_DEVICE);
+ break;
+ case SYNC_FOR_DEVICE:
+ if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
+ swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE);
+ else
+ BUG_ON(dir != DMA_FROM_DEVICE);
+ break;
+ default:
+ BUG();
+ }
+}
+EXPORT_SYMBOL_GPL(swiotlb_tbl_sync_single);
+
+void *
+swiotlb_alloc_coherent(struct device *hwdev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flags)
+{
+ dma_addr_t dev_addr;
+ void *ret;
+ int order = get_order(size);
+ u64 dma_mask = DMA_BIT_MASK(32);
+
+ if (hwdev && hwdev->coherent_dma_mask)
+ dma_mask = hwdev->coherent_dma_mask;
+
+ ret = (void *)__get_free_pages(flags, order);
+ if (ret && swiotlb_virt_to_bus(hwdev, ret) + size - 1 > dma_mask) {
+ /*
+ * The allocated memory isn't reachable by the device.
+ */
+ free_pages((unsigned long) ret, order);
+ ret = NULL;
+ }
+ if (!ret) {
+ /*
+ * We are either out of memory or the device can't DMA to
+ * GFP_DMA memory; fall back on map_single(), which
+ * will grab memory from the lowest available address range.
+ */
+ ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
+ if (!ret)
+ return NULL;
+ }
+
+ memset(ret, 0, size);
+ dev_addr = swiotlb_virt_to_bus(hwdev, ret);
+
+ /* Confirm address can be DMA'd by device */
+ if (dev_addr + size - 1 > dma_mask) {
+ printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
+ (unsigned long long)dma_mask,
+ (unsigned long long)dev_addr);
+
+ /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
+ swiotlb_tbl_unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
+ return NULL;
+ }
+ *dma_handle = dev_addr;
+ return ret;
+}
+EXPORT_SYMBOL(swiotlb_alloc_coherent);
+
+void
+swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
+ dma_addr_t dev_addr)
+{
+ phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
+
+ WARN_ON(irqs_disabled());
+ if (!is_swiotlb_buffer(paddr))
+ free_pages((unsigned long)vaddr, get_order(size));
+ else
+ /* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */
+ swiotlb_tbl_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
+}
+EXPORT_SYMBOL(swiotlb_free_coherent);
+
+static void
+swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
+ int do_panic)
+{
+ /*
+ * Ran out of IOMMU space for this operation. This is very bad.
+ * Unfortunately the drivers cannot handle this operation properly.
+ * unless they check for dma_mapping_error (most don't)
+ * When the mapping is small enough return a static buffer to limit
+ * the damage, or panic when the transfer is too big.
+ */
+ printk(KERN_ERR "DMA: Out of SW-IOMMU space for %zu bytes at "
+ "device %s\n", size, dev ? dev_name(dev) : "?");
+
+ if (size <= io_tlb_overflow || !do_panic)
+ return;
+
+ if (dir == DMA_BIDIRECTIONAL)
+ panic("DMA: Random memory could be DMA accessed\n");
+ if (dir == DMA_FROM_DEVICE)
+ panic("DMA: Random memory could be DMA written\n");
+ if (dir == DMA_TO_DEVICE)
+ panic("DMA: Random memory could be DMA read\n");
+}
+
+/*
+ * Map a single buffer of the indicated size for DMA in streaming mode. The
+ * physical address to use is returned.
+ *
+ * Once the device is given the dma address, the device owns this memory until
+ * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed.
+ */
+dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ phys_addr_t phys = page_to_phys(page) + offset;
+ dma_addr_t dev_addr = phys_to_dma(dev, phys);
+ void *map;
+
+ BUG_ON(dir == DMA_NONE);
+ /*
+ * If the address happens to be in the device's DMA window,
+ * we can safely return the device addr and not worry about bounce
+ * buffering it.
+ */
+ if (dma_capable(dev, dev_addr, size) && !swiotlb_force)
+ return dev_addr;
+
+ /*
+ * Oh well, have to allocate and map a bounce buffer.
+ */
+ map = map_single(dev, phys, size, dir);
+ if (!map) {
+ swiotlb_full(dev, size, dir, 1);
+ map = io_tlb_overflow_buffer;
+ }
+
+ dev_addr = swiotlb_virt_to_bus(dev, map);
+
+ /*
+ * Ensure that the address returned is DMA'ble
+ */
+ if (!dma_capable(dev, dev_addr, size))
+ panic("map_single: bounce buffer is not DMA'ble");
+
+ return dev_addr;
+}
+EXPORT_SYMBOL_GPL(swiotlb_map_page);
+
+/*
+ * Unmap a single streaming mode DMA translation. The dma_addr and size must
+ * match what was provided for in a previous swiotlb_map_page call. All
+ * other usages are undefined.
+ *
+ * After this call, reads by the cpu to the buffer are guaranteed to see
+ * whatever the device wrote there.
+ */
+static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
+ size_t size, enum dma_data_direction dir)
+{
+ phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
+
+ BUG_ON(dir == DMA_NONE);
+
+ if (is_swiotlb_buffer(paddr)) {
+ swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir);
+ return;
+ }
+
+ if (dir != DMA_FROM_DEVICE)
+ return;
+
+ /*
+ * phys_to_virt doesn't work with hihgmem page but we could
+ * call dma_mark_clean() with hihgmem page here. However, we
+ * are fine since dma_mark_clean() is null on POWERPC. We can
+ * make dma_mark_clean() take a physical address if necessary.
+ */
+ dma_mark_clean(phys_to_virt(paddr), size);
+}
+
+void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
+ size_t size, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ unmap_single(hwdev, dev_addr, size, dir);
+}
+EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
+
+/*
+ * Make physical memory consistent for a single streaming mode DMA translation
+ * after a transfer.
+ *
+ * If you perform a swiotlb_map_page() but wish to interrogate the buffer
+ * using the cpu, yet do not wish to teardown the dma mapping, you must
+ * call this function before doing so. At the next point you give the dma
+ * address back to the card, you must first perform a
+ * swiotlb_dma_sync_for_device, and then the device again owns the buffer
+ */
+static void
+swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
+ size_t size, enum dma_data_direction dir,
+ enum dma_sync_target target)
+{
+ phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
+
+ BUG_ON(dir == DMA_NONE);
+
+ if (is_swiotlb_buffer(paddr)) {
+ swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir,
+ target);
+ return;
+ }
+
+ if (dir != DMA_FROM_DEVICE)
+ return;
+
+ dma_mark_clean(phys_to_virt(paddr), size);
+}
+
+void
+swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
+ size_t size, enum dma_data_direction dir)
+{
+ swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
+}
+EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
+
+void
+swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
+ size_t size, enum dma_data_direction dir)
+{
+ swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
+}
+EXPORT_SYMBOL(swiotlb_sync_single_for_device);
+
+/*
+ * Map a set of buffers described by scatterlist in streaming mode for DMA.
+ * This is the scatter-gather version of the above swiotlb_map_page
+ * interface. Here the scatter gather list elements are each tagged with the
+ * appropriate dma address and length. They are obtained via
+ * sg_dma_{address,length}(SG).
+ *
+ * NOTE: An implementation may be able to use a smaller number of
+ * DMA address/length pairs than there are SG table elements.
+ * (for example via virtual mapping capabilities)
+ * The routine returns the number of addr/length pairs actually
+ * used, at most nents.
+ *
+ * Device ownership issues as mentioned above for swiotlb_map_page are the
+ * same here.
+ */
+int
+swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
+ enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+ struct scatterlist *sg;
+ int i;
+
+ BUG_ON(dir == DMA_NONE);
+
+ for_each_sg(sgl, sg, nelems, i) {
+ phys_addr_t paddr = sg_phys(sg);
+ dma_addr_t dev_addr = phys_to_dma(hwdev, paddr);
+
+ if (swiotlb_force ||
+ !dma_capable(hwdev, dev_addr, sg->length)) {
+ void *map = map_single(hwdev, sg_phys(sg),
+ sg->length, dir);
+ if (!map) {
+ /* Don't panic here, we expect map_sg users
+ to do proper error handling. */
+ swiotlb_full(hwdev, sg->length, dir, 0);
+ swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
+ attrs);
+ sgl[0].dma_length = 0;
+ return 0;
+ }
+ sg->dma_address = swiotlb_virt_to_bus(hwdev, map);
+ } else
+ sg->dma_address = dev_addr;
+ sg->dma_length = sg->length;
+ }
+ return nelems;
+}
+EXPORT_SYMBOL(swiotlb_map_sg_attrs);
+
+int
+swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
+ enum dma_data_direction dir)
+{
+ return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL);
+}
+EXPORT_SYMBOL(swiotlb_map_sg);
+
+/*
+ * Unmap a set of streaming mode DMA translations. Again, cpu read rules
+ * concerning calls here are the same as for swiotlb_unmap_page() above.
+ */
+void
+swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
+ int nelems, enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+ struct scatterlist *sg;
+ int i;
+
+ BUG_ON(dir == DMA_NONE);
+
+ for_each_sg(sgl, sg, nelems, i)
+ unmap_single(hwdev, sg->dma_address, sg->dma_length, dir);
+
+}
+EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
+
+void
+swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
+ enum dma_data_direction dir)
+{
+ return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL);
+}
+EXPORT_SYMBOL(swiotlb_unmap_sg);
+
+/*
+ * Make physical memory consistent for a set of streaming mode DMA translations
+ * after a transfer.
+ *
+ * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules
+ * and usage.
+ */
+static void
+swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
+ int nelems, enum dma_data_direction dir,
+ enum dma_sync_target target)
+{
+ struct scatterlist *sg;
+ int i;
+
+ for_each_sg(sgl, sg, nelems, i)
+ swiotlb_sync_single(hwdev, sg->dma_address,
+ sg->dma_length, dir, target);
+}
+
+void
+swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
+ int nelems, enum dma_data_direction dir)
+{
+ swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU);
+}
+EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
+
+void
+swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
+ int nelems, enum dma_data_direction dir)
+{
+ swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
+}
+EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
+
+int
+swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
+{
+ return (dma_addr == swiotlb_virt_to_bus(hwdev, io_tlb_overflow_buffer));
+}
+EXPORT_SYMBOL(swiotlb_dma_mapping_error);
+
+/*
+ * Return whether the given device DMA address mask can be supported
+ * properly. For example, if your device can only drive the low 24-bits
+ * during bus mastering, then you would pass 0x00ffffff as the mask to
+ * this function.
+ */
+int
+swiotlb_dma_supported(struct device *hwdev, u64 mask)
+{
+ return swiotlb_virt_to_bus(hwdev, io_tlb_end - 1) <= mask;
+}
+EXPORT_SYMBOL(swiotlb_dma_supported);
diff --git a/lib/syscall.c b/lib/syscall.c
new file mode 100644
index 00000000..a4f7067f
--- /dev/null
+++ b/lib/syscall.c
@@ -0,0 +1,75 @@
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <asm/syscall.h>
+
+static int collect_syscall(struct task_struct *target, long *callno,
+ unsigned long args[6], unsigned int maxargs,
+ unsigned long *sp, unsigned long *pc)
+{
+ struct pt_regs *regs = task_pt_regs(target);
+ if (unlikely(!regs))
+ return -EAGAIN;
+
+ *sp = user_stack_pointer(regs);
+ *pc = instruction_pointer(regs);
+
+ *callno = syscall_get_nr(target, regs);
+ if (*callno != -1L && maxargs > 0)
+ syscall_get_arguments(target, regs, 0, maxargs, args);
+
+ return 0;
+}
+
+/**
+ * task_current_syscall - Discover what a blocked task is doing.
+ * @target: thread to examine
+ * @callno: filled with system call number or -1
+ * @args: filled with @maxargs system call arguments
+ * @maxargs: number of elements in @args to fill
+ * @sp: filled with user stack pointer
+ * @pc: filled with user PC
+ *
+ * If @target is blocked in a system call, returns zero with *@callno
+ * set to the the call's number and @args filled in with its arguments.
+ * Registers not used for system call arguments may not be available and
+ * it is not kosher to use &struct user_regset calls while the system
+ * call is still in progress. Note we may get this result if @target
+ * has finished its system call but not yet returned to user mode, such
+ * as when it's stopped for signal handling or syscall exit tracing.
+ *
+ * If @target is blocked in the kernel during a fault or exception,
+ * returns zero with *@callno set to -1 and does not fill in @args.
+ * If so, it's now safe to examine @target using &struct user_regset
+ * get() calls as long as we're sure @target won't return to user mode.
+ *
+ * Returns -%EAGAIN if @target does not remain blocked.
+ *
+ * Returns -%EINVAL if @maxargs is too large (maximum is six).
+ */
+int task_current_syscall(struct task_struct *target, long *callno,
+ unsigned long args[6], unsigned int maxargs,
+ unsigned long *sp, unsigned long *pc)
+{
+ long state;
+ unsigned long ncsw;
+
+ if (unlikely(maxargs > 6))
+ return -EINVAL;
+
+ if (target == current)
+ return collect_syscall(target, callno, args, maxargs, sp, pc);
+
+ state = target->state;
+ if (unlikely(!state))
+ return -EAGAIN;
+
+ ncsw = wait_task_inactive(target, state);
+ if (unlikely(!ncsw) ||
+ unlikely(collect_syscall(target, callno, args, maxargs, sp, pc)) ||
+ unlikely(wait_task_inactive(target, state) != ncsw))
+ return -EAGAIN;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(task_current_syscall);
diff --git a/lib/textsearch.c b/lib/textsearch.c
new file mode 100644
index 00000000..d608331b
--- /dev/null
+++ b/lib/textsearch.c
@@ -0,0 +1,324 @@
+/*
+ * lib/textsearch.c Generic text search interface
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Thomas Graf <tgraf@suug.ch>
+ * Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * ==========================================================================
+ *
+ * INTRODUCTION
+ *
+ * The textsearch infrastructure provides text searching facitilies for
+ * both linear and non-linear data. Individual search algorithms are
+ * implemented in modules and chosen by the user.
+ *
+ * ARCHITECTURE
+ *
+ * User
+ * +----------------+
+ * | finish()|<--------------(6)-----------------+
+ * |get_next_block()|<--------------(5)---------------+ |
+ * | | Algorithm | |
+ * | | +------------------------------+
+ * | | | init() find() destroy() |
+ * | | +------------------------------+
+ * | | Core API ^ ^ ^
+ * | | +---------------+ (2) (4) (8)
+ * | (1)|----->| prepare() |---+ | |
+ * | (3)|----->| find()/next() |-----------+ |
+ * | (7)|----->| destroy() |----------------------+
+ * +----------------+ +---------------+
+ *
+ * (1) User configures a search by calling _prepare() specifying the
+ * search parameters such as the pattern and algorithm name.
+ * (2) Core requests the algorithm to allocate and initialize a search
+ * configuration according to the specified parameters.
+ * (3) User starts the search(es) by calling _find() or _next() to
+ * fetch subsequent occurrences. A state variable is provided
+ * to the algorithm to store persistent variables.
+ * (4) Core eventually resets the search offset and forwards the find()
+ * request to the algorithm.
+ * (5) Algorithm calls get_next_block() provided by the user continously
+ * to fetch the data to be searched in block by block.
+ * (6) Algorithm invokes finish() after the last call to get_next_block
+ * to clean up any leftovers from get_next_block. (Optional)
+ * (7) User destroys the configuration by calling _destroy().
+ * (8) Core notifies the algorithm to destroy algorithm specific
+ * allocations. (Optional)
+ *
+ * USAGE
+ *
+ * Before a search can be performed, a configuration must be created
+ * by calling textsearch_prepare() specifying the searching algorithm,
+ * the pattern to look for and flags. As a flag, you can set TS_IGNORECASE
+ * to perform case insensitive matching. But it might slow down
+ * performance of algorithm, so you should use it at own your risk.
+ * The returned configuration may then be used for an arbitary
+ * amount of times and even in parallel as long as a separate struct
+ * ts_state variable is provided to every instance.
+ *
+ * The actual search is performed by either calling textsearch_find_-
+ * continuous() for linear data or by providing an own get_next_block()
+ * implementation and calling textsearch_find(). Both functions return
+ * the position of the first occurrence of the patern or UINT_MAX if
+ * no match was found. Subsequent occurences can be found by calling
+ * textsearch_next() regardless of the linearity of the data.
+ *
+ * Once you're done using a configuration it must be given back via
+ * textsearch_destroy.
+ *
+ * EXAMPLE
+ *
+ * int pos;
+ * struct ts_config *conf;
+ * struct ts_state state;
+ * const char *pattern = "chicken";
+ * const char *example = "We dance the funky chicken";
+ *
+ * conf = textsearch_prepare("kmp", pattern, strlen(pattern),
+ * GFP_KERNEL, TS_AUTOLOAD);
+ * if (IS_ERR(conf)) {
+ * err = PTR_ERR(conf);
+ * goto errout;
+ * }
+ *
+ * pos = textsearch_find_continuous(conf, &state, example, strlen(example));
+ * if (pos != UINT_MAX)
+ * panic("Oh my god, dancing chickens at %d\n", pos);
+ *
+ * textsearch_destroy(conf);
+ * ==========================================================================
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/err.h>
+#include <linux/textsearch.h>
+#include <linux/slab.h>
+
+static LIST_HEAD(ts_ops);
+static DEFINE_SPINLOCK(ts_mod_lock);
+
+static inline struct ts_ops *lookup_ts_algo(const char *name)
+{
+ struct ts_ops *o;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(o, &ts_ops, list) {
+ if (!strcmp(name, o->name)) {
+ if (!try_module_get(o->owner))
+ o = NULL;
+ rcu_read_unlock();
+ return o;
+ }
+ }
+ rcu_read_unlock();
+
+ return NULL;
+}
+
+/**
+ * textsearch_register - register a textsearch module
+ * @ops: operations lookup table
+ *
+ * This function must be called by textsearch modules to announce
+ * their presence. The specified &@ops must have %name set to a
+ * unique identifier and the callbacks find(), init(), get_pattern(),
+ * and get_pattern_len() must be implemented.
+ *
+ * Returns 0 or -EEXISTS if another module has already registered
+ * with same name.
+ */
+int textsearch_register(struct ts_ops *ops)
+{
+ int err = -EEXIST;
+ struct ts_ops *o;
+
+ if (ops->name == NULL || ops->find == NULL || ops->init == NULL ||
+ ops->get_pattern == NULL || ops->get_pattern_len == NULL)
+ return -EINVAL;
+
+ spin_lock(&ts_mod_lock);
+ list_for_each_entry(o, &ts_ops, list) {
+ if (!strcmp(ops->name, o->name))
+ goto errout;
+ }
+
+ list_add_tail_rcu(&ops->list, &ts_ops);
+ err = 0;
+errout:
+ spin_unlock(&ts_mod_lock);
+ return err;
+}
+
+/**
+ * textsearch_unregister - unregister a textsearch module
+ * @ops: operations lookup table
+ *
+ * This function must be called by textsearch modules to announce
+ * their disappearance for examples when the module gets unloaded.
+ * The &ops parameter must be the same as the one during the
+ * registration.
+ *
+ * Returns 0 on success or -ENOENT if no matching textsearch
+ * registration was found.
+ */
+int textsearch_unregister(struct ts_ops *ops)
+{
+ int err = 0;
+ struct ts_ops *o;
+
+ spin_lock(&ts_mod_lock);
+ list_for_each_entry(o, &ts_ops, list) {
+ if (o == ops) {
+ list_del_rcu(&o->list);
+ goto out;
+ }
+ }
+
+ err = -ENOENT;
+out:
+ spin_unlock(&ts_mod_lock);
+ return err;
+}
+
+struct ts_linear_state
+{
+ unsigned int len;
+ const void *data;
+};
+
+static unsigned int get_linear_data(unsigned int consumed, const u8 **dst,
+ struct ts_config *conf,
+ struct ts_state *state)
+{
+ struct ts_linear_state *st = (struct ts_linear_state *) state->cb;
+
+ if (likely(consumed < st->len)) {
+ *dst = st->data + consumed;
+ return st->len - consumed;
+ }
+
+ return 0;
+}
+
+/**
+ * textsearch_find_continuous - search a pattern in continuous/linear data
+ * @conf: search configuration
+ * @state: search state
+ * @data: data to search in
+ * @len: length of data
+ *
+ * A simplified version of textsearch_find() for continuous/linear data.
+ * Call textsearch_next() to retrieve subsequent matches.
+ *
+ * Returns the position of first occurrence of the pattern or
+ * %UINT_MAX if no occurrence was found.
+ */
+unsigned int textsearch_find_continuous(struct ts_config *conf,
+ struct ts_state *state,
+ const void *data, unsigned int len)
+{
+ struct ts_linear_state *st = (struct ts_linear_state *) state->cb;
+
+ conf->get_next_block = get_linear_data;
+ st->data = data;
+ st->len = len;
+
+ return textsearch_find(conf, state);
+}
+
+/**
+ * textsearch_prepare - Prepare a search
+ * @algo: name of search algorithm
+ * @pattern: pattern data
+ * @len: length of pattern
+ * @gfp_mask: allocation mask
+ * @flags: search flags
+ *
+ * Looks up the search algorithm module and creates a new textsearch
+ * configuration for the specified pattern. Upon completion all
+ * necessary refcnts are held and the configuration must be put back
+ * using textsearch_put() after usage.
+ *
+ * Note: The format of the pattern may not be compatible between
+ * the various search algorithms.
+ *
+ * Returns a new textsearch configuration according to the specified
+ * parameters or a ERR_PTR(). If a zero length pattern is passed, this
+ * function returns EINVAL.
+ */
+struct ts_config *textsearch_prepare(const char *algo, const void *pattern,
+ unsigned int len, gfp_t gfp_mask, int flags)
+{
+ int err = -ENOENT;
+ struct ts_config *conf;
+ struct ts_ops *ops;
+
+ if (len == 0)
+ return ERR_PTR(-EINVAL);
+
+ ops = lookup_ts_algo(algo);
+#ifdef CONFIG_MODULES
+ /*
+ * Why not always autoload you may ask. Some users are
+ * in a situation where requesting a module may deadlock,
+ * especially when the module is located on a NFS mount.
+ */
+ if (ops == NULL && flags & TS_AUTOLOAD) {
+ request_module("ts_%s", algo);
+ ops = lookup_ts_algo(algo);
+ }
+#endif
+
+ if (ops == NULL)
+ goto errout;
+
+ conf = ops->init(pattern, len, gfp_mask, flags);
+ if (IS_ERR(conf)) {
+ err = PTR_ERR(conf);
+ goto errout;
+ }
+
+ conf->ops = ops;
+ return conf;
+
+errout:
+ if (ops)
+ module_put(ops->owner);
+
+ return ERR_PTR(err);
+}
+
+/**
+ * textsearch_destroy - destroy a search configuration
+ * @conf: search configuration
+ *
+ * Releases all references of the configuration and frees
+ * up the memory.
+ */
+void textsearch_destroy(struct ts_config *conf)
+{
+ if (conf->ops) {
+ if (conf->ops->destroy)
+ conf->ops->destroy(conf);
+ module_put(conf->ops->owner);
+ }
+
+ kfree(conf);
+}
+
+EXPORT_SYMBOL(textsearch_register);
+EXPORT_SYMBOL(textsearch_unregister);
+EXPORT_SYMBOL(textsearch_prepare);
+EXPORT_SYMBOL(textsearch_find_continuous);
+EXPORT_SYMBOL(textsearch_destroy);
diff --git a/lib/ts_bm.c b/lib/ts_bm.c
new file mode 100644
index 00000000..9e66ee40
--- /dev/null
+++ b/lib/ts_bm.c
@@ -0,0 +1,207 @@
+/*
+ * lib/ts_bm.c Boyer-Moore text search implementation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Pablo Neira Ayuso <pablo@eurodev.net>
+ *
+ * ==========================================================================
+ *
+ * Implements Boyer-Moore string matching algorithm:
+ *
+ * [1] A Fast String Searching Algorithm, R.S. Boyer and Moore.
+ * Communications of the Association for Computing Machinery,
+ * 20(10), 1977, pp. 762-772.
+ * http://www.cs.utexas.edu/users/moore/publications/fstrpos.pdf
+ *
+ * [2] Handbook of Exact String Matching Algorithms, Thierry Lecroq, 2004
+ * http://www-igm.univ-mlv.fr/~lecroq/string/string.pdf
+ *
+ * Note: Since Boyer-Moore (BM) performs searches for matchings from right
+ * to left, it's still possible that a matching could be spread over
+ * multiple blocks, in that case this algorithm won't find any coincidence.
+ *
+ * If you're willing to ensure that such thing won't ever happen, use the
+ * Knuth-Pratt-Morris (KMP) implementation instead. In conclusion, choose
+ * the proper string search algorithm depending on your setting.
+ *
+ * Say you're using the textsearch infrastructure for filtering, NIDS or
+ * any similar security focused purpose, then go KMP. Otherwise, if you
+ * really care about performance, say you're classifying packets to apply
+ * Quality of Service (QoS) policies, and you don't mind about possible
+ * matchings spread over multiple fragments, then go BM.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/textsearch.h>
+
+/* Alphabet size, use ASCII */
+#define ASIZE 256
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(args, format...)
+#endif
+
+struct ts_bm
+{
+ u8 * pattern;
+ unsigned int patlen;
+ unsigned int bad_shift[ASIZE];
+ unsigned int good_shift[0];
+};
+
+static unsigned int bm_find(struct ts_config *conf, struct ts_state *state)
+{
+ struct ts_bm *bm = ts_config_priv(conf);
+ unsigned int i, text_len, consumed = state->offset;
+ const u8 *text;
+ int shift = bm->patlen - 1, bs;
+ const u8 icase = conf->flags & TS_IGNORECASE;
+
+ for (;;) {
+ text_len = conf->get_next_block(consumed, &text, conf, state);
+
+ if (unlikely(text_len == 0))
+ break;
+
+ while (shift < text_len) {
+ DEBUGP("Searching in position %d (%c)\n",
+ shift, text[shift]);
+ for (i = 0; i < bm->patlen; i++)
+ if ((icase ? toupper(text[shift-i])
+ : text[shift-i])
+ != bm->pattern[bm->patlen-1-i])
+ goto next;
+
+ /* London calling... */
+ DEBUGP("found!\n");
+ return consumed += (shift-(bm->patlen-1));
+
+next: bs = bm->bad_shift[text[shift-i]];
+
+ /* Now jumping to... */
+ shift = max_t(int, shift-i+bs, shift+bm->good_shift[i]);
+ }
+ consumed += text_len;
+ }
+
+ return UINT_MAX;
+}
+
+static int subpattern(u8 *pattern, int i, int j, int g)
+{
+ int x = i+g-1, y = j+g-1, ret = 0;
+
+ while(pattern[x--] == pattern[y--]) {
+ if (y < 0) {
+ ret = 1;
+ break;
+ }
+ if (--g == 0) {
+ ret = pattern[i-1] != pattern[j-1];
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static void compute_prefix_tbl(struct ts_bm *bm, int flags)
+{
+ int i, j, g;
+
+ for (i = 0; i < ASIZE; i++)
+ bm->bad_shift[i] = bm->patlen;
+ for (i = 0; i < bm->patlen - 1; i++) {
+ bm->bad_shift[bm->pattern[i]] = bm->patlen - 1 - i;
+ if (flags & TS_IGNORECASE)
+ bm->bad_shift[tolower(bm->pattern[i])]
+ = bm->patlen - 1 - i;
+ }
+
+ /* Compute the good shift array, used to match reocurrences
+ * of a subpattern */
+ bm->good_shift[0] = 1;
+ for (i = 1; i < bm->patlen; i++)
+ bm->good_shift[i] = bm->patlen;
+ for (i = bm->patlen-1, g = 1; i > 0; g++, i--) {
+ for (j = i-1; j >= 1-g ; j--)
+ if (subpattern(bm->pattern, i, j, g)) {
+ bm->good_shift[g] = bm->patlen-j-g;
+ break;
+ }
+ }
+}
+
+static struct ts_config *bm_init(const void *pattern, unsigned int len,
+ gfp_t gfp_mask, int flags)
+{
+ struct ts_config *conf;
+ struct ts_bm *bm;
+ int i;
+ unsigned int prefix_tbl_len = len * sizeof(unsigned int);
+ size_t priv_size = sizeof(*bm) + len + prefix_tbl_len;
+
+ conf = alloc_ts_config(priv_size, gfp_mask);
+ if (IS_ERR(conf))
+ return conf;
+
+ conf->flags = flags;
+ bm = ts_config_priv(conf);
+ bm->patlen = len;
+ bm->pattern = (u8 *) bm->good_shift + prefix_tbl_len;
+ if (flags & TS_IGNORECASE)
+ for (i = 0; i < len; i++)
+ bm->pattern[i] = toupper(((u8 *)pattern)[i]);
+ else
+ memcpy(bm->pattern, pattern, len);
+ compute_prefix_tbl(bm, flags);
+
+ return conf;
+}
+
+static void *bm_get_pattern(struct ts_config *conf)
+{
+ struct ts_bm *bm = ts_config_priv(conf);
+ return bm->pattern;
+}
+
+static unsigned int bm_get_pattern_len(struct ts_config *conf)
+{
+ struct ts_bm *bm = ts_config_priv(conf);
+ return bm->patlen;
+}
+
+static struct ts_ops bm_ops = {
+ .name = "bm",
+ .find = bm_find,
+ .init = bm_init,
+ .get_pattern = bm_get_pattern,
+ .get_pattern_len = bm_get_pattern_len,
+ .owner = THIS_MODULE,
+ .list = LIST_HEAD_INIT(bm_ops.list)
+};
+
+static int __init init_bm(void)
+{
+ return textsearch_register(&bm_ops);
+}
+
+static void __exit exit_bm(void)
+{
+ textsearch_unregister(&bm_ops);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_bm);
+module_exit(exit_bm);
diff --git a/lib/ts_fsm.c b/lib/ts_fsm.c
new file mode 100644
index 00000000..5696a351
--- /dev/null
+++ b/lib/ts_fsm.c
@@ -0,0 +1,341 @@
+/*
+ * lib/ts_fsm.c A naive finite state machine text search approach
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Thomas Graf <tgraf@suug.ch>
+ *
+ * ==========================================================================
+ *
+ * A finite state machine consists of n states (struct ts_fsm_token)
+ * representing the pattern as a finite automation. The data is read
+ * sequentially on an octet basis. Every state token specifies the number
+ * of recurrences and the type of value accepted which can be either a
+ * specific character or ctype based set of characters. The available
+ * type of recurrences include 1, (0|1), [0 n], and [1 n].
+ *
+ * The algorithm differs between strict/non-strict mode specifying
+ * whether the pattern has to start at the first octet. Strict mode
+ * is enabled by default and can be disabled by inserting
+ * TS_FSM_HEAD_IGNORE as the first token in the chain.
+ *
+ * The runtime performance of the algorithm should be around O(n),
+ * however while in strict mode the average runtime can be better.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/textsearch.h>
+#include <linux/textsearch_fsm.h>
+
+struct ts_fsm
+{
+ unsigned int ntokens;
+ struct ts_fsm_token tokens[0];
+};
+
+/* other values derived from ctype.h */
+#define _A 0x100 /* ascii */
+#define _W 0x200 /* wildcard */
+
+/* Map to _ctype flags and some magic numbers */
+static const u16 token_map[TS_FSM_TYPE_MAX+1] = {
+ [TS_FSM_SPECIFIC] = 0,
+ [TS_FSM_WILDCARD] = _W,
+ [TS_FSM_CNTRL] = _C,
+ [TS_FSM_LOWER] = _L,
+ [TS_FSM_UPPER] = _U,
+ [TS_FSM_PUNCT] = _P,
+ [TS_FSM_SPACE] = _S,
+ [TS_FSM_DIGIT] = _D,
+ [TS_FSM_XDIGIT] = _D | _X,
+ [TS_FSM_ALPHA] = _U | _L,
+ [TS_FSM_ALNUM] = _U | _L | _D,
+ [TS_FSM_PRINT] = _P | _U | _L | _D | _SP,
+ [TS_FSM_GRAPH] = _P | _U | _L | _D,
+ [TS_FSM_ASCII] = _A,
+};
+
+static const u16 token_lookup_tbl[256] = {
+_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 0- 3 */
+_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 4- 7 */
+_W|_A|_C, _W|_A|_C|_S, _W|_A|_C|_S, _W|_A|_C|_S, /* 8- 11 */
+_W|_A|_C|_S, _W|_A|_C|_S, _W|_A|_C, _W|_A|_C, /* 12- 15 */
+_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 16- 19 */
+_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 20- 23 */
+_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 24- 27 */
+_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 28- 31 */
+_W|_A|_S|_SP, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 32- 35 */
+_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 36- 39 */
+_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 40- 43 */
+_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 44- 47 */
+_W|_A|_D, _W|_A|_D, _W|_A|_D, _W|_A|_D, /* 48- 51 */
+_W|_A|_D, _W|_A|_D, _W|_A|_D, _W|_A|_D, /* 52- 55 */
+_W|_A|_D, _W|_A|_D, _W|_A|_P, _W|_A|_P, /* 56- 59 */
+_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 60- 63 */
+_W|_A|_P, _W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U|_X, /* 64- 67 */
+_W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U, /* 68- 71 */
+_W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U, /* 72- 75 */
+_W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U, /* 76- 79 */
+_W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U, /* 80- 83 */
+_W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U, /* 84- 87 */
+_W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_P, /* 88- 91 */
+_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 92- 95 */
+_W|_A|_P, _W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L|_X, /* 96- 99 */
+_W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L, /* 100-103 */
+_W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L, /* 104-107 */
+_W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L, /* 108-111 */
+_W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L, /* 112-115 */
+_W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L, /* 116-119 */
+_W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_P, /* 120-123 */
+_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_C, /* 124-127 */
+_W, _W, _W, _W, /* 128-131 */
+_W, _W, _W, _W, /* 132-135 */
+_W, _W, _W, _W, /* 136-139 */
+_W, _W, _W, _W, /* 140-143 */
+_W, _W, _W, _W, /* 144-147 */
+_W, _W, _W, _W, /* 148-151 */
+_W, _W, _W, _W, /* 152-155 */
+_W, _W, _W, _W, /* 156-159 */
+_W|_S|_SP, _W|_P, _W|_P, _W|_P, /* 160-163 */
+_W|_P, _W|_P, _W|_P, _W|_P, /* 164-167 */
+_W|_P, _W|_P, _W|_P, _W|_P, /* 168-171 */
+_W|_P, _W|_P, _W|_P, _W|_P, /* 172-175 */
+_W|_P, _W|_P, _W|_P, _W|_P, /* 176-179 */
+_W|_P, _W|_P, _W|_P, _W|_P, /* 180-183 */
+_W|_P, _W|_P, _W|_P, _W|_P, /* 184-187 */
+_W|_P, _W|_P, _W|_P, _W|_P, /* 188-191 */
+_W|_U, _W|_U, _W|_U, _W|_U, /* 192-195 */
+_W|_U, _W|_U, _W|_U, _W|_U, /* 196-199 */
+_W|_U, _W|_U, _W|_U, _W|_U, /* 200-203 */
+_W|_U, _W|_U, _W|_U, _W|_U, /* 204-207 */
+_W|_U, _W|_U, _W|_U, _W|_U, /* 208-211 */
+_W|_U, _W|_U, _W|_U, _W|_P, /* 212-215 */
+_W|_U, _W|_U, _W|_U, _W|_U, /* 216-219 */
+_W|_U, _W|_U, _W|_U, _W|_L, /* 220-223 */
+_W|_L, _W|_L, _W|_L, _W|_L, /* 224-227 */
+_W|_L, _W|_L, _W|_L, _W|_L, /* 228-231 */
+_W|_L, _W|_L, _W|_L, _W|_L, /* 232-235 */
+_W|_L, _W|_L, _W|_L, _W|_L, /* 236-239 */
+_W|_L, _W|_L, _W|_L, _W|_L, /* 240-243 */
+_W|_L, _W|_L, _W|_L, _W|_P, /* 244-247 */
+_W|_L, _W|_L, _W|_L, _W|_L, /* 248-251 */
+_W|_L, _W|_L, _W|_L, _W|_L}; /* 252-255 */
+
+static inline int match_token(struct ts_fsm_token *t, u8 d)
+{
+ if (t->type)
+ return (token_lookup_tbl[d] & t->type) != 0;
+ else
+ return t->value == d;
+}
+
+static unsigned int fsm_find(struct ts_config *conf, struct ts_state *state)
+{
+ struct ts_fsm *fsm = ts_config_priv(conf);
+ struct ts_fsm_token *cur = NULL, *next;
+ unsigned int match_start, block_idx = 0, tok_idx;
+ unsigned block_len = 0, strict, consumed = state->offset;
+ const u8 *data;
+
+#define GET_NEXT_BLOCK() \
+({ consumed += block_idx; \
+ block_idx = 0; \
+ block_len = conf->get_next_block(consumed, &data, conf, state); })
+
+#define TOKEN_MISMATCH() \
+ do { \
+ if (strict) \
+ goto no_match; \
+ block_idx++; \
+ goto startover; \
+ } while(0)
+
+#define end_of_data() unlikely(block_idx >= block_len && !GET_NEXT_BLOCK())
+
+ if (end_of_data())
+ goto no_match;
+
+ strict = fsm->tokens[0].recur != TS_FSM_HEAD_IGNORE;
+
+startover:
+ match_start = consumed + block_idx;
+
+ for (tok_idx = 0; tok_idx < fsm->ntokens; tok_idx++) {
+ cur = &fsm->tokens[tok_idx];
+
+ if (likely(tok_idx < (fsm->ntokens - 1)))
+ next = &fsm->tokens[tok_idx + 1];
+ else
+ next = NULL;
+
+ switch (cur->recur) {
+ case TS_FSM_SINGLE:
+ if (end_of_data())
+ goto no_match;
+
+ if (!match_token(cur, data[block_idx]))
+ TOKEN_MISMATCH();
+ break;
+
+ case TS_FSM_PERHAPS:
+ if (end_of_data() ||
+ !match_token(cur, data[block_idx]))
+ continue;
+ break;
+
+ case TS_FSM_MULTI:
+ if (end_of_data())
+ goto no_match;
+
+ if (!match_token(cur, data[block_idx]))
+ TOKEN_MISMATCH();
+
+ block_idx++;
+ /* fall through */
+
+ case TS_FSM_ANY:
+ if (next == NULL)
+ goto found_match;
+
+ if (end_of_data())
+ continue;
+
+ while (!match_token(next, data[block_idx])) {
+ if (!match_token(cur, data[block_idx]))
+ TOKEN_MISMATCH();
+ block_idx++;
+ if (end_of_data())
+ goto no_match;
+ }
+ continue;
+
+ /*
+ * Optimization: Prefer small local loop over jumping
+ * back and forth until garbage at head is munched.
+ */
+ case TS_FSM_HEAD_IGNORE:
+ if (end_of_data())
+ continue;
+
+ while (!match_token(next, data[block_idx])) {
+ /*
+ * Special case, don't start over upon
+ * a mismatch, give the user the
+ * chance to specify the type of data
+ * allowed to be ignored.
+ */
+ if (!match_token(cur, data[block_idx]))
+ goto no_match;
+
+ block_idx++;
+ if (end_of_data())
+ goto no_match;
+ }
+
+ match_start = consumed + block_idx;
+ continue;
+ }
+
+ block_idx++;
+ }
+
+ if (end_of_data())
+ goto found_match;
+
+no_match:
+ return UINT_MAX;
+
+found_match:
+ state->offset = consumed + block_idx;
+ return match_start;
+}
+
+static struct ts_config *fsm_init(const void *pattern, unsigned int len,
+ gfp_t gfp_mask, int flags)
+{
+ int i, err = -EINVAL;
+ struct ts_config *conf;
+ struct ts_fsm *fsm;
+ struct ts_fsm_token *tokens = (struct ts_fsm_token *) pattern;
+ unsigned int ntokens = len / sizeof(*tokens);
+ size_t priv_size = sizeof(*fsm) + len;
+
+ if (len % sizeof(struct ts_fsm_token) || ntokens < 1)
+ goto errout;
+
+ if (flags & TS_IGNORECASE)
+ goto errout;
+
+ for (i = 0; i < ntokens; i++) {
+ struct ts_fsm_token *t = &tokens[i];
+
+ if (t->type > TS_FSM_TYPE_MAX || t->recur > TS_FSM_RECUR_MAX)
+ goto errout;
+
+ if (t->recur == TS_FSM_HEAD_IGNORE &&
+ (i != 0 || i == (ntokens - 1)))
+ goto errout;
+ }
+
+ conf = alloc_ts_config(priv_size, gfp_mask);
+ if (IS_ERR(conf))
+ return conf;
+
+ conf->flags = flags;
+ fsm = ts_config_priv(conf);
+ fsm->ntokens = ntokens;
+ memcpy(fsm->tokens, pattern, len);
+
+ for (i = 0; i < fsm->ntokens; i++) {
+ struct ts_fsm_token *t = &fsm->tokens[i];
+ t->type = token_map[t->type];
+ }
+
+ return conf;
+
+errout:
+ return ERR_PTR(err);
+}
+
+static void *fsm_get_pattern(struct ts_config *conf)
+{
+ struct ts_fsm *fsm = ts_config_priv(conf);
+ return fsm->tokens;
+}
+
+static unsigned int fsm_get_pattern_len(struct ts_config *conf)
+{
+ struct ts_fsm *fsm = ts_config_priv(conf);
+ return fsm->ntokens * sizeof(struct ts_fsm_token);
+}
+
+static struct ts_ops fsm_ops = {
+ .name = "fsm",
+ .find = fsm_find,
+ .init = fsm_init,
+ .get_pattern = fsm_get_pattern,
+ .get_pattern_len = fsm_get_pattern_len,
+ .owner = THIS_MODULE,
+ .list = LIST_HEAD_INIT(fsm_ops.list)
+};
+
+static int __init init_fsm(void)
+{
+ return textsearch_register(&fsm_ops);
+}
+
+static void __exit exit_fsm(void)
+{
+ textsearch_unregister(&fsm_ops);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_fsm);
+module_exit(exit_fsm);
diff --git a/lib/ts_kmp.c b/lib/ts_kmp.c
new file mode 100644
index 00000000..632f783e
--- /dev/null
+++ b/lib/ts_kmp.c
@@ -0,0 +1,157 @@
+/*
+ * lib/ts_kmp.c Knuth-Morris-Pratt text search implementation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Thomas Graf <tgraf@suug.ch>
+ *
+ * ==========================================================================
+ *
+ * Implements a linear-time string-matching algorithm due to Knuth,
+ * Morris, and Pratt [1]. Their algorithm avoids the explicit
+ * computation of the transition function DELTA altogether. Its
+ * matching time is O(n), for n being length(text), using just an
+ * auxiliary function PI[1..m], for m being length(pattern),
+ * precomputed from the pattern in time O(m). The array PI allows
+ * the transition function DELTA to be computed efficiently
+ * "on the fly" as needed. Roughly speaking, for any state
+ * "q" = 0,1,...,m and any character "a" in SIGMA, the value
+ * PI["q"] contains the information that is independent of "a" and
+ * is needed to compute DELTA("q", "a") [2]. Since the array PI
+ * has only m entries, whereas DELTA has O(m|SIGMA|) entries, we
+ * save a factor of |SIGMA| in the preprocessing time by computing
+ * PI rather than DELTA.
+ *
+ * [1] Cormen, Leiserson, Rivest, Stein
+ * Introdcution to Algorithms, 2nd Edition, MIT Press
+ * [2] See finite automation theory
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/textsearch.h>
+
+struct ts_kmp
+{
+ u8 * pattern;
+ unsigned int pattern_len;
+ unsigned int prefix_tbl[0];
+};
+
+static unsigned int kmp_find(struct ts_config *conf, struct ts_state *state)
+{
+ struct ts_kmp *kmp = ts_config_priv(conf);
+ unsigned int i, q = 0, text_len, consumed = state->offset;
+ const u8 *text;
+ const int icase = conf->flags & TS_IGNORECASE;
+
+ for (;;) {
+ text_len = conf->get_next_block(consumed, &text, conf, state);
+
+ if (unlikely(text_len == 0))
+ break;
+
+ for (i = 0; i < text_len; i++) {
+ while (q > 0 && kmp->pattern[q]
+ != (icase ? toupper(text[i]) : text[i]))
+ q = kmp->prefix_tbl[q - 1];
+ if (kmp->pattern[q]
+ == (icase ? toupper(text[i]) : text[i]))
+ q++;
+ if (unlikely(q == kmp->pattern_len)) {
+ state->offset = consumed + i + 1;
+ return state->offset - kmp->pattern_len;
+ }
+ }
+
+ consumed += text_len;
+ }
+
+ return UINT_MAX;
+}
+
+static inline void compute_prefix_tbl(const u8 *pattern, unsigned int len,
+ unsigned int *prefix_tbl, int flags)
+{
+ unsigned int k, q;
+ const u8 icase = flags & TS_IGNORECASE;
+
+ for (k = 0, q = 1; q < len; q++) {
+ while (k > 0 && (icase ? toupper(pattern[k]) : pattern[k])
+ != (icase ? toupper(pattern[q]) : pattern[q]))
+ k = prefix_tbl[k-1];
+ if ((icase ? toupper(pattern[k]) : pattern[k])
+ == (icase ? toupper(pattern[q]) : pattern[q]))
+ k++;
+ prefix_tbl[q] = k;
+ }
+}
+
+static struct ts_config *kmp_init(const void *pattern, unsigned int len,
+ gfp_t gfp_mask, int flags)
+{
+ struct ts_config *conf;
+ struct ts_kmp *kmp;
+ int i;
+ unsigned int prefix_tbl_len = len * sizeof(unsigned int);
+ size_t priv_size = sizeof(*kmp) + len + prefix_tbl_len;
+
+ conf = alloc_ts_config(priv_size, gfp_mask);
+ if (IS_ERR(conf))
+ return conf;
+
+ conf->flags = flags;
+ kmp = ts_config_priv(conf);
+ kmp->pattern_len = len;
+ compute_prefix_tbl(pattern, len, kmp->prefix_tbl, flags);
+ kmp->pattern = (u8 *) kmp->prefix_tbl + prefix_tbl_len;
+ if (flags & TS_IGNORECASE)
+ for (i = 0; i < len; i++)
+ kmp->pattern[i] = toupper(((u8 *)pattern)[i]);
+ else
+ memcpy(kmp->pattern, pattern, len);
+
+ return conf;
+}
+
+static void *kmp_get_pattern(struct ts_config *conf)
+{
+ struct ts_kmp *kmp = ts_config_priv(conf);
+ return kmp->pattern;
+}
+
+static unsigned int kmp_get_pattern_len(struct ts_config *conf)
+{
+ struct ts_kmp *kmp = ts_config_priv(conf);
+ return kmp->pattern_len;
+}
+
+static struct ts_ops kmp_ops = {
+ .name = "kmp",
+ .find = kmp_find,
+ .init = kmp_init,
+ .get_pattern = kmp_get_pattern,
+ .get_pattern_len = kmp_get_pattern_len,
+ .owner = THIS_MODULE,
+ .list = LIST_HEAD_INIT(kmp_ops.list)
+};
+
+static int __init init_kmp(void)
+{
+ return textsearch_register(&kmp_ops);
+}
+
+static void __exit exit_kmp(void)
+{
+ textsearch_unregister(&kmp_ops);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_kmp);
+module_exit(exit_kmp);
diff --git a/lib/uuid.c b/lib/uuid.c
new file mode 100644
index 00000000..8fadd7ce
--- /dev/null
+++ b/lib/uuid.c
@@ -0,0 +1,53 @@
+/*
+ * Unified UUID/GUID definition
+ *
+ * Copyright (C) 2009, Intel Corp.
+ * Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation;
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/uuid.h>
+#include <linux/random.h>
+
+static void __uuid_gen_common(__u8 b[16])
+{
+ int i;
+ u32 r;
+
+ for (i = 0; i < 4; i++) {
+ r = random32();
+ memcpy(b + i * 4, &r, 4);
+ }
+ /* reversion 0b10 */
+ b[8] = (b[8] & 0x3F) | 0x80;
+}
+
+void uuid_le_gen(uuid_le *lu)
+{
+ __uuid_gen_common(lu->b);
+ /* version 4 : random generation */
+ lu->b[7] = (lu->b[7] & 0x0F) | 0x40;
+}
+EXPORT_SYMBOL_GPL(uuid_le_gen);
+
+void uuid_be_gen(uuid_be *bu)
+{
+ __uuid_gen_common(bu->b);
+ /* version 4 : random generation */
+ bu->b[6] = (bu->b[6] & 0x0F) | 0x40;
+}
+EXPORT_SYMBOL_GPL(uuid_be_gen);
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
new file mode 100644
index 00000000..7af9d841
--- /dev/null
+++ b/lib/vsprintf.c
@@ -0,0 +1,2152 @@
+/*
+ * linux/lib/vsprintf.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/* vsprintf.c -- Lars Wirzenius & Linus Torvalds. */
+/*
+ * Wirzenius wrote this portably, Torvalds fucked it up :-)
+ */
+
+/*
+ * Fri Jul 13 2001 Crutcher Dunnavant <crutcher+kernel@datastacks.com>
+ * - changed to provide snprintf and vsnprintf functions
+ * So Feb 1 16:51:32 CET 2004 Juergen Quade <quade@hsnr.de>
+ * - scnprintf and vscnprintf
+ */
+
+#include <stdarg.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/kernel.h>
+#include <linux/kallsyms.h>
+#include <linux/uaccess.h>
+#include <linux/ioport.h>
+#include <net/addrconf.h>
+
+#include <asm/page.h> /* for PAGE_SIZE */
+#include <asm/div64.h>
+#include <asm/sections.h> /* for dereference_function_descriptor() */
+
+/* Works only for digits and letters, but small and fast */
+#define TOLOWER(x) ((x) | 0x20)
+
+static unsigned int simple_guess_base(const char *cp)
+{
+ if (cp[0] == '0') {
+ if (TOLOWER(cp[1]) == 'x' && isxdigit(cp[2]))
+ return 16;
+ else
+ return 8;
+ } else {
+ return 10;
+ }
+}
+
+/**
+ * simple_strtoull - convert a string to an unsigned long long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base)
+{
+ unsigned long long result = 0;
+
+ if (!base)
+ base = simple_guess_base(cp);
+
+ if (base == 16 && cp[0] == '0' && TOLOWER(cp[1]) == 'x')
+ cp += 2;
+
+ while (isxdigit(*cp)) {
+ unsigned int value;
+
+ value = isdigit(*cp) ? *cp - '0' : TOLOWER(*cp) - 'a' + 10;
+ if (value >= base)
+ break;
+ result = result * base + value;
+ cp++;
+ }
+ if (endp)
+ *endp = (char *)cp;
+
+ return result;
+}
+EXPORT_SYMBOL(simple_strtoull);
+
+/**
+ * simple_strtoul - convert a string to an unsigned long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base)
+{
+ return simple_strtoull(cp, endp, base);
+}
+EXPORT_SYMBOL(simple_strtoul);
+
+/**
+ * simple_strtol - convert a string to a signed long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+long simple_strtol(const char *cp, char **endp, unsigned int base)
+{
+ if (*cp == '-')
+ return -simple_strtoul(cp + 1, endp, base);
+
+ return simple_strtoul(cp, endp, base);
+}
+EXPORT_SYMBOL(simple_strtol);
+
+/**
+ * simple_strtoll - convert a string to a signed long long
+ * @cp: The start of the string
+ * @endp: A pointer to the end of the parsed string will be placed here
+ * @base: The number base to use
+ */
+long long simple_strtoll(const char *cp, char **endp, unsigned int base)
+{
+ if (*cp == '-')
+ return -simple_strtoull(cp + 1, endp, base);
+
+ return simple_strtoull(cp, endp, base);
+}
+EXPORT_SYMBOL(simple_strtoll);
+
+/**
+ * strict_strtoul - convert a string to an unsigned long strictly
+ * @cp: The string to be converted
+ * @base: The number base to use
+ * @res: The converted result value
+ *
+ * strict_strtoul converts a string to an unsigned long only if the
+ * string is really an unsigned long string, any string containing
+ * any invalid char at the tail will be rejected and -EINVAL is returned,
+ * only a newline char at the tail is acceptible because people generally
+ * change a module parameter in the following way:
+ *
+ * echo 1024 > /sys/module/e1000/parameters/copybreak
+ *
+ * echo will append a newline to the tail.
+ *
+ * It returns 0 if conversion is successful and *res is set to the converted
+ * value, otherwise it returns -EINVAL and *res is set to 0.
+ *
+ * simple_strtoul just ignores the successive invalid characters and
+ * return the converted value of prefix part of the string.
+ */
+int strict_strtoul(const char *cp, unsigned int base, unsigned long *res)
+{
+ char *tail;
+ unsigned long val;
+
+ *res = 0;
+ if (!*cp)
+ return -EINVAL;
+
+ val = simple_strtoul(cp, &tail, base);
+ if (tail == cp)
+ return -EINVAL;
+
+ if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) {
+ *res = val;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+EXPORT_SYMBOL(strict_strtoul);
+
+/**
+ * strict_strtol - convert a string to a long strictly
+ * @cp: The string to be converted
+ * @base: The number base to use
+ * @res: The converted result value
+ *
+ * strict_strtol is similiar to strict_strtoul, but it allows the first
+ * character of a string is '-'.
+ *
+ * It returns 0 if conversion is successful and *res is set to the converted
+ * value, otherwise it returns -EINVAL and *res is set to 0.
+ */
+int strict_strtol(const char *cp, unsigned int base, long *res)
+{
+ int ret;
+ if (*cp == '-') {
+ ret = strict_strtoul(cp + 1, base, (unsigned long *)res);
+ if (!ret)
+ *res = -(*res);
+ } else {
+ ret = strict_strtoul(cp, base, (unsigned long *)res);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(strict_strtol);
+
+/**
+ * strict_strtoull - convert a string to an unsigned long long strictly
+ * @cp: The string to be converted
+ * @base: The number base to use
+ * @res: The converted result value
+ *
+ * strict_strtoull converts a string to an unsigned long long only if the
+ * string is really an unsigned long long string, any string containing
+ * any invalid char at the tail will be rejected and -EINVAL is returned,
+ * only a newline char at the tail is acceptible because people generally
+ * change a module parameter in the following way:
+ *
+ * echo 1024 > /sys/module/e1000/parameters/copybreak
+ *
+ * echo will append a newline to the tail of the string.
+ *
+ * It returns 0 if conversion is successful and *res is set to the converted
+ * value, otherwise it returns -EINVAL and *res is set to 0.
+ *
+ * simple_strtoull just ignores the successive invalid characters and
+ * return the converted value of prefix part of the string.
+ */
+int strict_strtoull(const char *cp, unsigned int base, unsigned long long *res)
+{
+ char *tail;
+ unsigned long long val;
+
+ *res = 0;
+ if (!*cp)
+ return -EINVAL;
+
+ val = simple_strtoull(cp, &tail, base);
+ if (tail == cp)
+ return -EINVAL;
+ if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) {
+ *res = val;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+EXPORT_SYMBOL(strict_strtoull);
+
+/**
+ * strict_strtoll - convert a string to a long long strictly
+ * @cp: The string to be converted
+ * @base: The number base to use
+ * @res: The converted result value
+ *
+ * strict_strtoll is similiar to strict_strtoull, but it allows the first
+ * character of a string is '-'.
+ *
+ * It returns 0 if conversion is successful and *res is set to the converted
+ * value, otherwise it returns -EINVAL and *res is set to 0.
+ */
+int strict_strtoll(const char *cp, unsigned int base, long long *res)
+{
+ int ret;
+ if (*cp == '-') {
+ ret = strict_strtoull(cp + 1, base, (unsigned long long *)res);
+ if (!ret)
+ *res = -(*res);
+ } else {
+ ret = strict_strtoull(cp, base, (unsigned long long *)res);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(strict_strtoll);
+
+static noinline_for_stack
+int skip_atoi(const char **s)
+{
+ int i = 0;
+
+ while (isdigit(**s))
+ i = i*10 + *((*s)++) - '0';
+
+ return i;
+}
+
+/* Decimal conversion is by far the most typical, and is used
+ * for /proc and /sys data. This directly impacts e.g. top performance
+ * with many processes running. We optimize it for speed
+ * using code from
+ * http://www.cs.uiowa.edu/~jones/bcd/decimal.html
+ * (with permission from the author, Douglas W. Jones). */
+
+/* Formats correctly any integer in [0,99999].
+ * Outputs from one to five digits depending on input.
+ * On i386 gcc 4.1.2 -O2: ~250 bytes of code. */
+static noinline_for_stack
+char *put_dec_trunc(char *buf, unsigned q)
+{
+ unsigned d3, d2, d1, d0;
+ d1 = (q>>4) & 0xf;
+ d2 = (q>>8) & 0xf;
+ d3 = (q>>12);
+
+ d0 = 6*(d3 + d2 + d1) + (q & 0xf);
+ q = (d0 * 0xcd) >> 11;
+ d0 = d0 - 10*q;
+ *buf++ = d0 + '0'; /* least significant digit */
+ d1 = q + 9*d3 + 5*d2 + d1;
+ if (d1 != 0) {
+ q = (d1 * 0xcd) >> 11;
+ d1 = d1 - 10*q;
+ *buf++ = d1 + '0'; /* next digit */
+
+ d2 = q + 2*d2;
+ if ((d2 != 0) || (d3 != 0)) {
+ q = (d2 * 0xd) >> 7;
+ d2 = d2 - 10*q;
+ *buf++ = d2 + '0'; /* next digit */
+
+ d3 = q + 4*d3;
+ if (d3 != 0) {
+ q = (d3 * 0xcd) >> 11;
+ d3 = d3 - 10*q;
+ *buf++ = d3 + '0'; /* next digit */
+ if (q != 0)
+ *buf++ = q + '0'; /* most sign. digit */
+ }
+ }
+ }
+
+ return buf;
+}
+/* Same with if's removed. Always emits five digits */
+static noinline_for_stack
+char *put_dec_full(char *buf, unsigned q)
+{
+ /* BTW, if q is in [0,9999], 8-bit ints will be enough, */
+ /* but anyway, gcc produces better code with full-sized ints */
+ unsigned d3, d2, d1, d0;
+ d1 = (q>>4) & 0xf;
+ d2 = (q>>8) & 0xf;
+ d3 = (q>>12);
+
+ /*
+ * Possible ways to approx. divide by 10
+ * gcc -O2 replaces multiply with shifts and adds
+ * (x * 0xcd) >> 11: 11001101 - shorter code than * 0x67 (on i386)
+ * (x * 0x67) >> 10: 1100111
+ * (x * 0x34) >> 9: 110100 - same
+ * (x * 0x1a) >> 8: 11010 - same
+ * (x * 0x0d) >> 7: 1101 - same, shortest code (on i386)
+ */
+ d0 = 6*(d3 + d2 + d1) + (q & 0xf);
+ q = (d0 * 0xcd) >> 11;
+ d0 = d0 - 10*q;
+ *buf++ = d0 + '0';
+ d1 = q + 9*d3 + 5*d2 + d1;
+ q = (d1 * 0xcd) >> 11;
+ d1 = d1 - 10*q;
+ *buf++ = d1 + '0';
+
+ d2 = q + 2*d2;
+ q = (d2 * 0xd) >> 7;
+ d2 = d2 - 10*q;
+ *buf++ = d2 + '0';
+
+ d3 = q + 4*d3;
+ q = (d3 * 0xcd) >> 11; /* - shorter code */
+ /* q = (d3 * 0x67) >> 10; - would also work */
+ d3 = d3 - 10*q;
+ *buf++ = d3 + '0';
+ *buf++ = q + '0';
+
+ return buf;
+}
+/* No inlining helps gcc to use registers better */
+static noinline_for_stack
+char *put_dec(char *buf, unsigned long long num)
+{
+ while (1) {
+ unsigned rem;
+ if (num < 100000)
+ return put_dec_trunc(buf, num);
+ rem = do_div(num, 100000);
+ buf = put_dec_full(buf, rem);
+ }
+}
+
+#define ZEROPAD 1 /* pad with zero */
+#define SIGN 2 /* unsigned/signed long */
+#define PLUS 4 /* show plus */
+#define SPACE 8 /* space if plus */
+#define LEFT 16 /* left justified */
+#define SMALL 32 /* use lowercase in hex (must be 32 == 0x20) */
+#define SPECIAL 64 /* prefix hex with "0x", octal with "0" */
+
+enum format_type {
+ FORMAT_TYPE_NONE, /* Just a string part */
+ FORMAT_TYPE_WIDTH,
+ FORMAT_TYPE_PRECISION,
+ FORMAT_TYPE_CHAR,
+ FORMAT_TYPE_STR,
+ FORMAT_TYPE_PTR,
+ FORMAT_TYPE_PERCENT_CHAR,
+ FORMAT_TYPE_INVALID,
+ FORMAT_TYPE_LONG_LONG,
+ FORMAT_TYPE_ULONG,
+ FORMAT_TYPE_LONG,
+ FORMAT_TYPE_UBYTE,
+ FORMAT_TYPE_BYTE,
+ FORMAT_TYPE_USHORT,
+ FORMAT_TYPE_SHORT,
+ FORMAT_TYPE_UINT,
+ FORMAT_TYPE_INT,
+ FORMAT_TYPE_NRCHARS,
+ FORMAT_TYPE_SIZE_T,
+ FORMAT_TYPE_PTRDIFF
+};
+
+struct printf_spec {
+ u8 type; /* format_type enum */
+ u8 flags; /* flags to number() */
+ u8 base; /* number base, 8, 10 or 16 only */
+ u8 qualifier; /* number qualifier, one of 'hHlLtzZ' */
+ s16 field_width; /* width of output field */
+ s16 precision; /* # of digits/chars */
+};
+
+static noinline_for_stack
+char *number(char *buf, char *end, unsigned long long num,
+ struct printf_spec spec)
+{
+ /* we are called with base 8, 10 or 16, only, thus don't need "G..." */
+ static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
+
+ char tmp[66];
+ char sign;
+ char locase;
+ int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10);
+ int i;
+
+ /* locase = 0 or 0x20. ORing digits or letters with 'locase'
+ * produces same digits or (maybe lowercased) letters */
+ locase = (spec.flags & SMALL);
+ if (spec.flags & LEFT)
+ spec.flags &= ~ZEROPAD;
+ sign = 0;
+ if (spec.flags & SIGN) {
+ if ((signed long long)num < 0) {
+ sign = '-';
+ num = -(signed long long)num;
+ spec.field_width--;
+ } else if (spec.flags & PLUS) {
+ sign = '+';
+ spec.field_width--;
+ } else if (spec.flags & SPACE) {
+ sign = ' ';
+ spec.field_width--;
+ }
+ }
+ if (need_pfx) {
+ spec.field_width--;
+ if (spec.base == 16)
+ spec.field_width--;
+ }
+
+ /* generate full string in tmp[], in reverse order */
+ i = 0;
+ if (num == 0)
+ tmp[i++] = '0';
+ /* Generic code, for any base:
+ else do {
+ tmp[i++] = (digits[do_div(num,base)] | locase);
+ } while (num != 0);
+ */
+ else if (spec.base != 10) { /* 8 or 16 */
+ int mask = spec.base - 1;
+ int shift = 3;
+
+ if (spec.base == 16)
+ shift = 4;
+ do {
+ tmp[i++] = (digits[((unsigned char)num) & mask] | locase);
+ num >>= shift;
+ } while (num);
+ } else { /* base 10 */
+ i = put_dec(tmp, num) - tmp;
+ }
+
+ /* printing 100 using %2d gives "100", not "00" */
+ if (i > spec.precision)
+ spec.precision = i;
+ /* leading space padding */
+ spec.field_width -= spec.precision;
+ if (!(spec.flags & (ZEROPAD+LEFT))) {
+ while (--spec.field_width >= 0) {
+ if (buf < end)
+ *buf = ' ';
+ ++buf;
+ }
+ }
+ /* sign */
+ if (sign) {
+ if (buf < end)
+ *buf = sign;
+ ++buf;
+ }
+ /* "0x" / "0" prefix */
+ if (need_pfx) {
+ if (buf < end)
+ *buf = '0';
+ ++buf;
+ if (spec.base == 16) {
+ if (buf < end)
+ *buf = ('X' | locase);
+ ++buf;
+ }
+ }
+ /* zero or space padding */
+ if (!(spec.flags & LEFT)) {
+ char c = (spec.flags & ZEROPAD) ? '0' : ' ';
+ while (--spec.field_width >= 0) {
+ if (buf < end)
+ *buf = c;
+ ++buf;
+ }
+ }
+ /* hmm even more zero padding? */
+ while (i <= --spec.precision) {
+ if (buf < end)
+ *buf = '0';
+ ++buf;
+ }
+ /* actual digits of result */
+ while (--i >= 0) {
+ if (buf < end)
+ *buf = tmp[i];
+ ++buf;
+ }
+ /* trailing space padding */
+ while (--spec.field_width >= 0) {
+ if (buf < end)
+ *buf = ' ';
+ ++buf;
+ }
+
+ return buf;
+}
+
+static noinline_for_stack
+char *string(char *buf, char *end, const char *s, struct printf_spec spec)
+{
+ int len, i;
+
+ if ((unsigned long)s < PAGE_SIZE)
+ s = "(null)";
+
+ len = strnlen(s, spec.precision);
+
+ if (!(spec.flags & LEFT)) {
+ while (len < spec.field_width--) {
+ if (buf < end)
+ *buf = ' ';
+ ++buf;
+ }
+ }
+ for (i = 0; i < len; ++i) {
+ if (buf < end)
+ *buf = *s;
+ ++buf; ++s;
+ }
+ while (len < spec.field_width--) {
+ if (buf < end)
+ *buf = ' ';
+ ++buf;
+ }
+
+ return buf;
+}
+
+static noinline_for_stack
+char *symbol_string(char *buf, char *end, void *ptr,
+ struct printf_spec spec, char ext)
+{
+ unsigned long value = (unsigned long) ptr;
+#ifdef CONFIG_KALLSYMS
+ char sym[KSYM_SYMBOL_LEN];
+ if (ext != 'f' && ext != 's')
+ sprint_symbol(sym, value);
+ else
+ kallsyms_lookup(value, NULL, NULL, NULL, sym);
+
+ return string(buf, end, sym, spec);
+#else
+ spec.field_width = 2 * sizeof(void *);
+ spec.flags |= SPECIAL | SMALL | ZEROPAD;
+ spec.base = 16;
+
+ return number(buf, end, value, spec);
+#endif
+}
+
+static noinline_for_stack
+char *resource_string(char *buf, char *end, struct resource *res,
+ struct printf_spec spec, const char *fmt)
+{
+#ifndef IO_RSRC_PRINTK_SIZE
+#define IO_RSRC_PRINTK_SIZE 6
+#endif
+
+#ifndef MEM_RSRC_PRINTK_SIZE
+#define MEM_RSRC_PRINTK_SIZE 10
+#endif
+ static const struct printf_spec io_spec = {
+ .base = 16,
+ .field_width = IO_RSRC_PRINTK_SIZE,
+ .precision = -1,
+ .flags = SPECIAL | SMALL | ZEROPAD,
+ };
+ static const struct printf_spec mem_spec = {
+ .base = 16,
+ .field_width = MEM_RSRC_PRINTK_SIZE,
+ .precision = -1,
+ .flags = SPECIAL | SMALL | ZEROPAD,
+ };
+ static const struct printf_spec bus_spec = {
+ .base = 16,
+ .field_width = 2,
+ .precision = -1,
+ .flags = SMALL | ZEROPAD,
+ };
+ static const struct printf_spec dec_spec = {
+ .base = 10,
+ .precision = -1,
+ .flags = 0,
+ };
+ static const struct printf_spec str_spec = {
+ .field_width = -1,
+ .precision = 10,
+ .flags = LEFT,
+ };
+ static const struct printf_spec flag_spec = {
+ .base = 16,
+ .precision = -1,
+ .flags = SPECIAL | SMALL,
+ };
+
+ /* 32-bit res (sizeof==4): 10 chars in dec, 10 in hex ("0x" + 8)
+ * 64-bit res (sizeof==8): 20 chars in dec, 18 in hex ("0x" + 16) */
+#define RSRC_BUF_SIZE ((2 * sizeof(resource_size_t)) + 4)
+#define FLAG_BUF_SIZE (2 * sizeof(res->flags))
+#define DECODED_BUF_SIZE sizeof("[mem - 64bit pref window disabled]")
+#define RAW_BUF_SIZE sizeof("[mem - flags 0x]")
+ char sym[max(2*RSRC_BUF_SIZE + DECODED_BUF_SIZE,
+ 2*RSRC_BUF_SIZE + FLAG_BUF_SIZE + RAW_BUF_SIZE)];
+
+ char *p = sym, *pend = sym + sizeof(sym);
+ int decode = (fmt[0] == 'R') ? 1 : 0;
+ const struct printf_spec *specp;
+
+ *p++ = '[';
+ if (res->flags & IORESOURCE_IO) {
+ p = string(p, pend, "io ", str_spec);
+ specp = &io_spec;
+ } else if (res->flags & IORESOURCE_MEM) {
+ p = string(p, pend, "mem ", str_spec);
+ specp = &mem_spec;
+ } else if (res->flags & IORESOURCE_IRQ) {
+ p = string(p, pend, "irq ", str_spec);
+ specp = &dec_spec;
+ } else if (res->flags & IORESOURCE_DMA) {
+ p = string(p, pend, "dma ", str_spec);
+ specp = &dec_spec;
+ } else if (res->flags & IORESOURCE_BUS) {
+ p = string(p, pend, "bus ", str_spec);
+ specp = &bus_spec;
+ } else {
+ p = string(p, pend, "??? ", str_spec);
+ specp = &mem_spec;
+ decode = 0;
+ }
+ p = number(p, pend, res->start, *specp);
+ if (res->start != res->end) {
+ *p++ = '-';
+ p = number(p, pend, res->end, *specp);
+ }
+ if (decode) {
+ if (res->flags & IORESOURCE_MEM_64)
+ p = string(p, pend, " 64bit", str_spec);
+ if (res->flags & IORESOURCE_PREFETCH)
+ p = string(p, pend, " pref", str_spec);
+ if (res->flags & IORESOURCE_WINDOW)
+ p = string(p, pend, " window", str_spec);
+ if (res->flags & IORESOURCE_DISABLED)
+ p = string(p, pend, " disabled", str_spec);
+ } else {
+ p = string(p, pend, " flags ", str_spec);
+ p = number(p, pend, res->flags, flag_spec);
+ }
+ *p++ = ']';
+ *p = '\0';
+
+ return string(buf, end, sym, spec);
+}
+
+static noinline_for_stack
+char *mac_address_string(char *buf, char *end, u8 *addr,
+ struct printf_spec spec, const char *fmt)
+{
+ char mac_addr[sizeof("xx:xx:xx:xx:xx:xx")];
+ char *p = mac_addr;
+ int i;
+ char separator;
+
+ if (fmt[1] == 'F') { /* FDDI canonical format */
+ separator = '-';
+ } else {
+ separator = ':';
+ }
+
+ for (i = 0; i < 6; i++) {
+ p = pack_hex_byte(p, addr[i]);
+ if (fmt[0] == 'M' && i != 5)
+ *p++ = separator;
+ }
+ *p = '\0';
+
+ return string(buf, end, mac_addr, spec);
+}
+
+static noinline_for_stack
+char *ip4_string(char *p, const u8 *addr, const char *fmt)
+{
+ int i;
+ bool leading_zeros = (fmt[0] == 'i');
+ int index;
+ int step;
+
+ switch (fmt[2]) {
+ case 'h':
+#ifdef __BIG_ENDIAN
+ index = 0;
+ step = 1;
+#else
+ index = 3;
+ step = -1;
+#endif
+ break;
+ case 'l':
+ index = 3;
+ step = -1;
+ break;
+ case 'n':
+ case 'b':
+ default:
+ index = 0;
+ step = 1;
+ break;
+ }
+ for (i = 0; i < 4; i++) {
+ char temp[3]; /* hold each IP quad in reverse order */
+ int digits = put_dec_trunc(temp, addr[index]) - temp;
+ if (leading_zeros) {
+ if (digits < 3)
+ *p++ = '0';
+ if (digits < 2)
+ *p++ = '0';
+ }
+ /* reverse the digits in the quad */
+ while (digits--)
+ *p++ = temp[digits];
+ if (i < 3)
+ *p++ = '.';
+ index += step;
+ }
+ *p = '\0';
+
+ return p;
+}
+
+static noinline_for_stack
+char *ip6_compressed_string(char *p, const char *addr)
+{
+ int i, j, range;
+ unsigned char zerolength[8];
+ int longest = 1;
+ int colonpos = -1;
+ u16 word;
+ u8 hi, lo;
+ bool needcolon = false;
+ bool useIPv4;
+ struct in6_addr in6;
+
+ memcpy(&in6, addr, sizeof(struct in6_addr));
+
+ useIPv4 = ipv6_addr_v4mapped(&in6) || ipv6_addr_is_isatap(&in6);
+
+ memset(zerolength, 0, sizeof(zerolength));
+
+ if (useIPv4)
+ range = 6;
+ else
+ range = 8;
+
+ /* find position of longest 0 run */
+ for (i = 0; i < range; i++) {
+ for (j = i; j < range; j++) {
+ if (in6.s6_addr16[j] != 0)
+ break;
+ zerolength[i]++;
+ }
+ }
+ for (i = 0; i < range; i++) {
+ if (zerolength[i] > longest) {
+ longest = zerolength[i];
+ colonpos = i;
+ }
+ }
+
+ /* emit address */
+ for (i = 0; i < range; i++) {
+ if (i == colonpos) {
+ if (needcolon || i == 0)
+ *p++ = ':';
+ *p++ = ':';
+ needcolon = false;
+ i += longest - 1;
+ continue;
+ }
+ if (needcolon) {
+ *p++ = ':';
+ needcolon = false;
+ }
+ /* hex u16 without leading 0s */
+ word = ntohs(in6.s6_addr16[i]);
+ hi = word >> 8;
+ lo = word & 0xff;
+ if (hi) {
+ if (hi > 0x0f)
+ p = pack_hex_byte(p, hi);
+ else
+ *p++ = hex_asc_lo(hi);
+ p = pack_hex_byte(p, lo);
+ }
+ else if (lo > 0x0f)
+ p = pack_hex_byte(p, lo);
+ else
+ *p++ = hex_asc_lo(lo);
+ needcolon = true;
+ }
+
+ if (useIPv4) {
+ if (needcolon)
+ *p++ = ':';
+ p = ip4_string(p, &in6.s6_addr[12], "I4");
+ }
+ *p = '\0';
+
+ return p;
+}
+
+static noinline_for_stack
+char *ip6_string(char *p, const char *addr, const char *fmt)
+{
+ int i;
+
+ for (i = 0; i < 8; i++) {
+ p = pack_hex_byte(p, *addr++);
+ p = pack_hex_byte(p, *addr++);
+ if (fmt[0] == 'I' && i != 7)
+ *p++ = ':';
+ }
+ *p = '\0';
+
+ return p;
+}
+
+static noinline_for_stack
+char *ip6_addr_string(char *buf, char *end, const u8 *addr,
+ struct printf_spec spec, const char *fmt)
+{
+ char ip6_addr[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255")];
+
+ if (fmt[0] == 'I' && fmt[2] == 'c')
+ ip6_compressed_string(ip6_addr, addr);
+ else
+ ip6_string(ip6_addr, addr, fmt);
+
+ return string(buf, end, ip6_addr, spec);
+}
+
+static noinline_for_stack
+char *ip4_addr_string(char *buf, char *end, const u8 *addr,
+ struct printf_spec spec, const char *fmt)
+{
+ char ip4_addr[sizeof("255.255.255.255")];
+
+ ip4_string(ip4_addr, addr, fmt);
+
+ return string(buf, end, ip4_addr, spec);
+}
+
+static noinline_for_stack
+char *uuid_string(char *buf, char *end, const u8 *addr,
+ struct printf_spec spec, const char *fmt)
+{
+ char uuid[sizeof("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")];
+ char *p = uuid;
+ int i;
+ static const u8 be[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+ static const u8 le[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15};
+ const u8 *index = be;
+ bool uc = false;
+
+ switch (*(++fmt)) {
+ case 'L':
+ uc = true; /* fall-through */
+ case 'l':
+ index = le;
+ break;
+ case 'B':
+ uc = true;
+ break;
+ }
+
+ for (i = 0; i < 16; i++) {
+ p = pack_hex_byte(p, addr[index[i]]);
+ switch (i) {
+ case 3:
+ case 5:
+ case 7:
+ case 9:
+ *p++ = '-';
+ break;
+ }
+ }
+
+ *p = 0;
+
+ if (uc) {
+ p = uuid;
+ do {
+ *p = toupper(*p);
+ } while (*(++p));
+ }
+
+ return string(buf, end, uuid, spec);
+}
+
+/*
+ * Show a '%p' thing. A kernel extension is that the '%p' is followed
+ * by an extra set of alphanumeric characters that are extended format
+ * specifiers.
+ *
+ * Right now we handle:
+ *
+ * - 'F' For symbolic function descriptor pointers with offset
+ * - 'f' For simple symbolic function names without offset
+ * - 'S' For symbolic direct pointers with offset
+ * - 's' For symbolic direct pointers without offset
+ * - 'R' For decoded struct resource, e.g., [mem 0x0-0x1f 64bit pref]
+ * - 'r' For raw struct resource, e.g., [mem 0x0-0x1f flags 0x201]
+ * - 'M' For a 6-byte MAC address, it prints the address in the
+ * usual colon-separated hex notation
+ * - 'm' For a 6-byte MAC address, it prints the hex address without colons
+ * - 'MF' For a 6-byte MAC FDDI address, it prints the address
+ * with a dash-separated hex notation
+ * - 'I' [46] for IPv4/IPv6 addresses printed in the usual way
+ * IPv4 uses dot-separated decimal without leading 0's (1.2.3.4)
+ * IPv6 uses colon separated network-order 16 bit hex with leading 0's
+ * - 'i' [46] for 'raw' IPv4/IPv6 addresses
+ * IPv6 omits the colons (01020304...0f)
+ * IPv4 uses dot-separated decimal with leading 0's (010.123.045.006)
+ * - '[Ii]4[hnbl]' IPv4 addresses in host, network, big or little endian order
+ * - 'I6c' for IPv6 addresses printed as specified by
+ * http://tools.ietf.org/html/draft-ietf-6man-text-addr-representation-00
+ * - 'U' For a 16 byte UUID/GUID, it prints the UUID/GUID in the form
+ * "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
+ * Options for %pU are:
+ * b big endian lower case hex (default)
+ * B big endian UPPER case hex
+ * l little endian lower case hex
+ * L little endian UPPER case hex
+ * big endian output byte order is:
+ * [0][1][2][3]-[4][5]-[6][7]-[8][9]-[10][11][12][13][14][15]
+ * little endian output byte order is:
+ * [3][2][1][0]-[5][4]-[7][6]-[8][9]-[10][11][12][13][14][15]
+ * - 'V' For a struct va_format which contains a format string * and va_list *,
+ * call vsnprintf(->format, *->va_list).
+ * Implements a "recursive vsnprintf".
+ * Do not use this feature without some mechanism to verify the
+ * correctness of the format string and va_list arguments.
+ *
+ * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
+ * function pointers are really function descriptors, which contain a
+ * pointer to the real address.
+ */
+static noinline_for_stack
+char *pointer(const char *fmt, char *buf, char *end, void *ptr,
+ struct printf_spec spec)
+{
+ if (!ptr)
+ return string(buf, end, "(null)", spec);
+
+ switch (*fmt) {
+ case 'F':
+ case 'f':
+ ptr = dereference_function_descriptor(ptr);
+ /* Fallthrough */
+ case 'S':
+ case 's':
+ return symbol_string(buf, end, ptr, spec, *fmt);
+ case 'R':
+ case 'r':
+ return resource_string(buf, end, ptr, spec, fmt);
+ case 'M': /* Colon separated: 00:01:02:03:04:05 */
+ case 'm': /* Contiguous: 000102030405 */
+ /* [mM]F (FDDI, bit reversed) */
+ return mac_address_string(buf, end, ptr, spec, fmt);
+ case 'I': /* Formatted IP supported
+ * 4: 1.2.3.4
+ * 6: 0001:0203:...:0708
+ * 6c: 1::708 or 1::1.2.3.4
+ */
+ case 'i': /* Contiguous:
+ * 4: 001.002.003.004
+ * 6: 000102...0f
+ */
+ switch (fmt[1]) {
+ case '6':
+ return ip6_addr_string(buf, end, ptr, spec, fmt);
+ case '4':
+ return ip4_addr_string(buf, end, ptr, spec, fmt);
+ }
+ break;
+ case 'U':
+ return uuid_string(buf, end, ptr, spec, fmt);
+ case 'V':
+ return buf + vsnprintf(buf, end - buf,
+ ((struct va_format *)ptr)->fmt,
+ *(((struct va_format *)ptr)->va));
+ }
+ spec.flags |= SMALL;
+ if (spec.field_width == -1) {
+ spec.field_width = 2*sizeof(void *);
+ spec.flags |= ZEROPAD;
+ }
+ spec.base = 16;
+
+ return number(buf, end, (unsigned long) ptr, spec);
+}
+
+/*
+ * Helper function to decode printf style format.
+ * Each call decode a token from the format and return the
+ * number of characters read (or likely the delta where it wants
+ * to go on the next call).
+ * The decoded token is returned through the parameters
+ *
+ * 'h', 'l', or 'L' for integer fields
+ * 'z' support added 23/7/1999 S.H.
+ * 'z' changed to 'Z' --davidm 1/25/99
+ * 't' added for ptrdiff_t
+ *
+ * @fmt: the format string
+ * @type of the token returned
+ * @flags: various flags such as +, -, # tokens..
+ * @field_width: overwritten width
+ * @base: base of the number (octal, hex, ...)
+ * @precision: precision of a number
+ * @qualifier: qualifier of a number (long, size_t, ...)
+ */
+static noinline_for_stack
+int format_decode(const char *fmt, struct printf_spec *spec)
+{
+ const char *start = fmt;
+
+ /* we finished early by reading the field width */
+ if (spec->type == FORMAT_TYPE_WIDTH) {
+ if (spec->field_width < 0) {
+ spec->field_width = -spec->field_width;
+ spec->flags |= LEFT;
+ }
+ spec->type = FORMAT_TYPE_NONE;
+ goto precision;
+ }
+
+ /* we finished early by reading the precision */
+ if (spec->type == FORMAT_TYPE_PRECISION) {
+ if (spec->precision < 0)
+ spec->precision = 0;
+
+ spec->type = FORMAT_TYPE_NONE;
+ goto qualifier;
+ }
+
+ /* By default */
+ spec->type = FORMAT_TYPE_NONE;
+
+ for (; *fmt ; ++fmt) {
+ if (*fmt == '%')
+ break;
+ }
+
+ /* Return the current non-format string */
+ if (fmt != start || !*fmt)
+ return fmt - start;
+
+ /* Process flags */
+ spec->flags = 0;
+
+ while (1) { /* this also skips first '%' */
+ bool found = true;
+
+ ++fmt;
+
+ switch (*fmt) {
+ case '-': spec->flags |= LEFT; break;
+ case '+': spec->flags |= PLUS; break;
+ case ' ': spec->flags |= SPACE; break;
+ case '#': spec->flags |= SPECIAL; break;
+ case '0': spec->flags |= ZEROPAD; break;
+ default: found = false;
+ }
+
+ if (!found)
+ break;
+ }
+
+ /* get field width */
+ spec->field_width = -1;
+
+ if (isdigit(*fmt))
+ spec->field_width = skip_atoi(&fmt);
+ else if (*fmt == '*') {
+ /* it's the next argument */
+ spec->type = FORMAT_TYPE_WIDTH;
+ return ++fmt - start;
+ }
+
+precision:
+ /* get the precision */
+ spec->precision = -1;
+ if (*fmt == '.') {
+ ++fmt;
+ if (isdigit(*fmt)) {
+ spec->precision = skip_atoi(&fmt);
+ if (spec->precision < 0)
+ spec->precision = 0;
+ } else if (*fmt == '*') {
+ /* it's the next argument */
+ spec->type = FORMAT_TYPE_PRECISION;
+ return ++fmt - start;
+ }
+ }
+
+qualifier:
+ /* get the conversion qualifier */
+ spec->qualifier = -1;
+ if (*fmt == 'h' || TOLOWER(*fmt) == 'l' ||
+ TOLOWER(*fmt) == 'z' || *fmt == 't') {
+ spec->qualifier = *fmt++;
+ if (unlikely(spec->qualifier == *fmt)) {
+ if (spec->qualifier == 'l') {
+ spec->qualifier = 'L';
+ ++fmt;
+ } else if (spec->qualifier == 'h') {
+ spec->qualifier = 'H';
+ ++fmt;
+ }
+ }
+ }
+
+ /* default base */
+ spec->base = 10;
+ switch (*fmt) {
+ case 'c':
+ spec->type = FORMAT_TYPE_CHAR;
+ return ++fmt - start;
+
+ case 's':
+ spec->type = FORMAT_TYPE_STR;
+ return ++fmt - start;
+
+ case 'p':
+ spec->type = FORMAT_TYPE_PTR;
+ return fmt - start;
+ /* skip alnum */
+
+ case 'n':
+ spec->type = FORMAT_TYPE_NRCHARS;
+ return ++fmt - start;
+
+ case '%':
+ spec->type = FORMAT_TYPE_PERCENT_CHAR;
+ return ++fmt - start;
+
+ /* integer number formats - set up the flags and "break" */
+ case 'o':
+ spec->base = 8;
+ break;
+
+ case 'x':
+ spec->flags |= SMALL;
+
+ case 'X':
+ spec->base = 16;
+ break;
+
+ case 'd':
+ case 'i':
+ spec->flags |= SIGN;
+ case 'u':
+ break;
+
+ default:
+ spec->type = FORMAT_TYPE_INVALID;
+ return fmt - start;
+ }
+
+ if (spec->qualifier == 'L')
+ spec->type = FORMAT_TYPE_LONG_LONG;
+ else if (spec->qualifier == 'l') {
+ if (spec->flags & SIGN)
+ spec->type = FORMAT_TYPE_LONG;
+ else
+ spec->type = FORMAT_TYPE_ULONG;
+ } else if (TOLOWER(spec->qualifier) == 'z') {
+ spec->type = FORMAT_TYPE_SIZE_T;
+ } else if (spec->qualifier == 't') {
+ spec->type = FORMAT_TYPE_PTRDIFF;
+ } else if (spec->qualifier == 'H') {
+ if (spec->flags & SIGN)
+ spec->type = FORMAT_TYPE_BYTE;
+ else
+ spec->type = FORMAT_TYPE_UBYTE;
+ } else if (spec->qualifier == 'h') {
+ if (spec->flags & SIGN)
+ spec->type = FORMAT_TYPE_SHORT;
+ else
+ spec->type = FORMAT_TYPE_USHORT;
+ } else {
+ if (spec->flags & SIGN)
+ spec->type = FORMAT_TYPE_INT;
+ else
+ spec->type = FORMAT_TYPE_UINT;
+ }
+
+ return ++fmt - start;
+}
+
+/**
+ * vsnprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @size: The size of the buffer, including the trailing null space
+ * @fmt: The format string to use
+ * @args: Arguments for the format string
+ *
+ * This function follows C99 vsnprintf, but has some extensions:
+ * %pS output the name of a text symbol with offset
+ * %ps output the name of a text symbol without offset
+ * %pF output the name of a function pointer with its offset
+ * %pf output the name of a function pointer without its offset
+ * %pR output the address range in a struct resource with decoded flags
+ * %pr output the address range in a struct resource with raw flags
+ * %pM output a 6-byte MAC address with colons
+ * %pm output a 6-byte MAC address without colons
+ * %pI4 print an IPv4 address without leading zeros
+ * %pi4 print an IPv4 address with leading zeros
+ * %pI6 print an IPv6 address with colons
+ * %pi6 print an IPv6 address without colons
+ * %pI6c print an IPv6 address as specified by
+ * http://tools.ietf.org/html/draft-ietf-6man-text-addr-representation-00
+ * %pU[bBlL] print a UUID/GUID in big or little endian using lower or upper
+ * case.
+ * %n is ignored
+ *
+ * The return value is the number of characters which would
+ * be generated for the given input, excluding the trailing
+ * '\0', as per ISO C99. If you want to have the exact
+ * number of characters written into @buf as return value
+ * (not including the trailing '\0'), use vscnprintf(). If the
+ * return is greater than or equal to @size, the resulting
+ * string is truncated.
+ *
+ * Call this function if you are already dealing with a va_list.
+ * You probably want snprintf() instead.
+ */
+int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
+{
+ unsigned long long num;
+ char *str, *end;
+ struct printf_spec spec = {0};
+
+ /* Reject out-of-range values early. Large positive sizes are
+ used for unknown buffer sizes. */
+ if (WARN_ON_ONCE((int) size < 0))
+ return 0;
+
+ str = buf;
+ end = buf + size;
+
+ /* Make sure end is always >= buf */
+ if (end < buf) {
+ end = ((void *)-1);
+ size = end - buf;
+ }
+
+ while (*fmt) {
+ const char *old_fmt = fmt;
+ int read = format_decode(fmt, &spec);
+
+ fmt += read;
+
+ switch (spec.type) {
+ case FORMAT_TYPE_NONE: {
+ int copy = read;
+ if (str < end) {
+ if (copy > end - str)
+ copy = end - str;
+ memcpy(str, old_fmt, copy);
+ }
+ str += read;
+ break;
+ }
+
+ case FORMAT_TYPE_WIDTH:
+ spec.field_width = va_arg(args, int);
+ break;
+
+ case FORMAT_TYPE_PRECISION:
+ spec.precision = va_arg(args, int);
+ break;
+
+ case FORMAT_TYPE_CHAR: {
+ char c;
+
+ if (!(spec.flags & LEFT)) {
+ while (--spec.field_width > 0) {
+ if (str < end)
+ *str = ' ';
+ ++str;
+
+ }
+ }
+ c = (unsigned char) va_arg(args, int);
+ if (str < end)
+ *str = c;
+ ++str;
+ while (--spec.field_width > 0) {
+ if (str < end)
+ *str = ' ';
+ ++str;
+ }
+ break;
+ }
+
+ case FORMAT_TYPE_STR:
+ str = string(str, end, va_arg(args, char *), spec);
+ break;
+
+ case FORMAT_TYPE_PTR:
+ str = pointer(fmt+1, str, end, va_arg(args, void *),
+ spec);
+ while (isalnum(*fmt))
+ fmt++;
+ break;
+
+ case FORMAT_TYPE_PERCENT_CHAR:
+ if (str < end)
+ *str = '%';
+ ++str;
+ break;
+
+ case FORMAT_TYPE_INVALID:
+ if (str < end)
+ *str = '%';
+ ++str;
+ break;
+
+ case FORMAT_TYPE_NRCHARS: {
+ u8 qualifier = spec.qualifier;
+
+ if (qualifier == 'l') {
+ long *ip = va_arg(args, long *);
+ *ip = (str - buf);
+ } else if (TOLOWER(qualifier) == 'z') {
+ size_t *ip = va_arg(args, size_t *);
+ *ip = (str - buf);
+ } else {
+ int *ip = va_arg(args, int *);
+ *ip = (str - buf);
+ }
+ break;
+ }
+
+ default:
+ switch (spec.type) {
+ case FORMAT_TYPE_LONG_LONG:
+ num = va_arg(args, long long);
+ break;
+ case FORMAT_TYPE_ULONG:
+ num = va_arg(args, unsigned long);
+ break;
+ case FORMAT_TYPE_LONG:
+ num = va_arg(args, long);
+ break;
+ case FORMAT_TYPE_SIZE_T:
+ num = va_arg(args, size_t);
+ break;
+ case FORMAT_TYPE_PTRDIFF:
+ num = va_arg(args, ptrdiff_t);
+ break;
+ case FORMAT_TYPE_UBYTE:
+ num = (unsigned char) va_arg(args, int);
+ break;
+ case FORMAT_TYPE_BYTE:
+ num = (signed char) va_arg(args, int);
+ break;
+ case FORMAT_TYPE_USHORT:
+ num = (unsigned short) va_arg(args, int);
+ break;
+ case FORMAT_TYPE_SHORT:
+ num = (short) va_arg(args, int);
+ break;
+ case FORMAT_TYPE_INT:
+ num = (int) va_arg(args, int);
+ break;
+ default:
+ num = va_arg(args, unsigned int);
+ }
+
+ str = number(str, end, num, spec);
+ }
+ }
+
+ if (size > 0) {
+ if (str < end)
+ *str = '\0';
+ else
+ end[-1] = '\0';
+ }
+
+ /* the trailing null byte doesn't count towards the total */
+ return str-buf;
+
+}
+EXPORT_SYMBOL(vsnprintf);
+
+/**
+ * vscnprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @size: The size of the buffer, including the trailing null space
+ * @fmt: The format string to use
+ * @args: Arguments for the format string
+ *
+ * The return value is the number of characters which have been written into
+ * the @buf not including the trailing '\0'. If @size is <= 0 the function
+ * returns 0.
+ *
+ * Call this function if you are already dealing with a va_list.
+ * You probably want scnprintf() instead.
+ *
+ * See the vsnprintf() documentation for format string extensions over C99.
+ */
+int vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
+{
+ int i;
+
+ i = vsnprintf(buf, size, fmt, args);
+
+ return (i >= size) ? (size - 1) : i;
+}
+EXPORT_SYMBOL(vscnprintf);
+
+/**
+ * snprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @size: The size of the buffer, including the trailing null space
+ * @fmt: The format string to use
+ * @...: Arguments for the format string
+ *
+ * The return value is the number of characters which would be
+ * generated for the given input, excluding the trailing null,
+ * as per ISO C99. If the return is greater than or equal to
+ * @size, the resulting string is truncated.
+ *
+ * See the vsnprintf() documentation for format string extensions over C99.
+ */
+int snprintf(char *buf, size_t size, const char *fmt, ...)
+{
+ va_list args;
+ int i;
+
+ va_start(args, fmt);
+ i = vsnprintf(buf, size, fmt, args);
+ va_end(args);
+
+ return i;
+}
+EXPORT_SYMBOL(snprintf);
+
+/**
+ * scnprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @size: The size of the buffer, including the trailing null space
+ * @fmt: The format string to use
+ * @...: Arguments for the format string
+ *
+ * The return value is the number of characters written into @buf not including
+ * the trailing '\0'. If @size is <= 0 the function returns 0.
+ */
+
+int scnprintf(char *buf, size_t size, const char *fmt, ...)
+{
+ va_list args;
+ int i;
+
+ va_start(args, fmt);
+ i = vsnprintf(buf, size, fmt, args);
+ va_end(args);
+
+ return (i >= size) ? (size - 1) : i;
+}
+EXPORT_SYMBOL(scnprintf);
+
+/**
+ * vsprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @fmt: The format string to use
+ * @args: Arguments for the format string
+ *
+ * The function returns the number of characters written
+ * into @buf. Use vsnprintf() or vscnprintf() in order to avoid
+ * buffer overflows.
+ *
+ * Call this function if you are already dealing with a va_list.
+ * You probably want sprintf() instead.
+ *
+ * See the vsnprintf() documentation for format string extensions over C99.
+ */
+int vsprintf(char *buf, const char *fmt, va_list args)
+{
+ return vsnprintf(buf, INT_MAX, fmt, args);
+}
+EXPORT_SYMBOL(vsprintf);
+
+/**
+ * sprintf - Format a string and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @fmt: The format string to use
+ * @...: Arguments for the format string
+ *
+ * The function returns the number of characters written
+ * into @buf. Use snprintf() or scnprintf() in order to avoid
+ * buffer overflows.
+ *
+ * See the vsnprintf() documentation for format string extensions over C99.
+ */
+int sprintf(char *buf, const char *fmt, ...)
+{
+ va_list args;
+ int i;
+
+ va_start(args, fmt);
+ i = vsnprintf(buf, INT_MAX, fmt, args);
+ va_end(args);
+
+ return i;
+}
+EXPORT_SYMBOL(sprintf);
+
+#ifdef CONFIG_BINARY_PRINTF
+/*
+ * bprintf service:
+ * vbin_printf() - VA arguments to binary data
+ * bstr_printf() - Binary data to text string
+ */
+
+/**
+ * vbin_printf - Parse a format string and place args' binary value in a buffer
+ * @bin_buf: The buffer to place args' binary value
+ * @size: The size of the buffer(by words(32bits), not characters)
+ * @fmt: The format string to use
+ * @args: Arguments for the format string
+ *
+ * The format follows C99 vsnprintf, except %n is ignored, and its argument
+ * is skiped.
+ *
+ * The return value is the number of words(32bits) which would be generated for
+ * the given input.
+ *
+ * NOTE:
+ * If the return value is greater than @size, the resulting bin_buf is NOT
+ * valid for bstr_printf().
+ */
+int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args)
+{
+ struct printf_spec spec = {0};
+ char *str, *end;
+
+ str = (char *)bin_buf;
+ end = (char *)(bin_buf + size);
+
+#define save_arg(type) \
+do { \
+ if (sizeof(type) == 8) { \
+ unsigned long long value; \
+ str = PTR_ALIGN(str, sizeof(u32)); \
+ value = va_arg(args, unsigned long long); \
+ if (str + sizeof(type) <= end) { \
+ *(u32 *)str = *(u32 *)&value; \
+ *(u32 *)(str + 4) = *((u32 *)&value + 1); \
+ } \
+ } else { \
+ unsigned long value; \
+ str = PTR_ALIGN(str, sizeof(type)); \
+ value = va_arg(args, int); \
+ if (str + sizeof(type) <= end) \
+ *(typeof(type) *)str = (type)value; \
+ } \
+ str += sizeof(type); \
+} while (0)
+
+ while (*fmt) {
+ int read = format_decode(fmt, &spec);
+
+ fmt += read;
+
+ switch (spec.type) {
+ case FORMAT_TYPE_NONE:
+ case FORMAT_TYPE_INVALID:
+ case FORMAT_TYPE_PERCENT_CHAR:
+ break;
+
+ case FORMAT_TYPE_WIDTH:
+ case FORMAT_TYPE_PRECISION:
+ save_arg(int);
+ break;
+
+ case FORMAT_TYPE_CHAR:
+ save_arg(char);
+ break;
+
+ case FORMAT_TYPE_STR: {
+ const char *save_str = va_arg(args, char *);
+ size_t len;
+
+ if ((unsigned long)save_str > (unsigned long)-PAGE_SIZE
+ || (unsigned long)save_str < PAGE_SIZE)
+ save_str = "(null)";
+ len = strlen(save_str) + 1;
+ if (str + len < end)
+ memcpy(str, save_str, len);
+ str += len;
+ break;
+ }
+
+ case FORMAT_TYPE_PTR:
+ save_arg(void *);
+ /* skip all alphanumeric pointer suffixes */
+ while (isalnum(*fmt))
+ fmt++;
+ break;
+
+ case FORMAT_TYPE_NRCHARS: {
+ /* skip %n 's argument */
+ u8 qualifier = spec.qualifier;
+ void *skip_arg;
+ if (qualifier == 'l')
+ skip_arg = va_arg(args, long *);
+ else if (TOLOWER(qualifier) == 'z')
+ skip_arg = va_arg(args, size_t *);
+ else
+ skip_arg = va_arg(args, int *);
+ break;
+ }
+
+ default:
+ switch (spec.type) {
+
+ case FORMAT_TYPE_LONG_LONG:
+ save_arg(long long);
+ break;
+ case FORMAT_TYPE_ULONG:
+ case FORMAT_TYPE_LONG:
+ save_arg(unsigned long);
+ break;
+ case FORMAT_TYPE_SIZE_T:
+ save_arg(size_t);
+ break;
+ case FORMAT_TYPE_PTRDIFF:
+ save_arg(ptrdiff_t);
+ break;
+ case FORMAT_TYPE_UBYTE:
+ case FORMAT_TYPE_BYTE:
+ save_arg(char);
+ break;
+ case FORMAT_TYPE_USHORT:
+ case FORMAT_TYPE_SHORT:
+ save_arg(short);
+ break;
+ default:
+ save_arg(int);
+ }
+ }
+ }
+
+ return (u32 *)(PTR_ALIGN(str, sizeof(u32))) - bin_buf;
+#undef save_arg
+}
+EXPORT_SYMBOL_GPL(vbin_printf);
+
+/**
+ * bstr_printf - Format a string from binary arguments and place it in a buffer
+ * @buf: The buffer to place the result into
+ * @size: The size of the buffer, including the trailing null space
+ * @fmt: The format string to use
+ * @bin_buf: Binary arguments for the format string
+ *
+ * This function like C99 vsnprintf, but the difference is that vsnprintf gets
+ * arguments from stack, and bstr_printf gets arguments from @bin_buf which is
+ * a binary buffer that generated by vbin_printf.
+ *
+ * The format follows C99 vsnprintf, but has some extensions:
+ * see vsnprintf comment for details.
+ *
+ * The return value is the number of characters which would
+ * be generated for the given input, excluding the trailing
+ * '\0', as per ISO C99. If you want to have the exact
+ * number of characters written into @buf as return value
+ * (not including the trailing '\0'), use vscnprintf(). If the
+ * return is greater than or equal to @size, the resulting
+ * string is truncated.
+ */
+int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
+{
+ struct printf_spec spec = {0};
+ char *str, *end;
+ const char *args = (const char *)bin_buf;
+
+ if (WARN_ON_ONCE((int) size < 0))
+ return 0;
+
+ str = buf;
+ end = buf + size;
+
+#define get_arg(type) \
+({ \
+ typeof(type) value; \
+ if (sizeof(type) == 8) { \
+ args = PTR_ALIGN(args, sizeof(u32)); \
+ *(u32 *)&value = *(u32 *)args; \
+ *((u32 *)&value + 1) = *(u32 *)(args + 4); \
+ } else { \
+ args = PTR_ALIGN(args, sizeof(type)); \
+ value = *(typeof(type) *)args; \
+ } \
+ args += sizeof(type); \
+ value; \
+})
+
+ /* Make sure end is always >= buf */
+ if (end < buf) {
+ end = ((void *)-1);
+ size = end - buf;
+ }
+
+ while (*fmt) {
+ const char *old_fmt = fmt;
+ int read = format_decode(fmt, &spec);
+
+ fmt += read;
+
+ switch (spec.type) {
+ case FORMAT_TYPE_NONE: {
+ int copy = read;
+ if (str < end) {
+ if (copy > end - str)
+ copy = end - str;
+ memcpy(str, old_fmt, copy);
+ }
+ str += read;
+ break;
+ }
+
+ case FORMAT_TYPE_WIDTH:
+ spec.field_width = get_arg(int);
+ break;
+
+ case FORMAT_TYPE_PRECISION:
+ spec.precision = get_arg(int);
+ break;
+
+ case FORMAT_TYPE_CHAR: {
+ char c;
+
+ if (!(spec.flags & LEFT)) {
+ while (--spec.field_width > 0) {
+ if (str < end)
+ *str = ' ';
+ ++str;
+ }
+ }
+ c = (unsigned char) get_arg(char);
+ if (str < end)
+ *str = c;
+ ++str;
+ while (--spec.field_width > 0) {
+ if (str < end)
+ *str = ' ';
+ ++str;
+ }
+ break;
+ }
+
+ case FORMAT_TYPE_STR: {
+ const char *str_arg = args;
+ args += strlen(str_arg) + 1;
+ str = string(str, end, (char *)str_arg, spec);
+ break;
+ }
+
+ case FORMAT_TYPE_PTR:
+ str = pointer(fmt+1, str, end, get_arg(void *), spec);
+ while (isalnum(*fmt))
+ fmt++;
+ break;
+
+ case FORMAT_TYPE_PERCENT_CHAR:
+ case FORMAT_TYPE_INVALID:
+ if (str < end)
+ *str = '%';
+ ++str;
+ break;
+
+ case FORMAT_TYPE_NRCHARS:
+ /* skip */
+ break;
+
+ default: {
+ unsigned long long num;
+
+ switch (spec.type) {
+
+ case FORMAT_TYPE_LONG_LONG:
+ num = get_arg(long long);
+ break;
+ case FORMAT_TYPE_ULONG:
+ case FORMAT_TYPE_LONG:
+ num = get_arg(unsigned long);
+ break;
+ case FORMAT_TYPE_SIZE_T:
+ num = get_arg(size_t);
+ break;
+ case FORMAT_TYPE_PTRDIFF:
+ num = get_arg(ptrdiff_t);
+ break;
+ case FORMAT_TYPE_UBYTE:
+ num = get_arg(unsigned char);
+ break;
+ case FORMAT_TYPE_BYTE:
+ num = get_arg(signed char);
+ break;
+ case FORMAT_TYPE_USHORT:
+ num = get_arg(unsigned short);
+ break;
+ case FORMAT_TYPE_SHORT:
+ num = get_arg(short);
+ break;
+ case FORMAT_TYPE_UINT:
+ num = get_arg(unsigned int);
+ break;
+ default:
+ num = get_arg(int);
+ }
+
+ str = number(str, end, num, spec);
+ } /* default: */
+ } /* switch(spec.type) */
+ } /* while(*fmt) */
+
+ if (size > 0) {
+ if (str < end)
+ *str = '\0';
+ else
+ end[-1] = '\0';
+ }
+
+#undef get_arg
+
+ /* the trailing null byte doesn't count towards the total */
+ return str - buf;
+}
+EXPORT_SYMBOL_GPL(bstr_printf);
+
+/**
+ * bprintf - Parse a format string and place args' binary value in a buffer
+ * @bin_buf: The buffer to place args' binary value
+ * @size: The size of the buffer(by words(32bits), not characters)
+ * @fmt: The format string to use
+ * @...: Arguments for the format string
+ *
+ * The function returns the number of words(u32) written
+ * into @bin_buf.
+ */
+int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, fmt);
+ ret = vbin_printf(bin_buf, size, fmt, args);
+ va_end(args);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(bprintf);
+
+#endif /* CONFIG_BINARY_PRINTF */
+
+/**
+ * vsscanf - Unformat a buffer into a list of arguments
+ * @buf: input buffer
+ * @fmt: format of buffer
+ * @args: arguments
+ */
+int vsscanf(const char *buf, const char *fmt, va_list args)
+{
+ const char *str = buf;
+ char *next;
+ char digit;
+ int num = 0;
+ u8 qualifier;
+ u8 base;
+ s16 field_width;
+ bool is_sign;
+
+ while (*fmt && *str) {
+ /* skip any white space in format */
+ /* white space in format matchs any amount of
+ * white space, including none, in the input.
+ */
+ if (isspace(*fmt)) {
+ fmt = skip_spaces(++fmt);
+ str = skip_spaces(str);
+ }
+
+ /* anything that is not a conversion must match exactly */
+ if (*fmt != '%' && *fmt) {
+ if (*fmt++ != *str++)
+ break;
+ continue;
+ }
+
+ if (!*fmt)
+ break;
+ ++fmt;
+
+ /* skip this conversion.
+ * advance both strings to next white space
+ */
+ if (*fmt == '*') {
+ while (!isspace(*fmt) && *fmt != '%' && *fmt)
+ fmt++;
+ while (!isspace(*str) && *str)
+ str++;
+ continue;
+ }
+
+ /* get field width */
+ field_width = -1;
+ if (isdigit(*fmt))
+ field_width = skip_atoi(&fmt);
+
+ /* get conversion qualifier */
+ qualifier = -1;
+ if (*fmt == 'h' || TOLOWER(*fmt) == 'l' ||
+ TOLOWER(*fmt) == 'z') {
+ qualifier = *fmt++;
+ if (unlikely(qualifier == *fmt)) {
+ if (qualifier == 'h') {
+ qualifier = 'H';
+ fmt++;
+ } else if (qualifier == 'l') {
+ qualifier = 'L';
+ fmt++;
+ }
+ }
+ }
+
+ if (!*fmt || !*str)
+ break;
+
+ base = 10;
+ is_sign = 0;
+
+ switch (*fmt++) {
+ case 'c':
+ {
+ char *s = (char *)va_arg(args, char*);
+ if (field_width == -1)
+ field_width = 1;
+ do {
+ *s++ = *str++;
+ } while (--field_width > 0 && *str);
+ num++;
+ }
+ continue;
+ case 's':
+ {
+ char *s = (char *)va_arg(args, char *);
+ if (field_width == -1)
+ field_width = SHRT_MAX;
+ /* first, skip leading white space in buffer */
+ str = skip_spaces(str);
+
+ /* now copy until next white space */
+ while (*str && !isspace(*str) && field_width--)
+ *s++ = *str++;
+ *s = '\0';
+ num++;
+ }
+ continue;
+ case 'n':
+ /* return number of characters read so far */
+ {
+ int *i = (int *)va_arg(args, int*);
+ *i = str - buf;
+ }
+ continue;
+ case 'o':
+ base = 8;
+ break;
+ case 'x':
+ case 'X':
+ base = 16;
+ break;
+ case 'i':
+ base = 0;
+ case 'd':
+ is_sign = 1;
+ case 'u':
+ break;
+ case '%':
+ /* looking for '%' in str */
+ if (*str++ != '%')
+ return num;
+ continue;
+ default:
+ /* invalid format; stop here */
+ return num;
+ }
+
+ /* have some sort of integer conversion.
+ * first, skip white space in buffer.
+ */
+ str = skip_spaces(str);
+
+ digit = *str;
+ if (is_sign && digit == '-')
+ digit = *(str + 1);
+
+ if (!digit
+ || (base == 16 && !isxdigit(digit))
+ || (base == 10 && !isdigit(digit))
+ || (base == 8 && (!isdigit(digit) || digit > '7'))
+ || (base == 0 && !isdigit(digit)))
+ break;
+
+ switch (qualifier) {
+ case 'H': /* that's 'hh' in format */
+ if (is_sign) {
+ signed char *s = (signed char *)va_arg(args, signed char *);
+ *s = (signed char)simple_strtol(str, &next, base);
+ } else {
+ unsigned char *s = (unsigned char *)va_arg(args, unsigned char *);
+ *s = (unsigned char)simple_strtoul(str, &next, base);
+ }
+ break;
+ case 'h':
+ if (is_sign) {
+ short *s = (short *)va_arg(args, short *);
+ *s = (short)simple_strtol(str, &next, base);
+ } else {
+ unsigned short *s = (unsigned short *)va_arg(args, unsigned short *);
+ *s = (unsigned short)simple_strtoul(str, &next, base);
+ }
+ break;
+ case 'l':
+ if (is_sign) {
+ long *l = (long *)va_arg(args, long *);
+ *l = simple_strtol(str, &next, base);
+ } else {
+ unsigned long *l = (unsigned long *)va_arg(args, unsigned long *);
+ *l = simple_strtoul(str, &next, base);
+ }
+ break;
+ case 'L':
+ if (is_sign) {
+ long long *l = (long long *)va_arg(args, long long *);
+ *l = simple_strtoll(str, &next, base);
+ } else {
+ unsigned long long *l = (unsigned long long *)va_arg(args, unsigned long long *);
+ *l = simple_strtoull(str, &next, base);
+ }
+ break;
+ case 'Z':
+ case 'z':
+ {
+ size_t *s = (size_t *)va_arg(args, size_t *);
+ *s = (size_t)simple_strtoul(str, &next, base);
+ }
+ break;
+ default:
+ if (is_sign) {
+ int *i = (int *)va_arg(args, int *);
+ *i = (int)simple_strtol(str, &next, base);
+ } else {
+ unsigned int *i = (unsigned int *)va_arg(args, unsigned int*);
+ *i = (unsigned int)simple_strtoul(str, &next, base);
+ }
+ break;
+ }
+ num++;
+
+ if (!next)
+ break;
+ str = next;
+ }
+
+ /*
+ * Now we've come all the way through so either the input string or the
+ * format ended. In the former case, there can be a %n at the current
+ * position in the format that needs to be filled.
+ */
+ if (*fmt == '%' && *(fmt + 1) == 'n') {
+ int *p = (int *)va_arg(args, int *);
+ *p = str - buf;
+ }
+
+ return num;
+}
+EXPORT_SYMBOL(vsscanf);
+
+/**
+ * sscanf - Unformat a buffer into a list of arguments
+ * @buf: input buffer
+ * @fmt: formatting of buffer
+ * @...: resulting arguments
+ */
+int sscanf(const char *buf, const char *fmt, ...)
+{
+ va_list args;
+ int i;
+
+ va_start(args, fmt);
+ i = vsscanf(buf, fmt, args);
+ va_end(args);
+
+ return i;
+}
+EXPORT_SYMBOL(sscanf);
diff --git a/lib/zlib_deflate/Makefile b/lib/zlib_deflate/Makefile
new file mode 100644
index 00000000..86275e3f
--- /dev/null
+++ b/lib/zlib_deflate/Makefile
@@ -0,0 +1,11 @@
+#
+# This is a modified version of zlib, which does all memory
+# allocation ahead of time.
+#
+# This is the compression code, see zlib_inflate for the
+# decompression code.
+#
+
+obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate.o
+
+zlib_deflate-objs := deflate.o deftree.o deflate_syms.o
diff --git a/lib/zlib_deflate/deflate.c b/lib/zlib_deflate/deflate.c
new file mode 100644
index 00000000..46a31e5f
--- /dev/null
+++ b/lib/zlib_deflate/deflate.c
@@ -0,0 +1,1253 @@
+/* +++ deflate.c */
+/* deflate.c -- compress data using the deflation algorithm
+ * Copyright (C) 1995-1996 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/*
+ * ALGORITHM
+ *
+ * The "deflation" process depends on being able to identify portions
+ * of the input text which are identical to earlier input (within a
+ * sliding window trailing behind the input currently being processed).
+ *
+ * The most straightforward technique turns out to be the fastest for
+ * most input files: try all possible matches and select the longest.
+ * The key feature of this algorithm is that insertions into the string
+ * dictionary are very simple and thus fast, and deletions are avoided
+ * completely. Insertions are performed at each input character, whereas
+ * string matches are performed only when the previous match ends. So it
+ * is preferable to spend more time in matches to allow very fast string
+ * insertions and avoid deletions. The matching algorithm for small
+ * strings is inspired from that of Rabin & Karp. A brute force approach
+ * is used to find longer strings when a small match has been found.
+ * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze
+ * (by Leonid Broukhis).
+ * A previous version of this file used a more sophisticated algorithm
+ * (by Fiala and Greene) which is guaranteed to run in linear amortized
+ * time, but has a larger average cost, uses more memory and is patented.
+ * However the F&G algorithm may be faster for some highly redundant
+ * files if the parameter max_chain_length (described below) is too large.
+ *
+ * ACKNOWLEDGEMENTS
+ *
+ * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and
+ * I found it in 'freeze' written by Leonid Broukhis.
+ * Thanks to many people for bug reports and testing.
+ *
+ * REFERENCES
+ *
+ * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification".
+ * Available in ftp://ds.internic.net/rfc/rfc1951.txt
+ *
+ * A description of the Rabin and Karp algorithm is given in the book
+ * "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
+ *
+ * Fiala,E.R., and Greene,D.H.
+ * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/zutil.h>
+#include "defutil.h"
+
+
+/* ===========================================================================
+ * Function prototypes.
+ */
+typedef enum {
+ need_more, /* block not completed, need more input or more output */
+ block_done, /* block flush performed */
+ finish_started, /* finish started, need only more output at next deflate */
+ finish_done /* finish done, accept no more input or output */
+} block_state;
+
+typedef block_state (*compress_func) (deflate_state *s, int flush);
+/* Compression function. Returns the block state after the call. */
+
+static void fill_window (deflate_state *s);
+static block_state deflate_stored (deflate_state *s, int flush);
+static block_state deflate_fast (deflate_state *s, int flush);
+static block_state deflate_slow (deflate_state *s, int flush);
+static void lm_init (deflate_state *s);
+static void putShortMSB (deflate_state *s, uInt b);
+static void flush_pending (z_streamp strm);
+static int read_buf (z_streamp strm, Byte *buf, unsigned size);
+static uInt longest_match (deflate_state *s, IPos cur_match);
+
+#ifdef DEBUG_ZLIB
+static void check_match (deflate_state *s, IPos start, IPos match,
+ int length);
+#endif
+
+/* ===========================================================================
+ * Local data
+ */
+
+#define NIL 0
+/* Tail of hash chains */
+
+#ifndef TOO_FAR
+# define TOO_FAR 4096
+#endif
+/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
+
+#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
+/* Minimum amount of lookahead, except at the end of the input file.
+ * See deflate.c for comments about the MIN_MATCH+1.
+ */
+
+/* Values for max_lazy_match, good_match and max_chain_length, depending on
+ * the desired pack level (0..9). The values given below have been tuned to
+ * exclude worst case performance for pathological files. Better values may be
+ * found for specific files.
+ */
+typedef struct config_s {
+ ush good_length; /* reduce lazy search above this match length */
+ ush max_lazy; /* do not perform lazy search above this match length */
+ ush nice_length; /* quit search above this match length */
+ ush max_chain;
+ compress_func func;
+} config;
+
+static const config configuration_table[10] = {
+/* good lazy nice chain */
+/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */
+/* 1 */ {4, 4, 8, 4, deflate_fast}, /* maximum speed, no lazy matches */
+/* 2 */ {4, 5, 16, 8, deflate_fast},
+/* 3 */ {4, 6, 32, 32, deflate_fast},
+
+/* 4 */ {4, 4, 16, 16, deflate_slow}, /* lazy matches */
+/* 5 */ {8, 16, 32, 32, deflate_slow},
+/* 6 */ {8, 16, 128, 128, deflate_slow},
+/* 7 */ {8, 32, 128, 256, deflate_slow},
+/* 8 */ {32, 128, 258, 1024, deflate_slow},
+/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* maximum compression */
+
+/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4
+ * For deflate_fast() (levels <= 3) good is ignored and lazy has a different
+ * meaning.
+ */
+
+#define EQUAL 0
+/* result of memcmp for equal strings */
+
+/* ===========================================================================
+ * Update a hash value with the given input byte
+ * IN assertion: all calls to UPDATE_HASH are made with consecutive
+ * input characters, so that a running hash key can be computed from the
+ * previous key instead of complete recalculation each time.
+ */
+#define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask)
+
+
+/* ===========================================================================
+ * Insert string str in the dictionary and set match_head to the previous head
+ * of the hash chain (the most recent string with same hash key). Return
+ * the previous length of the hash chain.
+ * IN assertion: all calls to INSERT_STRING are made with consecutive
+ * input characters and the first MIN_MATCH bytes of str are valid
+ * (except for the last MIN_MATCH-1 bytes of the input file).
+ */
+#define INSERT_STRING(s, str, match_head) \
+ (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \
+ s->prev[(str) & s->w_mask] = match_head = s->head[s->ins_h], \
+ s->head[s->ins_h] = (Pos)(str))
+
+/* ===========================================================================
+ * Initialize the hash table (avoiding 64K overflow for 16 bit systems).
+ * prev[] will be initialized on the fly.
+ */
+#define CLEAR_HASH(s) \
+ s->head[s->hash_size-1] = NIL; \
+ memset((char *)s->head, 0, (unsigned)(s->hash_size-1)*sizeof(*s->head));
+
+/* ========================================================================= */
+int zlib_deflateInit2(
+ z_streamp strm,
+ int level,
+ int method,
+ int windowBits,
+ int memLevel,
+ int strategy
+)
+{
+ deflate_state *s;
+ int noheader = 0;
+ deflate_workspace *mem;
+
+ ush *overlay;
+ /* We overlay pending_buf and d_buf+l_buf. This works since the average
+ * output size for (length,distance) codes is <= 24 bits.
+ */
+
+ if (strm == NULL) return Z_STREAM_ERROR;
+
+ strm->msg = NULL;
+
+ if (level == Z_DEFAULT_COMPRESSION) level = 6;
+
+ mem = (deflate_workspace *) strm->workspace;
+
+ if (windowBits < 0) { /* undocumented feature: suppress zlib header */
+ noheader = 1;
+ windowBits = -windowBits;
+ }
+ if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED ||
+ windowBits < 9 || windowBits > 15 || level < 0 || level > 9 ||
+ strategy < 0 || strategy > Z_HUFFMAN_ONLY) {
+ return Z_STREAM_ERROR;
+ }
+ s = (deflate_state *) &(mem->deflate_memory);
+ strm->state = (struct internal_state *)s;
+ s->strm = strm;
+
+ s->noheader = noheader;
+ s->w_bits = windowBits;
+ s->w_size = 1 << s->w_bits;
+ s->w_mask = s->w_size - 1;
+
+ s->hash_bits = memLevel + 7;
+ s->hash_size = 1 << s->hash_bits;
+ s->hash_mask = s->hash_size - 1;
+ s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH);
+
+ s->window = (Byte *) mem->window_memory;
+ s->prev = (Pos *) mem->prev_memory;
+ s->head = (Pos *) mem->head_memory;
+
+ s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */
+
+ overlay = (ush *) mem->overlay_memory;
+ s->pending_buf = (uch *) overlay;
+ s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L);
+
+ s->d_buf = overlay + s->lit_bufsize/sizeof(ush);
+ s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize;
+
+ s->level = level;
+ s->strategy = strategy;
+ s->method = (Byte)method;
+
+ return zlib_deflateReset(strm);
+}
+
+/* ========================================================================= */
+#if 0
+int zlib_deflateSetDictionary(
+ z_streamp strm,
+ const Byte *dictionary,
+ uInt dictLength
+)
+{
+ deflate_state *s;
+ uInt length = dictLength;
+ uInt n;
+ IPos hash_head = 0;
+
+ if (strm == NULL || strm->state == NULL || dictionary == NULL)
+ return Z_STREAM_ERROR;
+
+ s = (deflate_state *) strm->state;
+ if (s->status != INIT_STATE) return Z_STREAM_ERROR;
+
+ strm->adler = zlib_adler32(strm->adler, dictionary, dictLength);
+
+ if (length < MIN_MATCH) return Z_OK;
+ if (length > MAX_DIST(s)) {
+ length = MAX_DIST(s);
+#ifndef USE_DICT_HEAD
+ dictionary += dictLength - length; /* use the tail of the dictionary */
+#endif
+ }
+ memcpy((char *)s->window, dictionary, length);
+ s->strstart = length;
+ s->block_start = (long)length;
+
+ /* Insert all strings in the hash table (except for the last two bytes).
+ * s->lookahead stays null, so s->ins_h will be recomputed at the next
+ * call of fill_window.
+ */
+ s->ins_h = s->window[0];
+ UPDATE_HASH(s, s->ins_h, s->window[1]);
+ for (n = 0; n <= length - MIN_MATCH; n++) {
+ INSERT_STRING(s, n, hash_head);
+ }
+ if (hash_head) hash_head = 0; /* to make compiler happy */
+ return Z_OK;
+}
+#endif /* 0 */
+
+/* ========================================================================= */
+int zlib_deflateReset(
+ z_streamp strm
+)
+{
+ deflate_state *s;
+
+ if (strm == NULL || strm->state == NULL)
+ return Z_STREAM_ERROR;
+
+ strm->total_in = strm->total_out = 0;
+ strm->msg = NULL;
+ strm->data_type = Z_UNKNOWN;
+
+ s = (deflate_state *)strm->state;
+ s->pending = 0;
+ s->pending_out = s->pending_buf;
+
+ if (s->noheader < 0) {
+ s->noheader = 0; /* was set to -1 by deflate(..., Z_FINISH); */
+ }
+ s->status = s->noheader ? BUSY_STATE : INIT_STATE;
+ strm->adler = 1;
+ s->last_flush = Z_NO_FLUSH;
+
+ zlib_tr_init(s);
+ lm_init(s);
+
+ return Z_OK;
+}
+
+/* ========================================================================= */
+#if 0
+int zlib_deflateParams(
+ z_streamp strm,
+ int level,
+ int strategy
+)
+{
+ deflate_state *s;
+ compress_func func;
+ int err = Z_OK;
+
+ if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
+ s = (deflate_state *) strm->state;
+
+ if (level == Z_DEFAULT_COMPRESSION) {
+ level = 6;
+ }
+ if (level < 0 || level > 9 || strategy < 0 || strategy > Z_HUFFMAN_ONLY) {
+ return Z_STREAM_ERROR;
+ }
+ func = configuration_table[s->level].func;
+
+ if (func != configuration_table[level].func && strm->total_in != 0) {
+ /* Flush the last buffer: */
+ err = zlib_deflate(strm, Z_PARTIAL_FLUSH);
+ }
+ if (s->level != level) {
+ s->level = level;
+ s->max_lazy_match = configuration_table[level].max_lazy;
+ s->good_match = configuration_table[level].good_length;
+ s->nice_match = configuration_table[level].nice_length;
+ s->max_chain_length = configuration_table[level].max_chain;
+ }
+ s->strategy = strategy;
+ return err;
+}
+#endif /* 0 */
+
+/* =========================================================================
+ * Put a short in the pending buffer. The 16-bit value is put in MSB order.
+ * IN assertion: the stream state is correct and there is enough room in
+ * pending_buf.
+ */
+static void putShortMSB(
+ deflate_state *s,
+ uInt b
+)
+{
+ put_byte(s, (Byte)(b >> 8));
+ put_byte(s, (Byte)(b & 0xff));
+}
+
+/* =========================================================================
+ * Flush as much pending output as possible. All deflate() output goes
+ * through this function so some applications may wish to modify it
+ * to avoid allocating a large strm->next_out buffer and copying into it.
+ * (See also read_buf()).
+ */
+static void flush_pending(
+ z_streamp strm
+)
+{
+ deflate_state *s = (deflate_state *) strm->state;
+ unsigned len = s->pending;
+
+ if (len > strm->avail_out) len = strm->avail_out;
+ if (len == 0) return;
+
+ if (strm->next_out != NULL) {
+ memcpy(strm->next_out, s->pending_out, len);
+ strm->next_out += len;
+ }
+ s->pending_out += len;
+ strm->total_out += len;
+ strm->avail_out -= len;
+ s->pending -= len;
+ if (s->pending == 0) {
+ s->pending_out = s->pending_buf;
+ }
+}
+
+/* ========================================================================= */
+int zlib_deflate(
+ z_streamp strm,
+ int flush
+)
+{
+ int old_flush; /* value of flush param for previous deflate call */
+ deflate_state *s;
+
+ if (strm == NULL || strm->state == NULL ||
+ flush > Z_FINISH || flush < 0) {
+ return Z_STREAM_ERROR;
+ }
+ s = (deflate_state *) strm->state;
+
+ if ((strm->next_in == NULL && strm->avail_in != 0) ||
+ (s->status == FINISH_STATE && flush != Z_FINISH)) {
+ return Z_STREAM_ERROR;
+ }
+ if (strm->avail_out == 0) return Z_BUF_ERROR;
+
+ s->strm = strm; /* just in case */
+ old_flush = s->last_flush;
+ s->last_flush = flush;
+
+ /* Write the zlib header */
+ if (s->status == INIT_STATE) {
+
+ uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8;
+ uInt level_flags = (s->level-1) >> 1;
+
+ if (level_flags > 3) level_flags = 3;
+ header |= (level_flags << 6);
+ if (s->strstart != 0) header |= PRESET_DICT;
+ header += 31 - (header % 31);
+
+ s->status = BUSY_STATE;
+ putShortMSB(s, header);
+
+ /* Save the adler32 of the preset dictionary: */
+ if (s->strstart != 0) {
+ putShortMSB(s, (uInt)(strm->adler >> 16));
+ putShortMSB(s, (uInt)(strm->adler & 0xffff));
+ }
+ strm->adler = 1L;
+ }
+
+ /* Flush as much pending output as possible */
+ if (s->pending != 0) {
+ flush_pending(strm);
+ if (strm->avail_out == 0) {
+ /* Since avail_out is 0, deflate will be called again with
+ * more output space, but possibly with both pending and
+ * avail_in equal to zero. There won't be anything to do,
+ * but this is not an error situation so make sure we
+ * return OK instead of BUF_ERROR at next call of deflate:
+ */
+ s->last_flush = -1;
+ return Z_OK;
+ }
+
+ /* Make sure there is something to do and avoid duplicate consecutive
+ * flushes. For repeated and useless calls with Z_FINISH, we keep
+ * returning Z_STREAM_END instead of Z_BUFF_ERROR.
+ */
+ } else if (strm->avail_in == 0 && flush <= old_flush &&
+ flush != Z_FINISH) {
+ return Z_BUF_ERROR;
+ }
+
+ /* User must not provide more input after the first FINISH: */
+ if (s->status == FINISH_STATE && strm->avail_in != 0) {
+ return Z_BUF_ERROR;
+ }
+
+ /* Start a new block or continue the current one.
+ */
+ if (strm->avail_in != 0 || s->lookahead != 0 ||
+ (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) {
+ block_state bstate;
+
+ bstate = (*(configuration_table[s->level].func))(s, flush);
+
+ if (bstate == finish_started || bstate == finish_done) {
+ s->status = FINISH_STATE;
+ }
+ if (bstate == need_more || bstate == finish_started) {
+ if (strm->avail_out == 0) {
+ s->last_flush = -1; /* avoid BUF_ERROR next call, see above */
+ }
+ return Z_OK;
+ /* If flush != Z_NO_FLUSH && avail_out == 0, the next call
+ * of deflate should use the same flush parameter to make sure
+ * that the flush is complete. So we don't have to output an
+ * empty block here, this will be done at next call. This also
+ * ensures that for a very small output buffer, we emit at most
+ * one empty block.
+ */
+ }
+ if (bstate == block_done) {
+ if (flush == Z_PARTIAL_FLUSH) {
+ zlib_tr_align(s);
+ } else if (flush == Z_PACKET_FLUSH) {
+ /* Output just the 3-bit `stored' block type value,
+ but not a zero length. */
+ zlib_tr_stored_type_only(s);
+ } else { /* FULL_FLUSH or SYNC_FLUSH */
+ zlib_tr_stored_block(s, (char*)0, 0L, 0);
+ /* For a full flush, this empty block will be recognized
+ * as a special marker by inflate_sync().
+ */
+ if (flush == Z_FULL_FLUSH) {
+ CLEAR_HASH(s); /* forget history */
+ }
+ }
+ flush_pending(strm);
+ if (strm->avail_out == 0) {
+ s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */
+ return Z_OK;
+ }
+ }
+ }
+ Assert(strm->avail_out > 0, "bug2");
+
+ if (flush != Z_FINISH) return Z_OK;
+ if (s->noheader) return Z_STREAM_END;
+
+ /* Write the zlib trailer (adler32) */
+ putShortMSB(s, (uInt)(strm->adler >> 16));
+ putShortMSB(s, (uInt)(strm->adler & 0xffff));
+ flush_pending(strm);
+ /* If avail_out is zero, the application will call deflate again
+ * to flush the rest.
+ */
+ s->noheader = -1; /* write the trailer only once! */
+ return s->pending != 0 ? Z_OK : Z_STREAM_END;
+}
+
+/* ========================================================================= */
+int zlib_deflateEnd(
+ z_streamp strm
+)
+{
+ int status;
+ deflate_state *s;
+
+ if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
+ s = (deflate_state *) strm->state;
+
+ status = s->status;
+ if (status != INIT_STATE && status != BUSY_STATE &&
+ status != FINISH_STATE) {
+ return Z_STREAM_ERROR;
+ }
+
+ strm->state = NULL;
+
+ return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK;
+}
+
+/* =========================================================================
+ * Copy the source state to the destination state.
+ */
+#if 0
+int zlib_deflateCopy (
+ z_streamp dest,
+ z_streamp source
+)
+{
+#ifdef MAXSEG_64K
+ return Z_STREAM_ERROR;
+#else
+ deflate_state *ds;
+ deflate_state *ss;
+ ush *overlay;
+ deflate_workspace *mem;
+
+
+ if (source == NULL || dest == NULL || source->state == NULL) {
+ return Z_STREAM_ERROR;
+ }
+
+ ss = (deflate_state *) source->state;
+
+ *dest = *source;
+
+ mem = (deflate_workspace *) dest->workspace;
+
+ ds = &(mem->deflate_memory);
+
+ dest->state = (struct internal_state *) ds;
+ *ds = *ss;
+ ds->strm = dest;
+
+ ds->window = (Byte *) mem->window_memory;
+ ds->prev = (Pos *) mem->prev_memory;
+ ds->head = (Pos *) mem->head_memory;
+ overlay = (ush *) mem->overlay_memory;
+ ds->pending_buf = (uch *) overlay;
+
+ memcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte));
+ memcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos));
+ memcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos));
+ memcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);
+
+ ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
+ ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush);
+ ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize;
+
+ ds->l_desc.dyn_tree = ds->dyn_ltree;
+ ds->d_desc.dyn_tree = ds->dyn_dtree;
+ ds->bl_desc.dyn_tree = ds->bl_tree;
+
+ return Z_OK;
+#endif
+}
+#endif /* 0 */
+
+/* ===========================================================================
+ * Read a new buffer from the current input stream, update the adler32
+ * and total number of bytes read. All deflate() input goes through
+ * this function so some applications may wish to modify it to avoid
+ * allocating a large strm->next_in buffer and copying from it.
+ * (See also flush_pending()).
+ */
+static int read_buf(
+ z_streamp strm,
+ Byte *buf,
+ unsigned size
+)
+{
+ unsigned len = strm->avail_in;
+
+ if (len > size) len = size;
+ if (len == 0) return 0;
+
+ strm->avail_in -= len;
+
+ if (!((deflate_state *)(strm->state))->noheader) {
+ strm->adler = zlib_adler32(strm->adler, strm->next_in, len);
+ }
+ memcpy(buf, strm->next_in, len);
+ strm->next_in += len;
+ strm->total_in += len;
+
+ return (int)len;
+}
+
+/* ===========================================================================
+ * Initialize the "longest match" routines for a new zlib stream
+ */
+static void lm_init(
+ deflate_state *s
+)
+{
+ s->window_size = (ulg)2L*s->w_size;
+
+ CLEAR_HASH(s);
+
+ /* Set the default configuration parameters:
+ */
+ s->max_lazy_match = configuration_table[s->level].max_lazy;
+ s->good_match = configuration_table[s->level].good_length;
+ s->nice_match = configuration_table[s->level].nice_length;
+ s->max_chain_length = configuration_table[s->level].max_chain;
+
+ s->strstart = 0;
+ s->block_start = 0L;
+ s->lookahead = 0;
+ s->match_length = s->prev_length = MIN_MATCH-1;
+ s->match_available = 0;
+ s->ins_h = 0;
+}
+
+/* ===========================================================================
+ * Set match_start to the longest match starting at the given string and
+ * return its length. Matches shorter or equal to prev_length are discarded,
+ * in which case the result is equal to prev_length and match_start is
+ * garbage.
+ * IN assertions: cur_match is the head of the hash chain for the current
+ * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
+ * OUT assertion: the match length is not greater than s->lookahead.
+ */
+/* For 80x86 and 680x0, an optimized version will be provided in match.asm or
+ * match.S. The code will be functionally equivalent.
+ */
+static uInt longest_match(
+ deflate_state *s,
+ IPos cur_match /* current match */
+)
+{
+ unsigned chain_length = s->max_chain_length;/* max hash chain length */
+ register Byte *scan = s->window + s->strstart; /* current string */
+ register Byte *match; /* matched string */
+ register int len; /* length of current match */
+ int best_len = s->prev_length; /* best match length so far */
+ int nice_match = s->nice_match; /* stop if match long enough */
+ IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
+ s->strstart - (IPos)MAX_DIST(s) : NIL;
+ /* Stop when cur_match becomes <= limit. To simplify the code,
+ * we prevent matches with the string of window index 0.
+ */
+ Pos *prev = s->prev;
+ uInt wmask = s->w_mask;
+
+#ifdef UNALIGNED_OK
+ /* Compare two bytes at a time. Note: this is not always beneficial.
+ * Try with and without -DUNALIGNED_OK to check.
+ */
+ register Byte *strend = s->window + s->strstart + MAX_MATCH - 1;
+ register ush scan_start = *(ush*)scan;
+ register ush scan_end = *(ush*)(scan+best_len-1);
+#else
+ register Byte *strend = s->window + s->strstart + MAX_MATCH;
+ register Byte scan_end1 = scan[best_len-1];
+ register Byte scan_end = scan[best_len];
+#endif
+
+ /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
+ * It is easy to get rid of this optimization if necessary.
+ */
+ Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
+
+ /* Do not waste too much time if we already have a good match: */
+ if (s->prev_length >= s->good_match) {
+ chain_length >>= 2;
+ }
+ /* Do not look for matches beyond the end of the input. This is necessary
+ * to make deflate deterministic.
+ */
+ if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
+
+ Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
+
+ do {
+ Assert(cur_match < s->strstart, "no future");
+ match = s->window + cur_match;
+
+ /* Skip to next match if the match length cannot increase
+ * or if the match length is less than 2:
+ */
+#if (defined(UNALIGNED_OK) && MAX_MATCH == 258)
+ /* This code assumes sizeof(unsigned short) == 2. Do not use
+ * UNALIGNED_OK if your compiler uses a different size.
+ */
+ if (*(ush*)(match+best_len-1) != scan_end ||
+ *(ush*)match != scan_start) continue;
+
+ /* It is not necessary to compare scan[2] and match[2] since they are
+ * always equal when the other bytes match, given that the hash keys
+ * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at
+ * strstart+3, +5, ... up to strstart+257. We check for insufficient
+ * lookahead only every 4th comparison; the 128th check will be made
+ * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is
+ * necessary to put more guard bytes at the end of the window, or
+ * to check more often for insufficient lookahead.
+ */
+ Assert(scan[2] == match[2], "scan[2]?");
+ scan++, match++;
+ do {
+ } while (*(ush*)(scan+=2) == *(ush*)(match+=2) &&
+ *(ush*)(scan+=2) == *(ush*)(match+=2) &&
+ *(ush*)(scan+=2) == *(ush*)(match+=2) &&
+ *(ush*)(scan+=2) == *(ush*)(match+=2) &&
+ scan < strend);
+ /* The funny "do {}" generates better code on most compilers */
+
+ /* Here, scan <= window+strstart+257 */
+ Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
+ if (*scan == *match) scan++;
+
+ len = (MAX_MATCH - 1) - (int)(strend-scan);
+ scan = strend - (MAX_MATCH-1);
+
+#else /* UNALIGNED_OK */
+
+ if (match[best_len] != scan_end ||
+ match[best_len-1] != scan_end1 ||
+ *match != *scan ||
+ *++match != scan[1]) continue;
+
+ /* The check at best_len-1 can be removed because it will be made
+ * again later. (This heuristic is not always a win.)
+ * It is not necessary to compare scan[2] and match[2] since they
+ * are always equal when the other bytes match, given that
+ * the hash keys are equal and that HASH_BITS >= 8.
+ */
+ scan += 2, match++;
+ Assert(*scan == *match, "match[2]?");
+
+ /* We check for insufficient lookahead only every 8th comparison;
+ * the 256th check will be made at strstart+258.
+ */
+ do {
+ } while (*++scan == *++match && *++scan == *++match &&
+ *++scan == *++match && *++scan == *++match &&
+ *++scan == *++match && *++scan == *++match &&
+ *++scan == *++match && *++scan == *++match &&
+ scan < strend);
+
+ Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
+
+ len = MAX_MATCH - (int)(strend - scan);
+ scan = strend - MAX_MATCH;
+
+#endif /* UNALIGNED_OK */
+
+ if (len > best_len) {
+ s->match_start = cur_match;
+ best_len = len;
+ if (len >= nice_match) break;
+#ifdef UNALIGNED_OK
+ scan_end = *(ush*)(scan+best_len-1);
+#else
+ scan_end1 = scan[best_len-1];
+ scan_end = scan[best_len];
+#endif
+ }
+ } while ((cur_match = prev[cur_match & wmask]) > limit
+ && --chain_length != 0);
+
+ if ((uInt)best_len <= s->lookahead) return best_len;
+ return s->lookahead;
+}
+
+#ifdef DEBUG_ZLIB
+/* ===========================================================================
+ * Check that the match at match_start is indeed a match.
+ */
+static void check_match(
+ deflate_state *s,
+ IPos start,
+ IPos match,
+ int length
+)
+{
+ /* check that the match is indeed a match */
+ if (memcmp((char *)s->window + match,
+ (char *)s->window + start, length) != EQUAL) {
+ fprintf(stderr, " start %u, match %u, length %d\n",
+ start, match, length);
+ do {
+ fprintf(stderr, "%c%c", s->window[match++], s->window[start++]);
+ } while (--length != 0);
+ z_error("invalid match");
+ }
+ if (z_verbose > 1) {
+ fprintf(stderr,"\\[%d,%d]", start-match, length);
+ do { putc(s->window[start++], stderr); } while (--length != 0);
+ }
+}
+#else
+# define check_match(s, start, match, length)
+#endif
+
+/* ===========================================================================
+ * Fill the window when the lookahead becomes insufficient.
+ * Updates strstart and lookahead.
+ *
+ * IN assertion: lookahead < MIN_LOOKAHEAD
+ * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD
+ * At least one byte has been read, or avail_in == 0; reads are
+ * performed for at least two bytes (required for the zip translate_eol
+ * option -- not supported here).
+ */
+static void fill_window(
+ deflate_state *s
+)
+{
+ register unsigned n, m;
+ register Pos *p;
+ unsigned more; /* Amount of free space at the end of the window. */
+ uInt wsize = s->w_size;
+
+ do {
+ more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart);
+
+ /* Deal with !@#$% 64K limit: */
+ if (more == 0 && s->strstart == 0 && s->lookahead == 0) {
+ more = wsize;
+
+ } else if (more == (unsigned)(-1)) {
+ /* Very unlikely, but possible on 16 bit machine if strstart == 0
+ * and lookahead == 1 (input done one byte at time)
+ */
+ more--;
+
+ /* If the window is almost full and there is insufficient lookahead,
+ * move the upper half to the lower one to make room in the upper half.
+ */
+ } else if (s->strstart >= wsize+MAX_DIST(s)) {
+
+ memcpy((char *)s->window, (char *)s->window+wsize,
+ (unsigned)wsize);
+ s->match_start -= wsize;
+ s->strstart -= wsize; /* we now have strstart >= MAX_DIST */
+ s->block_start -= (long) wsize;
+
+ /* Slide the hash table (could be avoided with 32 bit values
+ at the expense of memory usage). We slide even when level == 0
+ to keep the hash table consistent if we switch back to level > 0
+ later. (Using level 0 permanently is not an optimal usage of
+ zlib, so we don't care about this pathological case.)
+ */
+ n = s->hash_size;
+ p = &s->head[n];
+ do {
+ m = *--p;
+ *p = (Pos)(m >= wsize ? m-wsize : NIL);
+ } while (--n);
+
+ n = wsize;
+ p = &s->prev[n];
+ do {
+ m = *--p;
+ *p = (Pos)(m >= wsize ? m-wsize : NIL);
+ /* If n is not on any hash chain, prev[n] is garbage but
+ * its value will never be used.
+ */
+ } while (--n);
+ more += wsize;
+ }
+ if (s->strm->avail_in == 0) return;
+
+ /* If there was no sliding:
+ * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
+ * more == window_size - lookahead - strstart
+ * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
+ * => more >= window_size - 2*WSIZE + 2
+ * In the BIG_MEM or MMAP case (not yet supported),
+ * window_size == input_size + MIN_LOOKAHEAD &&
+ * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
+ * Otherwise, window_size == 2*WSIZE so more >= 2.
+ * If there was sliding, more >= WSIZE. So in all cases, more >= 2.
+ */
+ Assert(more >= 2, "more < 2");
+
+ n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more);
+ s->lookahead += n;
+
+ /* Initialize the hash value now that we have some input: */
+ if (s->lookahead >= MIN_MATCH) {
+ s->ins_h = s->window[s->strstart];
+ UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]);
+#if MIN_MATCH != 3
+ Call UPDATE_HASH() MIN_MATCH-3 more times
+#endif
+ }
+ /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
+ * but this is not important since only literal bytes will be emitted.
+ */
+
+ } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
+}
+
+/* ===========================================================================
+ * Flush the current block, with given end-of-file flag.
+ * IN assertion: strstart is set to the end of the current match.
+ */
+#define FLUSH_BLOCK_ONLY(s, eof) { \
+ zlib_tr_flush_block(s, (s->block_start >= 0L ? \
+ (char *)&s->window[(unsigned)s->block_start] : \
+ NULL), \
+ (ulg)((long)s->strstart - s->block_start), \
+ (eof)); \
+ s->block_start = s->strstart; \
+ flush_pending(s->strm); \
+ Tracev((stderr,"[FLUSH]")); \
+}
+
+/* Same but force premature exit if necessary. */
+#define FLUSH_BLOCK(s, eof) { \
+ FLUSH_BLOCK_ONLY(s, eof); \
+ if (s->strm->avail_out == 0) return (eof) ? finish_started : need_more; \
+}
+
+/* ===========================================================================
+ * Copy without compression as much as possible from the input stream, return
+ * the current block state.
+ * This function does not insert new strings in the dictionary since
+ * uncompressible data is probably not useful. This function is used
+ * only for the level=0 compression option.
+ * NOTE: this function should be optimized to avoid extra copying from
+ * window to pending_buf.
+ */
+static block_state deflate_stored(
+ deflate_state *s,
+ int flush
+)
+{
+ /* Stored blocks are limited to 0xffff bytes, pending_buf is limited
+ * to pending_buf_size, and each stored block has a 5 byte header:
+ */
+ ulg max_block_size = 0xffff;
+ ulg max_start;
+
+ if (max_block_size > s->pending_buf_size - 5) {
+ max_block_size = s->pending_buf_size - 5;
+ }
+
+ /* Copy as much as possible from input to output: */
+ for (;;) {
+ /* Fill the window as much as possible: */
+ if (s->lookahead <= 1) {
+
+ Assert(s->strstart < s->w_size+MAX_DIST(s) ||
+ s->block_start >= (long)s->w_size, "slide too late");
+
+ fill_window(s);
+ if (s->lookahead == 0 && flush == Z_NO_FLUSH) return need_more;
+
+ if (s->lookahead == 0) break; /* flush the current block */
+ }
+ Assert(s->block_start >= 0L, "block gone");
+
+ s->strstart += s->lookahead;
+ s->lookahead = 0;
+
+ /* Emit a stored block if pending_buf will be full: */
+ max_start = s->block_start + max_block_size;
+ if (s->strstart == 0 || (ulg)s->strstart >= max_start) {
+ /* strstart == 0 is possible when wraparound on 16-bit machine */
+ s->lookahead = (uInt)(s->strstart - max_start);
+ s->strstart = (uInt)max_start;
+ FLUSH_BLOCK(s, 0);
+ }
+ /* Flush if we may have to slide, otherwise block_start may become
+ * negative and the data will be gone:
+ */
+ if (s->strstart - (uInt)s->block_start >= MAX_DIST(s)) {
+ FLUSH_BLOCK(s, 0);
+ }
+ }
+ FLUSH_BLOCK(s, flush == Z_FINISH);
+ return flush == Z_FINISH ? finish_done : block_done;
+}
+
+/* ===========================================================================
+ * Compress as much as possible from the input stream, return the current
+ * block state.
+ * This function does not perform lazy evaluation of matches and inserts
+ * new strings in the dictionary only for unmatched strings or for short
+ * matches. It is used only for the fast compression options.
+ */
+static block_state deflate_fast(
+ deflate_state *s,
+ int flush
+)
+{
+ IPos hash_head = NIL; /* head of the hash chain */
+ int bflush; /* set if current block must be flushed */
+
+ for (;;) {
+ /* Make sure that we always have enough lookahead, except
+ * at the end of the input file. We need MAX_MATCH bytes
+ * for the next match, plus MIN_MATCH bytes to insert the
+ * string following the next match.
+ */
+ if (s->lookahead < MIN_LOOKAHEAD) {
+ fill_window(s);
+ if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
+ return need_more;
+ }
+ if (s->lookahead == 0) break; /* flush the current block */
+ }
+
+ /* Insert the string window[strstart .. strstart+2] in the
+ * dictionary, and set hash_head to the head of the hash chain:
+ */
+ if (s->lookahead >= MIN_MATCH) {
+ INSERT_STRING(s, s->strstart, hash_head);
+ }
+
+ /* Find the longest match, discarding those <= prev_length.
+ * At this point we have always match_length < MIN_MATCH
+ */
+ if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) {
+ /* To simplify the code, we prevent matches with the string
+ * of window index 0 (in particular we have to avoid a match
+ * of the string with itself at the start of the input file).
+ */
+ if (s->strategy != Z_HUFFMAN_ONLY) {
+ s->match_length = longest_match (s, hash_head);
+ }
+ /* longest_match() sets match_start */
+ }
+ if (s->match_length >= MIN_MATCH) {
+ check_match(s, s->strstart, s->match_start, s->match_length);
+
+ bflush = zlib_tr_tally(s, s->strstart - s->match_start,
+ s->match_length - MIN_MATCH);
+
+ s->lookahead -= s->match_length;
+
+ /* Insert new strings in the hash table only if the match length
+ * is not too large. This saves time but degrades compression.
+ */
+ if (s->match_length <= s->max_insert_length &&
+ s->lookahead >= MIN_MATCH) {
+ s->match_length--; /* string at strstart already in hash table */
+ do {
+ s->strstart++;
+ INSERT_STRING(s, s->strstart, hash_head);
+ /* strstart never exceeds WSIZE-MAX_MATCH, so there are
+ * always MIN_MATCH bytes ahead.
+ */
+ } while (--s->match_length != 0);
+ s->strstart++;
+ } else {
+ s->strstart += s->match_length;
+ s->match_length = 0;
+ s->ins_h = s->window[s->strstart];
+ UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]);
+#if MIN_MATCH != 3
+ Call UPDATE_HASH() MIN_MATCH-3 more times
+#endif
+ /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not
+ * matter since it will be recomputed at next deflate call.
+ */
+ }
+ } else {
+ /* No match, output a literal byte */
+ Tracevv((stderr,"%c", s->window[s->strstart]));
+ bflush = zlib_tr_tally (s, 0, s->window[s->strstart]);
+ s->lookahead--;
+ s->strstart++;
+ }
+ if (bflush) FLUSH_BLOCK(s, 0);
+ }
+ FLUSH_BLOCK(s, flush == Z_FINISH);
+ return flush == Z_FINISH ? finish_done : block_done;
+}
+
+/* ===========================================================================
+ * Same as above, but achieves better compression. We use a lazy
+ * evaluation for matches: a match is finally adopted only if there is
+ * no better match at the next window position.
+ */
+static block_state deflate_slow(
+ deflate_state *s,
+ int flush
+)
+{
+ IPos hash_head = NIL; /* head of hash chain */
+ int bflush; /* set if current block must be flushed */
+
+ /* Process the input block. */
+ for (;;) {
+ /* Make sure that we always have enough lookahead, except
+ * at the end of the input file. We need MAX_MATCH bytes
+ * for the next match, plus MIN_MATCH bytes to insert the
+ * string following the next match.
+ */
+ if (s->lookahead < MIN_LOOKAHEAD) {
+ fill_window(s);
+ if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
+ return need_more;
+ }
+ if (s->lookahead == 0) break; /* flush the current block */
+ }
+
+ /* Insert the string window[strstart .. strstart+2] in the
+ * dictionary, and set hash_head to the head of the hash chain:
+ */
+ if (s->lookahead >= MIN_MATCH) {
+ INSERT_STRING(s, s->strstart, hash_head);
+ }
+
+ /* Find the longest match, discarding those <= prev_length.
+ */
+ s->prev_length = s->match_length, s->prev_match = s->match_start;
+ s->match_length = MIN_MATCH-1;
+
+ if (hash_head != NIL && s->prev_length < s->max_lazy_match &&
+ s->strstart - hash_head <= MAX_DIST(s)) {
+ /* To simplify the code, we prevent matches with the string
+ * of window index 0 (in particular we have to avoid a match
+ * of the string with itself at the start of the input file).
+ */
+ if (s->strategy != Z_HUFFMAN_ONLY) {
+ s->match_length = longest_match (s, hash_head);
+ }
+ /* longest_match() sets match_start */
+
+ if (s->match_length <= 5 && (s->strategy == Z_FILTERED ||
+ (s->match_length == MIN_MATCH &&
+ s->strstart - s->match_start > TOO_FAR))) {
+
+ /* If prev_match is also MIN_MATCH, match_start is garbage
+ * but we will ignore the current match anyway.
+ */
+ s->match_length = MIN_MATCH-1;
+ }
+ }
+ /* If there was a match at the previous step and the current
+ * match is not better, output the previous match:
+ */
+ if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) {
+ uInt max_insert = s->strstart + s->lookahead - MIN_MATCH;
+ /* Do not insert strings in hash table beyond this. */
+
+ check_match(s, s->strstart-1, s->prev_match, s->prev_length);
+
+ bflush = zlib_tr_tally(s, s->strstart -1 - s->prev_match,
+ s->prev_length - MIN_MATCH);
+
+ /* Insert in hash table all strings up to the end of the match.
+ * strstart-1 and strstart are already inserted. If there is not
+ * enough lookahead, the last two strings are not inserted in
+ * the hash table.
+ */
+ s->lookahead -= s->prev_length-1;
+ s->prev_length -= 2;
+ do {
+ if (++s->strstart <= max_insert) {
+ INSERT_STRING(s, s->strstart, hash_head);
+ }
+ } while (--s->prev_length != 0);
+ s->match_available = 0;
+ s->match_length = MIN_MATCH-1;
+ s->strstart++;
+
+ if (bflush) FLUSH_BLOCK(s, 0);
+
+ } else if (s->match_available) {
+ /* If there was no match at the previous position, output a
+ * single literal. If there was a match but the current match
+ * is longer, truncate the previous match to a single literal.
+ */
+ Tracevv((stderr,"%c", s->window[s->strstart-1]));
+ if (zlib_tr_tally (s, 0, s->window[s->strstart-1])) {
+ FLUSH_BLOCK_ONLY(s, 0);
+ }
+ s->strstart++;
+ s->lookahead--;
+ if (s->strm->avail_out == 0) return need_more;
+ } else {
+ /* There is no previous match to compare with, wait for
+ * the next step to decide.
+ */
+ s->match_available = 1;
+ s->strstart++;
+ s->lookahead--;
+ }
+ }
+ Assert (flush != Z_NO_FLUSH, "no flush?");
+ if (s->match_available) {
+ Tracevv((stderr,"%c", s->window[s->strstart-1]));
+ zlib_tr_tally (s, 0, s->window[s->strstart-1]);
+ s->match_available = 0;
+ }
+ FLUSH_BLOCK(s, flush == Z_FINISH);
+ return flush == Z_FINISH ? finish_done : block_done;
+}
+
+int zlib_deflate_workspacesize(void)
+{
+ return sizeof(deflate_workspace);
+}
diff --git a/lib/zlib_deflate/deflate_syms.c b/lib/zlib_deflate/deflate_syms.c
new file mode 100644
index 00000000..ccfe25f3
--- /dev/null
+++ b/lib/zlib_deflate/deflate_syms.c
@@ -0,0 +1,18 @@
+/*
+ * linux/lib/zlib_deflate/deflate_syms.c
+ *
+ * Exported symbols for the deflate functionality.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/zlib.h>
+
+EXPORT_SYMBOL(zlib_deflate_workspacesize);
+EXPORT_SYMBOL(zlib_deflate);
+EXPORT_SYMBOL(zlib_deflateInit2);
+EXPORT_SYMBOL(zlib_deflateEnd);
+EXPORT_SYMBOL(zlib_deflateReset);
+MODULE_LICENSE("GPL");
diff --git a/lib/zlib_deflate/deftree.c b/lib/zlib_deflate/deftree.c
new file mode 100644
index 00000000..ddf34829
--- /dev/null
+++ b/lib/zlib_deflate/deftree.c
@@ -0,0 +1,1113 @@
+/* +++ trees.c */
+/* trees.c -- output deflated data using Huffman coding
+ * Copyright (C) 1995-1996 Jean-loup Gailly
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/*
+ * ALGORITHM
+ *
+ * The "deflation" process uses several Huffman trees. The more
+ * common source values are represented by shorter bit sequences.
+ *
+ * Each code tree is stored in a compressed form which is itself
+ * a Huffman encoding of the lengths of all the code strings (in
+ * ascending order by source values). The actual code strings are
+ * reconstructed from the lengths in the inflate process, as described
+ * in the deflate specification.
+ *
+ * REFERENCES
+ *
+ * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification".
+ * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc
+ *
+ * Storer, James A.
+ * Data Compression: Methods and Theory, pp. 49-50.
+ * Computer Science Press, 1988. ISBN 0-7167-8156-5.
+ *
+ * Sedgewick, R.
+ * Algorithms, p290.
+ * Addison-Wesley, 1983. ISBN 0-201-06672-6.
+ */
+
+/* From: trees.c,v 1.11 1996/07/24 13:41:06 me Exp $ */
+
+/* #include "deflate.h" */
+
+#include <linux/zutil.h>
+#include "defutil.h"
+
+#ifdef DEBUG_ZLIB
+# include <ctype.h>
+#endif
+
+/* ===========================================================================
+ * Constants
+ */
+
+#define MAX_BL_BITS 7
+/* Bit length codes must not exceed MAX_BL_BITS bits */
+
+#define END_BLOCK 256
+/* end of block literal code */
+
+#define REP_3_6 16
+/* repeat previous bit length 3-6 times (2 bits of repeat count) */
+
+#define REPZ_3_10 17
+/* repeat a zero length 3-10 times (3 bits of repeat count) */
+
+#define REPZ_11_138 18
+/* repeat a zero length 11-138 times (7 bits of repeat count) */
+
+static const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */
+ = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};
+
+static const int extra_dbits[D_CODES] /* extra bits for each distance code */
+ = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
+
+static const int extra_blbits[BL_CODES]/* extra bits for each bit length code */
+ = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7};
+
+static const uch bl_order[BL_CODES]
+ = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
+/* The lengths of the bit length codes are sent in order of decreasing
+ * probability, to avoid transmitting the lengths for unused bit length codes.
+ */
+
+#define Buf_size (8 * 2*sizeof(char))
+/* Number of bits used within bi_buf. (bi_buf might be implemented on
+ * more than 16 bits on some systems.)
+ */
+
+/* ===========================================================================
+ * Local data. These are initialized only once.
+ */
+
+static ct_data static_ltree[L_CODES+2];
+/* The static literal tree. Since the bit lengths are imposed, there is no
+ * need for the L_CODES extra codes used during heap construction. However
+ * The codes 286 and 287 are needed to build a canonical tree (see zlib_tr_init
+ * below).
+ */
+
+static ct_data static_dtree[D_CODES];
+/* The static distance tree. (Actually a trivial tree since all codes use
+ * 5 bits.)
+ */
+
+static uch dist_code[512];
+/* distance codes. The first 256 values correspond to the distances
+ * 3 .. 258, the last 256 values correspond to the top 8 bits of
+ * the 15 bit distances.
+ */
+
+static uch length_code[MAX_MATCH-MIN_MATCH+1];
+/* length code for each normalized match length (0 == MIN_MATCH) */
+
+static int base_length[LENGTH_CODES];
+/* First normalized length for each code (0 = MIN_MATCH) */
+
+static int base_dist[D_CODES];
+/* First normalized distance for each code (0 = distance of 1) */
+
+struct static_tree_desc_s {
+ const ct_data *static_tree; /* static tree or NULL */
+ const int *extra_bits; /* extra bits for each code or NULL */
+ int extra_base; /* base index for extra_bits */
+ int elems; /* max number of elements in the tree */
+ int max_length; /* max bit length for the codes */
+};
+
+static static_tree_desc static_l_desc =
+{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS};
+
+static static_tree_desc static_d_desc =
+{static_dtree, extra_dbits, 0, D_CODES, MAX_BITS};
+
+static static_tree_desc static_bl_desc =
+{(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS};
+
+/* ===========================================================================
+ * Local (static) routines in this file.
+ */
+
+static void tr_static_init (void);
+static void init_block (deflate_state *s);
+static void pqdownheap (deflate_state *s, ct_data *tree, int k);
+static void gen_bitlen (deflate_state *s, tree_desc *desc);
+static void gen_codes (ct_data *tree, int max_code, ush *bl_count);
+static void build_tree (deflate_state *s, tree_desc *desc);
+static void scan_tree (deflate_state *s, ct_data *tree, int max_code);
+static void send_tree (deflate_state *s, ct_data *tree, int max_code);
+static int build_bl_tree (deflate_state *s);
+static void send_all_trees (deflate_state *s, int lcodes, int dcodes,
+ int blcodes);
+static void compress_block (deflate_state *s, ct_data *ltree,
+ ct_data *dtree);
+static void set_data_type (deflate_state *s);
+static unsigned bi_reverse (unsigned value, int length);
+static void bi_windup (deflate_state *s);
+static void bi_flush (deflate_state *s);
+static void copy_block (deflate_state *s, char *buf, unsigned len,
+ int header);
+
+#ifndef DEBUG_ZLIB
+# define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len)
+ /* Send a code of the given tree. c and tree must not have side effects */
+
+#else /* DEBUG_ZLIB */
+# define send_code(s, c, tree) \
+ { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \
+ send_bits(s, tree[c].Code, tree[c].Len); }
+#endif
+
+#define d_code(dist) \
+ ((dist) < 256 ? dist_code[dist] : dist_code[256+((dist)>>7)])
+/* Mapping from a distance to a distance code. dist is the distance - 1 and
+ * must not have side effects. dist_code[256] and dist_code[257] are never
+ * used.
+ */
+
+/* ===========================================================================
+ * Send a value on a given number of bits.
+ * IN assertion: length <= 16 and value fits in length bits.
+ */
+#ifdef DEBUG_ZLIB
+static void send_bits (deflate_state *s, int value, int length);
+
+static void send_bits(
+ deflate_state *s,
+ int value, /* value to send */
+ int length /* number of bits */
+)
+{
+ Tracevv((stderr," l %2d v %4x ", length, value));
+ Assert(length > 0 && length <= 15, "invalid length");
+ s->bits_sent += (ulg)length;
+
+ /* If not enough room in bi_buf, use (valid) bits from bi_buf and
+ * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid))
+ * unused bits in value.
+ */
+ if (s->bi_valid > (int)Buf_size - length) {
+ s->bi_buf |= (value << s->bi_valid);
+ put_short(s, s->bi_buf);
+ s->bi_buf = (ush)value >> (Buf_size - s->bi_valid);
+ s->bi_valid += length - Buf_size;
+ } else {
+ s->bi_buf |= value << s->bi_valid;
+ s->bi_valid += length;
+ }
+}
+#else /* !DEBUG_ZLIB */
+
+#define send_bits(s, value, length) \
+{ int len = length;\
+ if (s->bi_valid > (int)Buf_size - len) {\
+ int val = value;\
+ s->bi_buf |= (val << s->bi_valid);\
+ put_short(s, s->bi_buf);\
+ s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\
+ s->bi_valid += len - Buf_size;\
+ } else {\
+ s->bi_buf |= (value) << s->bi_valid;\
+ s->bi_valid += len;\
+ }\
+}
+#endif /* DEBUG_ZLIB */
+
+/* ===========================================================================
+ * Initialize the various 'constant' tables. In a multi-threaded environment,
+ * this function may be called by two threads concurrently, but this is
+ * harmless since both invocations do exactly the same thing.
+ */
+static void tr_static_init(void)
+{
+ static int static_init_done;
+ int n; /* iterates over tree elements */
+ int bits; /* bit counter */
+ int length; /* length value */
+ int code; /* code value */
+ int dist; /* distance index */
+ ush bl_count[MAX_BITS+1];
+ /* number of codes at each bit length for an optimal tree */
+
+ if (static_init_done) return;
+
+ /* Initialize the mapping length (0..255) -> length code (0..28) */
+ length = 0;
+ for (code = 0; code < LENGTH_CODES-1; code++) {
+ base_length[code] = length;
+ for (n = 0; n < (1<<extra_lbits[code]); n++) {
+ length_code[length++] = (uch)code;
+ }
+ }
+ Assert (length == 256, "tr_static_init: length != 256");
+ /* Note that the length 255 (match length 258) can be represented
+ * in two different ways: code 284 + 5 bits or code 285, so we
+ * overwrite length_code[255] to use the best encoding:
+ */
+ length_code[length-1] = (uch)code;
+
+ /* Initialize the mapping dist (0..32K) -> dist code (0..29) */
+ dist = 0;
+ for (code = 0 ; code < 16; code++) {
+ base_dist[code] = dist;
+ for (n = 0; n < (1<<extra_dbits[code]); n++) {
+ dist_code[dist++] = (uch)code;
+ }
+ }
+ Assert (dist == 256, "tr_static_init: dist != 256");
+ dist >>= 7; /* from now on, all distances are divided by 128 */
+ for ( ; code < D_CODES; code++) {
+ base_dist[code] = dist << 7;
+ for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) {
+ dist_code[256 + dist++] = (uch)code;
+ }
+ }
+ Assert (dist == 256, "tr_static_init: 256+dist != 512");
+
+ /* Construct the codes of the static literal tree */
+ for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0;
+ n = 0;
+ while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++;
+ while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++;
+ while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++;
+ while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++;
+ /* Codes 286 and 287 do not exist, but we must include them in the
+ * tree construction to get a canonical Huffman tree (longest code
+ * all ones)
+ */
+ gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count);
+
+ /* The static distance tree is trivial: */
+ for (n = 0; n < D_CODES; n++) {
+ static_dtree[n].Len = 5;
+ static_dtree[n].Code = bi_reverse((unsigned)n, 5);
+ }
+ static_init_done = 1;
+}
+
+/* ===========================================================================
+ * Initialize the tree data structures for a new zlib stream.
+ */
+void zlib_tr_init(
+ deflate_state *s
+)
+{
+ tr_static_init();
+
+ s->compressed_len = 0L;
+
+ s->l_desc.dyn_tree = s->dyn_ltree;
+ s->l_desc.stat_desc = &static_l_desc;
+
+ s->d_desc.dyn_tree = s->dyn_dtree;
+ s->d_desc.stat_desc = &static_d_desc;
+
+ s->bl_desc.dyn_tree = s->bl_tree;
+ s->bl_desc.stat_desc = &static_bl_desc;
+
+ s->bi_buf = 0;
+ s->bi_valid = 0;
+ s->last_eob_len = 8; /* enough lookahead for inflate */
+#ifdef DEBUG_ZLIB
+ s->bits_sent = 0L;
+#endif
+
+ /* Initialize the first block of the first file: */
+ init_block(s);
+}
+
+/* ===========================================================================
+ * Initialize a new block.
+ */
+static void init_block(
+ deflate_state *s
+)
+{
+ int n; /* iterates over tree elements */
+
+ /* Initialize the trees. */
+ for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0;
+ for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0;
+ for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0;
+
+ s->dyn_ltree[END_BLOCK].Freq = 1;
+ s->opt_len = s->static_len = 0L;
+ s->last_lit = s->matches = 0;
+}
+
+#define SMALLEST 1
+/* Index within the heap array of least frequent node in the Huffman tree */
+
+
+/* ===========================================================================
+ * Remove the smallest element from the heap and recreate the heap with
+ * one less element. Updates heap and heap_len.
+ */
+#define pqremove(s, tree, top) \
+{\
+ top = s->heap[SMALLEST]; \
+ s->heap[SMALLEST] = s->heap[s->heap_len--]; \
+ pqdownheap(s, tree, SMALLEST); \
+}
+
+/* ===========================================================================
+ * Compares to subtrees, using the tree depth as tie breaker when
+ * the subtrees have equal frequency. This minimizes the worst case length.
+ */
+#define smaller(tree, n, m, depth) \
+ (tree[n].Freq < tree[m].Freq || \
+ (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m]))
+
+/* ===========================================================================
+ * Restore the heap property by moving down the tree starting at node k,
+ * exchanging a node with the smallest of its two sons if necessary, stopping
+ * when the heap property is re-established (each father smaller than its
+ * two sons).
+ */
+static void pqdownheap(
+ deflate_state *s,
+ ct_data *tree, /* the tree to restore */
+ int k /* node to move down */
+)
+{
+ int v = s->heap[k];
+ int j = k << 1; /* left son of k */
+ while (j <= s->heap_len) {
+ /* Set j to the smallest of the two sons: */
+ if (j < s->heap_len &&
+ smaller(tree, s->heap[j+1], s->heap[j], s->depth)) {
+ j++;
+ }
+ /* Exit if v is smaller than both sons */
+ if (smaller(tree, v, s->heap[j], s->depth)) break;
+
+ /* Exchange v with the smallest son */
+ s->heap[k] = s->heap[j]; k = j;
+
+ /* And continue down the tree, setting j to the left son of k */
+ j <<= 1;
+ }
+ s->heap[k] = v;
+}
+
+/* ===========================================================================
+ * Compute the optimal bit lengths for a tree and update the total bit length
+ * for the current block.
+ * IN assertion: the fields freq and dad are set, heap[heap_max] and
+ * above are the tree nodes sorted by increasing frequency.
+ * OUT assertions: the field len is set to the optimal bit length, the
+ * array bl_count contains the frequencies for each bit length.
+ * The length opt_len is updated; static_len is also updated if stree is
+ * not null.
+ */
+static void gen_bitlen(
+ deflate_state *s,
+ tree_desc *desc /* the tree descriptor */
+)
+{
+ ct_data *tree = desc->dyn_tree;
+ int max_code = desc->max_code;
+ const ct_data *stree = desc->stat_desc->static_tree;
+ const int *extra = desc->stat_desc->extra_bits;
+ int base = desc->stat_desc->extra_base;
+ int max_length = desc->stat_desc->max_length;
+ int h; /* heap index */
+ int n, m; /* iterate over the tree elements */
+ int bits; /* bit length */
+ int xbits; /* extra bits */
+ ush f; /* frequency */
+ int overflow = 0; /* number of elements with bit length too large */
+
+ for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0;
+
+ /* In a first pass, compute the optimal bit lengths (which may
+ * overflow in the case of the bit length tree).
+ */
+ tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */
+
+ for (h = s->heap_max+1; h < HEAP_SIZE; h++) {
+ n = s->heap[h];
+ bits = tree[tree[n].Dad].Len + 1;
+ if (bits > max_length) bits = max_length, overflow++;
+ tree[n].Len = (ush)bits;
+ /* We overwrite tree[n].Dad which is no longer needed */
+
+ if (n > max_code) continue; /* not a leaf node */
+
+ s->bl_count[bits]++;
+ xbits = 0;
+ if (n >= base) xbits = extra[n-base];
+ f = tree[n].Freq;
+ s->opt_len += (ulg)f * (bits + xbits);
+ if (stree) s->static_len += (ulg)f * (stree[n].Len + xbits);
+ }
+ if (overflow == 0) return;
+
+ Trace((stderr,"\nbit length overflow\n"));
+ /* This happens for example on obj2 and pic of the Calgary corpus */
+
+ /* Find the first bit length which could increase: */
+ do {
+ bits = max_length-1;
+ while (s->bl_count[bits] == 0) bits--;
+ s->bl_count[bits]--; /* move one leaf down the tree */
+ s->bl_count[bits+1] += 2; /* move one overflow item as its brother */
+ s->bl_count[max_length]--;
+ /* The brother of the overflow item also moves one step up,
+ * but this does not affect bl_count[max_length]
+ */
+ overflow -= 2;
+ } while (overflow > 0);
+
+ /* Now recompute all bit lengths, scanning in increasing frequency.
+ * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
+ * lengths instead of fixing only the wrong ones. This idea is taken
+ * from 'ar' written by Haruhiko Okumura.)
+ */
+ for (bits = max_length; bits != 0; bits--) {
+ n = s->bl_count[bits];
+ while (n != 0) {
+ m = s->heap[--h];
+ if (m > max_code) continue;
+ if (tree[m].Len != (unsigned) bits) {
+ Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits));
+ s->opt_len += ((long)bits - (long)tree[m].Len)
+ *(long)tree[m].Freq;
+ tree[m].Len = (ush)bits;
+ }
+ n--;
+ }
+ }
+}
+
+/* ===========================================================================
+ * Generate the codes for a given tree and bit counts (which need not be
+ * optimal).
+ * IN assertion: the array bl_count contains the bit length statistics for
+ * the given tree and the field len is set for all tree elements.
+ * OUT assertion: the field code is set for all tree elements of non
+ * zero code length.
+ */
+static void gen_codes(
+ ct_data *tree, /* the tree to decorate */
+ int max_code, /* largest code with non zero frequency */
+ ush *bl_count /* number of codes at each bit length */
+)
+{
+ ush next_code[MAX_BITS+1]; /* next code value for each bit length */
+ ush code = 0; /* running code value */
+ int bits; /* bit index */
+ int n; /* code index */
+
+ /* The distribution counts are first used to generate the code values
+ * without bit reversal.
+ */
+ for (bits = 1; bits <= MAX_BITS; bits++) {
+ next_code[bits] = code = (code + bl_count[bits-1]) << 1;
+ }
+ /* Check that the bit counts in bl_count are consistent. The last code
+ * must be all ones.
+ */
+ Assert (code + bl_count[MAX_BITS]-1 == (1<<MAX_BITS)-1,
+ "inconsistent bit counts");
+ Tracev((stderr,"\ngen_codes: max_code %d ", max_code));
+
+ for (n = 0; n <= max_code; n++) {
+ int len = tree[n].Len;
+ if (len == 0) continue;
+ /* Now reverse the bits */
+ tree[n].Code = bi_reverse(next_code[len]++, len);
+
+ Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ",
+ n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1));
+ }
+}
+
+/* ===========================================================================
+ * Construct one Huffman tree and assigns the code bit strings and lengths.
+ * Update the total bit length for the current block.
+ * IN assertion: the field freq is set for all tree elements.
+ * OUT assertions: the fields len and code are set to the optimal bit length
+ * and corresponding code. The length opt_len is updated; static_len is
+ * also updated if stree is not null. The field max_code is set.
+ */
+static void build_tree(
+ deflate_state *s,
+ tree_desc *desc /* the tree descriptor */
+)
+{
+ ct_data *tree = desc->dyn_tree;
+ const ct_data *stree = desc->stat_desc->static_tree;
+ int elems = desc->stat_desc->elems;
+ int n, m; /* iterate over heap elements */
+ int max_code = -1; /* largest code with non zero frequency */
+ int node; /* new node being created */
+
+ /* Construct the initial heap, with least frequent element in
+ * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
+ * heap[0] is not used.
+ */
+ s->heap_len = 0, s->heap_max = HEAP_SIZE;
+
+ for (n = 0; n < elems; n++) {
+ if (tree[n].Freq != 0) {
+ s->heap[++(s->heap_len)] = max_code = n;
+ s->depth[n] = 0;
+ } else {
+ tree[n].Len = 0;
+ }
+ }
+
+ /* The pkzip format requires that at least one distance code exists,
+ * and that at least one bit should be sent even if there is only one
+ * possible code. So to avoid special checks later on we force at least
+ * two codes of non zero frequency.
+ */
+ while (s->heap_len < 2) {
+ node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0);
+ tree[node].Freq = 1;
+ s->depth[node] = 0;
+ s->opt_len--; if (stree) s->static_len -= stree[node].Len;
+ /* node is 0 or 1 so it does not have extra bits */
+ }
+ desc->max_code = max_code;
+
+ /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
+ * establish sub-heaps of increasing lengths:
+ */
+ for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n);
+
+ /* Construct the Huffman tree by repeatedly combining the least two
+ * frequent nodes.
+ */
+ node = elems; /* next internal node of the tree */
+ do {
+ pqremove(s, tree, n); /* n = node of least frequency */
+ m = s->heap[SMALLEST]; /* m = node of next least frequency */
+
+ s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */
+ s->heap[--(s->heap_max)] = m;
+
+ /* Create a new node father of n and m */
+ tree[node].Freq = tree[n].Freq + tree[m].Freq;
+ s->depth[node] = (uch) (max(s->depth[n], s->depth[m]) + 1);
+ tree[n].Dad = tree[m].Dad = (ush)node;
+#ifdef DUMP_BL_TREE
+ if (tree == s->bl_tree) {
+ fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)",
+ node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq);
+ }
+#endif
+ /* and insert the new node in the heap */
+ s->heap[SMALLEST] = node++;
+ pqdownheap(s, tree, SMALLEST);
+
+ } while (s->heap_len >= 2);
+
+ s->heap[--(s->heap_max)] = s->heap[SMALLEST];
+
+ /* At this point, the fields freq and dad are set. We can now
+ * generate the bit lengths.
+ */
+ gen_bitlen(s, (tree_desc *)desc);
+
+ /* The field len is now set, we can generate the bit codes */
+ gen_codes ((ct_data *)tree, max_code, s->bl_count);
+}
+
+/* ===========================================================================
+ * Scan a literal or distance tree to determine the frequencies of the codes
+ * in the bit length tree.
+ */
+static void scan_tree(
+ deflate_state *s,
+ ct_data *tree, /* the tree to be scanned */
+ int max_code /* and its largest code of non zero frequency */
+)
+{
+ int n; /* iterates over all tree elements */
+ int prevlen = -1; /* last emitted length */
+ int curlen; /* length of current code */
+ int nextlen = tree[0].Len; /* length of next code */
+ int count = 0; /* repeat count of the current code */
+ int max_count = 7; /* max repeat count */
+ int min_count = 4; /* min repeat count */
+
+ if (nextlen == 0) max_count = 138, min_count = 3;
+ tree[max_code+1].Len = (ush)0xffff; /* guard */
+
+ for (n = 0; n <= max_code; n++) {
+ curlen = nextlen; nextlen = tree[n+1].Len;
+ if (++count < max_count && curlen == nextlen) {
+ continue;
+ } else if (count < min_count) {
+ s->bl_tree[curlen].Freq += count;
+ } else if (curlen != 0) {
+ if (curlen != prevlen) s->bl_tree[curlen].Freq++;
+ s->bl_tree[REP_3_6].Freq++;
+ } else if (count <= 10) {
+ s->bl_tree[REPZ_3_10].Freq++;
+ } else {
+ s->bl_tree[REPZ_11_138].Freq++;
+ }
+ count = 0; prevlen = curlen;
+ if (nextlen == 0) {
+ max_count = 138, min_count = 3;
+ } else if (curlen == nextlen) {
+ max_count = 6, min_count = 3;
+ } else {
+ max_count = 7, min_count = 4;
+ }
+ }
+}
+
+/* ===========================================================================
+ * Send a literal or distance tree in compressed form, using the codes in
+ * bl_tree.
+ */
+static void send_tree(
+ deflate_state *s,
+ ct_data *tree, /* the tree to be scanned */
+ int max_code /* and its largest code of non zero frequency */
+)
+{
+ int n; /* iterates over all tree elements */
+ int prevlen = -1; /* last emitted length */
+ int curlen; /* length of current code */
+ int nextlen = tree[0].Len; /* length of next code */
+ int count = 0; /* repeat count of the current code */
+ int max_count = 7; /* max repeat count */
+ int min_count = 4; /* min repeat count */
+
+ /* tree[max_code+1].Len = -1; */ /* guard already set */
+ if (nextlen == 0) max_count = 138, min_count = 3;
+
+ for (n = 0; n <= max_code; n++) {
+ curlen = nextlen; nextlen = tree[n+1].Len;
+ if (++count < max_count && curlen == nextlen) {
+ continue;
+ } else if (count < min_count) {
+ do { send_code(s, curlen, s->bl_tree); } while (--count != 0);
+
+ } else if (curlen != 0) {
+ if (curlen != prevlen) {
+ send_code(s, curlen, s->bl_tree); count--;
+ }
+ Assert(count >= 3 && count <= 6, " 3_6?");
+ send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2);
+
+ } else if (count <= 10) {
+ send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3);
+
+ } else {
+ send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7);
+ }
+ count = 0; prevlen = curlen;
+ if (nextlen == 0) {
+ max_count = 138, min_count = 3;
+ } else if (curlen == nextlen) {
+ max_count = 6, min_count = 3;
+ } else {
+ max_count = 7, min_count = 4;
+ }
+ }
+}
+
+/* ===========================================================================
+ * Construct the Huffman tree for the bit lengths and return the index in
+ * bl_order of the last bit length code to send.
+ */
+static int build_bl_tree(
+ deflate_state *s
+)
+{
+ int max_blindex; /* index of last bit length code of non zero freq */
+
+ /* Determine the bit length frequencies for literal and distance trees */
+ scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code);
+ scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code);
+
+ /* Build the bit length tree: */
+ build_tree(s, (tree_desc *)(&(s->bl_desc)));
+ /* opt_len now includes the length of the tree representations, except
+ * the lengths of the bit lengths codes and the 5+5+4 bits for the counts.
+ */
+
+ /* Determine the number of bit length codes to send. The pkzip format
+ * requires that at least 4 bit length codes be sent. (appnote.txt says
+ * 3 but the actual value used is 4.)
+ */
+ for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) {
+ if (s->bl_tree[bl_order[max_blindex]].Len != 0) break;
+ }
+ /* Update opt_len to include the bit length tree and counts */
+ s->opt_len += 3*(max_blindex+1) + 5+5+4;
+ Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld",
+ s->opt_len, s->static_len));
+
+ return max_blindex;
+}
+
+/* ===========================================================================
+ * Send the header for a block using dynamic Huffman trees: the counts, the
+ * lengths of the bit length codes, the literal tree and the distance tree.
+ * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.
+ */
+static void send_all_trees(
+ deflate_state *s,
+ int lcodes, /* number of codes for each tree */
+ int dcodes, /* number of codes for each tree */
+ int blcodes /* number of codes for each tree */
+)
+{
+ int rank; /* index in bl_order */
+
+ Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes");
+ Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES,
+ "too many codes");
+ Tracev((stderr, "\nbl counts: "));
+ send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */
+ send_bits(s, dcodes-1, 5);
+ send_bits(s, blcodes-4, 4); /* not -3 as stated in appnote.txt */
+ for (rank = 0; rank < blcodes; rank++) {
+ Tracev((stderr, "\nbl code %2d ", bl_order[rank]));
+ send_bits(s, s->bl_tree[bl_order[rank]].Len, 3);
+ }
+ Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent));
+
+ send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */
+ Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent));
+
+ send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */
+ Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent));
+}
+
+/* ===========================================================================
+ * Send a stored block
+ */
+void zlib_tr_stored_block(
+ deflate_state *s,
+ char *buf, /* input block */
+ ulg stored_len, /* length of input block */
+ int eof /* true if this is the last block for a file */
+)
+{
+ send_bits(s, (STORED_BLOCK<<1)+eof, 3); /* send block type */
+ s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L;
+ s->compressed_len += (stored_len + 4) << 3;
+
+ copy_block(s, buf, (unsigned)stored_len, 1); /* with header */
+}
+
+/* Send just the `stored block' type code without any length bytes or data.
+ */
+void zlib_tr_stored_type_only(
+ deflate_state *s
+)
+{
+ send_bits(s, (STORED_BLOCK << 1), 3);
+ bi_windup(s);
+ s->compressed_len = (s->compressed_len + 3) & ~7L;
+}
+
+
+/* ===========================================================================
+ * Send one empty static block to give enough lookahead for inflate.
+ * This takes 10 bits, of which 7 may remain in the bit buffer.
+ * The current inflate code requires 9 bits of lookahead. If the
+ * last two codes for the previous block (real code plus EOB) were coded
+ * on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode
+ * the last real code. In this case we send two empty static blocks instead
+ * of one. (There are no problems if the previous block is stored or fixed.)
+ * To simplify the code, we assume the worst case of last real code encoded
+ * on one bit only.
+ */
+void zlib_tr_align(
+ deflate_state *s
+)
+{
+ send_bits(s, STATIC_TREES<<1, 3);
+ send_code(s, END_BLOCK, static_ltree);
+ s->compressed_len += 10L; /* 3 for block type, 7 for EOB */
+ bi_flush(s);
+ /* Of the 10 bits for the empty block, we have already sent
+ * (10 - bi_valid) bits. The lookahead for the last real code (before
+ * the EOB of the previous block) was thus at least one plus the length
+ * of the EOB plus what we have just sent of the empty static block.
+ */
+ if (1 + s->last_eob_len + 10 - s->bi_valid < 9) {
+ send_bits(s, STATIC_TREES<<1, 3);
+ send_code(s, END_BLOCK, static_ltree);
+ s->compressed_len += 10L;
+ bi_flush(s);
+ }
+ s->last_eob_len = 7;
+}
+
+/* ===========================================================================
+ * Determine the best encoding for the current block: dynamic trees, static
+ * trees or store, and output the encoded block to the zip file. This function
+ * returns the total compressed length for the file so far.
+ */
+ulg zlib_tr_flush_block(
+ deflate_state *s,
+ char *buf, /* input block, or NULL if too old */
+ ulg stored_len, /* length of input block */
+ int eof /* true if this is the last block for a file */
+)
+{
+ ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
+ int max_blindex = 0; /* index of last bit length code of non zero freq */
+
+ /* Build the Huffman trees unless a stored block is forced */
+ if (s->level > 0) {
+
+ /* Check if the file is ascii or binary */
+ if (s->data_type == Z_UNKNOWN) set_data_type(s);
+
+ /* Construct the literal and distance trees */
+ build_tree(s, (tree_desc *)(&(s->l_desc)));
+ Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len,
+ s->static_len));
+
+ build_tree(s, (tree_desc *)(&(s->d_desc)));
+ Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len,
+ s->static_len));
+ /* At this point, opt_len and static_len are the total bit lengths of
+ * the compressed block data, excluding the tree representations.
+ */
+
+ /* Build the bit length tree for the above two trees, and get the index
+ * in bl_order of the last bit length code to send.
+ */
+ max_blindex = build_bl_tree(s);
+
+ /* Determine the best encoding. Compute first the block length in bytes*/
+ opt_lenb = (s->opt_len+3+7)>>3;
+ static_lenb = (s->static_len+3+7)>>3;
+
+ Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ",
+ opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len,
+ s->last_lit));
+
+ if (static_lenb <= opt_lenb) opt_lenb = static_lenb;
+
+ } else {
+ Assert(buf != (char*)0, "lost buf");
+ opt_lenb = static_lenb = stored_len + 5; /* force a stored block */
+ }
+
+ /* If compression failed and this is the first and last block,
+ * and if the .zip file can be seeked (to rewrite the local header),
+ * the whole file is transformed into a stored file:
+ */
+#ifdef STORED_FILE_OK
+# ifdef FORCE_STORED_FILE
+ if (eof && s->compressed_len == 0L) { /* force stored file */
+# else
+ if (stored_len <= opt_lenb && eof && s->compressed_len==0L && seekable()) {
+# endif
+ /* Since LIT_BUFSIZE <= 2*WSIZE, the input data must be there: */
+ if (buf == (char*)0) error ("block vanished");
+
+ copy_block(s, buf, (unsigned)stored_len, 0); /* without header */
+ s->compressed_len = stored_len << 3;
+ s->method = STORED;
+ } else
+#endif /* STORED_FILE_OK */
+
+#ifdef FORCE_STORED
+ if (buf != (char*)0) { /* force stored block */
+#else
+ if (stored_len+4 <= opt_lenb && buf != (char*)0) {
+ /* 4: two words for the lengths */
+#endif
+ /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
+ * Otherwise we can't have processed more than WSIZE input bytes since
+ * the last block flush, because compression would have been
+ * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
+ * transform a block into a stored block.
+ */
+ zlib_tr_stored_block(s, buf, stored_len, eof);
+
+#ifdef FORCE_STATIC
+ } else if (static_lenb >= 0) { /* force static trees */
+#else
+ } else if (static_lenb == opt_lenb) {
+#endif
+ send_bits(s, (STATIC_TREES<<1)+eof, 3);
+ compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree);
+ s->compressed_len += 3 + s->static_len;
+ } else {
+ send_bits(s, (DYN_TREES<<1)+eof, 3);
+ send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1,
+ max_blindex+1);
+ compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree);
+ s->compressed_len += 3 + s->opt_len;
+ }
+ Assert (s->compressed_len == s->bits_sent, "bad compressed size");
+ init_block(s);
+
+ if (eof) {
+ bi_windup(s);
+ s->compressed_len += 7; /* align on byte boundary */
+ }
+ Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3,
+ s->compressed_len-7*eof));
+
+ return s->compressed_len >> 3;
+}
+
+/* ===========================================================================
+ * Save the match info and tally the frequency counts. Return true if
+ * the current block must be flushed.
+ */
+int zlib_tr_tally(
+ deflate_state *s,
+ unsigned dist, /* distance of matched string */
+ unsigned lc /* match length-MIN_MATCH or unmatched char (if dist==0) */
+)
+{
+ s->d_buf[s->last_lit] = (ush)dist;
+ s->l_buf[s->last_lit++] = (uch)lc;
+ if (dist == 0) {
+ /* lc is the unmatched char */
+ s->dyn_ltree[lc].Freq++;
+ } else {
+ s->matches++;
+ /* Here, lc is the match length - MIN_MATCH */
+ dist--; /* dist = match distance - 1 */
+ Assert((ush)dist < (ush)MAX_DIST(s) &&
+ (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) &&
+ (ush)d_code(dist) < (ush)D_CODES, "zlib_tr_tally: bad match");
+
+ s->dyn_ltree[length_code[lc]+LITERALS+1].Freq++;
+ s->dyn_dtree[d_code(dist)].Freq++;
+ }
+
+ /* Try to guess if it is profitable to stop the current block here */
+ if ((s->last_lit & 0xfff) == 0 && s->level > 2) {
+ /* Compute an upper bound for the compressed length */
+ ulg out_length = (ulg)s->last_lit*8L;
+ ulg in_length = (ulg)((long)s->strstart - s->block_start);
+ int dcode;
+ for (dcode = 0; dcode < D_CODES; dcode++) {
+ out_length += (ulg)s->dyn_dtree[dcode].Freq *
+ (5L+extra_dbits[dcode]);
+ }
+ out_length >>= 3;
+ Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ",
+ s->last_lit, in_length, out_length,
+ 100L - out_length*100L/in_length));
+ if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1;
+ }
+ return (s->last_lit == s->lit_bufsize-1);
+ /* We avoid equality with lit_bufsize because of wraparound at 64K
+ * on 16 bit machines and because stored blocks are restricted to
+ * 64K-1 bytes.
+ */
+}
+
+/* ===========================================================================
+ * Send the block data compressed using the given Huffman trees
+ */
+static void compress_block(
+ deflate_state *s,
+ ct_data *ltree, /* literal tree */
+ ct_data *dtree /* distance tree */
+)
+{
+ unsigned dist; /* distance of matched string */
+ int lc; /* match length or unmatched char (if dist == 0) */
+ unsigned lx = 0; /* running index in l_buf */
+ unsigned code; /* the code to send */
+ int extra; /* number of extra bits to send */
+
+ if (s->last_lit != 0) do {
+ dist = s->d_buf[lx];
+ lc = s->l_buf[lx++];
+ if (dist == 0) {
+ send_code(s, lc, ltree); /* send a literal byte */
+ Tracecv(isgraph(lc), (stderr," '%c' ", lc));
+ } else {
+ /* Here, lc is the match length - MIN_MATCH */
+ code = length_code[lc];
+ send_code(s, code+LITERALS+1, ltree); /* send the length code */
+ extra = extra_lbits[code];
+ if (extra != 0) {
+ lc -= base_length[code];
+ send_bits(s, lc, extra); /* send the extra length bits */
+ }
+ dist--; /* dist is now the match distance - 1 */
+ code = d_code(dist);
+ Assert (code < D_CODES, "bad d_code");
+
+ send_code(s, code, dtree); /* send the distance code */
+ extra = extra_dbits[code];
+ if (extra != 0) {
+ dist -= base_dist[code];
+ send_bits(s, dist, extra); /* send the extra distance bits */
+ }
+ } /* literal or match pair ? */
+
+ /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */
+ Assert(s->pending < s->lit_bufsize + 2*lx, "pendingBuf overflow");
+
+ } while (lx < s->last_lit);
+
+ send_code(s, END_BLOCK, ltree);
+ s->last_eob_len = ltree[END_BLOCK].Len;
+}
+
+/* ===========================================================================
+ * Set the data type to ASCII or BINARY, using a crude approximation:
+ * binary if more than 20% of the bytes are <= 6 or >= 128, ascii otherwise.
+ * IN assertion: the fields freq of dyn_ltree are set and the total of all
+ * frequencies does not exceed 64K (to fit in an int on 16 bit machines).
+ */
+static void set_data_type(
+ deflate_state *s
+)
+{
+ int n = 0;
+ unsigned ascii_freq = 0;
+ unsigned bin_freq = 0;
+ while (n < 7) bin_freq += s->dyn_ltree[n++].Freq;
+ while (n < 128) ascii_freq += s->dyn_ltree[n++].Freq;
+ while (n < LITERALS) bin_freq += s->dyn_ltree[n++].Freq;
+ s->data_type = (Byte)(bin_freq > (ascii_freq >> 2) ? Z_BINARY : Z_ASCII);
+}
+
+/* ===========================================================================
+ * Copy a stored block, storing first the length and its
+ * one's complement if requested.
+ */
+static void copy_block(
+ deflate_state *s,
+ char *buf, /* the input data */
+ unsigned len, /* its length */
+ int header /* true if block header must be written */
+)
+{
+ bi_windup(s); /* align on byte boundary */
+ s->last_eob_len = 8; /* enough lookahead for inflate */
+
+ if (header) {
+ put_short(s, (ush)len);
+ put_short(s, (ush)~len);
+#ifdef DEBUG_ZLIB
+ s->bits_sent += 2*16;
+#endif
+ }
+#ifdef DEBUG_ZLIB
+ s->bits_sent += (ulg)len<<3;
+#endif
+ /* bundle up the put_byte(s, *buf++) calls */
+ memcpy(&s->pending_buf[s->pending], buf, len);
+ s->pending += len;
+}
+
diff --git a/lib/zlib_deflate/defutil.h b/lib/zlib_deflate/defutil.h
new file mode 100644
index 00000000..6b15a909
--- /dev/null
+++ b/lib/zlib_deflate/defutil.h
@@ -0,0 +1,334 @@
+
+
+
+#define Assert(err, str)
+#define Trace(dummy)
+#define Tracev(dummy)
+#define Tracecv(err, dummy)
+#define Tracevv(dummy)
+
+
+
+#define LENGTH_CODES 29
+/* number of length codes, not counting the special END_BLOCK code */
+
+#define LITERALS 256
+/* number of literal bytes 0..255 */
+
+#define L_CODES (LITERALS+1+LENGTH_CODES)
+/* number of Literal or Length codes, including the END_BLOCK code */
+
+#define D_CODES 30
+/* number of distance codes */
+
+#define BL_CODES 19
+/* number of codes used to transfer the bit lengths */
+
+#define HEAP_SIZE (2*L_CODES+1)
+/* maximum heap size */
+
+#define MAX_BITS 15
+/* All codes must not exceed MAX_BITS bits */
+
+#define INIT_STATE 42
+#define BUSY_STATE 113
+#define FINISH_STATE 666
+/* Stream status */
+
+
+/* Data structure describing a single value and its code string. */
+typedef struct ct_data_s {
+ union {
+ ush freq; /* frequency count */
+ ush code; /* bit string */
+ } fc;
+ union {
+ ush dad; /* father node in Huffman tree */
+ ush len; /* length of bit string */
+ } dl;
+} ct_data;
+
+#define Freq fc.freq
+#define Code fc.code
+#define Dad dl.dad
+#define Len dl.len
+
+typedef struct static_tree_desc_s static_tree_desc;
+
+typedef struct tree_desc_s {
+ ct_data *dyn_tree; /* the dynamic tree */
+ int max_code; /* largest code with non zero frequency */
+ static_tree_desc *stat_desc; /* the corresponding static tree */
+} tree_desc;
+
+typedef ush Pos;
+typedef unsigned IPos;
+
+/* A Pos is an index in the character window. We use short instead of int to
+ * save space in the various tables. IPos is used only for parameter passing.
+ */
+
+typedef struct deflate_state {
+ z_streamp strm; /* pointer back to this zlib stream */
+ int status; /* as the name implies */
+ Byte *pending_buf; /* output still pending */
+ ulg pending_buf_size; /* size of pending_buf */
+ Byte *pending_out; /* next pending byte to output to the stream */
+ int pending; /* nb of bytes in the pending buffer */
+ int noheader; /* suppress zlib header and adler32 */
+ Byte data_type; /* UNKNOWN, BINARY or ASCII */
+ Byte method; /* STORED (for zip only) or DEFLATED */
+ int last_flush; /* value of flush param for previous deflate call */
+
+ /* used by deflate.c: */
+
+ uInt w_size; /* LZ77 window size (32K by default) */
+ uInt w_bits; /* log2(w_size) (8..16) */
+ uInt w_mask; /* w_size - 1 */
+
+ Byte *window;
+ /* Sliding window. Input bytes are read into the second half of the window,
+ * and move to the first half later to keep a dictionary of at least wSize
+ * bytes. With this organization, matches are limited to a distance of
+ * wSize-MAX_MATCH bytes, but this ensures that IO is always
+ * performed with a length multiple of the block size. Also, it limits
+ * the window size to 64K, which is quite useful on MSDOS.
+ * To do: use the user input buffer as sliding window.
+ */
+
+ ulg window_size;
+ /* Actual size of window: 2*wSize, except when the user input buffer
+ * is directly used as sliding window.
+ */
+
+ Pos *prev;
+ /* Link to older string with same hash index. To limit the size of this
+ * array to 64K, this link is maintained only for the last 32K strings.
+ * An index in this array is thus a window index modulo 32K.
+ */
+
+ Pos *head; /* Heads of the hash chains or NIL. */
+
+ uInt ins_h; /* hash index of string to be inserted */
+ uInt hash_size; /* number of elements in hash table */
+ uInt hash_bits; /* log2(hash_size) */
+ uInt hash_mask; /* hash_size-1 */
+
+ uInt hash_shift;
+ /* Number of bits by which ins_h must be shifted at each input
+ * step. It must be such that after MIN_MATCH steps, the oldest
+ * byte no longer takes part in the hash key, that is:
+ * hash_shift * MIN_MATCH >= hash_bits
+ */
+
+ long block_start;
+ /* Window position at the beginning of the current output block. Gets
+ * negative when the window is moved backwards.
+ */
+
+ uInt match_length; /* length of best match */
+ IPos prev_match; /* previous match */
+ int match_available; /* set if previous match exists */
+ uInt strstart; /* start of string to insert */
+ uInt match_start; /* start of matching string */
+ uInt lookahead; /* number of valid bytes ahead in window */
+
+ uInt prev_length;
+ /* Length of the best match at previous step. Matches not greater than this
+ * are discarded. This is used in the lazy match evaluation.
+ */
+
+ uInt max_chain_length;
+ /* To speed up deflation, hash chains are never searched beyond this
+ * length. A higher limit improves compression ratio but degrades the
+ * speed.
+ */
+
+ uInt max_lazy_match;
+ /* Attempt to find a better match only when the current match is strictly
+ * smaller than this value. This mechanism is used only for compression
+ * levels >= 4.
+ */
+# define max_insert_length max_lazy_match
+ /* Insert new strings in the hash table only if the match length is not
+ * greater than this length. This saves time but degrades compression.
+ * max_insert_length is used only for compression levels <= 3.
+ */
+
+ int level; /* compression level (1..9) */
+ int strategy; /* favor or force Huffman coding*/
+
+ uInt good_match;
+ /* Use a faster search when the previous match is longer than this */
+
+ int nice_match; /* Stop searching when current match exceeds this */
+
+ /* used by trees.c: */
+ /* Didn't use ct_data typedef below to suppress compiler warning */
+ struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */
+ struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */
+ struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */
+
+ struct tree_desc_s l_desc; /* desc. for literal tree */
+ struct tree_desc_s d_desc; /* desc. for distance tree */
+ struct tree_desc_s bl_desc; /* desc. for bit length tree */
+
+ ush bl_count[MAX_BITS+1];
+ /* number of codes at each bit length for an optimal tree */
+
+ int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */
+ int heap_len; /* number of elements in the heap */
+ int heap_max; /* element of largest frequency */
+ /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
+ * The same heap array is used to build all trees.
+ */
+
+ uch depth[2*L_CODES+1];
+ /* Depth of each subtree used as tie breaker for trees of equal frequency
+ */
+
+ uch *l_buf; /* buffer for literals or lengths */
+
+ uInt lit_bufsize;
+ /* Size of match buffer for literals/lengths. There are 4 reasons for
+ * limiting lit_bufsize to 64K:
+ * - frequencies can be kept in 16 bit counters
+ * - if compression is not successful for the first block, all input
+ * data is still in the window so we can still emit a stored block even
+ * when input comes from standard input. (This can also be done for
+ * all blocks if lit_bufsize is not greater than 32K.)
+ * - if compression is not successful for a file smaller than 64K, we can
+ * even emit a stored file instead of a stored block (saving 5 bytes).
+ * This is applicable only for zip (not gzip or zlib).
+ * - creating new Huffman trees less frequently may not provide fast
+ * adaptation to changes in the input data statistics. (Take for
+ * example a binary file with poorly compressible code followed by
+ * a highly compressible string table.) Smaller buffer sizes give
+ * fast adaptation but have of course the overhead of transmitting
+ * trees more frequently.
+ * - I can't count above 4
+ */
+
+ uInt last_lit; /* running index in l_buf */
+
+ ush *d_buf;
+ /* Buffer for distances. To simplify the code, d_buf and l_buf have
+ * the same number of elements. To use different lengths, an extra flag
+ * array would be necessary.
+ */
+
+ ulg opt_len; /* bit length of current block with optimal trees */
+ ulg static_len; /* bit length of current block with static trees */
+ ulg compressed_len; /* total bit length of compressed file */
+ uInt matches; /* number of string matches in current block */
+ int last_eob_len; /* bit length of EOB code for last block */
+
+#ifdef DEBUG_ZLIB
+ ulg bits_sent; /* bit length of the compressed data */
+#endif
+
+ ush bi_buf;
+ /* Output buffer. bits are inserted starting at the bottom (least
+ * significant bits).
+ */
+ int bi_valid;
+ /* Number of valid bits in bi_buf. All bits above the last valid bit
+ * are always zero.
+ */
+
+} deflate_state;
+
+typedef struct deflate_workspace {
+ /* State memory for the deflator */
+ deflate_state deflate_memory;
+ Byte window_memory[2 * (1 << MAX_WBITS)];
+ Pos prev_memory[1 << MAX_WBITS];
+ Pos head_memory[1 << (MAX_MEM_LEVEL + 7)];
+ char overlay_memory[(1 << (MAX_MEM_LEVEL + 6)) * (sizeof(ush)+2)];
+} deflate_workspace;
+
+/* Output a byte on the stream.
+ * IN assertion: there is enough room in pending_buf.
+ */
+#define put_byte(s, c) {s->pending_buf[s->pending++] = (c);}
+
+
+#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
+/* Minimum amount of lookahead, except at the end of the input file.
+ * See deflate.c for comments about the MIN_MATCH+1.
+ */
+
+#define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD)
+/* In order to simplify the code, particularly on 16 bit machines, match
+ * distances are limited to MAX_DIST instead of WSIZE.
+ */
+
+ /* in trees.c */
+void zlib_tr_init (deflate_state *s);
+int zlib_tr_tally (deflate_state *s, unsigned dist, unsigned lc);
+ulg zlib_tr_flush_block (deflate_state *s, char *buf, ulg stored_len,
+ int eof);
+void zlib_tr_align (deflate_state *s);
+void zlib_tr_stored_block (deflate_state *s, char *buf, ulg stored_len,
+ int eof);
+void zlib_tr_stored_type_only (deflate_state *);
+
+
+/* ===========================================================================
+ * Output a short LSB first on the stream.
+ * IN assertion: there is enough room in pendingBuf.
+ */
+#define put_short(s, w) { \
+ put_byte(s, (uch)((w) & 0xff)); \
+ put_byte(s, (uch)((ush)(w) >> 8)); \
+}
+
+/* ===========================================================================
+ * Reverse the first len bits of a code, using straightforward code (a faster
+ * method would use a table)
+ * IN assertion: 1 <= len <= 15
+ */
+static inline unsigned bi_reverse(unsigned code, /* the value to invert */
+ int len) /* its bit length */
+{
+ register unsigned res = 0;
+ do {
+ res |= code & 1;
+ code >>= 1, res <<= 1;
+ } while (--len > 0);
+ return res >> 1;
+}
+
+/* ===========================================================================
+ * Flush the bit buffer, keeping at most 7 bits in it.
+ */
+static inline void bi_flush(deflate_state *s)
+{
+ if (s->bi_valid == 16) {
+ put_short(s, s->bi_buf);
+ s->bi_buf = 0;
+ s->bi_valid = 0;
+ } else if (s->bi_valid >= 8) {
+ put_byte(s, (Byte)s->bi_buf);
+ s->bi_buf >>= 8;
+ s->bi_valid -= 8;
+ }
+}
+
+/* ===========================================================================
+ * Flush the bit buffer and align the output on a byte boundary
+ */
+static inline void bi_windup(deflate_state *s)
+{
+ if (s->bi_valid > 8) {
+ put_short(s, s->bi_buf);
+ } else if (s->bi_valid > 0) {
+ put_byte(s, (Byte)s->bi_buf);
+ }
+ s->bi_buf = 0;
+ s->bi_valid = 0;
+#ifdef DEBUG_ZLIB
+ s->bits_sent = (s->bits_sent+7) & ~7;
+#endif
+}
+
diff --git a/lib/zlib_inflate/Makefile b/lib/zlib_inflate/Makefile
new file mode 100644
index 00000000..49f8ce57
--- /dev/null
+++ b/lib/zlib_inflate/Makefile
@@ -0,0 +1,19 @@
+#
+# This is a modified version of zlib, which does all memory
+# allocation ahead of time.
+#
+# This is only the decompression, see zlib_deflate for the
+# the compression
+#
+# Decompression needs to be serialized for each memory
+# allocation.
+#
+# (The upsides of the simplification is that you can't get in
+# any nasty situations wrt memory management, and that the
+# uncompression can be done without blocking on allocation).
+#
+
+obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate.o
+
+zlib_inflate-objs := inffast.o inflate.o infutil.o \
+ inftrees.o inflate_syms.o
diff --git a/lib/zlib_inflate/inffast.c b/lib/zlib_inflate/inffast.c
new file mode 100644
index 00000000..2c13ecc5
--- /dev/null
+++ b/lib/zlib_inflate/inffast.c
@@ -0,0 +1,363 @@
+/* inffast.c -- fast decoding
+ * Copyright (C) 1995-2004 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include <linux/zutil.h>
+#include "inftrees.h"
+#include "inflate.h"
+#include "inffast.h"
+
+#ifndef ASMINF
+
+/* Allow machine dependent optimization for post-increment or pre-increment.
+ Based on testing to date,
+ Pre-increment preferred for:
+ - PowerPC G3 (Adler)
+ - MIPS R5000 (Randers-Pehrson)
+ Post-increment preferred for:
+ - none
+ No measurable difference:
+ - Pentium III (Anderson)
+ - M68060 (Nikl)
+ */
+union uu {
+ unsigned short us;
+ unsigned char b[2];
+};
+
+/* Endian independed version */
+static inline unsigned short
+get_unaligned16(const unsigned short *p)
+{
+ union uu mm;
+ unsigned char *b = (unsigned char *)p;
+
+ mm.b[0] = b[0];
+ mm.b[1] = b[1];
+ return mm.us;
+}
+
+#ifdef POSTINC
+# define OFF 0
+# define PUP(a) *(a)++
+# define UP_UNALIGNED(a) get_unaligned16((a)++)
+#else
+# define OFF 1
+# define PUP(a) *++(a)
+# define UP_UNALIGNED(a) get_unaligned16(++(a))
+#endif
+
+/*
+ Decode literal, length, and distance codes and write out the resulting
+ literal and match bytes until either not enough input or output is
+ available, an end-of-block is encountered, or a data error is encountered.
+ When large enough input and output buffers are supplied to inflate(), for
+ example, a 16K input buffer and a 64K output buffer, more than 95% of the
+ inflate execution time is spent in this routine.
+
+ Entry assumptions:
+
+ state->mode == LEN
+ strm->avail_in >= 6
+ strm->avail_out >= 258
+ start >= strm->avail_out
+ state->bits < 8
+
+ On return, state->mode is one of:
+
+ LEN -- ran out of enough output space or enough available input
+ TYPE -- reached end of block code, inflate() to interpret next block
+ BAD -- error in block data
+
+ Notes:
+
+ - The maximum input bits used by a length/distance pair is 15 bits for the
+ length code, 5 bits for the length extra, 15 bits for the distance code,
+ and 13 bits for the distance extra. This totals 48 bits, or six bytes.
+ Therefore if strm->avail_in >= 6, then there is enough input to avoid
+ checking for available input while decoding.
+
+ - The maximum bytes that a single length/distance pair can output is 258
+ bytes, which is the maximum length that can be coded. inflate_fast()
+ requires strm->avail_out >= 258 for each loop to avoid checking for
+ output space.
+
+ - @start: inflate()'s starting value for strm->avail_out
+ */
+void inflate_fast(z_streamp strm, unsigned start)
+{
+ struct inflate_state *state;
+ const unsigned char *in; /* local strm->next_in */
+ const unsigned char *last; /* while in < last, enough input available */
+ unsigned char *out; /* local strm->next_out */
+ unsigned char *beg; /* inflate()'s initial strm->next_out */
+ unsigned char *end; /* while out < end, enough space available */
+#ifdef INFLATE_STRICT
+ unsigned dmax; /* maximum distance from zlib header */
+#endif
+ unsigned wsize; /* window size or zero if not using window */
+ unsigned whave; /* valid bytes in the window */
+ unsigned write; /* window write index */
+ unsigned char *window; /* allocated sliding window, if wsize != 0 */
+ unsigned long hold; /* local strm->hold */
+ unsigned bits; /* local strm->bits */
+ code const *lcode; /* local strm->lencode */
+ code const *dcode; /* local strm->distcode */
+ unsigned lmask; /* mask for first level of length codes */
+ unsigned dmask; /* mask for first level of distance codes */
+ code this; /* retrieved table entry */
+ unsigned op; /* code bits, operation, extra bits, or */
+ /* window position, window bytes to copy */
+ unsigned len; /* match length, unused bytes */
+ unsigned dist; /* match distance */
+ unsigned char *from; /* where to copy match from */
+
+ /* copy state to local variables */
+ state = (struct inflate_state *)strm->state;
+ in = strm->next_in - OFF;
+ last = in + (strm->avail_in - 5);
+ out = strm->next_out - OFF;
+ beg = out - (start - strm->avail_out);
+ end = out + (strm->avail_out - 257);
+#ifdef INFLATE_STRICT
+ dmax = state->dmax;
+#endif
+ wsize = state->wsize;
+ whave = state->whave;
+ write = state->write;
+ window = state->window;
+ hold = state->hold;
+ bits = state->bits;
+ lcode = state->lencode;
+ dcode = state->distcode;
+ lmask = (1U << state->lenbits) - 1;
+ dmask = (1U << state->distbits) - 1;
+
+ /* decode literals and length/distances until end-of-block or not enough
+ input data or output space */
+ do {
+ if (bits < 15) {
+ hold += (unsigned long)(PUP(in)) << bits;
+ bits += 8;
+ hold += (unsigned long)(PUP(in)) << bits;
+ bits += 8;
+ }
+ this = lcode[hold & lmask];
+ dolen:
+ op = (unsigned)(this.bits);
+ hold >>= op;
+ bits -= op;
+ op = (unsigned)(this.op);
+ if (op == 0) { /* literal */
+ PUP(out) = (unsigned char)(this.val);
+ }
+ else if (op & 16) { /* length base */
+ len = (unsigned)(this.val);
+ op &= 15; /* number of extra bits */
+ if (op) {
+ if (bits < op) {
+ hold += (unsigned long)(PUP(in)) << bits;
+ bits += 8;
+ }
+ len += (unsigned)hold & ((1U << op) - 1);
+ hold >>= op;
+ bits -= op;
+ }
+ if (bits < 15) {
+ hold += (unsigned long)(PUP(in)) << bits;
+ bits += 8;
+ hold += (unsigned long)(PUP(in)) << bits;
+ bits += 8;
+ }
+ this = dcode[hold & dmask];
+ dodist:
+ op = (unsigned)(this.bits);
+ hold >>= op;
+ bits -= op;
+ op = (unsigned)(this.op);
+ if (op & 16) { /* distance base */
+ dist = (unsigned)(this.val);
+ op &= 15; /* number of extra bits */
+ if (bits < op) {
+ hold += (unsigned long)(PUP(in)) << bits;
+ bits += 8;
+ if (bits < op) {
+ hold += (unsigned long)(PUP(in)) << bits;
+ bits += 8;
+ }
+ }
+ dist += (unsigned)hold & ((1U << op) - 1);
+#ifdef INFLATE_STRICT
+ if (dist > dmax) {
+ strm->msg = (char *)"invalid distance too far back";
+ state->mode = BAD;
+ break;
+ }
+#endif
+ hold >>= op;
+ bits -= op;
+ op = (unsigned)(out - beg); /* max distance in output */
+ if (dist > op) { /* see if copy from window */
+ op = dist - op; /* distance back in window */
+ if (op > whave) {
+ strm->msg = (char *)"invalid distance too far back";
+ state->mode = BAD;
+ break;
+ }
+ from = window - OFF;
+ if (write == 0) { /* very common case */
+ from += wsize - op;
+ if (op < len) { /* some from window */
+ len -= op;
+ do {
+ PUP(out) = PUP(from);
+ } while (--op);
+ from = out - dist; /* rest from output */
+ }
+ }
+ else if (write < op) { /* wrap around window */
+ from += wsize + write - op;
+ op -= write;
+ if (op < len) { /* some from end of window */
+ len -= op;
+ do {
+ PUP(out) = PUP(from);
+ } while (--op);
+ from = window - OFF;
+ if (write < len) { /* some from start of window */
+ op = write;
+ len -= op;
+ do {
+ PUP(out) = PUP(from);
+ } while (--op);
+ from = out - dist; /* rest from output */
+ }
+ }
+ }
+ else { /* contiguous in window */
+ from += write - op;
+ if (op < len) { /* some from window */
+ len -= op;
+ do {
+ PUP(out) = PUP(from);
+ } while (--op);
+ from = out - dist; /* rest from output */
+ }
+ }
+ while (len > 2) {
+ PUP(out) = PUP(from);
+ PUP(out) = PUP(from);
+ PUP(out) = PUP(from);
+ len -= 3;
+ }
+ if (len) {
+ PUP(out) = PUP(from);
+ if (len > 1)
+ PUP(out) = PUP(from);
+ }
+ }
+ else {
+ unsigned short *sout;
+ unsigned long loops;
+
+ from = out - dist; /* copy direct from output */
+ /* minimum length is three */
+ /* Align out addr */
+ if (!((long)(out - 1 + OFF) & 1)) {
+ PUP(out) = PUP(from);
+ len--;
+ }
+ sout = (unsigned short *)(out - OFF);
+ if (dist > 2) {
+ unsigned short *sfrom;
+
+ sfrom = (unsigned short *)(from - OFF);
+ loops = len >> 1;
+ do
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ PUP(sout) = PUP(sfrom);
+#else
+ PUP(sout) = UP_UNALIGNED(sfrom);
+#endif
+ while (--loops);
+ out = (unsigned char *)sout + OFF;
+ from = (unsigned char *)sfrom + OFF;
+ } else { /* dist == 1 or dist == 2 */
+ unsigned short pat16;
+
+ pat16 = *(sout-1+OFF);
+ if (dist == 1) {
+ union uu mm;
+ /* copy one char pattern to both bytes */
+ mm.us = pat16;
+ mm.b[0] = mm.b[1];
+ pat16 = mm.us;
+ }
+ loops = len >> 1;
+ do
+ PUP(sout) = pat16;
+ while (--loops);
+ out = (unsigned char *)sout + OFF;
+ }
+ if (len & 1)
+ PUP(out) = PUP(from);
+ }
+ }
+ else if ((op & 64) == 0) { /* 2nd level distance code */
+ this = dcode[this.val + (hold & ((1U << op) - 1))];
+ goto dodist;
+ }
+ else {
+ strm->msg = (char *)"invalid distance code";
+ state->mode = BAD;
+ break;
+ }
+ }
+ else if ((op & 64) == 0) { /* 2nd level length code */
+ this = lcode[this.val + (hold & ((1U << op) - 1))];
+ goto dolen;
+ }
+ else if (op & 32) { /* end-of-block */
+ state->mode = TYPE;
+ break;
+ }
+ else {
+ strm->msg = (char *)"invalid literal/length code";
+ state->mode = BAD;
+ break;
+ }
+ } while (in < last && out < end);
+
+ /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
+ len = bits >> 3;
+ in -= len;
+ bits -= len << 3;
+ hold &= (1U << bits) - 1;
+
+ /* update state and return */
+ strm->next_in = in + OFF;
+ strm->next_out = out + OFF;
+ strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
+ strm->avail_out = (unsigned)(out < end ?
+ 257 + (end - out) : 257 - (out - end));
+ state->hold = hold;
+ state->bits = bits;
+ return;
+}
+
+/*
+ inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
+ - Using bit fields for code structure
+ - Different op definition to avoid & for extra bits (do & for table bits)
+ - Three separate decoding do-loops for direct, window, and write == 0
+ - Special case for distance > 1 copies to do overlapped load and store copy
+ - Explicit branch predictions (based on measured branch probabilities)
+ - Deferring match copy and interspersed it with decoding subsequent codes
+ - Swapping literal/length else
+ - Swapping window/direct else
+ - Larger unrolled copy loops (three is about right)
+ - Moving len -= 3 statement into middle of loop
+ */
+
+#endif /* !ASMINF */
diff --git a/lib/zlib_inflate/inffast.h b/lib/zlib_inflate/inffast.h
new file mode 100644
index 00000000..40315d9f
--- /dev/null
+++ b/lib/zlib_inflate/inffast.h
@@ -0,0 +1,11 @@
+/* inffast.h -- header to use inffast.c
+ * Copyright (C) 1995-2003 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+ part of the implementation of the compression library and is
+ subject to change. Applications should only use zlib.h.
+ */
+
+void inflate_fast (z_streamp strm, unsigned start);
diff --git a/lib/zlib_inflate/inffixed.h b/lib/zlib_inflate/inffixed.h
new file mode 100644
index 00000000..75ed4b59
--- /dev/null
+++ b/lib/zlib_inflate/inffixed.h
@@ -0,0 +1,94 @@
+ /* inffixed.h -- table for decoding fixed codes
+ * Generated automatically by makefixed().
+ */
+
+ /* WARNING: this file should *not* be used by applications. It
+ is part of the implementation of the compression library and
+ is subject to change. Applications should only use zlib.h.
+ */
+
+ static const code lenfix[512] = {
+ {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48},
+ {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128},
+ {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59},
+ {0,8,120},{0,8,56},{0,9,208},{17,7,17},{0,8,104},{0,8,40},{0,9,176},
+ {0,8,8},{0,8,136},{0,8,72},{0,9,240},{16,7,4},{0,8,84},{0,8,20},
+ {21,8,227},{19,7,43},{0,8,116},{0,8,52},{0,9,200},{17,7,13},{0,8,100},
+ {0,8,36},{0,9,168},{0,8,4},{0,8,132},{0,8,68},{0,9,232},{16,7,8},
+ {0,8,92},{0,8,28},{0,9,152},{20,7,83},{0,8,124},{0,8,60},{0,9,216},
+ {18,7,23},{0,8,108},{0,8,44},{0,9,184},{0,8,12},{0,8,140},{0,8,76},
+ {0,9,248},{16,7,3},{0,8,82},{0,8,18},{21,8,163},{19,7,35},{0,8,114},
+ {0,8,50},{0,9,196},{17,7,11},{0,8,98},{0,8,34},{0,9,164},{0,8,2},
+ {0,8,130},{0,8,66},{0,9,228},{16,7,7},{0,8,90},{0,8,26},{0,9,148},
+ {20,7,67},{0,8,122},{0,8,58},{0,9,212},{18,7,19},{0,8,106},{0,8,42},
+ {0,9,180},{0,8,10},{0,8,138},{0,8,74},{0,9,244},{16,7,5},{0,8,86},
+ {0,8,22},{64,8,0},{19,7,51},{0,8,118},{0,8,54},{0,9,204},{17,7,15},
+ {0,8,102},{0,8,38},{0,9,172},{0,8,6},{0,8,134},{0,8,70},{0,9,236},
+ {16,7,9},{0,8,94},{0,8,30},{0,9,156},{20,7,99},{0,8,126},{0,8,62},
+ {0,9,220},{18,7,27},{0,8,110},{0,8,46},{0,9,188},{0,8,14},{0,8,142},
+ {0,8,78},{0,9,252},{96,7,0},{0,8,81},{0,8,17},{21,8,131},{18,7,31},
+ {0,8,113},{0,8,49},{0,9,194},{16,7,10},{0,8,97},{0,8,33},{0,9,162},
+ {0,8,1},{0,8,129},{0,8,65},{0,9,226},{16,7,6},{0,8,89},{0,8,25},
+ {0,9,146},{19,7,59},{0,8,121},{0,8,57},{0,9,210},{17,7,17},{0,8,105},
+ {0,8,41},{0,9,178},{0,8,9},{0,8,137},{0,8,73},{0,9,242},{16,7,4},
+ {0,8,85},{0,8,21},{16,8,258},{19,7,43},{0,8,117},{0,8,53},{0,9,202},
+ {17,7,13},{0,8,101},{0,8,37},{0,9,170},{0,8,5},{0,8,133},{0,8,69},
+ {0,9,234},{16,7,8},{0,8,93},{0,8,29},{0,9,154},{20,7,83},{0,8,125},
+ {0,8,61},{0,9,218},{18,7,23},{0,8,109},{0,8,45},{0,9,186},{0,8,13},
+ {0,8,141},{0,8,77},{0,9,250},{16,7,3},{0,8,83},{0,8,19},{21,8,195},
+ {19,7,35},{0,8,115},{0,8,51},{0,9,198},{17,7,11},{0,8,99},{0,8,35},
+ {0,9,166},{0,8,3},{0,8,131},{0,8,67},{0,9,230},{16,7,7},{0,8,91},
+ {0,8,27},{0,9,150},{20,7,67},{0,8,123},{0,8,59},{0,9,214},{18,7,19},
+ {0,8,107},{0,8,43},{0,9,182},{0,8,11},{0,8,139},{0,8,75},{0,9,246},
+ {16,7,5},{0,8,87},{0,8,23},{64,8,0},{19,7,51},{0,8,119},{0,8,55},
+ {0,9,206},{17,7,15},{0,8,103},{0,8,39},{0,9,174},{0,8,7},{0,8,135},
+ {0,8,71},{0,9,238},{16,7,9},{0,8,95},{0,8,31},{0,9,158},{20,7,99},
+ {0,8,127},{0,8,63},{0,9,222},{18,7,27},{0,8,111},{0,8,47},{0,9,190},
+ {0,8,15},{0,8,143},{0,8,79},{0,9,254},{96,7,0},{0,8,80},{0,8,16},
+ {20,8,115},{18,7,31},{0,8,112},{0,8,48},{0,9,193},{16,7,10},{0,8,96},
+ {0,8,32},{0,9,161},{0,8,0},{0,8,128},{0,8,64},{0,9,225},{16,7,6},
+ {0,8,88},{0,8,24},{0,9,145},{19,7,59},{0,8,120},{0,8,56},{0,9,209},
+ {17,7,17},{0,8,104},{0,8,40},{0,9,177},{0,8,8},{0,8,136},{0,8,72},
+ {0,9,241},{16,7,4},{0,8,84},{0,8,20},{21,8,227},{19,7,43},{0,8,116},
+ {0,8,52},{0,9,201},{17,7,13},{0,8,100},{0,8,36},{0,9,169},{0,8,4},
+ {0,8,132},{0,8,68},{0,9,233},{16,7,8},{0,8,92},{0,8,28},{0,9,153},
+ {20,7,83},{0,8,124},{0,8,60},{0,9,217},{18,7,23},{0,8,108},{0,8,44},
+ {0,9,185},{0,8,12},{0,8,140},{0,8,76},{0,9,249},{16,7,3},{0,8,82},
+ {0,8,18},{21,8,163},{19,7,35},{0,8,114},{0,8,50},{0,9,197},{17,7,11},
+ {0,8,98},{0,8,34},{0,9,165},{0,8,2},{0,8,130},{0,8,66},{0,9,229},
+ {16,7,7},{0,8,90},{0,8,26},{0,9,149},{20,7,67},{0,8,122},{0,8,58},
+ {0,9,213},{18,7,19},{0,8,106},{0,8,42},{0,9,181},{0,8,10},{0,8,138},
+ {0,8,74},{0,9,245},{16,7,5},{0,8,86},{0,8,22},{64,8,0},{19,7,51},
+ {0,8,118},{0,8,54},{0,9,205},{17,7,15},{0,8,102},{0,8,38},{0,9,173},
+ {0,8,6},{0,8,134},{0,8,70},{0,9,237},{16,7,9},{0,8,94},{0,8,30},
+ {0,9,157},{20,7,99},{0,8,126},{0,8,62},{0,9,221},{18,7,27},{0,8,110},
+ {0,8,46},{0,9,189},{0,8,14},{0,8,142},{0,8,78},{0,9,253},{96,7,0},
+ {0,8,81},{0,8,17},{21,8,131},{18,7,31},{0,8,113},{0,8,49},{0,9,195},
+ {16,7,10},{0,8,97},{0,8,33},{0,9,163},{0,8,1},{0,8,129},{0,8,65},
+ {0,9,227},{16,7,6},{0,8,89},{0,8,25},{0,9,147},{19,7,59},{0,8,121},
+ {0,8,57},{0,9,211},{17,7,17},{0,8,105},{0,8,41},{0,9,179},{0,8,9},
+ {0,8,137},{0,8,73},{0,9,243},{16,7,4},{0,8,85},{0,8,21},{16,8,258},
+ {19,7,43},{0,8,117},{0,8,53},{0,9,203},{17,7,13},{0,8,101},{0,8,37},
+ {0,9,171},{0,8,5},{0,8,133},{0,8,69},{0,9,235},{16,7,8},{0,8,93},
+ {0,8,29},{0,9,155},{20,7,83},{0,8,125},{0,8,61},{0,9,219},{18,7,23},
+ {0,8,109},{0,8,45},{0,9,187},{0,8,13},{0,8,141},{0,8,77},{0,9,251},
+ {16,7,3},{0,8,83},{0,8,19},{21,8,195},{19,7,35},{0,8,115},{0,8,51},
+ {0,9,199},{17,7,11},{0,8,99},{0,8,35},{0,9,167},{0,8,3},{0,8,131},
+ {0,8,67},{0,9,231},{16,7,7},{0,8,91},{0,8,27},{0,9,151},{20,7,67},
+ {0,8,123},{0,8,59},{0,9,215},{18,7,19},{0,8,107},{0,8,43},{0,9,183},
+ {0,8,11},{0,8,139},{0,8,75},{0,9,247},{16,7,5},{0,8,87},{0,8,23},
+ {64,8,0},{19,7,51},{0,8,119},{0,8,55},{0,9,207},{17,7,15},{0,8,103},
+ {0,8,39},{0,9,175},{0,8,7},{0,8,135},{0,8,71},{0,9,239},{16,7,9},
+ {0,8,95},{0,8,31},{0,9,159},{20,7,99},{0,8,127},{0,8,63},{0,9,223},
+ {18,7,27},{0,8,111},{0,8,47},{0,9,191},{0,8,15},{0,8,143},{0,8,79},
+ {0,9,255}
+ };
+
+ static const code distfix[32] = {
+ {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025},
+ {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193},
+ {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385},
+ {19,5,25},{27,5,6145},{17,5,7},{25,5,1537},{21,5,97},{29,5,24577},
+ {16,5,4},{24,5,769},{20,5,49},{28,5,12289},{18,5,13},{26,5,3073},
+ {22,5,193},{64,5,0}
+ };
diff --git a/lib/zlib_inflate/inflate.c b/lib/zlib_inflate/inflate.c
new file mode 100644
index 00000000..f5ce87b0
--- /dev/null
+++ b/lib/zlib_inflate/inflate.c
@@ -0,0 +1,918 @@
+/* inflate.c -- zlib decompression
+ * Copyright (C) 1995-2005 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ *
+ * Based on zlib 1.2.3 but modified for the Linux Kernel by
+ * Richard Purdie <richard@openedhand.com>
+ *
+ * Changes mainly for static instead of dynamic memory allocation
+ *
+ */
+
+#include <linux/zutil.h>
+#include "inftrees.h"
+#include "inflate.h"
+#include "inffast.h"
+#include "infutil.h"
+
+int zlib_inflate_workspacesize(void)
+{
+ return sizeof(struct inflate_workspace);
+}
+
+int zlib_inflateReset(z_streamp strm)
+{
+ struct inflate_state *state;
+
+ if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
+ state = (struct inflate_state *)strm->state;
+ strm->total_in = strm->total_out = state->total = 0;
+ strm->msg = NULL;
+ strm->adler = 1; /* to support ill-conceived Java test suite */
+ state->mode = HEAD;
+ state->last = 0;
+ state->havedict = 0;
+ state->dmax = 32768U;
+ state->hold = 0;
+ state->bits = 0;
+ state->lencode = state->distcode = state->next = state->codes;
+
+ /* Initialise Window */
+ state->wsize = 1U << state->wbits;
+ state->write = 0;
+ state->whave = 0;
+
+ return Z_OK;
+}
+
+#if 0
+int zlib_inflatePrime(z_streamp strm, int bits, int value)
+{
+ struct inflate_state *state;
+
+ if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
+ state = (struct inflate_state *)strm->state;
+ if (bits > 16 || state->bits + bits > 32) return Z_STREAM_ERROR;
+ value &= (1L << bits) - 1;
+ state->hold += value << state->bits;
+ state->bits += bits;
+ return Z_OK;
+}
+#endif
+
+int zlib_inflateInit2(z_streamp strm, int windowBits)
+{
+ struct inflate_state *state;
+
+ if (strm == NULL) return Z_STREAM_ERROR;
+ strm->msg = NULL; /* in case we return an error */
+
+ state = &WS(strm)->inflate_state;
+ strm->state = (struct internal_state *)state;
+
+ if (windowBits < 0) {
+ state->wrap = 0;
+ windowBits = -windowBits;
+ }
+ else {
+ state->wrap = (windowBits >> 4) + 1;
+ }
+ if (windowBits < 8 || windowBits > 15) {
+ return Z_STREAM_ERROR;
+ }
+ state->wbits = (unsigned)windowBits;
+ state->window = &WS(strm)->working_window[0];
+
+ return zlib_inflateReset(strm);
+}
+
+/*
+ Return state with length and distance decoding tables and index sizes set to
+ fixed code decoding. This returns fixed tables from inffixed.h.
+ */
+static void zlib_fixedtables(struct inflate_state *state)
+{
+# include "inffixed.h"
+ state->lencode = lenfix;
+ state->lenbits = 9;
+ state->distcode = distfix;
+ state->distbits = 5;
+}
+
+
+/*
+ Update the window with the last wsize (normally 32K) bytes written before
+ returning. This is only called when a window is already in use, or when
+ output has been written during this inflate call, but the end of the deflate
+ stream has not been reached yet. It is also called to window dictionary data
+ when a dictionary is loaded.
+
+ Providing output buffers larger than 32K to inflate() should provide a speed
+ advantage, since only the last 32K of output is copied to the sliding window
+ upon return from inflate(), and since all distances after the first 32K of
+ output will fall in the output data, making match copies simpler and faster.
+ The advantage may be dependent on the size of the processor's data caches.
+ */
+static void zlib_updatewindow(z_streamp strm, unsigned out)
+{
+ struct inflate_state *state;
+ unsigned copy, dist;
+
+ state = (struct inflate_state *)strm->state;
+
+ /* copy state->wsize or less output bytes into the circular window */
+ copy = out - strm->avail_out;
+ if (copy >= state->wsize) {
+ memcpy(state->window, strm->next_out - state->wsize, state->wsize);
+ state->write = 0;
+ state->whave = state->wsize;
+ }
+ else {
+ dist = state->wsize - state->write;
+ if (dist > copy) dist = copy;
+ memcpy(state->window + state->write, strm->next_out - copy, dist);
+ copy -= dist;
+ if (copy) {
+ memcpy(state->window, strm->next_out - copy, copy);
+ state->write = copy;
+ state->whave = state->wsize;
+ }
+ else {
+ state->write += dist;
+ if (state->write == state->wsize) state->write = 0;
+ if (state->whave < state->wsize) state->whave += dist;
+ }
+ }
+}
+
+
+/*
+ * At the end of a Deflate-compressed PPP packet, we expect to have seen
+ * a `stored' block type value but not the (zero) length bytes.
+ */
+/*
+ Returns true if inflate is currently at the end of a block generated by
+ Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP
+ implementation to provide an additional safety check. PPP uses
+ Z_SYNC_FLUSH but removes the length bytes of the resulting empty stored
+ block. When decompressing, PPP checks that at the end of input packet,
+ inflate is waiting for these length bytes.
+ */
+static int zlib_inflateSyncPacket(z_streamp strm)
+{
+ struct inflate_state *state;
+
+ if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
+ state = (struct inflate_state *)strm->state;
+
+ if (state->mode == STORED && state->bits == 0) {
+ state->mode = TYPE;
+ return Z_OK;
+ }
+ return Z_DATA_ERROR;
+}
+
+/* Macros for inflate(): */
+
+/* check function to use adler32() for zlib or crc32() for gzip */
+#define UPDATE(check, buf, len) zlib_adler32(check, buf, len)
+
+/* Load registers with state in inflate() for speed */
+#define LOAD() \
+ do { \
+ put = strm->next_out; \
+ left = strm->avail_out; \
+ next = strm->next_in; \
+ have = strm->avail_in; \
+ hold = state->hold; \
+ bits = state->bits; \
+ } while (0)
+
+/* Restore state from registers in inflate() */
+#define RESTORE() \
+ do { \
+ strm->next_out = put; \
+ strm->avail_out = left; \
+ strm->next_in = next; \
+ strm->avail_in = have; \
+ state->hold = hold; \
+ state->bits = bits; \
+ } while (0)
+
+/* Clear the input bit accumulator */
+#define INITBITS() \
+ do { \
+ hold = 0; \
+ bits = 0; \
+ } while (0)
+
+/* Get a byte of input into the bit accumulator, or return from inflate()
+ if there is no input available. */
+#define PULLBYTE() \
+ do { \
+ if (have == 0) goto inf_leave; \
+ have--; \
+ hold += (unsigned long)(*next++) << bits; \
+ bits += 8; \
+ } while (0)
+
+/* Assure that there are at least n bits in the bit accumulator. If there is
+ not enough available input to do that, then return from inflate(). */
+#define NEEDBITS(n) \
+ do { \
+ while (bits < (unsigned)(n)) \
+ PULLBYTE(); \
+ } while (0)
+
+/* Return the low n bits of the bit accumulator (n < 16) */
+#define BITS(n) \
+ ((unsigned)hold & ((1U << (n)) - 1))
+
+/* Remove n bits from the bit accumulator */
+#define DROPBITS(n) \
+ do { \
+ hold >>= (n); \
+ bits -= (unsigned)(n); \
+ } while (0)
+
+/* Remove zero to seven bits as needed to go to a byte boundary */
+#define BYTEBITS() \
+ do { \
+ hold >>= bits & 7; \
+ bits -= bits & 7; \
+ } while (0)
+
+/* Reverse the bytes in a 32-bit value */
+#define REVERSE(q) \
+ ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \
+ (((q) & 0xff00) << 8) + (((q) & 0xff) << 24))
+
+/*
+ inflate() uses a state machine to process as much input data and generate as
+ much output data as possible before returning. The state machine is
+ structured roughly as follows:
+
+ for (;;) switch (state) {
+ ...
+ case STATEn:
+ if (not enough input data or output space to make progress)
+ return;
+ ... make progress ...
+ state = STATEm;
+ break;
+ ...
+ }
+
+ so when inflate() is called again, the same case is attempted again, and
+ if the appropriate resources are provided, the machine proceeds to the
+ next state. The NEEDBITS() macro is usually the way the state evaluates
+ whether it can proceed or should return. NEEDBITS() does the return if
+ the requested bits are not available. The typical use of the BITS macros
+ is:
+
+ NEEDBITS(n);
+ ... do something with BITS(n) ...
+ DROPBITS(n);
+
+ where NEEDBITS(n) either returns from inflate() if there isn't enough
+ input left to load n bits into the accumulator, or it continues. BITS(n)
+ gives the low n bits in the accumulator. When done, DROPBITS(n) drops
+ the low n bits off the accumulator. INITBITS() clears the accumulator
+ and sets the number of available bits to zero. BYTEBITS() discards just
+ enough bits to put the accumulator on a byte boundary. After BYTEBITS()
+ and a NEEDBITS(8), then BITS(8) would return the next byte in the stream.
+
+ NEEDBITS(n) uses PULLBYTE() to get an available byte of input, or to return
+ if there is no input available. The decoding of variable length codes uses
+ PULLBYTE() directly in order to pull just enough bytes to decode the next
+ code, and no more.
+
+ Some states loop until they get enough input, making sure that enough
+ state information is maintained to continue the loop where it left off
+ if NEEDBITS() returns in the loop. For example, want, need, and keep
+ would all have to actually be part of the saved state in case NEEDBITS()
+ returns:
+
+ case STATEw:
+ while (want < need) {
+ NEEDBITS(n);
+ keep[want++] = BITS(n);
+ DROPBITS(n);
+ }
+ state = STATEx;
+ case STATEx:
+
+ As shown above, if the next state is also the next case, then the break
+ is omitted.
+
+ A state may also return if there is not enough output space available to
+ complete that state. Those states are copying stored data, writing a
+ literal byte, and copying a matching string.
+
+ When returning, a "goto inf_leave" is used to update the total counters,
+ update the check value, and determine whether any progress has been made
+ during that inflate() call in order to return the proper return code.
+ Progress is defined as a change in either strm->avail_in or strm->avail_out.
+ When there is a window, goto inf_leave will update the window with the last
+ output written. If a goto inf_leave occurs in the middle of decompression
+ and there is no window currently, goto inf_leave will create one and copy
+ output to the window for the next call of inflate().
+
+ In this implementation, the flush parameter of inflate() only affects the
+ return code (per zlib.h). inflate() always writes as much as possible to
+ strm->next_out, given the space available and the provided input--the effect
+ documented in zlib.h of Z_SYNC_FLUSH. Furthermore, inflate() always defers
+ the allocation of and copying into a sliding window until necessary, which
+ provides the effect documented in zlib.h for Z_FINISH when the entire input
+ stream available. So the only thing the flush parameter actually does is:
+ when flush is set to Z_FINISH, inflate() cannot return Z_OK. Instead it
+ will return Z_BUF_ERROR if it has not reached the end of the stream.
+ */
+
+int zlib_inflate(z_streamp strm, int flush)
+{
+ struct inflate_state *state;
+ const unsigned char *next; /* next input */
+ unsigned char *put; /* next output */
+ unsigned have, left; /* available input and output */
+ unsigned long hold; /* bit buffer */
+ unsigned bits; /* bits in bit buffer */
+ unsigned in, out; /* save starting available input and output */
+ unsigned copy; /* number of stored or match bytes to copy */
+ unsigned char *from; /* where to copy match bytes from */
+ code this; /* current decoding table entry */
+ code last; /* parent table entry */
+ unsigned len; /* length to copy for repeats, bits to drop */
+ int ret; /* return code */
+ static const unsigned short order[19] = /* permutation of code lengths */
+ {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+
+ /* Do not check for strm->next_out == NULL here as ppc zImage
+ inflates to strm->next_out = 0 */
+
+ if (strm == NULL || strm->state == NULL ||
+ (strm->next_in == NULL && strm->avail_in != 0))
+ return Z_STREAM_ERROR;
+
+ state = (struct inflate_state *)strm->state;
+
+ if (state->mode == TYPE) state->mode = TYPEDO; /* skip check */
+ LOAD();
+ in = have;
+ out = left;
+ ret = Z_OK;
+ for (;;)
+ switch (state->mode) {
+ case HEAD:
+ if (state->wrap == 0) {
+ state->mode = TYPEDO;
+ break;
+ }
+ NEEDBITS(16);
+ if (
+ ((BITS(8) << 8) + (hold >> 8)) % 31) {
+ strm->msg = (char *)"incorrect header check";
+ state->mode = BAD;
+ break;
+ }
+ if (BITS(4) != Z_DEFLATED) {
+ strm->msg = (char *)"unknown compression method";
+ state->mode = BAD;
+ break;
+ }
+ DROPBITS(4);
+ len = BITS(4) + 8;
+ if (len > state->wbits) {
+ strm->msg = (char *)"invalid window size";
+ state->mode = BAD;
+ break;
+ }
+ state->dmax = 1U << len;
+ strm->adler = state->check = zlib_adler32(0L, NULL, 0);
+ state->mode = hold & 0x200 ? DICTID : TYPE;
+ INITBITS();
+ break;
+ case DICTID:
+ NEEDBITS(32);
+ strm->adler = state->check = REVERSE(hold);
+ INITBITS();
+ state->mode = DICT;
+ case DICT:
+ if (state->havedict == 0) {
+ RESTORE();
+ return Z_NEED_DICT;
+ }
+ strm->adler = state->check = zlib_adler32(0L, NULL, 0);
+ state->mode = TYPE;
+ case TYPE:
+ if (flush == Z_BLOCK) goto inf_leave;
+ case TYPEDO:
+ if (state->last) {
+ BYTEBITS();
+ state->mode = CHECK;
+ break;
+ }
+ NEEDBITS(3);
+ state->last = BITS(1);
+ DROPBITS(1);
+ switch (BITS(2)) {
+ case 0: /* stored block */
+ state->mode = STORED;
+ break;
+ case 1: /* fixed block */
+ zlib_fixedtables(state);
+ state->mode = LEN; /* decode codes */
+ break;
+ case 2: /* dynamic block */
+ state->mode = TABLE;
+ break;
+ case 3:
+ strm->msg = (char *)"invalid block type";
+ state->mode = BAD;
+ }
+ DROPBITS(2);
+ break;
+ case STORED:
+ BYTEBITS(); /* go to byte boundary */
+ NEEDBITS(32);
+ if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) {
+ strm->msg = (char *)"invalid stored block lengths";
+ state->mode = BAD;
+ break;
+ }
+ state->length = (unsigned)hold & 0xffff;
+ INITBITS();
+ state->mode = COPY;
+ case COPY:
+ copy = state->length;
+ if (copy) {
+ if (copy > have) copy = have;
+ if (copy > left) copy = left;
+ if (copy == 0) goto inf_leave;
+ memcpy(put, next, copy);
+ have -= copy;
+ next += copy;
+ left -= copy;
+ put += copy;
+ state->length -= copy;
+ break;
+ }
+ state->mode = TYPE;
+ break;
+ case TABLE:
+ NEEDBITS(14);
+ state->nlen = BITS(5) + 257;
+ DROPBITS(5);
+ state->ndist = BITS(5) + 1;
+ DROPBITS(5);
+ state->ncode = BITS(4) + 4;
+ DROPBITS(4);
+#ifndef PKZIP_BUG_WORKAROUND
+ if (state->nlen > 286 || state->ndist > 30) {
+ strm->msg = (char *)"too many length or distance symbols";
+ state->mode = BAD;
+ break;
+ }
+#endif
+ state->have = 0;
+ state->mode = LENLENS;
+ case LENLENS:
+ while (state->have < state->ncode) {
+ NEEDBITS(3);
+ state->lens[order[state->have++]] = (unsigned short)BITS(3);
+ DROPBITS(3);
+ }
+ while (state->have < 19)
+ state->lens[order[state->have++]] = 0;
+ state->next = state->codes;
+ state->lencode = (code const *)(state->next);
+ state->lenbits = 7;
+ ret = zlib_inflate_table(CODES, state->lens, 19, &(state->next),
+ &(state->lenbits), state->work);
+ if (ret) {
+ strm->msg = (char *)"invalid code lengths set";
+ state->mode = BAD;
+ break;
+ }
+ state->have = 0;
+ state->mode = CODELENS;
+ case CODELENS:
+ while (state->have < state->nlen + state->ndist) {
+ for (;;) {
+ this = state->lencode[BITS(state->lenbits)];
+ if ((unsigned)(this.bits) <= bits) break;
+ PULLBYTE();
+ }
+ if (this.val < 16) {
+ NEEDBITS(this.bits);
+ DROPBITS(this.bits);
+ state->lens[state->have++] = this.val;
+ }
+ else {
+ if (this.val == 16) {
+ NEEDBITS(this.bits + 2);
+ DROPBITS(this.bits);
+ if (state->have == 0) {
+ strm->msg = (char *)"invalid bit length repeat";
+ state->mode = BAD;
+ break;
+ }
+ len = state->lens[state->have - 1];
+ copy = 3 + BITS(2);
+ DROPBITS(2);
+ }
+ else if (this.val == 17) {
+ NEEDBITS(this.bits + 3);
+ DROPBITS(this.bits);
+ len = 0;
+ copy = 3 + BITS(3);
+ DROPBITS(3);
+ }
+ else {
+ NEEDBITS(this.bits + 7);
+ DROPBITS(this.bits);
+ len = 0;
+ copy = 11 + BITS(7);
+ DROPBITS(7);
+ }
+ if (state->have + copy > state->nlen + state->ndist) {
+ strm->msg = (char *)"invalid bit length repeat";
+ state->mode = BAD;
+ break;
+ }
+ while (copy--)
+ state->lens[state->have++] = (unsigned short)len;
+ }
+ }
+
+ /* handle error breaks in while */
+ if (state->mode == BAD) break;
+
+ /* build code tables */
+ state->next = state->codes;
+ state->lencode = (code const *)(state->next);
+ state->lenbits = 9;
+ ret = zlib_inflate_table(LENS, state->lens, state->nlen, &(state->next),
+ &(state->lenbits), state->work);
+ if (ret) {
+ strm->msg = (char *)"invalid literal/lengths set";
+ state->mode = BAD;
+ break;
+ }
+ state->distcode = (code const *)(state->next);
+ state->distbits = 6;
+ ret = zlib_inflate_table(DISTS, state->lens + state->nlen, state->ndist,
+ &(state->next), &(state->distbits), state->work);
+ if (ret) {
+ strm->msg = (char *)"invalid distances set";
+ state->mode = BAD;
+ break;
+ }
+ state->mode = LEN;
+ case LEN:
+ if (have >= 6 && left >= 258) {
+ RESTORE();
+ inflate_fast(strm, out);
+ LOAD();
+ break;
+ }
+ for (;;) {
+ this = state->lencode[BITS(state->lenbits)];
+ if ((unsigned)(this.bits) <= bits) break;
+ PULLBYTE();
+ }
+ if (this.op && (this.op & 0xf0) == 0) {
+ last = this;
+ for (;;) {
+ this = state->lencode[last.val +
+ (BITS(last.bits + last.op) >> last.bits)];
+ if ((unsigned)(last.bits + this.bits) <= bits) break;
+ PULLBYTE();
+ }
+ DROPBITS(last.bits);
+ }
+ DROPBITS(this.bits);
+ state->length = (unsigned)this.val;
+ if ((int)(this.op) == 0) {
+ state->mode = LIT;
+ break;
+ }
+ if (this.op & 32) {
+ state->mode = TYPE;
+ break;
+ }
+ if (this.op & 64) {
+ strm->msg = (char *)"invalid literal/length code";
+ state->mode = BAD;
+ break;
+ }
+ state->extra = (unsigned)(this.op) & 15;
+ state->mode = LENEXT;
+ case LENEXT:
+ if (state->extra) {
+ NEEDBITS(state->extra);
+ state->length += BITS(state->extra);
+ DROPBITS(state->extra);
+ }
+ state->mode = DIST;
+ case DIST:
+ for (;;) {
+ this = state->distcode[BITS(state->distbits)];
+ if ((unsigned)(this.bits) <= bits) break;
+ PULLBYTE();
+ }
+ if ((this.op & 0xf0) == 0) {
+ last = this;
+ for (;;) {
+ this = state->distcode[last.val +
+ (BITS(last.bits + last.op) >> last.bits)];
+ if ((unsigned)(last.bits + this.bits) <= bits) break;
+ PULLBYTE();
+ }
+ DROPBITS(last.bits);
+ }
+ DROPBITS(this.bits);
+ if (this.op & 64) {
+ strm->msg = (char *)"invalid distance code";
+ state->mode = BAD;
+ break;
+ }
+ state->offset = (unsigned)this.val;
+ state->extra = (unsigned)(this.op) & 15;
+ state->mode = DISTEXT;
+ case DISTEXT:
+ if (state->extra) {
+ NEEDBITS(state->extra);
+ state->offset += BITS(state->extra);
+ DROPBITS(state->extra);
+ }
+#ifdef INFLATE_STRICT
+ if (state->offset > state->dmax) {
+ strm->msg = (char *)"invalid distance too far back";
+ state->mode = BAD;
+ break;
+ }
+#endif
+ if (state->offset > state->whave + out - left) {
+ strm->msg = (char *)"invalid distance too far back";
+ state->mode = BAD;
+ break;
+ }
+ state->mode = MATCH;
+ case MATCH:
+ if (left == 0) goto inf_leave;
+ copy = out - left;
+ if (state->offset > copy) { /* copy from window */
+ copy = state->offset - copy;
+ if (copy > state->write) {
+ copy -= state->write;
+ from = state->window + (state->wsize - copy);
+ }
+ else
+ from = state->window + (state->write - copy);
+ if (copy > state->length) copy = state->length;
+ }
+ else { /* copy from output */
+ from = put - state->offset;
+ copy = state->length;
+ }
+ if (copy > left) copy = left;
+ left -= copy;
+ state->length -= copy;
+ do {
+ *put++ = *from++;
+ } while (--copy);
+ if (state->length == 0) state->mode = LEN;
+ break;
+ case LIT:
+ if (left == 0) goto inf_leave;
+ *put++ = (unsigned char)(state->length);
+ left--;
+ state->mode = LEN;
+ break;
+ case CHECK:
+ if (state->wrap) {
+ NEEDBITS(32);
+ out -= left;
+ strm->total_out += out;
+ state->total += out;
+ if (out)
+ strm->adler = state->check =
+ UPDATE(state->check, put - out, out);
+ out = left;
+ if ((
+ REVERSE(hold)) != state->check) {
+ strm->msg = (char *)"incorrect data check";
+ state->mode = BAD;
+ break;
+ }
+ INITBITS();
+ }
+ state->mode = DONE;
+ case DONE:
+ ret = Z_STREAM_END;
+ goto inf_leave;
+ case BAD:
+ ret = Z_DATA_ERROR;
+ goto inf_leave;
+ case MEM:
+ return Z_MEM_ERROR;
+ case SYNC:
+ default:
+ return Z_STREAM_ERROR;
+ }
+
+ /*
+ Return from inflate(), updating the total counts and the check value.
+ If there was no progress during the inflate() call, return a buffer
+ error. Call zlib_updatewindow() to create and/or update the window state.
+ */
+ inf_leave:
+ RESTORE();
+ if (state->wsize || (state->mode < CHECK && out != strm->avail_out))
+ zlib_updatewindow(strm, out);
+
+ in -= strm->avail_in;
+ out -= strm->avail_out;
+ strm->total_in += in;
+ strm->total_out += out;
+ state->total += out;
+ if (state->wrap && out)
+ strm->adler = state->check =
+ UPDATE(state->check, strm->next_out - out, out);
+
+ strm->data_type = state->bits + (state->last ? 64 : 0) +
+ (state->mode == TYPE ? 128 : 0);
+
+ if (flush == Z_PACKET_FLUSH && ret == Z_OK &&
+ strm->avail_out != 0 && strm->avail_in == 0)
+ return zlib_inflateSyncPacket(strm);
+
+ if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK)
+ ret = Z_BUF_ERROR;
+
+ return ret;
+}
+
+int zlib_inflateEnd(z_streamp strm)
+{
+ if (strm == NULL || strm->state == NULL)
+ return Z_STREAM_ERROR;
+ return Z_OK;
+}
+
+#if 0
+int zlib_inflateSetDictionary(z_streamp strm, const Byte *dictionary,
+ uInt dictLength)
+{
+ struct inflate_state *state;
+ unsigned long id;
+
+ /* check state */
+ if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
+ state = (struct inflate_state *)strm->state;
+ if (state->wrap != 0 && state->mode != DICT)
+ return Z_STREAM_ERROR;
+
+ /* check for correct dictionary id */
+ if (state->mode == DICT) {
+ id = zlib_adler32(0L, NULL, 0);
+ id = zlib_adler32(id, dictionary, dictLength);
+ if (id != state->check)
+ return Z_DATA_ERROR;
+ }
+
+ /* copy dictionary to window */
+ zlib_updatewindow(strm, strm->avail_out);
+
+ if (dictLength > state->wsize) {
+ memcpy(state->window, dictionary + dictLength - state->wsize,
+ state->wsize);
+ state->whave = state->wsize;
+ }
+ else {
+ memcpy(state->window + state->wsize - dictLength, dictionary,
+ dictLength);
+ state->whave = dictLength;
+ }
+ state->havedict = 1;
+ return Z_OK;
+}
+#endif
+
+#if 0
+/*
+ Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff. Return when found
+ or when out of input. When called, *have is the number of pattern bytes
+ found in order so far, in 0..3. On return *have is updated to the new
+ state. If on return *have equals four, then the pattern was found and the
+ return value is how many bytes were read including the last byte of the
+ pattern. If *have is less than four, then the pattern has not been found
+ yet and the return value is len. In the latter case, zlib_syncsearch() can be
+ called again with more data and the *have state. *have is initialized to
+ zero for the first call.
+ */
+static unsigned zlib_syncsearch(unsigned *have, unsigned char *buf,
+ unsigned len)
+{
+ unsigned got;
+ unsigned next;
+
+ got = *have;
+ next = 0;
+ while (next < len && got < 4) {
+ if ((int)(buf[next]) == (got < 2 ? 0 : 0xff))
+ got++;
+ else if (buf[next])
+ got = 0;
+ else
+ got = 4 - got;
+ next++;
+ }
+ *have = got;
+ return next;
+}
+#endif
+
+#if 0
+int zlib_inflateSync(z_streamp strm)
+{
+ unsigned len; /* number of bytes to look at or looked at */
+ unsigned long in, out; /* temporary to save total_in and total_out */
+ unsigned char buf[4]; /* to restore bit buffer to byte string */
+ struct inflate_state *state;
+
+ /* check parameters */
+ if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR;
+ state = (struct inflate_state *)strm->state;
+ if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR;
+
+ /* if first time, start search in bit buffer */
+ if (state->mode != SYNC) {
+ state->mode = SYNC;
+ state->hold <<= state->bits & 7;
+ state->bits -= state->bits & 7;
+ len = 0;
+ while (state->bits >= 8) {
+ buf[len++] = (unsigned char)(state->hold);
+ state->hold >>= 8;
+ state->bits -= 8;
+ }
+ state->have = 0;
+ zlib_syncsearch(&(state->have), buf, len);
+ }
+
+ /* search available input */
+ len = zlib_syncsearch(&(state->have), strm->next_in, strm->avail_in);
+ strm->avail_in -= len;
+ strm->next_in += len;
+ strm->total_in += len;
+
+ /* return no joy or set up to restart inflate() on a new block */
+ if (state->have != 4) return Z_DATA_ERROR;
+ in = strm->total_in; out = strm->total_out;
+ zlib_inflateReset(strm);
+ strm->total_in = in; strm->total_out = out;
+ state->mode = TYPE;
+ return Z_OK;
+}
+#endif
+
+/*
+ * This subroutine adds the data at next_in/avail_in to the output history
+ * without performing any output. The output buffer must be "caught up";
+ * i.e. no pending output but this should always be the case. The state must
+ * be waiting on the start of a block (i.e. mode == TYPE or HEAD). On exit,
+ * the output will also be caught up, and the checksum will have been updated
+ * if need be.
+ */
+int zlib_inflateIncomp(z_stream *z)
+{
+ struct inflate_state *state = (struct inflate_state *)z->state;
+ Byte *saved_no = z->next_out;
+ uInt saved_ao = z->avail_out;
+
+ if (state->mode != TYPE && state->mode != HEAD)
+ return Z_DATA_ERROR;
+
+ /* Setup some variables to allow misuse of updateWindow */
+ z->avail_out = 0;
+ z->next_out = (unsigned char*)z->next_in + z->avail_in;
+
+ zlib_updatewindow(z, z->avail_in);
+
+ /* Restore saved variables */
+ z->avail_out = saved_ao;
+ z->next_out = saved_no;
+
+ z->adler = state->check =
+ UPDATE(state->check, z->next_in, z->avail_in);
+
+ z->total_out += z->avail_in;
+ z->total_in += z->avail_in;
+ z->next_in += z->avail_in;
+ state->total += z->avail_in;
+ z->avail_in = 0;
+
+ return Z_OK;
+}
diff --git a/lib/zlib_inflate/inflate.h b/lib/zlib_inflate/inflate.h
new file mode 100644
index 00000000..3d17b3d1
--- /dev/null
+++ b/lib/zlib_inflate/inflate.h
@@ -0,0 +1,111 @@
+#ifndef INFLATE_H
+#define INFLATE_H
+
+/* inflate.h -- internal inflate state definition
+ * Copyright (C) 1995-2004 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+ part of the implementation of the compression library and is
+ subject to change. Applications should only use zlib.h.
+ */
+
+/* Possible inflate modes between inflate() calls */
+typedef enum {
+ HEAD, /* i: waiting for magic header */
+ FLAGS, /* i: waiting for method and flags (gzip) */
+ TIME, /* i: waiting for modification time (gzip) */
+ OS, /* i: waiting for extra flags and operating system (gzip) */
+ EXLEN, /* i: waiting for extra length (gzip) */
+ EXTRA, /* i: waiting for extra bytes (gzip) */
+ NAME, /* i: waiting for end of file name (gzip) */
+ COMMENT, /* i: waiting for end of comment (gzip) */
+ HCRC, /* i: waiting for header crc (gzip) */
+ DICTID, /* i: waiting for dictionary check value */
+ DICT, /* waiting for inflateSetDictionary() call */
+ TYPE, /* i: waiting for type bits, including last-flag bit */
+ TYPEDO, /* i: same, but skip check to exit inflate on new block */
+ STORED, /* i: waiting for stored size (length and complement) */
+ COPY, /* i/o: waiting for input or output to copy stored block */
+ TABLE, /* i: waiting for dynamic block table lengths */
+ LENLENS, /* i: waiting for code length code lengths */
+ CODELENS, /* i: waiting for length/lit and distance code lengths */
+ LEN, /* i: waiting for length/lit code */
+ LENEXT, /* i: waiting for length extra bits */
+ DIST, /* i: waiting for distance code */
+ DISTEXT, /* i: waiting for distance extra bits */
+ MATCH, /* o: waiting for output space to copy string */
+ LIT, /* o: waiting for output space to write literal */
+ CHECK, /* i: waiting for 32-bit check value */
+ LENGTH, /* i: waiting for 32-bit length (gzip) */
+ DONE, /* finished check, done -- remain here until reset */
+ BAD, /* got a data error -- remain here until reset */
+ MEM, /* got an inflate() memory error -- remain here until reset */
+ SYNC /* looking for synchronization bytes to restart inflate() */
+} inflate_mode;
+
+/*
+ State transitions between above modes -
+
+ (most modes can go to the BAD or MEM mode -- not shown for clarity)
+
+ Process header:
+ HEAD -> (gzip) or (zlib)
+ (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME
+ NAME -> COMMENT -> HCRC -> TYPE
+ (zlib) -> DICTID or TYPE
+ DICTID -> DICT -> TYPE
+ Read deflate blocks:
+ TYPE -> STORED or TABLE or LEN or CHECK
+ STORED -> COPY -> TYPE
+ TABLE -> LENLENS -> CODELENS -> LEN
+ Read deflate codes:
+ LEN -> LENEXT or LIT or TYPE
+ LENEXT -> DIST -> DISTEXT -> MATCH -> LEN
+ LIT -> LEN
+ Process trailer:
+ CHECK -> LENGTH -> DONE
+ */
+
+/* state maintained between inflate() calls. Approximately 7K bytes. */
+struct inflate_state {
+ inflate_mode mode; /* current inflate mode */
+ int last; /* true if processing last block */
+ int wrap; /* bit 0 true for zlib, bit 1 true for gzip */
+ int havedict; /* true if dictionary provided */
+ int flags; /* gzip header method and flags (0 if zlib) */
+ unsigned dmax; /* zlib header max distance (INFLATE_STRICT) */
+ unsigned long check; /* protected copy of check value */
+ unsigned long total; /* protected copy of output count */
+ /* gz_headerp head; */ /* where to save gzip header information */
+ /* sliding window */
+ unsigned wbits; /* log base 2 of requested window size */
+ unsigned wsize; /* window size or zero if not using window */
+ unsigned whave; /* valid bytes in the window */
+ unsigned write; /* window write index */
+ unsigned char *window; /* allocated sliding window, if needed */
+ /* bit accumulator */
+ unsigned long hold; /* input bit accumulator */
+ unsigned bits; /* number of bits in "in" */
+ /* for string and stored block copying */
+ unsigned length; /* literal or length of data to copy */
+ unsigned offset; /* distance back to copy string from */
+ /* for table and code decoding */
+ unsigned extra; /* extra bits needed */
+ /* fixed and dynamic code tables */
+ code const *lencode; /* starting table for length/literal codes */
+ code const *distcode; /* starting table for distance codes */
+ unsigned lenbits; /* index bits for lencode */
+ unsigned distbits; /* index bits for distcode */
+ /* dynamic table building */
+ unsigned ncode; /* number of code length code lengths */
+ unsigned nlen; /* number of length code lengths */
+ unsigned ndist; /* number of distance code lengths */
+ unsigned have; /* number of code lengths in lens[] */
+ code *next; /* next available space in codes[] */
+ unsigned short lens[320]; /* temporary storage for code lengths */
+ unsigned short work[288]; /* work area for code table building */
+ code codes[ENOUGH]; /* space for code tables */
+};
+#endif
diff --git a/lib/zlib_inflate/inflate_syms.c b/lib/zlib_inflate/inflate_syms.c
new file mode 100644
index 00000000..67329fe9
--- /dev/null
+++ b/lib/zlib_inflate/inflate_syms.c
@@ -0,0 +1,20 @@
+/*
+ * linux/lib/zlib_inflate/inflate_syms.c
+ *
+ * Exported symbols for the inflate functionality.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/zlib.h>
+
+EXPORT_SYMBOL(zlib_inflate_workspacesize);
+EXPORT_SYMBOL(zlib_inflate);
+EXPORT_SYMBOL(zlib_inflateInit2);
+EXPORT_SYMBOL(zlib_inflateEnd);
+EXPORT_SYMBOL(zlib_inflateReset);
+EXPORT_SYMBOL(zlib_inflateIncomp);
+EXPORT_SYMBOL(zlib_inflate_blob);
+MODULE_LICENSE("GPL");
diff --git a/lib/zlib_inflate/inftrees.c b/lib/zlib_inflate/inftrees.c
new file mode 100644
index 00000000..3fe6ce5b
--- /dev/null
+++ b/lib/zlib_inflate/inftrees.c
@@ -0,0 +1,315 @@
+/* inftrees.c -- generate Huffman trees for efficient decoding
+ * Copyright (C) 1995-2005 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include <linux/zutil.h>
+#include "inftrees.h"
+
+#define MAXBITS 15
+
+/*
+ Build a set of tables to decode the provided canonical Huffman code.
+ The code lengths are lens[0..codes-1]. The result starts at *table,
+ whose indices are 0..2^bits-1. work is a writable array of at least
+ lens shorts, which is used as a work area. type is the type of code
+ to be generated, CODES, LENS, or DISTS. On return, zero is success,
+ -1 is an invalid code, and +1 means that ENOUGH isn't enough. table
+ on return points to the next available entry's address. bits is the
+ requested root table index bits, and on return it is the actual root
+ table index bits. It will differ if the request is greater than the
+ longest code or if it is less than the shortest code.
+ */
+int zlib_inflate_table(codetype type, unsigned short *lens, unsigned codes,
+ code **table, unsigned *bits, unsigned short *work)
+{
+ unsigned len; /* a code's length in bits */
+ unsigned sym; /* index of code symbols */
+ unsigned min, max; /* minimum and maximum code lengths */
+ unsigned root; /* number of index bits for root table */
+ unsigned curr; /* number of index bits for current table */
+ unsigned drop; /* code bits to drop for sub-table */
+ int left; /* number of prefix codes available */
+ unsigned used; /* code entries in table used */
+ unsigned huff; /* Huffman code */
+ unsigned incr; /* for incrementing code, index */
+ unsigned fill; /* index for replicating entries */
+ unsigned low; /* low bits for current root entry */
+ unsigned mask; /* mask for low root bits */
+ code this; /* table entry for duplication */
+ code *next; /* next available space in table */
+ const unsigned short *base; /* base value table to use */
+ const unsigned short *extra; /* extra bits table to use */
+ int end; /* use base and extra for symbol > end */
+ unsigned short count[MAXBITS+1]; /* number of codes of each length */
+ unsigned short offs[MAXBITS+1]; /* offsets in table for each length */
+ static const unsigned short lbase[31] = { /* Length codes 257..285 base */
+ 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
+ 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
+ static const unsigned short lext[31] = { /* Length codes 257..285 extra */
+ 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
+ 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 201, 196};
+ static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
+ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
+ 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
+ 8193, 12289, 16385, 24577, 0, 0};
+ static const unsigned short dext[32] = { /* Distance codes 0..29 extra */
+ 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
+ 23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
+ 28, 28, 29, 29, 64, 64};
+
+ /*
+ Process a set of code lengths to create a canonical Huffman code. The
+ code lengths are lens[0..codes-1]. Each length corresponds to the
+ symbols 0..codes-1. The Huffman code is generated by first sorting the
+ symbols by length from short to long, and retaining the symbol order
+ for codes with equal lengths. Then the code starts with all zero bits
+ for the first code of the shortest length, and the codes are integer
+ increments for the same length, and zeros are appended as the length
+ increases. For the deflate format, these bits are stored backwards
+ from their more natural integer increment ordering, and so when the
+ decoding tables are built in the large loop below, the integer codes
+ are incremented backwards.
+
+ This routine assumes, but does not check, that all of the entries in
+ lens[] are in the range 0..MAXBITS. The caller must assure this.
+ 1..MAXBITS is interpreted as that code length. zero means that that
+ symbol does not occur in this code.
+
+ The codes are sorted by computing a count of codes for each length,
+ creating from that a table of starting indices for each length in the
+ sorted table, and then entering the symbols in order in the sorted
+ table. The sorted table is work[], with that space being provided by
+ the caller.
+
+ The length counts are used for other purposes as well, i.e. finding
+ the minimum and maximum length codes, determining if there are any
+ codes at all, checking for a valid set of lengths, and looking ahead
+ at length counts to determine sub-table sizes when building the
+ decoding tables.
+ */
+
+ /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
+ for (len = 0; len <= MAXBITS; len++)
+ count[len] = 0;
+ for (sym = 0; sym < codes; sym++)
+ count[lens[sym]]++;
+
+ /* bound code lengths, force root to be within code lengths */
+ root = *bits;
+ for (max = MAXBITS; max >= 1; max--)
+ if (count[max] != 0) break;
+ if (root > max) root = max;
+ if (max == 0) { /* no symbols to code at all */
+ this.op = (unsigned char)64; /* invalid code marker */
+ this.bits = (unsigned char)1;
+ this.val = (unsigned short)0;
+ *(*table)++ = this; /* make a table to force an error */
+ *(*table)++ = this;
+ *bits = 1;
+ return 0; /* no symbols, but wait for decoding to report error */
+ }
+ for (min = 1; min <= MAXBITS; min++)
+ if (count[min] != 0) break;
+ if (root < min) root = min;
+
+ /* check for an over-subscribed or incomplete set of lengths */
+ left = 1;
+ for (len = 1; len <= MAXBITS; len++) {
+ left <<= 1;
+ left -= count[len];
+ if (left < 0) return -1; /* over-subscribed */
+ }
+ if (left > 0 && (type == CODES || max != 1))
+ return -1; /* incomplete set */
+
+ /* generate offsets into symbol table for each length for sorting */
+ offs[1] = 0;
+ for (len = 1; len < MAXBITS; len++)
+ offs[len + 1] = offs[len] + count[len];
+
+ /* sort symbols by length, by symbol order within each length */
+ for (sym = 0; sym < codes; sym++)
+ if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
+
+ /*
+ Create and fill in decoding tables. In this loop, the table being
+ filled is at next and has curr index bits. The code being used is huff
+ with length len. That code is converted to an index by dropping drop
+ bits off of the bottom. For codes where len is less than drop + curr,
+ those top drop + curr - len bits are incremented through all values to
+ fill the table with replicated entries.
+
+ root is the number of index bits for the root table. When len exceeds
+ root, sub-tables are created pointed to by the root entry with an index
+ of the low root bits of huff. This is saved in low to check for when a
+ new sub-table should be started. drop is zero when the root table is
+ being filled, and drop is root when sub-tables are being filled.
+
+ When a new sub-table is needed, it is necessary to look ahead in the
+ code lengths to determine what size sub-table is needed. The length
+ counts are used for this, and so count[] is decremented as codes are
+ entered in the tables.
+
+ used keeps track of how many table entries have been allocated from the
+ provided *table space. It is checked when a LENS table is being made
+ against the space in *table, ENOUGH, minus the maximum space needed by
+ the worst case distance code, MAXD. This should never happen, but the
+ sufficiency of ENOUGH has not been proven exhaustively, hence the check.
+ This assumes that when type == LENS, bits == 9.
+
+ sym increments through all symbols, and the loop terminates when
+ all codes of length max, i.e. all codes, have been processed. This
+ routine permits incomplete codes, so another loop after this one fills
+ in the rest of the decoding tables with invalid code markers.
+ */
+
+ /* set up for code type */
+ switch (type) {
+ case CODES:
+ base = extra = work; /* dummy value--not used */
+ end = 19;
+ break;
+ case LENS:
+ base = lbase;
+ base -= 257;
+ extra = lext;
+ extra -= 257;
+ end = 256;
+ break;
+ default: /* DISTS */
+ base = dbase;
+ extra = dext;
+ end = -1;
+ }
+
+ /* initialize state for loop */
+ huff = 0; /* starting code */
+ sym = 0; /* starting code symbol */
+ len = min; /* starting code length */
+ next = *table; /* current table to fill in */
+ curr = root; /* current table index bits */
+ drop = 0; /* current bits to drop from code for index */
+ low = (unsigned)(-1); /* trigger new sub-table when len > root */
+ used = 1U << root; /* use root table entries */
+ mask = used - 1; /* mask for comparing low */
+
+ /* check available table space */
+ if (type == LENS && used >= ENOUGH - MAXD)
+ return 1;
+
+ /* process all codes and make table entries */
+ for (;;) {
+ /* create table entry */
+ this.bits = (unsigned char)(len - drop);
+ if ((int)(work[sym]) < end) {
+ this.op = (unsigned char)0;
+ this.val = work[sym];
+ }
+ else if ((int)(work[sym]) > end) {
+ this.op = (unsigned char)(extra[work[sym]]);
+ this.val = base[work[sym]];
+ }
+ else {
+ this.op = (unsigned char)(32 + 64); /* end of block */
+ this.val = 0;
+ }
+
+ /* replicate for those indices with low len bits equal to huff */
+ incr = 1U << (len - drop);
+ fill = 1U << curr;
+ min = fill; /* save offset to next table */
+ do {
+ fill -= incr;
+ next[(huff >> drop) + fill] = this;
+ } while (fill != 0);
+
+ /* backwards increment the len-bit code huff */
+ incr = 1U << (len - 1);
+ while (huff & incr)
+ incr >>= 1;
+ if (incr != 0) {
+ huff &= incr - 1;
+ huff += incr;
+ }
+ else
+ huff = 0;
+
+ /* go to next symbol, update count, len */
+ sym++;
+ if (--(count[len]) == 0) {
+ if (len == max) break;
+ len = lens[work[sym]];
+ }
+
+ /* create new sub-table if needed */
+ if (len > root && (huff & mask) != low) {
+ /* if first time, transition to sub-tables */
+ if (drop == 0)
+ drop = root;
+
+ /* increment past last table */
+ next += min; /* here min is 1 << curr */
+
+ /* determine length of next table */
+ curr = len - drop;
+ left = (int)(1 << curr);
+ while (curr + drop < max) {
+ left -= count[curr + drop];
+ if (left <= 0) break;
+ curr++;
+ left <<= 1;
+ }
+
+ /* check for enough space */
+ used += 1U << curr;
+ if (type == LENS && used >= ENOUGH - MAXD)
+ return 1;
+
+ /* point entry in root table to sub-table */
+ low = huff & mask;
+ (*table)[low].op = (unsigned char)curr;
+ (*table)[low].bits = (unsigned char)root;
+ (*table)[low].val = (unsigned short)(next - *table);
+ }
+ }
+
+ /*
+ Fill in rest of table for incomplete codes. This loop is similar to the
+ loop above in incrementing huff for table indices. It is assumed that
+ len is equal to curr + drop, so there is no loop needed to increment
+ through high index bits. When the current sub-table is filled, the loop
+ drops back to the root table to fill in any remaining entries there.
+ */
+ this.op = (unsigned char)64; /* invalid code marker */
+ this.bits = (unsigned char)(len - drop);
+ this.val = (unsigned short)0;
+ while (huff != 0) {
+ /* when done with sub-table, drop back to root table */
+ if (drop != 0 && (huff & mask) != low) {
+ drop = 0;
+ len = root;
+ next = *table;
+ this.bits = (unsigned char)len;
+ }
+
+ /* put invalid code marker in table */
+ next[huff >> drop] = this;
+
+ /* backwards increment the len-bit code huff */
+ incr = 1U << (len - 1);
+ while (huff & incr)
+ incr >>= 1;
+ if (incr != 0) {
+ huff &= incr - 1;
+ huff += incr;
+ }
+ else
+ huff = 0;
+ }
+
+ /* set return parameters */
+ *table += used;
+ *bits = root;
+ return 0;
+}
diff --git a/lib/zlib_inflate/inftrees.h b/lib/zlib_inflate/inftrees.h
new file mode 100644
index 00000000..b70b4731
--- /dev/null
+++ b/lib/zlib_inflate/inftrees.h
@@ -0,0 +1,59 @@
+#ifndef INFTREES_H
+#define INFTREES_H
+
+/* inftrees.h -- header to use inftrees.c
+ * Copyright (C) 1995-2005 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+ part of the implementation of the compression library and is
+ subject to change. Applications should only use zlib.h.
+ */
+
+/* Structure for decoding tables. Each entry provides either the
+ information needed to do the operation requested by the code that
+ indexed that table entry, or it provides a pointer to another
+ table that indexes more bits of the code. op indicates whether
+ the entry is a pointer to another table, a literal, a length or
+ distance, an end-of-block, or an invalid code. For a table
+ pointer, the low four bits of op is the number of index bits of
+ that table. For a length or distance, the low four bits of op
+ is the number of extra bits to get after the code. bits is
+ the number of bits in this code or part of the code to drop off
+ of the bit buffer. val is the actual byte to output in the case
+ of a literal, the base length or distance, or the offset from
+ the current table to the next table. Each entry is four bytes. */
+typedef struct {
+ unsigned char op; /* operation, extra bits, table bits */
+ unsigned char bits; /* bits in this part of the code */
+ unsigned short val; /* offset in table or code value */
+} code;
+
+/* op values as set by inflate_table():
+ 00000000 - literal
+ 0000tttt - table link, tttt != 0 is the number of table index bits
+ 0001eeee - length or distance, eeee is the number of extra bits
+ 01100000 - end of block
+ 01000000 - invalid code
+ */
+
+/* Maximum size of dynamic tree. The maximum found in a long but non-
+ exhaustive search was 1444 code structures (852 for length/literals
+ and 592 for distances, the latter actually the result of an
+ exhaustive search). The true maximum is not known, but the value
+ below is more than safe. */
+#define ENOUGH 2048
+#define MAXD 592
+
+/* Type of code to build for inftable() */
+typedef enum {
+ CODES,
+ LENS,
+ DISTS
+} codetype;
+
+extern int zlib_inflate_table (codetype type, unsigned short *lens,
+ unsigned codes, code **table,
+ unsigned *bits, unsigned short *work);
+#endif
diff --git a/lib/zlib_inflate/infutil.c b/lib/zlib_inflate/infutil.c
new file mode 100644
index 00000000..4824c2cc
--- /dev/null
+++ b/lib/zlib_inflate/infutil.c
@@ -0,0 +1,49 @@
+#include <linux/zutil.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+/* Utility function: initialize zlib, unpack binary blob, clean up zlib,
+ * return len or negative error code.
+ */
+int zlib_inflate_blob(void *gunzip_buf, unsigned int sz,
+ const void *buf, unsigned int len)
+{
+ const u8 *zbuf = buf;
+ struct z_stream_s *strm;
+ int rc;
+
+ rc = -ENOMEM;
+ strm = kmalloc(sizeof(*strm), GFP_KERNEL);
+ if (strm == NULL)
+ goto gunzip_nomem1;
+ strm->workspace = kmalloc(zlib_inflate_workspacesize(), GFP_KERNEL);
+ if (strm->workspace == NULL)
+ goto gunzip_nomem2;
+
+ /* gzip header (1f,8b,08... 10 bytes total + possible asciz filename)
+ * expected to be stripped from input
+ */
+ strm->next_in = zbuf;
+ strm->avail_in = len;
+ strm->next_out = gunzip_buf;
+ strm->avail_out = sz;
+
+ rc = zlib_inflateInit2(strm, -MAX_WBITS);
+ if (rc == Z_OK) {
+ rc = zlib_inflate(strm, Z_FINISH);
+ /* after Z_FINISH, only Z_STREAM_END is "we unpacked it all" */
+ if (rc == Z_STREAM_END)
+ rc = sz - strm->avail_out;
+ else
+ rc = -EINVAL;
+ zlib_inflateEnd(strm);
+ } else
+ rc = -EINVAL;
+
+ kfree(strm->workspace);
+gunzip_nomem2:
+ kfree(strm);
+gunzip_nomem1:
+ return rc; /* returns Z_OK (0) if successful */
+}
diff --git a/lib/zlib_inflate/infutil.h b/lib/zlib_inflate/infutil.h
new file mode 100644
index 00000000..eb1a9007
--- /dev/null
+++ b/lib/zlib_inflate/infutil.h
@@ -0,0 +1,25 @@
+/* infutil.h -- types and macros common to blocks and codes
+ * Copyright (C) 1995-1998 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+ part of the implementation of the compression library and is
+ subject to change. Applications should only use zlib.h.
+ */
+
+#ifndef _INFUTIL_H
+#define _INFUTIL_H
+
+#include <linux/zlib.h>
+
+/* memory allocation for inflation */
+
+struct inflate_workspace {
+ struct inflate_state inflate_state;
+ unsigned char working_window[1 << MAX_WBITS];
+};
+
+#define WS(z) ((struct inflate_workspace *)(z->workspace))
+
+#endif