diff options
Diffstat (limited to 'lib')
155 files changed, 38803 insertions, 0 deletions
diff --git a/lib/Kconfig b/lib/Kconfig new file mode 100644 index 00000000..fa9bf2c0 --- /dev/null +++ b/lib/Kconfig @@ -0,0 +1,213 @@ +# +# Library configuration +# + +config BINARY_PRINTF + def_bool n + +menu "Library routines" + +config RAID6_PQ + tristate + +config BITREVERSE + tristate + +config RATIONAL + boolean + +config GENERIC_FIND_FIRST_BIT + bool + +config GENERIC_FIND_NEXT_BIT + bool + +config GENERIC_FIND_LAST_BIT + bool + default y + +config CRC_CCITT + tristate "CRC-CCITT functions" + help + This option is provided for the case where no in-kernel-tree + modules require CRC-CCITT functions, but a module built outside + the kernel tree does. Such modules that use library CRC-CCITT + functions require M here. + +config CRC16 + tristate "CRC16 functions" + help + This option is provided for the case where no in-kernel-tree + modules require CRC16 functions, but a module built outside + the kernel tree does. Such modules that use library CRC16 + functions require M here. + +config CRC_T10DIF + tristate "CRC calculation for the T10 Data Integrity Field" + help + This option is only needed if a module that's not in the + kernel tree needs to calculate CRC checks for use with the + SCSI data integrity subsystem. + +config CRC_ITU_T + tristate "CRC ITU-T V.41 functions" + help + This option is provided for the case where no in-kernel-tree + modules require CRC ITU-T V.41 functions, but a module built outside + the kernel tree does. Such modules that use library CRC ITU-T V.41 + functions require M here. + +config CRC32 + tristate "CRC32 functions" + default y + select BITREVERSE + help + This option is provided for the case where no in-kernel-tree + modules require CRC32 functions, but a module built outside the + kernel tree does. Such modules that use library CRC32 functions + require M here. + +config CRC7 + tristate "CRC7 functions" + help + This option is provided for the case where no in-kernel-tree + modules require CRC7 functions, but a module built outside + the kernel tree does. Such modules that use library CRC7 + functions require M here. + +config LIBCRC32C + tristate "CRC32c (Castagnoli, et al) Cyclic Redundancy-Check" + select CRYPTO + select CRYPTO_CRC32C + help + This option is provided for the case where no in-kernel-tree + modules require CRC32c functions, but a module built outside the + kernel tree does. Such modules that use library CRC32c functions + require M here. See Castagnoli93. + Module will be libcrc32c. + +config AUDIT_GENERIC + bool + depends on AUDIT && !AUDIT_ARCH + default y + +# +# compression support is select'ed if needed +# +config ZLIB_INFLATE + tristate + +config ZLIB_DEFLATE + tristate + +config LZO_COMPRESS + tristate + +config LZO_DECOMPRESS + tristate + +# +# These all provide a common interface (hence the apparent duplication with +# ZLIB_INFLATE; DECOMPRESS_GZIP is just a wrapper.) +# +config DECOMPRESS_GZIP + select ZLIB_INFLATE + tristate + +config DECOMPRESS_BZIP2 + tristate + +config DECOMPRESS_LZMA + tristate + +config DECOMPRESS_LZO + select LZO_DECOMPRESS + tristate + +# +# Generic allocator support is selected if needed +# +config GENERIC_ALLOCATOR + boolean + +# +# reed solomon support is select'ed if needed +# +config REED_SOLOMON + tristate + +config REED_SOLOMON_ENC8 + boolean + +config REED_SOLOMON_DEC8 + boolean + +config REED_SOLOMON_ENC16 + boolean + +config REED_SOLOMON_DEC16 + boolean + +# +# Textsearch support is select'ed if needed +# +config TEXTSEARCH + boolean + +config TEXTSEARCH_KMP + tristate + +config TEXTSEARCH_BM + tristate + +config TEXTSEARCH_FSM + tristate + +config BTREE + boolean + +config HAS_IOMEM + boolean + depends on !NO_IOMEM + default y + +config HAS_IOPORT + boolean + depends on HAS_IOMEM && !NO_IOPORT + default y + +config HAS_DMA + boolean + depends on !NO_DMA + default y + +config CHECK_SIGNATURE + bool + +config CPUMASK_OFFSTACK + bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS + help + Use dynamic allocation for cpumask_var_t, instead of putting + them on the stack. This is a bit more expensive, but avoids + stack overflow. + +config DISABLE_OBSOLETE_CPUMASK_FUNCTIONS + bool "Disable obsolete cpumask functions" if DEBUG_PER_CPU_MAPS + depends on EXPERIMENTAL && BROKEN + +# +# Netlink attribute parsing support is select'ed if needed +# +config NLATTR + bool + +# +# Generic 64-bit atomic support is selected if needed +# +config GENERIC_ATOMIC64 + bool + +config LRU_CACHE + tristate + +endmenu diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug new file mode 100644 index 00000000..1b4afd2e --- /dev/null +++ b/lib/Kconfig.debug @@ -0,0 +1,1157 @@ + +config PRINTK_TIME + bool "Show timing information on printks" + depends on PRINTK + help + Selecting this option causes timing information to be + included in printk output. This allows you to measure + the interval between kernel operations, including bootup + operations. This is useful for identifying long delays + in kernel startup. + +config ENABLE_WARN_DEPRECATED + bool "Enable __deprecated logic" + default y + help + Enable the __deprecated logic in the kernel build. + Disable this to suppress the "warning: 'foo' is deprecated + (declared at kernel/power/somefile.c:1234)" messages. + +config ENABLE_MUST_CHECK + bool "Enable __must_check logic" + default y + help + Enable the __must_check logic in the kernel build. Disable this to + suppress the "warning: ignoring return value of 'foo', declared with + attribute warn_unused_result" messages. + +config FRAME_WARN + int "Warn for stack frames larger than (needs gcc 4.4)" + range 0 8192 + default 1024 if !64BIT + default 2048 if 64BIT + help + Tell gcc to warn at build time for stack frames larger than this. + Setting this too low will cause a lot of warnings. + Setting it to 0 disables the warning. + Requires gcc 4.4 + +config MAGIC_SYSRQ + bool "Magic SysRq key" + depends on !UML + help + If you say Y here, you will have some control over the system even + if the system crashes for example during kernel debugging (e.g., you + will be able to flush the buffer cache to disk, reboot the system + immediately or dump some status information). This is accomplished + by pressing various keys while holding SysRq (Alt+PrintScreen). It + also works on a serial console (on PC hardware at least), if you + send a BREAK and then within 5 seconds a command keypress. The + keys are documented in <file:Documentation/sysrq.txt>. Don't say Y + unless you really know what this hack does. + +config STRIP_ASM_SYMS + bool "Strip assembler-generated symbols during link" + default n + help + Strip internal assembler-generated symbols during a link (symbols + that look like '.Lxxx') so they don't pollute the output of + get_wchan() and suchlike. + +config UNUSED_SYMBOLS + bool "Enable unused/obsolete exported symbols" + default y if X86 + help + Unused but exported symbols make the kernel needlessly bigger. For + that reason most of these unused exports will soon be removed. This + option is provided temporarily to provide a transition period in case + some external kernel module needs one of these symbols anyway. If you + encounter such a case in your module, consider if you are actually + using the right API. (rationale: since nobody in the kernel is using + this in a module, there is a pretty good chance it's actually the + wrong interface to use). If you really need the symbol, please send a + mail to the linux kernel mailing list mentioning the symbol and why + you really need it, and what the merge plan to the mainline kernel for + your module is. + +config DEBUG_FS + bool "Debug Filesystem" + help + debugfs is a virtual file system that kernel developers use to put + debugging files into. Enable this option to be able to read and + write to these files. + + For detailed documentation on the debugfs API, see + Documentation/DocBook/filesystems. + + If unsure, say N. + +config HEADERS_CHECK + bool "Run 'make headers_check' when building vmlinux" + depends on !UML + help + This option will extract the user-visible kernel headers whenever + building the kernel, and will run basic sanity checks on them to + ensure that exported files do not attempt to include files which + were not exported, etc. + + If you're making modifications to header files which are + relevant for userspace, say 'Y', and check the headers + exported to $(INSTALL_HDR_PATH) (usually 'usr/include' in + your build tree), to make sure they're suitable. + +config DEBUG_SECTION_MISMATCH + bool "Enable full Section mismatch analysis" + depends on UNDEFINED || (BLACKFIN) + default y + # This option is on purpose disabled for now. + # It will be enabled when we are down to a reasonable number + # of section mismatch warnings (< 10 for an allyesconfig build) + help + The section mismatch analysis checks if there are illegal + references from one section to another section. + Linux will during link or during runtime drop some sections + and any use of code/data previously in these sections will + most likely result in an oops. + In the code functions and variables are annotated with + __init, __devinit etc. (see full list in include/linux/init.h) + which results in the code/data being placed in specific sections. + The section mismatch analysis is always done after a full + kernel build but enabling this option will in addition + do the following: + - Add the option -fno-inline-functions-called-once to gcc + When inlining a function annotated __init in a non-init + function we would lose the section information and thus + the analysis would not catch the illegal reference. + This option tells gcc to inline less but will also + result in a larger kernel. + - Run the section mismatch analysis for each module/built-in.o + When we run the section mismatch analysis on vmlinux.o we + lose valueble information about where the mismatch was + introduced. + Running the analysis for each module/built-in.o file + will tell where the mismatch happens much closer to the + source. The drawback is that we will report the same + mismatch at least twice. + - Enable verbose reporting from modpost to help solving + the section mismatches reported. + +config DEBUG_KERNEL + bool "Kernel debugging" + help + Say Y here if you are developing drivers or trying to debug and + identify kernel problems. + +config DEBUG_SHIRQ + bool "Debug shared IRQ handlers" + depends on DEBUG_KERNEL && GENERIC_HARDIRQS + help + Enable this to generate a spurious interrupt as soon as a shared + interrupt handler is registered, and just before one is deregistered. + Drivers ought to be able to handle interrupts coming in at those + points; some don't and need to be caught. + +config LOCKUP_DETECTOR + bool "Detect Hard and Soft Lockups" + depends on DEBUG_KERNEL && !S390 + help + Say Y here to enable the kernel to act as a watchdog to detect + hard and soft lockups. + + Softlockups are bugs that cause the kernel to loop in kernel + mode for more than 60 seconds, without giving other tasks a + chance to run. The current stack trace is displayed upon + detection and the system will stay locked up. + + Hardlockups are bugs that cause the CPU to loop in kernel mode + for more than 60 seconds, without letting other interrupts have a + chance to run. The current stack trace is displayed upon detection + and the system will stay locked up. + + The overhead should be minimal. A periodic hrtimer runs to + generate interrupts and kick the watchdog task every 10-12 seconds. + An NMI is generated every 60 seconds or so to check for hardlockups. + +config HARDLOCKUP_DETECTOR + def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI + +config BOOTPARAM_SOFTLOCKUP_PANIC + bool "Panic (Reboot) On Soft Lockups" + depends on LOCKUP_DETECTOR + help + Say Y here to enable the kernel to panic on "soft lockups", + which are bugs that cause the kernel to loop in kernel + mode for more than 60 seconds, without giving other tasks a + chance to run. + + The panic can be used in combination with panic_timeout, + to cause the system to reboot automatically after a + lockup has been detected. This feature is useful for + high-availability systems that have uptime guarantees and + where a lockup must be resolved ASAP. + + Say N if unsure. + +config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE + int + depends on LOCKUP_DETECTOR + range 0 1 + default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC + default 1 if BOOTPARAM_SOFTLOCKUP_PANIC + +config DETECT_HUNG_TASK + bool "Detect Hung Tasks" + depends on DEBUG_KERNEL + default DETECT_SOFTLOCKUP + help + Say Y here to enable the kernel to detect "hung tasks", + which are bugs that cause the task to be stuck in + uninterruptible "D" state indefinitiley. + + When a hung task is detected, the kernel will print the + current stack trace (which you should report), but the + task will stay in uninterruptible state. If lockdep is + enabled then all held locks will also be reported. This + feature has negligible overhead. + +config BOOTPARAM_HUNG_TASK_PANIC + bool "Panic (Reboot) On Hung Tasks" + depends on DETECT_HUNG_TASK + help + Say Y here to enable the kernel to panic on "hung tasks", + which are bugs that cause the kernel to leave a task stuck + in uninterruptible "D" state. + + The panic can be used in combination with panic_timeout, + to cause the system to reboot automatically after a + hung task has been detected. This feature is useful for + high-availability systems that have uptime guarantees and + where a hung tasks must be resolved ASAP. + + Say N if unsure. + +config BOOTPARAM_HUNG_TASK_PANIC_VALUE + int + depends on DETECT_HUNG_TASK + range 0 1 + default 0 if !BOOTPARAM_HUNG_TASK_PANIC + default 1 if BOOTPARAM_HUNG_TASK_PANIC + +config SCHED_DEBUG + bool "Collect scheduler debugging info" + depends on DEBUG_KERNEL && PROC_FS + default y + help + If you say Y here, the /proc/sched_debug file will be provided + that can help debug the scheduler. The runtime overhead of this + option is minimal. + +config SCHEDSTATS + bool "Collect scheduler statistics" + depends on DEBUG_KERNEL && PROC_FS + help + If you say Y here, additional code will be inserted into the + scheduler and related routines to collect statistics about + scheduler behavior and provide them in /proc/schedstat. These + stats may be useful for both tuning and debugging the scheduler + If you aren't debugging the scheduler or trying to tune a specific + application, you can say N to avoid the very slight overhead + this adds. + +config TIMER_STATS + bool "Collect kernel timers statistics" + depends on DEBUG_KERNEL && PROC_FS + help + If you say Y here, additional code will be inserted into the + timer routines to collect statistics about kernel timers being + reprogrammed. The statistics can be read from /proc/timer_stats. + The statistics collection is started by writing 1 to /proc/timer_stats, + writing 0 stops it. This feature is useful to collect information + about timer usage patterns in kernel and userspace. This feature + is lightweight if enabled in the kernel config but not activated + (it defaults to deactivated on bootup and will only be activated + if some application like powertop activates it explicitly). + +config DEBUG_OBJECTS + bool "Debug object operations" + depends on DEBUG_KERNEL + help + If you say Y here, additional code will be inserted into the + kernel to track the life time of various objects and validate + the operations on those objects. + +config DEBUG_OBJECTS_SELFTEST + bool "Debug objects selftest" + depends on DEBUG_OBJECTS + help + This enables the selftest of the object debug code. + +config DEBUG_OBJECTS_FREE + bool "Debug objects in freed memory" + depends on DEBUG_OBJECTS + help + This enables checks whether a k/v free operation frees an area + which contains an object which has not been deactivated + properly. This can make kmalloc/kfree-intensive workloads + much slower. + +config DEBUG_OBJECTS_TIMERS + bool "Debug timer objects" + depends on DEBUG_OBJECTS + help + If you say Y here, additional code will be inserted into the + timer routines to track the life time of timer objects and + validate the timer operations. + +config DEBUG_OBJECTS_WORK + bool "Debug work objects" + depends on DEBUG_OBJECTS + help + If you say Y here, additional code will be inserted into the + work queue routines to track the life time of work objects and + validate the work operations. + +config DEBUG_OBJECTS_RCU_HEAD + bool "Debug RCU callbacks objects" + depends on DEBUG_OBJECTS && PREEMPT + help + Enable this to turn on debugging of RCU list heads (call_rcu() usage). + +config DEBUG_OBJECTS_ENABLE_DEFAULT + int "debug_objects bootup default value (0-1)" + range 0 1 + default "1" + depends on DEBUG_OBJECTS + help + Debug objects boot parameter default value + +config DEBUG_SLAB + bool "Debug slab memory allocations" + depends on DEBUG_KERNEL && SLAB && !KMEMCHECK + help + Say Y here to have the kernel do limited verification on memory + allocation as well as poisoning memory on free to catch use of freed + memory. This can make kmalloc/kfree-intensive workloads much slower. + +config DEBUG_SLAB_LEAK + bool "Memory leak debugging" + depends on DEBUG_SLAB + +config SLUB_DEBUG_ON + bool "SLUB debugging on by default" + depends on SLUB && SLUB_DEBUG && !KMEMCHECK + default n + help + Boot with debugging on by default. SLUB boots by default with + the runtime debug capabilities switched off. Enabling this is + equivalent to specifying the "slub_debug" parameter on boot. + There is no support for more fine grained debug control like + possible with slub_debug=xxx. SLUB debugging may be switched + off in a kernel built with CONFIG_SLUB_DEBUG_ON by specifying + "slub_debug=-". + +config SLUB_STATS + default n + bool "Enable SLUB performance statistics" + depends on SLUB && SLUB_DEBUG && SYSFS + help + SLUB statistics are useful to debug SLUBs allocation behavior in + order find ways to optimize the allocator. This should never be + enabled for production use since keeping statistics slows down + the allocator by a few percentage points. The slabinfo command + supports the determination of the most active slabs to figure + out which slabs are relevant to a particular load. + Try running: slabinfo -DA + +config DEBUG_KMEMLEAK + bool "Kernel memory leak detector" + depends on DEBUG_KERNEL && EXPERIMENTAL && !MEMORY_HOTPLUG && \ + (X86 || ARM || PPC || S390 || SPARC64 || SUPERH || MICROBLAZE) + + select DEBUG_FS if SYSFS + select STACKTRACE if STACKTRACE_SUPPORT + select KALLSYMS + select CRC32 + help + Say Y here if you want to enable the memory leak + detector. The memory allocation/freeing is traced in a way + similar to the Boehm's conservative garbage collector, the + difference being that the orphan objects are not freed but + only shown in /sys/kernel/debug/kmemleak. Enabling this + feature will introduce an overhead to memory + allocations. See Documentation/kmemleak.txt for more + details. + + Enabling DEBUG_SLAB or SLUB_DEBUG may increase the chances + of finding leaks due to the slab objects poisoning. + + In order to access the kmemleak file, debugfs needs to be + mounted (usually at /sys/kernel/debug). + +config DEBUG_KMEMLEAK_EARLY_LOG_SIZE + int "Maximum kmemleak early log entries" + depends on DEBUG_KMEMLEAK + range 200 40000 + default 400 + help + Kmemleak must track all the memory allocations to avoid + reporting false positives. Since memory may be allocated or + freed before kmemleak is initialised, an early log buffer is + used to store these actions. If kmemleak reports "early log + buffer exceeded", please increase this value. + +config DEBUG_KMEMLEAK_TEST + tristate "Simple test for the kernel memory leak detector" + depends on DEBUG_KMEMLEAK + help + Say Y or M here to build a test for the kernel memory leak + detector. This option enables a module that explicitly leaks + memory. + + If unsure, say N. + +config DEBUG_KMEMLEAK_DEFAULT_OFF + bool "Default kmemleak to off" + depends on DEBUG_KMEMLEAK + help + Say Y here to disable kmemleak by default. It can then be enabled + on the command line via kmemleak=on. + +config DEBUG_PREEMPT + bool "Debug preemptible kernel" + depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT + default y + help + If you say Y here then the kernel will use a debug variant of the + commonly used smp_processor_id() function and will print warnings + if kernel code uses it in a preemption-unsafe way. Also, the kernel + will detect preemption count underflows. + +config DEBUG_RT_MUTEXES + bool "RT Mutex debugging, deadlock detection" + depends on DEBUG_KERNEL && RT_MUTEXES + help + This allows rt mutex semantics violations and rt mutex related + deadlocks (lockups) to be detected and reported automatically. + +config DEBUG_PI_LIST + bool + default y + depends on DEBUG_RT_MUTEXES + +config RT_MUTEX_TESTER + bool "Built-in scriptable tester for rt-mutexes" + depends on DEBUG_KERNEL && RT_MUTEXES + help + This option enables a rt-mutex tester. + +config DEBUG_SPINLOCK + bool "Spinlock and rw-lock debugging: basic checks" + depends on DEBUG_KERNEL + help + Say Y here and build SMP to catch missing spinlock initialization + and certain other kinds of spinlock errors commonly made. This is + best used in conjunction with the NMI watchdog so that spinlock + deadlocks are also debuggable. + +config DEBUG_MUTEXES + bool "Mutex debugging: basic checks" + depends on DEBUG_KERNEL + help + This feature allows mutex semantics violations to be detected and + reported. + +config DEBUG_LOCK_ALLOC + bool "Lock debugging: detect incorrect freeing of live locks" + depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT + select DEBUG_SPINLOCK + select DEBUG_MUTEXES + select LOCKDEP + help + This feature will check whether any held lock (spinlock, rwlock, + mutex or rwsem) is incorrectly freed by the kernel, via any of the + memory-freeing routines (kfree(), kmem_cache_free(), free_pages(), + vfree(), etc.), whether a live lock is incorrectly reinitialized via + spin_lock_init()/mutex_init()/etc., or whether there is any lock + held during task exit. + +config PROVE_LOCKING + bool "Lock debugging: prove locking correctness" + depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT + select LOCKDEP + select DEBUG_SPINLOCK + select DEBUG_MUTEXES + select DEBUG_LOCK_ALLOC + default n + help + This feature enables the kernel to prove that all locking + that occurs in the kernel runtime is mathematically + correct: that under no circumstance could an arbitrary (and + not yet triggered) combination of observed locking + sequences (on an arbitrary number of CPUs, running an + arbitrary number of tasks and interrupt contexts) cause a + deadlock. + + In short, this feature enables the kernel to report locking + related deadlocks before they actually occur. + + The proof does not depend on how hard and complex a + deadlock scenario would be to trigger: how many + participant CPUs, tasks and irq-contexts would be needed + for it to trigger. The proof also does not depend on + timing: if a race and a resulting deadlock is possible + theoretically (no matter how unlikely the race scenario + is), it will be proven so and will immediately be + reported by the kernel (once the event is observed that + makes the deadlock theoretically possible). + + If a deadlock is impossible (i.e. the locking rules, as + observed by the kernel, are mathematically correct), the + kernel reports nothing. + + NOTE: this feature can also be enabled for rwlocks, mutexes + and rwsems - in which case all dependencies between these + different locking variants are observed and mapped too, and + the proof of observed correctness is also maintained for an + arbitrary combination of these separate locking variants. + + For more details, see Documentation/lockdep-design.txt. + +config PROVE_RCU + bool "RCU debugging: prove RCU correctness" + depends on PROVE_LOCKING + default n + help + This feature enables lockdep extensions that check for correct + use of RCU APIs. This is currently under development. Say Y + if you want to debug RCU usage or help work on the PROVE_RCU + feature. + + Say N if you are unsure. + +config PROVE_RCU_REPEATEDLY + bool "RCU debugging: don't disable PROVE_RCU on first splat" + depends on PROVE_RCU + default n + help + By itself, PROVE_RCU will disable checking upon issuing the + first warning (or "splat"). This feature prevents such + disabling, allowing multiple RCU-lockdep warnings to be printed + on a single reboot. + + Say N if you are unsure. + +config LOCKDEP + bool + depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT + select STACKTRACE + select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE + select KALLSYMS + select KALLSYMS_ALL + +config LOCK_STAT + bool "Lock usage statistics" + depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT + select LOCKDEP + select DEBUG_SPINLOCK + select DEBUG_MUTEXES + select DEBUG_LOCK_ALLOC + default n + help + This feature enables tracking lock contention points + + For more details, see Documentation/lockstat.txt + + This also enables lock events required by "perf lock", + subcommand of perf. + If you want to use "perf lock", you also need to turn on + CONFIG_EVENT_TRACING. + + CONFIG_LOCK_STAT defines "contended" and "acquired" lock events. + (CONFIG_LOCKDEP defines "acquire" and "release" events.) + +config DEBUG_LOCKDEP + bool "Lock dependency engine debugging" + depends on DEBUG_KERNEL && LOCKDEP + help + If you say Y here, the lock dependency engine will do + additional runtime checks to debug itself, at the price + of more runtime overhead. + +config TRACE_IRQFLAGS + depends on DEBUG_KERNEL + bool + default y + depends on TRACE_IRQFLAGS_SUPPORT + depends on PROVE_LOCKING + +config DEBUG_SPINLOCK_SLEEP + bool "Spinlock debugging: sleep-inside-spinlock checking" + depends on DEBUG_KERNEL + help + If you say Y here, various routines which may sleep will become very + noisy if they are called with a spinlock held. + +config DEBUG_LOCKING_API_SELFTESTS + bool "Locking API boot-time self-tests" + depends on DEBUG_KERNEL + help + Say Y here if you want the kernel to run a short self-test during + bootup. The self-test checks whether common types of locking bugs + are detected by debugging mechanisms or not. (if you disable + lock debugging then those bugs wont be detected of course.) + The following locking APIs are covered: spinlocks, rwlocks, + mutexes and rwsems. + +config STACKTRACE + bool + depends on STACKTRACE_SUPPORT + +config DEBUG_KOBJECT + bool "kobject debugging" + depends on DEBUG_KERNEL + help + If you say Y here, some extra kobject debugging messages will be sent + to the syslog. + +config DEBUG_HIGHMEM + bool "Highmem debugging" + depends on DEBUG_KERNEL && HIGHMEM + help + This options enables addition error checking for high memory systems. + Disable for production systems. + +config DEBUG_BUGVERBOSE + bool "Verbose BUG() reporting (adds 70K)" if DEBUG_KERNEL && EMBEDDED + depends on BUG + depends on ARM || AVR32 || M32R || M68K || SPARC32 || SPARC64 || \ + FRV || SUPERH || GENERIC_BUG || BLACKFIN || MN10300 + default y + help + Say Y here to make BUG() panics output the file name and line number + of the BUG call as well as the EIP and oops trace. This aids + debugging but costs about 70-100K of memory. + +config DEBUG_INFO + bool "Compile the kernel with debug info" + depends on DEBUG_KERNEL + help + If you say Y here the resulting kernel image will include + debugging info resulting in a larger kernel image. + This adds debug symbols to the kernel and modules (gcc -g), and + is needed if you intend to use kernel crashdump or binary object + tools like crash, kgdb, LKCD, gdb, etc on the kernel. + Say Y here only if you plan to debug the kernel. + + If unsure, say N. + +config DEBUG_INFO_REDUCED + bool "Reduce debugging information" + depends on DEBUG_INFO + help + If you say Y here gcc is instructed to generate less debugging + information for structure types. This means that tools that + need full debugging information (like kgdb or systemtap) won't + be happy. But if you merely need debugging information to + resolve line numbers there is no loss. Advantage is that + build directory object sizes shrink dramatically over a full + DEBUG_INFO build and compile times are reduced too. + Only works with newer gcc versions. + +config DEBUG_VM + bool "Debug VM" + depends on DEBUG_KERNEL + help + Enable this to turn on extended checks in the virtual-memory system + that may impact performance. + + If unsure, say N. + +config DEBUG_VIRTUAL + bool "Debug VM translations" + depends on DEBUG_KERNEL && X86 + help + Enable some costly sanity checks in virtual to page code. This can + catch mistakes with virt_to_page() and friends. + + If unsure, say N. + +config DEBUG_NOMMU_REGIONS + bool "Debug the global anon/private NOMMU mapping region tree" + depends on DEBUG_KERNEL && !MMU + help + This option causes the global tree of anonymous and private mapping + regions to be regularly checked for invalid topology. + +config DEBUG_WRITECOUNT + bool "Debug filesystem writers count" + depends on DEBUG_KERNEL + help + Enable this to catch wrong use of the writers count in struct + vfsmount. This will increase the size of each file struct by + 32 bits. + + If unsure, say N. + +config DEBUG_MEMORY_INIT + bool "Debug memory initialisation" if EMBEDDED + default !EMBEDDED + help + Enable this for additional checks during memory initialisation. + The sanity checks verify aspects of the VM such as the memory model + and other information provided by the architecture. Verbose + information will be printed at KERN_DEBUG loglevel depending + on the mminit_loglevel= command-line option. + + If unsure, say Y + +config DEBUG_LIST + bool "Debug linked list manipulation" + depends on DEBUG_KERNEL + help + Enable this to turn on extended checks in the linked-list + walking routines. + + If unsure, say N. + +config DEBUG_SG + bool "Debug SG table operations" + depends on DEBUG_KERNEL + help + Enable this to turn on checks on scatter-gather tables. This can + help find problems with drivers that do not properly initialize + their sg tables. + + If unsure, say N. + +config DEBUG_NOTIFIERS + bool "Debug notifier call chains" + depends on DEBUG_KERNEL + help + Enable this to turn on sanity checking for notifier call chains. + This is most useful for kernel developers to make sure that + modules properly unregister themselves from notifier chains. + This is a relatively cheap check but if you care about maximum + performance, say N. + +config DEBUG_CREDENTIALS + bool "Debug credential management" + depends on DEBUG_KERNEL + help + Enable this to turn on some debug checking for credential + management. The additional code keeps track of the number of + pointers from task_structs to any given cred struct, and checks to + see that this number never exceeds the usage count of the cred + struct. + + Furthermore, if SELinux is enabled, this also checks that the + security pointer in the cred struct is never seen to be invalid. + + If unsure, say N. + +# +# Select this config option from the architecture Kconfig, if it +# it is preferred to always offer frame pointers as a config +# option on the architecture (regardless of KERNEL_DEBUG): +# +config ARCH_WANT_FRAME_POINTERS + bool + help + +config FRAME_POINTER + bool "Compile the kernel with frame pointers" + depends on DEBUG_KERNEL && \ + (CRIS || M68K || M68KNOMMU || FRV || UML || \ + AVR32 || SUPERH || BLACKFIN || MN10300) || \ + ARCH_WANT_FRAME_POINTERS + default y if (DEBUG_INFO && UML) || ARCH_WANT_FRAME_POINTERS + help + If you say Y here the resulting kernel image will be slightly + larger and slower, but it gives very useful debugging information + in case of kernel bugs. (precise oopses/stacktraces/warnings) + +config BOOT_PRINTK_DELAY + bool "Delay each boot printk message by N milliseconds" + depends on DEBUG_KERNEL && PRINTK && GENERIC_CALIBRATE_DELAY + help + This build option allows you to read kernel boot messages + by inserting a short delay after each one. The delay is + specified in milliseconds on the kernel command line, + using "boot_delay=N". + + It is likely that you would also need to use "lpj=M" to preset + the "loops per jiffie" value. + See a previous boot log for the "lpj" value to use for your + system, and then set "lpj=M" before setting "boot_delay=N". + NOTE: Using this option may adversely affect SMP systems. + I.e., processors other than the first one may not boot up. + BOOT_PRINTK_DELAY also may cause DETECT_SOFTLOCKUP to detect + what it believes to be lockup conditions. + +config RCU_TORTURE_TEST + tristate "torture tests for RCU" + depends on DEBUG_KERNEL + default n + help + This option provides a kernel module that runs torture tests + on the RCU infrastructure. The kernel module may be built + after the fact on the running kernel to be tested, if desired. + + Say Y here if you want RCU torture tests to be built into + the kernel. + Say M if you want the RCU torture tests to build as a module. + Say N if you are unsure. + +config RCU_TORTURE_TEST_RUNNABLE + bool "torture tests for RCU runnable by default" + depends on RCU_TORTURE_TEST = y + default n + help + This option provides a way to build the RCU torture tests + directly into the kernel without them starting up at boot + time. You can use /proc/sys/kernel/rcutorture_runnable + to manually override this setting. This /proc file is + available only when the RCU torture tests have been built + into the kernel. + + Say Y here if you want the RCU torture tests to start during + boot (you probably don't). + Say N here if you want the RCU torture tests to start only + after being manually enabled via /proc. + +config RCU_CPU_STALL_DETECTOR + bool "Check for stalled CPUs delaying RCU grace periods" + depends on TREE_RCU || TREE_PREEMPT_RCU + default y + help + This option causes RCU to printk information on which + CPUs are delaying the current grace period, but only when + the grace period extends for excessive time periods. + + Say N if you want to disable such checks. + + Say Y if you are unsure. + +config RCU_CPU_STALL_VERBOSE + bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR" + depends on RCU_CPU_STALL_DETECTOR && TREE_PREEMPT_RCU + default y + help + This option causes RCU to printk detailed per-task information + for any tasks that are stalling the current RCU grace period. + + Say N if you are unsure. + + Say Y if you want to enable such checks. + +config KPROBES_SANITY_TEST + bool "Kprobes sanity tests" + depends on DEBUG_KERNEL + depends on KPROBES + default n + help + This option provides for testing basic kprobes functionality on + boot. A sample kprobe, jprobe and kretprobe are inserted and + verified for functionality. + + Say N if you are unsure. + +config BACKTRACE_SELF_TEST + tristate "Self test for the backtrace code" + depends on DEBUG_KERNEL + default n + help + This option provides a kernel module that can be used to test + the kernel stack backtrace code. This option is not useful + for distributions or general kernels, but only for kernel + developers working on architecture code. + + Note that if you want to also test saved backtraces, you will + have to enable STACKTRACE as well. + + Say N if you are unsure. + +config DEBUG_BLOCK_EXT_DEVT + bool "Force extended block device numbers and spread them" + depends on DEBUG_KERNEL + depends on BLOCK + default n + help + BIG FAT WARNING: ENABLING THIS OPTION MIGHT BREAK BOOTING ON + SOME DISTRIBUTIONS. DO NOT ENABLE THIS UNLESS YOU KNOW WHAT + YOU ARE DOING. Distros, please enable this and fix whatever + is broken. + + Conventionally, block device numbers are allocated from + predetermined contiguous area. However, extended block area + may introduce non-contiguous block device numbers. This + option forces most block device numbers to be allocated from + the extended space and spreads them to discover kernel or + userland code paths which assume predetermined contiguous + device number allocation. + + Note that turning on this debug option shuffles all the + device numbers for all IDE and SCSI devices including libata + ones, so root partition specified using device number + directly (via rdev or root=MAJ:MIN) won't work anymore. + Textual device names (root=/dev/sdXn) will continue to work. + + Say N if you are unsure. + +config DEBUG_FORCE_WEAK_PER_CPU + bool "Force weak per-cpu definitions" + depends on DEBUG_KERNEL + help + s390 and alpha require percpu variables in modules to be + defined weak to work around addressing range issue which + puts the following two restrictions on percpu variable + definitions. + + 1. percpu symbols must be unique whether static or not + 2. percpu variables can't be defined inside a function + + To ensure that generic code follows the above rules, this + option forces all percpu variables to be defined as weak. + +config LKDTM + tristate "Linux Kernel Dump Test Tool Module" + depends on DEBUG_FS + depends on BLOCK + default n + help + This module enables testing of the different dumping mechanisms by + inducing system failures at predefined crash points. + If you don't need it: say N + Choose M here to compile this code as a module. The module will be + called lkdtm. + + Documentation on how to use the module can be found in + Documentation/fault-injection/provoke-crashes.txt + +config CPU_NOTIFIER_ERROR_INJECT + tristate "CPU notifier error injection module" + depends on HOTPLUG_CPU && DEBUG_KERNEL + help + This option provides a kernel module that can be used to test + the error handling of the cpu notifiers + + To compile this code as a module, choose M here: the module will + be called cpu-notifier-error-inject. + + If unsure, say N. + +config FAULT_INJECTION + bool "Fault-injection framework" + depends on DEBUG_KERNEL + help + Provide fault-injection framework. + For more details, see Documentation/fault-injection/. + +config FAILSLAB + bool "Fault-injection capability for kmalloc" + depends on FAULT_INJECTION + depends on SLAB || SLUB + help + Provide fault-injection capability for kmalloc. + +config FAIL_PAGE_ALLOC + bool "Fault-injection capabilitiy for alloc_pages()" + depends on FAULT_INJECTION + help + Provide fault-injection capability for alloc_pages(). + +config FAIL_MAKE_REQUEST + bool "Fault-injection capability for disk IO" + depends on FAULT_INJECTION && BLOCK + help + Provide fault-injection capability for disk IO. + +config FAIL_IO_TIMEOUT + bool "Fault-injection capability for faking disk interrupts" + depends on FAULT_INJECTION && BLOCK + help + Provide fault-injection capability on end IO handling. This + will make the block layer "forget" an interrupt as configured, + thus exercising the error handling. + + Only works with drivers that use the generic timeout handling, + for others it wont do anything. + +config FAULT_INJECTION_DEBUG_FS + bool "Debugfs entries for fault-injection capabilities" + depends on FAULT_INJECTION && SYSFS && DEBUG_FS + help + Enable configuration of fault-injection capabilities via debugfs. + +config FAULT_INJECTION_STACKTRACE_FILTER + bool "stacktrace filter for fault-injection capabilities" + depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT + depends on !X86_64 + select STACKTRACE + select FRAME_POINTER if !PPC && !S390 && !MICROBLAZE + help + Provide stacktrace filter for fault-injection capabilities + +config LATENCYTOP + bool "Latency measuring infrastructure" + depends on HAVE_LATENCYTOP_SUPPORT + depends on DEBUG_KERNEL + depends on STACKTRACE_SUPPORT + depends on PROC_FS + select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE + select KALLSYMS + select KALLSYMS_ALL + select STACKTRACE + select SCHEDSTATS + select SCHED_DEBUG + help + Enable this option if you want to use the LatencyTOP tool + to find out which userspace is blocking on what kernel operations. + +config SYSCTL_SYSCALL_CHECK + bool "Sysctl checks" + depends on SYSCTL + ---help--- + sys_sysctl uses binary paths that have been found challenging + to properly maintain and use. This enables checks that help + you to keep things correct. + +source mm/Kconfig.debug +source kernel/trace/Kconfig + +config PROVIDE_OHCI1394_DMA_INIT + bool "Remote debugging over FireWire early on boot" + depends on PCI && X86 + help + If you want to debug problems which hang or crash the kernel early + on boot and the crashing machine has a FireWire port, you can use + this feature to remotely access the memory of the crashed machine + over FireWire. This employs remote DMA as part of the OHCI1394 + specification which is now the standard for FireWire controllers. + + With remote DMA, you can monitor the printk buffer remotely using + firescope and access all memory below 4GB using fireproxy from gdb. + Even controlling a kernel debugger is possible using remote DMA. + + Usage: + + If ohci1394_dma=early is used as boot parameter, it will initialize + all OHCI1394 controllers which are found in the PCI config space. + + As all changes to the FireWire bus such as enabling and disabling + devices cause a bus reset and thereby disable remote DMA for all + devices, be sure to have the cable plugged and FireWire enabled on + the debugging host before booting the debug target for debugging. + + This code (~1k) is freed after boot. By then, the firewire stack + in charge of the OHCI-1394 controllers should be used instead. + + See Documentation/debugging-via-ohci1394.txt for more information. + +config FIREWIRE_OHCI_REMOTE_DMA + bool "Remote debugging over FireWire with firewire-ohci" + depends on FIREWIRE_OHCI + help + This option lets you use the FireWire bus for remote debugging + with help of the firewire-ohci driver. It enables unfiltered + remote DMA in firewire-ohci. + See Documentation/debugging-via-ohci1394.txt for more information. + + If unsure, say N. + +config BUILD_DOCSRC + bool "Build targets in Documentation/ tree" + depends on HEADERS_CHECK + help + This option attempts to build objects from the source files in the + kernel Documentation/ tree. + + Say N if you are unsure. + +config DYNAMIC_DEBUG + bool "Enable dynamic printk() support" + default n + depends on PRINTK + depends on DEBUG_FS + help + + Compiles debug level messages into the kernel, which would not + otherwise be available at runtime. These messages can then be + enabled/disabled based on various levels of scope - per source file, + function, module, format string, and line number. This mechanism + implicitly enables all pr_debug() and dev_dbg() calls. The impact of + this compile option is a larger kernel text size of about 2%. + + Usage: + + Dynamic debugging is controlled via the 'dynamic_debug/control' file, + which is contained in the 'debugfs' filesystem. Thus, the debugfs + filesystem must first be mounted before making use of this feature. + We refer the control file as: <debugfs>/dynamic_debug/control. This + file contains a list of the debug statements that can be enabled. The + format for each line of the file is: + + filename:lineno [module]function flags format + + filename : source file of the debug statement + lineno : line number of the debug statement + module : module that contains the debug statement + function : function that contains the debug statement + flags : 'p' means the line is turned 'on' for printing + format : the format used for the debug statement + + From a live system: + + nullarbor:~ # cat <debugfs>/dynamic_debug/control + # filename:lineno [module]function flags format + fs/aio.c:222 [aio]__put_ioctx - "__put_ioctx:\040freeing\040%p\012" + fs/aio.c:248 [aio]ioctx_alloc - "ENOMEM:\040nr_events\040too\040high\012" + fs/aio.c:1770 [aio]sys_io_cancel - "calling\040cancel\012" + + Example usage: + + // enable the message at line 1603 of file svcsock.c + nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' > + <debugfs>/dynamic_debug/control + + // enable all the messages in file svcsock.c + nullarbor:~ # echo -n 'file svcsock.c +p' > + <debugfs>/dynamic_debug/control + + // enable all the messages in the NFS server module + nullarbor:~ # echo -n 'module nfsd +p' > + <debugfs>/dynamic_debug/control + + // enable all 12 messages in the function svc_process() + nullarbor:~ # echo -n 'func svc_process +p' > + <debugfs>/dynamic_debug/control + + // disable all 12 messages in the function svc_process() + nullarbor:~ # echo -n 'func svc_process -p' > + <debugfs>/dynamic_debug/control + + See Documentation/dynamic-debug-howto.txt for additional information. + +config DMA_API_DEBUG + bool "Enable debugging of DMA-API usage" + depends on HAVE_DMA_API_DEBUG + help + Enable this option to debug the use of the DMA API by device drivers. + With this option you will be able to detect common bugs in device + drivers like double-freeing of DMA mappings or freeing mappings that + were never allocated. + This option causes a performance degredation. Use only if you want + to debug device drivers. If unsure, say N. + +config ATOMIC64_SELFTEST + bool "Perform an atomic64_t self-test at boot" + help + Enable this option to test the atomic64_t functions at boot. + + If unsure, say N. + +source "samples/Kconfig" + +source "lib/Kconfig.kgdb" + +source "lib/Kconfig.kmemcheck" diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb new file mode 100644 index 00000000..43cb93fa --- /dev/null +++ b/lib/Kconfig.kgdb @@ -0,0 +1,82 @@ + +config HAVE_ARCH_KGDB + bool + +menuconfig KGDB + bool "KGDB: kernel debugger" + depends on HAVE_ARCH_KGDB + depends on DEBUG_KERNEL && EXPERIMENTAL + help + If you say Y here, it will be possible to remotely debug the + kernel using gdb. It is recommended but not required, that + you also turn on the kernel config option + CONFIG_FRAME_POINTER to aid in producing more reliable stack + backtraces in the external debugger. Documentation of + kernel debugger is available at http://kgdb.sourceforge.net + as well as in DocBook form in Documentation/DocBook/. If + unsure, say N. + +if KGDB + +config KGDB_SERIAL_CONSOLE + tristate "KGDB: use kgdb over the serial console" + select CONSOLE_POLL + select MAGIC_SYSRQ + default y + help + Share a serial console with kgdb. Sysrq-g must be used + to break in initially. + +config KGDB_TESTS + bool "KGDB: internal test suite" + default n + help + This is a kgdb I/O module specifically designed to test + kgdb's internal functions. This kgdb I/O module is + intended to for the development of new kgdb stubs + as well as regression testing the kgdb internals. + See the drivers/misc/kgdbts.c for the details about + the tests. The most basic of this I/O module is to boot + a kernel boot arguments "kgdbwait kgdbts=V1F100" + +config KGDB_TESTS_ON_BOOT + bool "KGDB: Run tests on boot" + depends on KGDB_TESTS + default n + help + Run the kgdb tests on boot up automatically without the need + to pass in a kernel parameter + +config KGDB_TESTS_BOOT_STRING + string "KGDB: which internal kgdb tests to run" + depends on KGDB_TESTS_ON_BOOT + default "V1F100" + help + This is the command string to send the kgdb test suite on + boot. See the drivers/misc/kgdbts.c for detailed + information about other strings you could use beyond the + default of V1F100. + +config KGDB_LOW_LEVEL_TRAP + bool "KGDB: Allow debugging with traps in notifiers" + depends on X86 || MIPS + default n + help + This will add an extra call back to kgdb for the breakpoint + exception handler on which will will allow kgdb to step + through a notify handler. + +config KGDB_KDB + bool "KGDB_KDB: include kdb frontend for kgdb" + default n + help + KDB frontend for kernel + +config KDB_KEYBOARD + bool "KGDB_KDB: keyboard as input device" + depends on VT && KGDB_KDB + default n + help + KDB can use a PS/2 type keyboard for an input device + +endif # KGDB diff --git a/lib/Kconfig.kmemcheck b/lib/Kconfig.kmemcheck new file mode 100644 index 00000000..846e039a --- /dev/null +++ b/lib/Kconfig.kmemcheck @@ -0,0 +1,94 @@ +config HAVE_ARCH_KMEMCHECK + bool + +if HAVE_ARCH_KMEMCHECK + +menuconfig KMEMCHECK + bool "kmemcheck: trap use of uninitialized memory" + depends on DEBUG_KERNEL + depends on !X86_USE_3DNOW + depends on SLUB || SLAB + depends on !CC_OPTIMIZE_FOR_SIZE + depends on !FUNCTION_TRACER + select FRAME_POINTER + select STACKTRACE + default n + help + This option enables tracing of dynamically allocated kernel memory + to see if memory is used before it has been given an initial value. + Be aware that this requires half of your memory for bookkeeping and + will insert extra code at *every* read and write to tracked memory + thus slow down the kernel code (but user code is unaffected). + + The kernel may be started with kmemcheck=0 or kmemcheck=1 to disable + or enable kmemcheck at boot-time. If the kernel is started with + kmemcheck=0, the large memory and CPU overhead is not incurred. + +choice + prompt "kmemcheck: default mode at boot" + depends on KMEMCHECK + default KMEMCHECK_ONESHOT_BY_DEFAULT + help + This option controls the default behaviour of kmemcheck when the + kernel boots and no kmemcheck= parameter is given. + +config KMEMCHECK_DISABLED_BY_DEFAULT + bool "disabled" + depends on KMEMCHECK + +config KMEMCHECK_ENABLED_BY_DEFAULT + bool "enabled" + depends on KMEMCHECK + +config KMEMCHECK_ONESHOT_BY_DEFAULT + bool "one-shot" + depends on KMEMCHECK + help + In one-shot mode, only the first error detected is reported before + kmemcheck is disabled. + +endchoice + +config KMEMCHECK_QUEUE_SIZE + int "kmemcheck: error queue size" + depends on KMEMCHECK + default 64 + help + Select the maximum number of errors to store in the queue. Since + errors can occur virtually anywhere and in any context, we need a + temporary storage area which is guarantueed not to generate any + other faults. The queue will be emptied as soon as a tasklet may + be scheduled. If the queue is full, new error reports will be + lost. + +config KMEMCHECK_SHADOW_COPY_SHIFT + int "kmemcheck: shadow copy size (5 => 32 bytes, 6 => 64 bytes)" + depends on KMEMCHECK + range 2 8 + default 5 + help + Select the number of shadow bytes to save along with each entry of + the queue. These bytes indicate what parts of an allocation are + initialized, uninitialized, etc. and will be displayed when an + error is detected to help the debugging of a particular problem. + +config KMEMCHECK_PARTIAL_OK + bool "kmemcheck: allow partially uninitialized memory" + depends on KMEMCHECK + default y + help + This option works around certain GCC optimizations that produce + 32-bit reads from 16-bit variables where the upper 16 bits are + thrown away afterwards. This may of course also hide some real + bugs. + +config KMEMCHECK_BITOPS_OK + bool "kmemcheck: allow bit-field manipulation" + depends on KMEMCHECK + default n + help + This option silences warnings that would be generated for bit-field + accesses where not all the bits are initialized at the same time. + This may also hide some real bugs. + +endif diff --git a/lib/Makefile b/lib/Makefile new file mode 100644 index 00000000..e6a3763b --- /dev/null +++ b/lib/Makefile @@ -0,0 +1,118 @@ +# +# Makefile for some libs needed in the kernel. +# + +ifdef CONFIG_FUNCTION_TRACER +ORIG_CFLAGS := $(KBUILD_CFLAGS) +KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) +endif + +lib-y := ctype.o string.o vsprintf.o cmdline.o \ + rbtree.o radix-tree.o dump_stack.o \ + idr.o int_sqrt.o extable.o prio_tree.o \ + sha1.o irq_regs.o reciprocal_div.o argv_split.o \ + proportions.o prio_heap.o ratelimit.o show_mem.o \ + is_single_threaded.o plist.o decompress.o flex_array.o + +lib-$(CONFIG_MMU) += ioremap.o +lib-$(CONFIG_SMP) += cpumask.o + +lib-y += kobject.o kref.o klist.o + +obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ + bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ + string_helpers.o gcd.o lcm.o list_sort.o uuid.o + +ifeq ($(CONFIG_DEBUG_KOBJECT),y) +CFLAGS_kobject.o += -DDEBUG +CFLAGS_kobject_uevent.o += -DDEBUG +endif + +lib-$(CONFIG_HOTPLUG) += kobject_uevent.o +obj-$(CONFIG_GENERIC_IOMAP) += iomap.o +obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o +obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o +obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o +obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o +lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o +lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o +lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o +lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o +obj-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o + +CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) +obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o + +obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o +obj-$(CONFIG_BTREE) += btree.o +obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o +obj-$(CONFIG_DEBUG_LIST) += list_debug.o +obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o + +ifneq ($(CONFIG_HAVE_DEC_LOCK),y) + lib-y += dec_and_lock.o +endif + +obj-$(CONFIG_BITREVERSE) += bitrev.o +obj-$(CONFIG_RATIONAL) += rational.o +obj-$(CONFIG_CRC_CCITT) += crc-ccitt.o +obj-$(CONFIG_CRC16) += crc16.o +obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o +obj-$(CONFIG_CRC_ITU_T) += crc-itu-t.o +obj-$(CONFIG_CRC32) += crc32.o +obj-$(CONFIG_CRC7) += crc7.o +obj-$(CONFIG_LIBCRC32C) += libcrc32c.o +obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o + +obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/ +obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/ +obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ +obj-$(CONFIG_LZO_COMPRESS) += lzo/ +obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ +obj-$(CONFIG_RAID6_PQ) += raid6/ + +lib-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o +lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o +lib-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o +lib-$(CONFIG_DECOMPRESS_LZO) += decompress_unlzo.o + +obj-$(CONFIG_TEXTSEARCH) += textsearch.o +obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o +obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o +obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o +obj-$(CONFIG_SMP) += percpu_counter.o +obj-$(CONFIG_AUDIT_GENERIC) += audit.o + +obj-$(CONFIG_SWIOTLB) += swiotlb.o +obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o +obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o +obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o + +lib-$(CONFIG_GENERIC_BUG) += bug.o + +obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += syscall.o + +obj-$(CONFIG_DYNAMIC_DEBUG) += dynamic_debug.o + +obj-$(CONFIG_NLATTR) += nlattr.o + +obj-$(CONFIG_LRU_CACHE) += lru_cache.o + +obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o + +obj-$(CONFIG_GENERIC_CSUM) += checksum.o + +obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o + +obj-$(CONFIG_ATOMIC64_SELFTEST) += atomic64_test.o + +hostprogs-y := gen_crc32table +clean-files := crc32table.h + +$(obj)/crc32.o: $(obj)/crc32table.h + +quiet_cmd_crc32 = GEN $@ + cmd_crc32 = $< > $@ + +$(obj)/crc32table.h: $(obj)/gen_crc32table + $(call cmd,crc32) diff --git a/lib/argv_split.c b/lib/argv_split.c new file mode 100644 index 00000000..4b1b083f --- /dev/null +++ b/lib/argv_split.c @@ -0,0 +1,101 @@ +/* + * Helper function for splitting a string into an argv-like array. + */ + +#include <linux/kernel.h> +#include <linux/ctype.h> +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/module.h> + +static const char *skip_arg(const char *cp) +{ + while (*cp && !isspace(*cp)) + cp++; + + return cp; +} + +static int count_argc(const char *str) +{ + int count = 0; + + while (*str) { + str = skip_spaces(str); + if (*str) { + count++; + str = skip_arg(str); + } + } + + return count; +} + +/** + * argv_free - free an argv + * @argv - the argument vector to be freed + * + * Frees an argv and the strings it points to. + */ +void argv_free(char **argv) +{ + char **p; + for (p = argv; *p; p++) + kfree(*p); + + kfree(argv); +} +EXPORT_SYMBOL(argv_free); + +/** + * argv_split - split a string at whitespace, returning an argv + * @gfp: the GFP mask used to allocate memory + * @str: the string to be split + * @argcp: returned argument count + * + * Returns an array of pointers to strings which are split out from + * @str. This is performed by strictly splitting on white-space; no + * quote processing is performed. Multiple whitespace characters are + * considered to be a single argument separator. The returned array + * is always NULL-terminated. Returns NULL on memory allocation + * failure. + */ +char **argv_split(gfp_t gfp, const char *str, int *argcp) +{ + int argc = count_argc(str); + char **argv = kzalloc(sizeof(*argv) * (argc+1), gfp); + char **argvp; + + if (argv == NULL) + goto out; + + if (argcp) + *argcp = argc; + + argvp = argv; + + while (*str) { + str = skip_spaces(str); + + if (*str) { + const char *p = str; + char *t; + + str = skip_arg(str); + + t = kstrndup(p, str-p, gfp); + if (t == NULL) + goto fail; + *argvp++ = t; + } + } + *argvp = NULL; + + out: + return argv; + + fail: + argv_free(argv); + return NULL; +} +EXPORT_SYMBOL(argv_split); diff --git a/lib/atomic64.c b/lib/atomic64.c new file mode 100644 index 00000000..a21c12bc --- /dev/null +++ b/lib/atomic64.c @@ -0,0 +1,186 @@ +/* + * Generic implementation of 64-bit atomics using spinlocks, + * useful on processors that don't have 64-bit atomic instructions. + * + * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/types.h> +#include <linux/cache.h> +#include <linux/spinlock.h> +#include <linux/init.h> +#include <linux/module.h> +#include <asm/atomic.h> + +/* + * We use a hashed array of spinlocks to provide exclusive access + * to each atomic64_t variable. Since this is expected to used on + * systems with small numbers of CPUs (<= 4 or so), we use a + * relatively small array of 16 spinlocks to avoid wasting too much + * memory on the spinlock array. + */ +#define NR_LOCKS 16 + +/* + * Ensure each lock is in a separate cacheline. + */ +static union { + spinlock_t lock; + char pad[L1_CACHE_BYTES]; +} atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp; + +static inline spinlock_t *lock_addr(const atomic64_t *v) +{ + unsigned long addr = (unsigned long) v; + + addr >>= L1_CACHE_SHIFT; + addr ^= (addr >> 8) ^ (addr >> 16); + return &atomic64_lock[addr & (NR_LOCKS - 1)].lock; +} + +long long atomic64_read(const atomic64_t *v) +{ + unsigned long flags; + spinlock_t *lock = lock_addr(v); + long long val; + + spin_lock_irqsave(lock, flags); + val = v->counter; + spin_unlock_irqrestore(lock, flags); + return val; +} +EXPORT_SYMBOL(atomic64_read); + +void atomic64_set(atomic64_t *v, long long i) +{ + unsigned long flags; + spinlock_t *lock = lock_addr(v); + + spin_lock_irqsave(lock, flags); + v->counter = i; + spin_unlock_irqrestore(lock, flags); +} +EXPORT_SYMBOL(atomic64_set); + +void atomic64_add(long long a, atomic64_t *v) +{ + unsigned long flags; + spinlock_t *lock = lock_addr(v); + + spin_lock_irqsave(lock, flags); + v->counter += a; + spin_unlock_irqrestore(lock, flags); +} +EXPORT_SYMBOL(atomic64_add); + +long long atomic64_add_return(long long a, atomic64_t *v) +{ + unsigned long flags; + spinlock_t *lock = lock_addr(v); + long long val; + + spin_lock_irqsave(lock, flags); + val = v->counter += a; + spin_unlock_irqrestore(lock, flags); + return val; +} +EXPORT_SYMBOL(atomic64_add_return); + +void atomic64_sub(long long a, atomic64_t *v) +{ + unsigned long flags; + spinlock_t *lock = lock_addr(v); + + spin_lock_irqsave(lock, flags); + v->counter -= a; + spin_unlock_irqrestore(lock, flags); +} +EXPORT_SYMBOL(atomic64_sub); + +long long atomic64_sub_return(long long a, atomic64_t *v) +{ + unsigned long flags; + spinlock_t *lock = lock_addr(v); + long long val; + + spin_lock_irqsave(lock, flags); + val = v->counter -= a; + spin_unlock_irqrestore(lock, flags); + return val; +} +EXPORT_SYMBOL(atomic64_sub_return); + +long long atomic64_dec_if_positive(atomic64_t *v) +{ + unsigned long flags; + spinlock_t *lock = lock_addr(v); + long long val; + + spin_lock_irqsave(lock, flags); + val = v->counter - 1; + if (val >= 0) + v->counter = val; + spin_unlock_irqrestore(lock, flags); + return val; +} +EXPORT_SYMBOL(atomic64_dec_if_positive); + +long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n) +{ + unsigned long flags; + spinlock_t *lock = lock_addr(v); + long long val; + + spin_lock_irqsave(lock, flags); + val = v->counter; + if (val == o) + v->counter = n; + spin_unlock_irqrestore(lock, flags); + return val; +} +EXPORT_SYMBOL(atomic64_cmpxchg); + +long long atomic64_xchg(atomic64_t *v, long long new) +{ + unsigned long flags; + spinlock_t *lock = lock_addr(v); + long long val; + + spin_lock_irqsave(lock, flags); + val = v->counter; + v->counter = new; + spin_unlock_irqrestore(lock, flags); + return val; +} +EXPORT_SYMBOL(atomic64_xchg); + +int atomic64_add_unless(atomic64_t *v, long long a, long long u) +{ + unsigned long flags; + spinlock_t *lock = lock_addr(v); + int ret = 0; + + spin_lock_irqsave(lock, flags); + if (v->counter != u) { + v->counter += a; + ret = 1; + } + spin_unlock_irqrestore(lock, flags); + return ret; +} +EXPORT_SYMBOL(atomic64_add_unless); + +static int init_atomic64_lock(void) +{ + int i; + + for (i = 0; i < NR_LOCKS; ++i) + spin_lock_init(&atomic64_lock[i].lock); + return 0; +} + +pure_initcall(init_atomic64_lock); diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c new file mode 100644 index 00000000..44524cc8 --- /dev/null +++ b/lib/atomic64_test.c @@ -0,0 +1,166 @@ +/* + * Testsuite for atomic64_t functions + * + * Copyright © 2010 Luca Barbieri + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ +#include <linux/init.h> +#include <linux/kernel.h> +#include <asm/atomic.h> + +#define INIT(c) do { atomic64_set(&v, c); r = c; } while (0) +static __init int test_atomic64(void) +{ + long long v0 = 0xaaa31337c001d00dLL; + long long v1 = 0xdeadbeefdeafcafeLL; + long long v2 = 0xfaceabadf00df001LL; + long long onestwos = 0x1111111122222222LL; + long long one = 1LL; + + atomic64_t v = ATOMIC64_INIT(v0); + long long r = v0; + BUG_ON(v.counter != r); + + atomic64_set(&v, v1); + r = v1; + BUG_ON(v.counter != r); + BUG_ON(atomic64_read(&v) != r); + + INIT(v0); + atomic64_add(onestwos, &v); + r += onestwos; + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_add(-one, &v); + r += -one; + BUG_ON(v.counter != r); + + INIT(v0); + r += onestwos; + BUG_ON(atomic64_add_return(onestwos, &v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + r += -one; + BUG_ON(atomic64_add_return(-one, &v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_sub(onestwos, &v); + r -= onestwos; + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_sub(-one, &v); + r -= -one; + BUG_ON(v.counter != r); + + INIT(v0); + r -= onestwos; + BUG_ON(atomic64_sub_return(onestwos, &v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + r -= -one; + BUG_ON(atomic64_sub_return(-one, &v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_inc(&v); + r += one; + BUG_ON(v.counter != r); + + INIT(v0); + r += one; + BUG_ON(atomic64_inc_return(&v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + atomic64_dec(&v); + r -= one; + BUG_ON(v.counter != r); + + INIT(v0); + r -= one; + BUG_ON(atomic64_dec_return(&v) != r); + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(atomic64_xchg(&v, v1) != v0); + r = v1; + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(atomic64_cmpxchg(&v, v0, v1) != v0); + r = v1; + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(atomic64_cmpxchg(&v, v2, v1) != v0); + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(atomic64_add_unless(&v, one, v0)); + BUG_ON(v.counter != r); + + INIT(v0); + BUG_ON(!atomic64_add_unless(&v, one, v1)); + r += one; + BUG_ON(v.counter != r); + +#if defined(CONFIG_X86) || defined(CONFIG_MIPS) || defined(CONFIG_PPC) || \ + defined(CONFIG_S390) || defined(_ASM_GENERIC_ATOMIC64_H) || defined(CONFIG_ARM) + INIT(onestwos); + BUG_ON(atomic64_dec_if_positive(&v) != (onestwos - 1)); + r -= one; + BUG_ON(v.counter != r); + + INIT(0); + BUG_ON(atomic64_dec_if_positive(&v) != -one); + BUG_ON(v.counter != r); + + INIT(-one); + BUG_ON(atomic64_dec_if_positive(&v) != (-one - one)); + BUG_ON(v.counter != r); +#else +#warning Please implement atomic64_dec_if_positive for your architecture, and add it to the IF above +#endif + + INIT(onestwos); + BUG_ON(!atomic64_inc_not_zero(&v)); + r += one; + BUG_ON(v.counter != r); + + INIT(0); + BUG_ON(atomic64_inc_not_zero(&v)); + BUG_ON(v.counter != r); + + INIT(-one); + BUG_ON(!atomic64_inc_not_zero(&v)); + r += one; + BUG_ON(v.counter != r); + +#ifdef CONFIG_X86 + printk(KERN_INFO "atomic64 test passed for %s platform %s CX8 and %s SSE\n", +#ifdef CONFIG_X86_64 + "x86-64", +#elif defined(CONFIG_X86_CMPXCHG64) + "i586+", +#else + "i386+", +#endif + boot_cpu_has(X86_FEATURE_CX8) ? "with" : "without", + boot_cpu_has(X86_FEATURE_XMM) ? "with" : "without"); +#else + printk(KERN_INFO "atomic64 test passed\n"); +#endif + + return 0; +} + +core_initcall(test_atomic64); diff --git a/lib/audit.c b/lib/audit.c new file mode 100644 index 00000000..8e7dc1c6 --- /dev/null +++ b/lib/audit.c @@ -0,0 +1,66 @@ +#include <linux/init.h> +#include <linux/types.h> +#include <linux/audit.h> +#include <asm/unistd.h> + +static unsigned dir_class[] = { +#include <asm-generic/audit_dir_write.h> +~0U +}; + +static unsigned read_class[] = { +#include <asm-generic/audit_read.h> +~0U +}; + +static unsigned write_class[] = { +#include <asm-generic/audit_write.h> +~0U +}; + +static unsigned chattr_class[] = { +#include <asm-generic/audit_change_attr.h> +~0U +}; + +static unsigned signal_class[] = { +#include <asm-generic/audit_signal.h> +~0U +}; + +int audit_classify_arch(int arch) +{ + return 0; +} + +int audit_classify_syscall(int abi, unsigned syscall) +{ + switch(syscall) { + case __NR_open: + return 2; +#ifdef __NR_openat + case __NR_openat: + return 3; +#endif +#ifdef __NR_socketcall + case __NR_socketcall: + return 4; +#endif + case __NR_execve: + return 5; + default: + return 0; + } +} + +static int __init audit_classes_init(void) +{ + audit_register_class(AUDIT_CLASS_WRITE, write_class); + audit_register_class(AUDIT_CLASS_READ, read_class); + audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class); + audit_register_class(AUDIT_CLASS_CHATTR, chattr_class); + audit_register_class(AUDIT_CLASS_SIGNAL, signal_class); + return 0; +} + +__initcall(audit_classes_init); diff --git a/lib/bcd.c b/lib/bcd.c new file mode 100644 index 00000000..d74257fd --- /dev/null +++ b/lib/bcd.c @@ -0,0 +1,14 @@ +#include <linux/bcd.h> +#include <linux/module.h> + +unsigned bcd2bin(unsigned char val) +{ + return (val & 0x0f) + (val >> 4) * 10; +} +EXPORT_SYMBOL(bcd2bin); + +unsigned char bin2bcd(unsigned val) +{ + return ((val / 10) << 4) + val % 10; +} +EXPORT_SYMBOL(bin2bcd); diff --git a/lib/bitmap.c b/lib/bitmap.c new file mode 100644 index 00000000..a02625a2 --- /dev/null +++ b/lib/bitmap.c @@ -0,0 +1,1102 @@ +/* + * lib/bitmap.c + * Helper functions for bitmap.h. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ +#include <linux/module.h> +#include <linux/ctype.h> +#include <linux/errno.h> +#include <linux/bitmap.h> +#include <linux/bitops.h> +#include <asm/uaccess.h> + +/* + * bitmaps provide an array of bits, implemented using an an + * array of unsigned longs. The number of valid bits in a + * given bitmap does _not_ need to be an exact multiple of + * BITS_PER_LONG. + * + * The possible unused bits in the last, partially used word + * of a bitmap are 'don't care'. The implementation makes + * no particular effort to keep them zero. It ensures that + * their value will not affect the results of any operation. + * The bitmap operations that return Boolean (bitmap_empty, + * for example) or scalar (bitmap_weight, for example) results + * carefully filter out these unused bits from impacting their + * results. + * + * These operations actually hold to a slightly stronger rule: + * if you don't input any bitmaps to these ops that have some + * unused bits set, then they won't output any set unused bits + * in output bitmaps. + * + * The byte ordering of bitmaps is more natural on little + * endian architectures. See the big-endian headers + * include/asm-ppc64/bitops.h and include/asm-s390/bitops.h + * for the best explanations of this ordering. + */ + +int __bitmap_empty(const unsigned long *bitmap, int bits) +{ + int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + if (bitmap[k]) + return 0; + + if (bits % BITS_PER_LONG) + if (bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) + return 0; + + return 1; +} +EXPORT_SYMBOL(__bitmap_empty); + +int __bitmap_full(const unsigned long *bitmap, int bits) +{ + int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + if (~bitmap[k]) + return 0; + + if (bits % BITS_PER_LONG) + if (~bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) + return 0; + + return 1; +} +EXPORT_SYMBOL(__bitmap_full); + +int __bitmap_equal(const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits) +{ + int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + if (bitmap1[k] != bitmap2[k]) + return 0; + + if (bits % BITS_PER_LONG) + if ((bitmap1[k] ^ bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) + return 0; + + return 1; +} +EXPORT_SYMBOL(__bitmap_equal); + +void __bitmap_complement(unsigned long *dst, const unsigned long *src, int bits) +{ + int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + dst[k] = ~src[k]; + + if (bits % BITS_PER_LONG) + dst[k] = ~src[k] & BITMAP_LAST_WORD_MASK(bits); +} +EXPORT_SYMBOL(__bitmap_complement); + +/** + * __bitmap_shift_right - logical right shift of the bits in a bitmap + * @dst : destination bitmap + * @src : source bitmap + * @shift : shift by this many bits + * @bits : bitmap size, in bits + * + * Shifting right (dividing) means moving bits in the MS -> LS bit + * direction. Zeros are fed into the vacated MS positions and the + * LS bits shifted off the bottom are lost. + */ +void __bitmap_shift_right(unsigned long *dst, + const unsigned long *src, int shift, int bits) +{ + int k, lim = BITS_TO_LONGS(bits), left = bits % BITS_PER_LONG; + int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG; + unsigned long mask = (1UL << left) - 1; + for (k = 0; off + k < lim; ++k) { + unsigned long upper, lower; + + /* + * If shift is not word aligned, take lower rem bits of + * word above and make them the top rem bits of result. + */ + if (!rem || off + k + 1 >= lim) + upper = 0; + else { + upper = src[off + k + 1]; + if (off + k + 1 == lim - 1 && left) + upper &= mask; + } + lower = src[off + k]; + if (left && off + k == lim - 1) + lower &= mask; + dst[k] = upper << (BITS_PER_LONG - rem) | lower >> rem; + if (left && k == lim - 1) + dst[k] &= mask; + } + if (off) + memset(&dst[lim - off], 0, off*sizeof(unsigned long)); +} +EXPORT_SYMBOL(__bitmap_shift_right); + + +/** + * __bitmap_shift_left - logical left shift of the bits in a bitmap + * @dst : destination bitmap + * @src : source bitmap + * @shift : shift by this many bits + * @bits : bitmap size, in bits + * + * Shifting left (multiplying) means moving bits in the LS -> MS + * direction. Zeros are fed into the vacated LS bit positions + * and those MS bits shifted off the top are lost. + */ + +void __bitmap_shift_left(unsigned long *dst, + const unsigned long *src, int shift, int bits) +{ + int k, lim = BITS_TO_LONGS(bits), left = bits % BITS_PER_LONG; + int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG; + for (k = lim - off - 1; k >= 0; --k) { + unsigned long upper, lower; + + /* + * If shift is not word aligned, take upper rem bits of + * word below and make them the bottom rem bits of result. + */ + if (rem && k > 0) + lower = src[k - 1]; + else + lower = 0; + upper = src[k]; + if (left && k == lim - 1) + upper &= (1UL << left) - 1; + dst[k + off] = lower >> (BITS_PER_LONG - rem) | upper << rem; + if (left && k + off == lim - 1) + dst[k + off] &= (1UL << left) - 1; + } + if (off) + memset(dst, 0, off*sizeof(unsigned long)); +} +EXPORT_SYMBOL(__bitmap_shift_left); + +int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits) +{ + int k; + int nr = BITS_TO_LONGS(bits); + unsigned long result = 0; + + for (k = 0; k < nr; k++) + result |= (dst[k] = bitmap1[k] & bitmap2[k]); + return result != 0; +} +EXPORT_SYMBOL(__bitmap_and); + +void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits) +{ + int k; + int nr = BITS_TO_LONGS(bits); + + for (k = 0; k < nr; k++) + dst[k] = bitmap1[k] | bitmap2[k]; +} +EXPORT_SYMBOL(__bitmap_or); + +void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits) +{ + int k; + int nr = BITS_TO_LONGS(bits); + + for (k = 0; k < nr; k++) + dst[k] = bitmap1[k] ^ bitmap2[k]; +} +EXPORT_SYMBOL(__bitmap_xor); + +int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits) +{ + int k; + int nr = BITS_TO_LONGS(bits); + unsigned long result = 0; + + for (k = 0; k < nr; k++) + result |= (dst[k] = bitmap1[k] & ~bitmap2[k]); + return result != 0; +} +EXPORT_SYMBOL(__bitmap_andnot); + +int __bitmap_intersects(const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits) +{ + int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + if (bitmap1[k] & bitmap2[k]) + return 1; + + if (bits % BITS_PER_LONG) + if ((bitmap1[k] & bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) + return 1; + return 0; +} +EXPORT_SYMBOL(__bitmap_intersects); + +int __bitmap_subset(const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits) +{ + int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + if (bitmap1[k] & ~bitmap2[k]) + return 0; + + if (bits % BITS_PER_LONG) + if ((bitmap1[k] & ~bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) + return 0; + return 1; +} +EXPORT_SYMBOL(__bitmap_subset); + +int __bitmap_weight(const unsigned long *bitmap, int bits) +{ + int k, w = 0, lim = bits/BITS_PER_LONG; + + for (k = 0; k < lim; k++) + w += hweight_long(bitmap[k]); + + if (bits % BITS_PER_LONG) + w += hweight_long(bitmap[k] & BITMAP_LAST_WORD_MASK(bits)); + + return w; +} +EXPORT_SYMBOL(__bitmap_weight); + +#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG)) + +void bitmap_set(unsigned long *map, int start, int nr) +{ + unsigned long *p = map + BIT_WORD(start); + const int size = start + nr; + int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); + unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start); + + while (nr - bits_to_set >= 0) { + *p |= mask_to_set; + nr -= bits_to_set; + bits_to_set = BITS_PER_LONG; + mask_to_set = ~0UL; + p++; + } + if (nr) { + mask_to_set &= BITMAP_LAST_WORD_MASK(size); + *p |= mask_to_set; + } +} +EXPORT_SYMBOL(bitmap_set); + +void bitmap_clear(unsigned long *map, int start, int nr) +{ + unsigned long *p = map + BIT_WORD(start); + const int size = start + nr; + int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); + unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); + + while (nr - bits_to_clear >= 0) { + *p &= ~mask_to_clear; + nr -= bits_to_clear; + bits_to_clear = BITS_PER_LONG; + mask_to_clear = ~0UL; + p++; + } + if (nr) { + mask_to_clear &= BITMAP_LAST_WORD_MASK(size); + *p &= ~mask_to_clear; + } +} +EXPORT_SYMBOL(bitmap_clear); + +/** + * bitmap_find_next_zero_area - find a contiguous aligned zero area + * @map: The address to base the search on + * @size: The bitmap size in bits + * @start: The bitnumber to start searching at + * @nr: The number of zeroed bits we're looking for + * @align_mask: Alignment mask for zero area + * @align_offset: Alignment offset for zero area. + * + * The @align_mask should be one less than a power of 2; the effect is that + * the bit offset of all zero areas this function finds plus @align_offset + * is multiple of that power of 2. + */ +unsigned long bitmap_find_next_zero_area_off(unsigned long *map, + unsigned long size, + unsigned long start, + unsigned int nr, + unsigned long align_mask, + unsigned long align_offset) +{ + unsigned long index, end, i; +again: + index = find_next_zero_bit(map, size, start); + + /* Align allocation */ + index = __ALIGN_MASK(index + align_offset, align_mask) - align_offset; + + end = index + nr; + if (end > size) + return end; + i = find_next_bit(map, end, index); + if (i < end) { + start = i + 1; + goto again; + } + return index; +} +EXPORT_SYMBOL(bitmap_find_next_zero_area_off); + +/* + * Bitmap printing & parsing functions: first version by Bill Irwin, + * second version by Paul Jackson, third by Joe Korty. + */ + +#define CHUNKSZ 32 +#define nbits_to_hold_value(val) fls(val) +#define unhex(c) (isdigit(c) ? (c - '0') : (toupper(c) - 'A' + 10)) +#define BASEDEC 10 /* fancier cpuset lists input in decimal */ + +/** + * bitmap_scnprintf - convert bitmap to an ASCII hex string. + * @buf: byte buffer into which string is placed + * @buflen: reserved size of @buf, in bytes + * @maskp: pointer to bitmap to convert + * @nmaskbits: size of bitmap, in bits + * + * Exactly @nmaskbits bits are displayed. Hex digits are grouped into + * comma-separated sets of eight digits per set. + */ +int bitmap_scnprintf(char *buf, unsigned int buflen, + const unsigned long *maskp, int nmaskbits) +{ + int i, word, bit, len = 0; + unsigned long val; + const char *sep = ""; + int chunksz; + u32 chunkmask; + + chunksz = nmaskbits & (CHUNKSZ - 1); + if (chunksz == 0) + chunksz = CHUNKSZ; + + i = ALIGN(nmaskbits, CHUNKSZ) - CHUNKSZ; + for (; i >= 0; i -= CHUNKSZ) { + chunkmask = ((1ULL << chunksz) - 1); + word = i / BITS_PER_LONG; + bit = i % BITS_PER_LONG; + val = (maskp[word] >> bit) & chunkmask; + len += scnprintf(buf+len, buflen-len, "%s%0*lx", sep, + (chunksz+3)/4, val); + chunksz = CHUNKSZ; + sep = ","; + } + return len; +} +EXPORT_SYMBOL(bitmap_scnprintf); + +/** + * __bitmap_parse - convert an ASCII hex string into a bitmap. + * @buf: pointer to buffer containing string. + * @buflen: buffer size in bytes. If string is smaller than this + * then it must be terminated with a \0. + * @is_user: location of buffer, 0 indicates kernel space + * @maskp: pointer to bitmap array that will contain result. + * @nmaskbits: size of bitmap, in bits. + * + * Commas group hex digits into chunks. Each chunk defines exactly 32 + * bits of the resultant bitmask. No chunk may specify a value larger + * than 32 bits (%-EOVERFLOW), and if a chunk specifies a smaller value + * then leading 0-bits are prepended. %-EINVAL is returned for illegal + * characters and for grouping errors such as "1,,5", ",44", "," and "". + * Leading and trailing whitespace accepted, but not embedded whitespace. + */ +int __bitmap_parse(const char *buf, unsigned int buflen, + int is_user, unsigned long *maskp, + int nmaskbits) +{ + int c, old_c, totaldigits, ndigits, nchunks, nbits; + u32 chunk; + const char __user *ubuf = buf; + + bitmap_zero(maskp, nmaskbits); + + nchunks = nbits = totaldigits = c = 0; + do { + chunk = ndigits = 0; + + /* Get the next chunk of the bitmap */ + while (buflen) { + old_c = c; + if (is_user) { + if (__get_user(c, ubuf++)) + return -EFAULT; + } + else + c = *buf++; + buflen--; + if (isspace(c)) + continue; + + /* + * If the last character was a space and the current + * character isn't '\0', we've got embedded whitespace. + * This is a no-no, so throw an error. + */ + if (totaldigits && c && isspace(old_c)) + return -EINVAL; + + /* A '\0' or a ',' signal the end of the chunk */ + if (c == '\0' || c == ',') + break; + + if (!isxdigit(c)) + return -EINVAL; + + /* + * Make sure there are at least 4 free bits in 'chunk'. + * If not, this hexdigit will overflow 'chunk', so + * throw an error. + */ + if (chunk & ~((1UL << (CHUNKSZ - 4)) - 1)) + return -EOVERFLOW; + + chunk = (chunk << 4) | unhex(c); + ndigits++; totaldigits++; + } + if (ndigits == 0) + return -EINVAL; + if (nchunks == 0 && chunk == 0) + continue; + + __bitmap_shift_left(maskp, maskp, CHUNKSZ, nmaskbits); + *maskp |= chunk; + nchunks++; + nbits += (nchunks == 1) ? nbits_to_hold_value(chunk) : CHUNKSZ; + if (nbits > nmaskbits) + return -EOVERFLOW; + } while (buflen && c == ','); + + return 0; +} +EXPORT_SYMBOL(__bitmap_parse); + +/** + * bitmap_parse_user - convert an ASCII hex string in a user buffer into a bitmap + * + * @ubuf: pointer to user buffer containing string. + * @ulen: buffer size in bytes. If string is smaller than this + * then it must be terminated with a \0. + * @maskp: pointer to bitmap array that will contain result. + * @nmaskbits: size of bitmap, in bits. + * + * Wrapper for __bitmap_parse(), providing it with user buffer. + * + * We cannot have this as an inline function in bitmap.h because it needs + * linux/uaccess.h to get the access_ok() declaration and this causes + * cyclic dependencies. + */ +int bitmap_parse_user(const char __user *ubuf, + unsigned int ulen, unsigned long *maskp, + int nmaskbits) +{ + if (!access_ok(VERIFY_READ, ubuf, ulen)) + return -EFAULT; + return __bitmap_parse((const char *)ubuf, ulen, 1, maskp, nmaskbits); +} +EXPORT_SYMBOL(bitmap_parse_user); + +/* + * bscnl_emit(buf, buflen, rbot, rtop, bp) + * + * Helper routine for bitmap_scnlistprintf(). Write decimal number + * or range to buf, suppressing output past buf+buflen, with optional + * comma-prefix. Return len of what would be written to buf, if it + * all fit. + */ +static inline int bscnl_emit(char *buf, int buflen, int rbot, int rtop, int len) +{ + if (len > 0) + len += scnprintf(buf + len, buflen - len, ","); + if (rbot == rtop) + len += scnprintf(buf + len, buflen - len, "%d", rbot); + else + len += scnprintf(buf + len, buflen - len, "%d-%d", rbot, rtop); + return len; +} + +/** + * bitmap_scnlistprintf - convert bitmap to list format ASCII string + * @buf: byte buffer into which string is placed + * @buflen: reserved size of @buf, in bytes + * @maskp: pointer to bitmap to convert + * @nmaskbits: size of bitmap, in bits + * + * Output format is a comma-separated list of decimal numbers and + * ranges. Consecutively set bits are shown as two hyphen-separated + * decimal numbers, the smallest and largest bit numbers set in + * the range. Output format is compatible with the format + * accepted as input by bitmap_parselist(). + * + * The return value is the number of characters which would be + * generated for the given input, excluding the trailing '\0', as + * per ISO C99. + */ +int bitmap_scnlistprintf(char *buf, unsigned int buflen, + const unsigned long *maskp, int nmaskbits) +{ + int len = 0; + /* current bit is 'cur', most recently seen range is [rbot, rtop] */ + int cur, rbot, rtop; + + if (buflen == 0) + return 0; + buf[0] = 0; + + rbot = cur = find_first_bit(maskp, nmaskbits); + while (cur < nmaskbits) { + rtop = cur; + cur = find_next_bit(maskp, nmaskbits, cur+1); + if (cur >= nmaskbits || cur > rtop + 1) { + len = bscnl_emit(buf, buflen, rbot, rtop, len); + rbot = cur; + } + } + return len; +} +EXPORT_SYMBOL(bitmap_scnlistprintf); + +/** + * bitmap_parselist - convert list format ASCII string to bitmap + * @bp: read nul-terminated user string from this buffer + * @maskp: write resulting mask here + * @nmaskbits: number of bits in mask to be written + * + * Input format is a comma-separated list of decimal numbers and + * ranges. Consecutively set bits are shown as two hyphen-separated + * decimal numbers, the smallest and largest bit numbers set in + * the range. + * + * Returns 0 on success, -errno on invalid input strings. + * Error values: + * %-EINVAL: second number in range smaller than first + * %-EINVAL: invalid character in string + * %-ERANGE: bit number specified too large for mask + */ +int bitmap_parselist(const char *bp, unsigned long *maskp, int nmaskbits) +{ + unsigned a, b; + + bitmap_zero(maskp, nmaskbits); + do { + if (!isdigit(*bp)) + return -EINVAL; + b = a = simple_strtoul(bp, (char **)&bp, BASEDEC); + if (*bp == '-') { + bp++; + if (!isdigit(*bp)) + return -EINVAL; + b = simple_strtoul(bp, (char **)&bp, BASEDEC); + } + if (!(a <= b)) + return -EINVAL; + if (b >= nmaskbits) + return -ERANGE; + while (a <= b) { + set_bit(a, maskp); + a++; + } + if (*bp == ',') + bp++; + } while (*bp != '\0' && *bp != '\n'); + return 0; +} +EXPORT_SYMBOL(bitmap_parselist); + +/** + * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap + * @buf: pointer to a bitmap + * @pos: a bit position in @buf (0 <= @pos < @bits) + * @bits: number of valid bit positions in @buf + * + * Map the bit at position @pos in @buf (of length @bits) to the + * ordinal of which set bit it is. If it is not set or if @pos + * is not a valid bit position, map to -1. + * + * If for example, just bits 4 through 7 are set in @buf, then @pos + * values 4 through 7 will get mapped to 0 through 3, respectively, + * and other @pos values will get mapped to 0. When @pos value 7 + * gets mapped to (returns) @ord value 3 in this example, that means + * that bit 7 is the 3rd (starting with 0th) set bit in @buf. + * + * The bit positions 0 through @bits are valid positions in @buf. + */ +static int bitmap_pos_to_ord(const unsigned long *buf, int pos, int bits) +{ + int i, ord; + + if (pos < 0 || pos >= bits || !test_bit(pos, buf)) + return -1; + + i = find_first_bit(buf, bits); + ord = 0; + while (i < pos) { + i = find_next_bit(buf, bits, i + 1); + ord++; + } + BUG_ON(i != pos); + + return ord; +} + +/** + * bitmap_ord_to_pos - find position of n-th set bit in bitmap + * @buf: pointer to bitmap + * @ord: ordinal bit position (n-th set bit, n >= 0) + * @bits: number of valid bit positions in @buf + * + * Map the ordinal offset of bit @ord in @buf to its position in @buf. + * Value of @ord should be in range 0 <= @ord < weight(buf), else + * results are undefined. + * + * If for example, just bits 4 through 7 are set in @buf, then @ord + * values 0 through 3 will get mapped to 4 through 7, respectively, + * and all other @ord values return undefined values. When @ord value 3 + * gets mapped to (returns) @pos value 7 in this example, that means + * that the 3rd set bit (starting with 0th) is at position 7 in @buf. + * + * The bit positions 0 through @bits are valid positions in @buf. + */ +static int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits) +{ + int pos = 0; + + if (ord >= 0 && ord < bits) { + int i; + + for (i = find_first_bit(buf, bits); + i < bits && ord > 0; + i = find_next_bit(buf, bits, i + 1)) + ord--; + if (i < bits && ord == 0) + pos = i; + } + + return pos; +} + +/** + * bitmap_remap - Apply map defined by a pair of bitmaps to another bitmap + * @dst: remapped result + * @src: subset to be remapped + * @old: defines domain of map + * @new: defines range of map + * @bits: number of bits in each of these bitmaps + * + * Let @old and @new define a mapping of bit positions, such that + * whatever position is held by the n-th set bit in @old is mapped + * to the n-th set bit in @new. In the more general case, allowing + * for the possibility that the weight 'w' of @new is less than the + * weight of @old, map the position of the n-th set bit in @old to + * the position of the m-th set bit in @new, where m == n % w. + * + * If either of the @old and @new bitmaps are empty, or if @src and + * @dst point to the same location, then this routine copies @src + * to @dst. + * + * The positions of unset bits in @old are mapped to themselves + * (the identify map). + * + * Apply the above specified mapping to @src, placing the result in + * @dst, clearing any bits previously set in @dst. + * + * For example, lets say that @old has bits 4 through 7 set, and + * @new has bits 12 through 15 set. This defines the mapping of bit + * position 4 to 12, 5 to 13, 6 to 14 and 7 to 15, and of all other + * bit positions unchanged. So if say @src comes into this routine + * with bits 1, 5 and 7 set, then @dst should leave with bits 1, + * 13 and 15 set. + */ +void bitmap_remap(unsigned long *dst, const unsigned long *src, + const unsigned long *old, const unsigned long *new, + int bits) +{ + int oldbit, w; + + if (dst == src) /* following doesn't handle inplace remaps */ + return; + bitmap_zero(dst, bits); + + w = bitmap_weight(new, bits); + for_each_set_bit(oldbit, src, bits) { + int n = bitmap_pos_to_ord(old, oldbit, bits); + + if (n < 0 || w == 0) + set_bit(oldbit, dst); /* identity map */ + else + set_bit(bitmap_ord_to_pos(new, n % w, bits), dst); + } +} +EXPORT_SYMBOL(bitmap_remap); + +/** + * bitmap_bitremap - Apply map defined by a pair of bitmaps to a single bit + * @oldbit: bit position to be mapped + * @old: defines domain of map + * @new: defines range of map + * @bits: number of bits in each of these bitmaps + * + * Let @old and @new define a mapping of bit positions, such that + * whatever position is held by the n-th set bit in @old is mapped + * to the n-th set bit in @new. In the more general case, allowing + * for the possibility that the weight 'w' of @new is less than the + * weight of @old, map the position of the n-th set bit in @old to + * the position of the m-th set bit in @new, where m == n % w. + * + * The positions of unset bits in @old are mapped to themselves + * (the identify map). + * + * Apply the above specified mapping to bit position @oldbit, returning + * the new bit position. + * + * For example, lets say that @old has bits 4 through 7 set, and + * @new has bits 12 through 15 set. This defines the mapping of bit + * position 4 to 12, 5 to 13, 6 to 14 and 7 to 15, and of all other + * bit positions unchanged. So if say @oldbit is 5, then this routine + * returns 13. + */ +int bitmap_bitremap(int oldbit, const unsigned long *old, + const unsigned long *new, int bits) +{ + int w = bitmap_weight(new, bits); + int n = bitmap_pos_to_ord(old, oldbit, bits); + if (n < 0 || w == 0) + return oldbit; + else + return bitmap_ord_to_pos(new, n % w, bits); +} +EXPORT_SYMBOL(bitmap_bitremap); + +/** + * bitmap_onto - translate one bitmap relative to another + * @dst: resulting translated bitmap + * @orig: original untranslated bitmap + * @relmap: bitmap relative to which translated + * @bits: number of bits in each of these bitmaps + * + * Set the n-th bit of @dst iff there exists some m such that the + * n-th bit of @relmap is set, the m-th bit of @orig is set, and + * the n-th bit of @relmap is also the m-th _set_ bit of @relmap. + * (If you understood the previous sentence the first time your + * read it, you're overqualified for your current job.) + * + * In other words, @orig is mapped onto (surjectively) @dst, + * using the the map { <n, m> | the n-th bit of @relmap is the + * m-th set bit of @relmap }. + * + * Any set bits in @orig above bit number W, where W is the + * weight of (number of set bits in) @relmap are mapped nowhere. + * In particular, if for all bits m set in @orig, m >= W, then + * @dst will end up empty. In situations where the possibility + * of such an empty result is not desired, one way to avoid it is + * to use the bitmap_fold() operator, below, to first fold the + * @orig bitmap over itself so that all its set bits x are in the + * range 0 <= x < W. The bitmap_fold() operator does this by + * setting the bit (m % W) in @dst, for each bit (m) set in @orig. + * + * Example [1] for bitmap_onto(): + * Let's say @relmap has bits 30-39 set, and @orig has bits + * 1, 3, 5, 7, 9 and 11 set. Then on return from this routine, + * @dst will have bits 31, 33, 35, 37 and 39 set. + * + * When bit 0 is set in @orig, it means turn on the bit in + * @dst corresponding to whatever is the first bit (if any) + * that is turned on in @relmap. Since bit 0 was off in the + * above example, we leave off that bit (bit 30) in @dst. + * + * When bit 1 is set in @orig (as in the above example), it + * means turn on the bit in @dst corresponding to whatever + * is the second bit that is turned on in @relmap. The second + * bit in @relmap that was turned on in the above example was + * bit 31, so we turned on bit 31 in @dst. + * + * Similarly, we turned on bits 33, 35, 37 and 39 in @dst, + * because they were the 4th, 6th, 8th and 10th set bits + * set in @relmap, and the 4th, 6th, 8th and 10th bits of + * @orig (i.e. bits 3, 5, 7 and 9) were also set. + * + * When bit 11 is set in @orig, it means turn on the bit in + * @dst corresponding to whatever is the twelth bit that is + * turned on in @relmap. In the above example, there were + * only ten bits turned on in @relmap (30..39), so that bit + * 11 was set in @orig had no affect on @dst. + * + * Example [2] for bitmap_fold() + bitmap_onto(): + * Let's say @relmap has these ten bits set: + * 40 41 42 43 45 48 53 61 74 95 + * (for the curious, that's 40 plus the first ten terms of the + * Fibonacci sequence.) + * + * Further lets say we use the following code, invoking + * bitmap_fold() then bitmap_onto, as suggested above to + * avoid the possitility of an empty @dst result: + * + * unsigned long *tmp; // a temporary bitmap's bits + * + * bitmap_fold(tmp, orig, bitmap_weight(relmap, bits), bits); + * bitmap_onto(dst, tmp, relmap, bits); + * + * Then this table shows what various values of @dst would be, for + * various @orig's. I list the zero-based positions of each set bit. + * The tmp column shows the intermediate result, as computed by + * using bitmap_fold() to fold the @orig bitmap modulo ten + * (the weight of @relmap). + * + * @orig tmp @dst + * 0 0 40 + * 1 1 41 + * 9 9 95 + * 10 0 40 (*) + * 1 3 5 7 1 3 5 7 41 43 48 61 + * 0 1 2 3 4 0 1 2 3 4 40 41 42 43 45 + * 0 9 18 27 0 9 8 7 40 61 74 95 + * 0 10 20 30 0 40 + * 0 11 22 33 0 1 2 3 40 41 42 43 + * 0 12 24 36 0 2 4 6 40 42 45 53 + * 78 102 211 1 2 8 41 42 74 (*) + * + * (*) For these marked lines, if we hadn't first done bitmap_fold() + * into tmp, then the @dst result would have been empty. + * + * If either of @orig or @relmap is empty (no set bits), then @dst + * will be returned empty. + * + * If (as explained above) the only set bits in @orig are in positions + * m where m >= W, (where W is the weight of @relmap) then @dst will + * once again be returned empty. + * + * All bits in @dst not set by the above rule are cleared. + */ +void bitmap_onto(unsigned long *dst, const unsigned long *orig, + const unsigned long *relmap, int bits) +{ + int n, m; /* same meaning as in above comment */ + + if (dst == orig) /* following doesn't handle inplace mappings */ + return; + bitmap_zero(dst, bits); + + /* + * The following code is a more efficient, but less + * obvious, equivalent to the loop: + * for (m = 0; m < bitmap_weight(relmap, bits); m++) { + * n = bitmap_ord_to_pos(orig, m, bits); + * if (test_bit(m, orig)) + * set_bit(n, dst); + * } + */ + + m = 0; + for_each_set_bit(n, relmap, bits) { + /* m == bitmap_pos_to_ord(relmap, n, bits) */ + if (test_bit(m, orig)) + set_bit(n, dst); + m++; + } +} +EXPORT_SYMBOL(bitmap_onto); + +/** + * bitmap_fold - fold larger bitmap into smaller, modulo specified size + * @dst: resulting smaller bitmap + * @orig: original larger bitmap + * @sz: specified size + * @bits: number of bits in each of these bitmaps + * + * For each bit oldbit in @orig, set bit oldbit mod @sz in @dst. + * Clear all other bits in @dst. See further the comment and + * Example [2] for bitmap_onto() for why and how to use this. + */ +void bitmap_fold(unsigned long *dst, const unsigned long *orig, + int sz, int bits) +{ + int oldbit; + + if (dst == orig) /* following doesn't handle inplace mappings */ + return; + bitmap_zero(dst, bits); + + for_each_set_bit(oldbit, orig, bits) + set_bit(oldbit % sz, dst); +} +EXPORT_SYMBOL(bitmap_fold); + +/* + * Common code for bitmap_*_region() routines. + * bitmap: array of unsigned longs corresponding to the bitmap + * pos: the beginning of the region + * order: region size (log base 2 of number of bits) + * reg_op: operation(s) to perform on that region of bitmap + * + * Can set, verify and/or release a region of bits in a bitmap, + * depending on which combination of REG_OP_* flag bits is set. + * + * A region of a bitmap is a sequence of bits in the bitmap, of + * some size '1 << order' (a power of two), aligned to that same + * '1 << order' power of two. + * + * Returns 1 if REG_OP_ISFREE succeeds (region is all zero bits). + * Returns 0 in all other cases and reg_ops. + */ + +enum { + REG_OP_ISFREE, /* true if region is all zero bits */ + REG_OP_ALLOC, /* set all bits in region */ + REG_OP_RELEASE, /* clear all bits in region */ +}; + +static int __reg_op(unsigned long *bitmap, int pos, int order, int reg_op) +{ + int nbits_reg; /* number of bits in region */ + int index; /* index first long of region in bitmap */ + int offset; /* bit offset region in bitmap[index] */ + int nlongs_reg; /* num longs spanned by region in bitmap */ + int nbitsinlong; /* num bits of region in each spanned long */ + unsigned long mask; /* bitmask for one long of region */ + int i; /* scans bitmap by longs */ + int ret = 0; /* return value */ + + /* + * Either nlongs_reg == 1 (for small orders that fit in one long) + * or (offset == 0 && mask == ~0UL) (for larger multiword orders.) + */ + nbits_reg = 1 << order; + index = pos / BITS_PER_LONG; + offset = pos - (index * BITS_PER_LONG); + nlongs_reg = BITS_TO_LONGS(nbits_reg); + nbitsinlong = min(nbits_reg, BITS_PER_LONG); + + /* + * Can't do "mask = (1UL << nbitsinlong) - 1", as that + * overflows if nbitsinlong == BITS_PER_LONG. + */ + mask = (1UL << (nbitsinlong - 1)); + mask += mask - 1; + mask <<= offset; + + switch (reg_op) { + case REG_OP_ISFREE: + for (i = 0; i < nlongs_reg; i++) { + if (bitmap[index + i] & mask) + goto done; + } + ret = 1; /* all bits in region free (zero) */ + break; + + case REG_OP_ALLOC: + for (i = 0; i < nlongs_reg; i++) + bitmap[index + i] |= mask; + break; + + case REG_OP_RELEASE: + for (i = 0; i < nlongs_reg; i++) + bitmap[index + i] &= ~mask; + break; + } +done: + return ret; +} + +/** + * bitmap_find_free_region - find a contiguous aligned mem region + * @bitmap: array of unsigned longs corresponding to the bitmap + * @bits: number of bits in the bitmap + * @order: region size (log base 2 of number of bits) to find + * + * Find a region of free (zero) bits in a @bitmap of @bits bits and + * allocate them (set them to one). Only consider regions of length + * a power (@order) of two, aligned to that power of two, which + * makes the search algorithm much faster. + * + * Return the bit offset in bitmap of the allocated region, + * or -errno on failure. + */ +int bitmap_find_free_region(unsigned long *bitmap, int bits, int order) +{ + int pos, end; /* scans bitmap by regions of size order */ + + for (pos = 0 ; (end = pos + (1 << order)) <= bits; pos = end) { + if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE)) + continue; + __reg_op(bitmap, pos, order, REG_OP_ALLOC); + return pos; + } + return -ENOMEM; +} +EXPORT_SYMBOL(bitmap_find_free_region); + +/** + * bitmap_release_region - release allocated bitmap region + * @bitmap: array of unsigned longs corresponding to the bitmap + * @pos: beginning of bit region to release + * @order: region size (log base 2 of number of bits) to release + * + * This is the complement to __bitmap_find_free_region() and releases + * the found region (by clearing it in the bitmap). + * + * No return value. + */ +void bitmap_release_region(unsigned long *bitmap, int pos, int order) +{ + __reg_op(bitmap, pos, order, REG_OP_RELEASE); +} +EXPORT_SYMBOL(bitmap_release_region); + +/** + * bitmap_allocate_region - allocate bitmap region + * @bitmap: array of unsigned longs corresponding to the bitmap + * @pos: beginning of bit region to allocate + * @order: region size (log base 2 of number of bits) to allocate + * + * Allocate (set bits in) a specified region of a bitmap. + * + * Return 0 on success, or %-EBUSY if specified region wasn't + * free (not all bits were zero). + */ +int bitmap_allocate_region(unsigned long *bitmap, int pos, int order) +{ + if (!__reg_op(bitmap, pos, order, REG_OP_ISFREE)) + return -EBUSY; + __reg_op(bitmap, pos, order, REG_OP_ALLOC); + return 0; +} +EXPORT_SYMBOL(bitmap_allocate_region); + +/** + * bitmap_copy_le - copy a bitmap, putting the bits into little-endian order. + * @dst: destination buffer + * @src: bitmap to copy + * @nbits: number of bits in the bitmap + * + * Require nbits % BITS_PER_LONG == 0. + */ +void bitmap_copy_le(void *dst, const unsigned long *src, int nbits) +{ + unsigned long *d = dst; + int i; + + for (i = 0; i < nbits/BITS_PER_LONG; i++) { + if (BITS_PER_LONG == 64) + d[i] = cpu_to_le64(src[i]); + else + d[i] = cpu_to_le32(src[i]); + } +} +EXPORT_SYMBOL(bitmap_copy_le); diff --git a/lib/bitrev.c b/lib/bitrev.c new file mode 100644 index 00000000..39562034 --- /dev/null +++ b/lib/bitrev.c @@ -0,0 +1,59 @@ +#include <linux/types.h> +#include <linux/module.h> +#include <linux/bitrev.h> + +MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>"); +MODULE_DESCRIPTION("Bit ordering reversal functions"); +MODULE_LICENSE("GPL"); + +const u8 byte_rev_table[256] = { + 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, + 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0, + 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, + 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8, + 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4, + 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4, + 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec, + 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc, + 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2, + 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2, + 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea, + 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa, + 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6, + 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6, + 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee, + 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe, + 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1, + 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1, + 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9, + 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9, + 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5, + 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5, + 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed, + 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd, + 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3, + 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3, + 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb, + 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb, + 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7, + 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7, + 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, + 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff, +}; +EXPORT_SYMBOL_GPL(byte_rev_table); + +u16 bitrev16(u16 x) +{ + return (bitrev8(x & 0xff) << 8) | bitrev8(x >> 8); +} +EXPORT_SYMBOL(bitrev16); + +/** + * bitrev32 - reverse the order of bits in a u32 value + * @x: value to be bit-reversed + */ +u32 bitrev32(u32 x) +{ + return (bitrev16(x & 0xffff) << 16) | bitrev16(x >> 16); +} +EXPORT_SYMBOL(bitrev32); diff --git a/lib/btree.c b/lib/btree.c new file mode 100644 index 00000000..c9c6f035 --- /dev/null +++ b/lib/btree.c @@ -0,0 +1,798 @@ +/* + * lib/btree.c - Simple In-memory B+Tree + * + * As should be obvious for Linux kernel code, license is GPLv2 + * + * Copyright (c) 2007-2008 Joern Engel <joern@logfs.org> + * Bits and pieces stolen from Peter Zijlstra's code, which is + * Copyright 2007, Red Hat Inc. Peter Zijlstra <pzijlstr@redhat.com> + * GPLv2 + * + * see http://programming.kicks-ass.net/kernel-patches/vma_lookup/btree.patch + * + * A relatively simple B+Tree implementation. I have written it as a learning + * excercise to understand how B+Trees work. Turned out to be useful as well. + * + * B+Trees can be used similar to Linux radix trees (which don't have anything + * in common with textbook radix trees, beware). Prerequisite for them working + * well is that access to a random tree node is much faster than a large number + * of operations within each node. + * + * Disks have fulfilled the prerequisite for a long time. More recently DRAM + * has gained similar properties, as memory access times, when measured in cpu + * cycles, have increased. Cacheline sizes have increased as well, which also + * helps B+Trees. + * + * Compared to radix trees, B+Trees are more efficient when dealing with a + * sparsely populated address space. Between 25% and 50% of the memory is + * occupied with valid pointers. When densely populated, radix trees contain + * ~98% pointers - hard to beat. Very sparse radix trees contain only ~2% + * pointers. + * + * This particular implementation stores pointers identified by a long value. + * Storing NULL pointers is illegal, lookup will return NULL when no entry + * was found. + * + * A tricks was used that is not commonly found in textbooks. The lowest + * values are to the right, not to the left. All used slots within a node + * are on the left, all unused slots contain NUL values. Most operations + * simply loop once over all slots and terminate on the first NUL. + */ + +#include <linux/btree.h> +#include <linux/cache.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/module.h> + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#define NODESIZE MAX(L1_CACHE_BYTES, 128) + +struct btree_geo { + int keylen; + int no_pairs; + int no_longs; +}; + +struct btree_geo btree_geo32 = { + .keylen = 1, + .no_pairs = NODESIZE / sizeof(long) / 2, + .no_longs = NODESIZE / sizeof(long) / 2, +}; +EXPORT_SYMBOL_GPL(btree_geo32); + +#define LONG_PER_U64 (64 / BITS_PER_LONG) +struct btree_geo btree_geo64 = { + .keylen = LONG_PER_U64, + .no_pairs = NODESIZE / sizeof(long) / (1 + LONG_PER_U64), + .no_longs = LONG_PER_U64 * (NODESIZE / sizeof(long) / (1 + LONG_PER_U64)), +}; +EXPORT_SYMBOL_GPL(btree_geo64); + +struct btree_geo btree_geo128 = { + .keylen = 2 * LONG_PER_U64, + .no_pairs = NODESIZE / sizeof(long) / (1 + 2 * LONG_PER_U64), + .no_longs = 2 * LONG_PER_U64 * (NODESIZE / sizeof(long) / (1 + 2 * LONG_PER_U64)), +}; +EXPORT_SYMBOL_GPL(btree_geo128); + +static struct kmem_cache *btree_cachep; + +void *btree_alloc(gfp_t gfp_mask, void *pool_data) +{ + return kmem_cache_alloc(btree_cachep, gfp_mask); +} +EXPORT_SYMBOL_GPL(btree_alloc); + +void btree_free(void *element, void *pool_data) +{ + kmem_cache_free(btree_cachep, element); +} +EXPORT_SYMBOL_GPL(btree_free); + +static unsigned long *btree_node_alloc(struct btree_head *head, gfp_t gfp) +{ + unsigned long *node; + + node = mempool_alloc(head->mempool, gfp); + if (likely(node)) + memset(node, 0, NODESIZE); + return node; +} + +static int longcmp(const unsigned long *l1, const unsigned long *l2, size_t n) +{ + size_t i; + + for (i = 0; i < n; i++) { + if (l1[i] < l2[i]) + return -1; + if (l1[i] > l2[i]) + return 1; + } + return 0; +} + +static unsigned long *longcpy(unsigned long *dest, const unsigned long *src, + size_t n) +{ + size_t i; + + for (i = 0; i < n; i++) + dest[i] = src[i]; + return dest; +} + +static unsigned long *longset(unsigned long *s, unsigned long c, size_t n) +{ + size_t i; + + for (i = 0; i < n; i++) + s[i] = c; + return s; +} + +static void dec_key(struct btree_geo *geo, unsigned long *key) +{ + unsigned long val; + int i; + + for (i = geo->keylen - 1; i >= 0; i--) { + val = key[i]; + key[i] = val - 1; + if (val) + break; + } +} + +static unsigned long *bkey(struct btree_geo *geo, unsigned long *node, int n) +{ + return &node[n * geo->keylen]; +} + +static void *bval(struct btree_geo *geo, unsigned long *node, int n) +{ + return (void *)node[geo->no_longs + n]; +} + +static void setkey(struct btree_geo *geo, unsigned long *node, int n, + unsigned long *key) +{ + longcpy(bkey(geo, node, n), key, geo->keylen); +} + +static void setval(struct btree_geo *geo, unsigned long *node, int n, + void *val) +{ + node[geo->no_longs + n] = (unsigned long) val; +} + +static void clearpair(struct btree_geo *geo, unsigned long *node, int n) +{ + longset(bkey(geo, node, n), 0, geo->keylen); + node[geo->no_longs + n] = 0; +} + +static inline void __btree_init(struct btree_head *head) +{ + head->node = NULL; + head->height = 0; +} + +void btree_init_mempool(struct btree_head *head, mempool_t *mempool) +{ + __btree_init(head); + head->mempool = mempool; +} +EXPORT_SYMBOL_GPL(btree_init_mempool); + +int btree_init(struct btree_head *head) +{ + __btree_init(head); + head->mempool = mempool_create(0, btree_alloc, btree_free, NULL); + if (!head->mempool) + return -ENOMEM; + return 0; +} +EXPORT_SYMBOL_GPL(btree_init); + +void btree_destroy(struct btree_head *head) +{ + mempool_destroy(head->mempool); + head->mempool = NULL; +} +EXPORT_SYMBOL_GPL(btree_destroy); + +void *btree_last(struct btree_head *head, struct btree_geo *geo, + unsigned long *key) +{ + int height = head->height; + unsigned long *node = head->node; + + if (height == 0) + return NULL; + + for ( ; height > 1; height--) + node = bval(geo, node, 0); + + longcpy(key, bkey(geo, node, 0), geo->keylen); + return bval(geo, node, 0); +} +EXPORT_SYMBOL_GPL(btree_last); + +static int keycmp(struct btree_geo *geo, unsigned long *node, int pos, + unsigned long *key) +{ + return longcmp(bkey(geo, node, pos), key, geo->keylen); +} + +static int keyzero(struct btree_geo *geo, unsigned long *key) +{ + int i; + + for (i = 0; i < geo->keylen; i++) + if (key[i]) + return 0; + + return 1; +} + +void *btree_lookup(struct btree_head *head, struct btree_geo *geo, + unsigned long *key) +{ + int i, height = head->height; + unsigned long *node = head->node; + + if (height == 0) + return NULL; + + for ( ; height > 1; height--) { + for (i = 0; i < geo->no_pairs; i++) + if (keycmp(geo, node, i, key) <= 0) + break; + if (i == geo->no_pairs) + return NULL; + node = bval(geo, node, i); + if (!node) + return NULL; + } + + if (!node) + return NULL; + + for (i = 0; i < geo->no_pairs; i++) + if (keycmp(geo, node, i, key) == 0) + return bval(geo, node, i); + return NULL; +} +EXPORT_SYMBOL_GPL(btree_lookup); + +int btree_update(struct btree_head *head, struct btree_geo *geo, + unsigned long *key, void *val) +{ + int i, height = head->height; + unsigned long *node = head->node; + + if (height == 0) + return -ENOENT; + + for ( ; height > 1; height--) { + for (i = 0; i < geo->no_pairs; i++) + if (keycmp(geo, node, i, key) <= 0) + break; + if (i == geo->no_pairs) + return -ENOENT; + node = bval(geo, node, i); + if (!node) + return -ENOENT; + } + + if (!node) + return -ENOENT; + + for (i = 0; i < geo->no_pairs; i++) + if (keycmp(geo, node, i, key) == 0) { + setval(geo, node, i, val); + return 0; + } + return -ENOENT; +} +EXPORT_SYMBOL_GPL(btree_update); + +/* + * Usually this function is quite similar to normal lookup. But the key of + * a parent node may be smaller than the smallest key of all its siblings. + * In such a case we cannot just return NULL, as we have only proven that no + * key smaller than __key, but larger than this parent key exists. + * So we set __key to the parent key and retry. We have to use the smallest + * such parent key, which is the last parent key we encountered. + */ +void *btree_get_prev(struct btree_head *head, struct btree_geo *geo, + unsigned long *__key) +{ + int i, height; + unsigned long *node, *oldnode; + unsigned long *retry_key = NULL, key[geo->keylen]; + + if (keyzero(geo, __key)) + return NULL; + + if (head->height == 0) + return NULL; +retry: + longcpy(key, __key, geo->keylen); + dec_key(geo, key); + + node = head->node; + for (height = head->height ; height > 1; height--) { + for (i = 0; i < geo->no_pairs; i++) + if (keycmp(geo, node, i, key) <= 0) + break; + if (i == geo->no_pairs) + goto miss; + oldnode = node; + node = bval(geo, node, i); + if (!node) + goto miss; + retry_key = bkey(geo, oldnode, i); + } + + if (!node) + goto miss; + + for (i = 0; i < geo->no_pairs; i++) { + if (keycmp(geo, node, i, key) <= 0) { + if (bval(geo, node, i)) { + longcpy(__key, bkey(geo, node, i), geo->keylen); + return bval(geo, node, i); + } else + goto miss; + } + } +miss: + if (retry_key) { + __key = retry_key; + retry_key = NULL; + goto retry; + } + return NULL; +} + +static int getpos(struct btree_geo *geo, unsigned long *node, + unsigned long *key) +{ + int i; + + for (i = 0; i < geo->no_pairs; i++) { + if (keycmp(geo, node, i, key) <= 0) + break; + } + return i; +} + +static int getfill(struct btree_geo *geo, unsigned long *node, int start) +{ + int i; + + for (i = start; i < geo->no_pairs; i++) + if (!bval(geo, node, i)) + break; + return i; +} + +/* + * locate the correct leaf node in the btree + */ +static unsigned long *find_level(struct btree_head *head, struct btree_geo *geo, + unsigned long *key, int level) +{ + unsigned long *node = head->node; + int i, height; + + for (height = head->height; height > level; height--) { + for (i = 0; i < geo->no_pairs; i++) + if (keycmp(geo, node, i, key) <= 0) + break; + + if ((i == geo->no_pairs) || !bval(geo, node, i)) { + /* right-most key is too large, update it */ + /* FIXME: If the right-most key on higher levels is + * always zero, this wouldn't be necessary. */ + i--; + setkey(geo, node, i, key); + } + BUG_ON(i < 0); + node = bval(geo, node, i); + } + BUG_ON(!node); + return node; +} + +static int btree_grow(struct btree_head *head, struct btree_geo *geo, + gfp_t gfp) +{ + unsigned long *node; + int fill; + + node = btree_node_alloc(head, gfp); + if (!node) + return -ENOMEM; + if (head->node) { + fill = getfill(geo, head->node, 0); + setkey(geo, node, 0, bkey(geo, head->node, fill - 1)); + setval(geo, node, 0, head->node); + } + head->node = node; + head->height++; + return 0; +} + +static void btree_shrink(struct btree_head *head, struct btree_geo *geo) +{ + unsigned long *node; + int fill; + + if (head->height <= 1) + return; + + node = head->node; + fill = getfill(geo, node, 0); + BUG_ON(fill > 1); + head->node = bval(geo, node, 0); + head->height--; + mempool_free(node, head->mempool); +} + +static int btree_insert_level(struct btree_head *head, struct btree_geo *geo, + unsigned long *key, void *val, int level, + gfp_t gfp) +{ + unsigned long *node; + int i, pos, fill, err; + + BUG_ON(!val); + if (head->height < level) { + err = btree_grow(head, geo, gfp); + if (err) + return err; + } + +retry: + node = find_level(head, geo, key, level); + pos = getpos(geo, node, key); + fill = getfill(geo, node, pos); + /* two identical keys are not allowed */ + BUG_ON(pos < fill && keycmp(geo, node, pos, key) == 0); + + if (fill == geo->no_pairs) { + /* need to split node */ + unsigned long *new; + + new = btree_node_alloc(head, gfp); + if (!new) + return -ENOMEM; + err = btree_insert_level(head, geo, + bkey(geo, node, fill / 2 - 1), + new, level + 1, gfp); + if (err) { + mempool_free(new, head->mempool); + return err; + } + for (i = 0; i < fill / 2; i++) { + setkey(geo, new, i, bkey(geo, node, i)); + setval(geo, new, i, bval(geo, node, i)); + setkey(geo, node, i, bkey(geo, node, i + fill / 2)); + setval(geo, node, i, bval(geo, node, i + fill / 2)); + clearpair(geo, node, i + fill / 2); + } + if (fill & 1) { + setkey(geo, node, i, bkey(geo, node, fill - 1)); + setval(geo, node, i, bval(geo, node, fill - 1)); + clearpair(geo, node, fill - 1); + } + goto retry; + } + BUG_ON(fill >= geo->no_pairs); + + /* shift and insert */ + for (i = fill; i > pos; i--) { + setkey(geo, node, i, bkey(geo, node, i - 1)); + setval(geo, node, i, bval(geo, node, i - 1)); + } + setkey(geo, node, pos, key); + setval(geo, node, pos, val); + + return 0; +} + +int btree_insert(struct btree_head *head, struct btree_geo *geo, + unsigned long *key, void *val, gfp_t gfp) +{ + return btree_insert_level(head, geo, key, val, 1, gfp); +} +EXPORT_SYMBOL_GPL(btree_insert); + +static void *btree_remove_level(struct btree_head *head, struct btree_geo *geo, + unsigned long *key, int level); +static void merge(struct btree_head *head, struct btree_geo *geo, int level, + unsigned long *left, int lfill, + unsigned long *right, int rfill, + unsigned long *parent, int lpos) +{ + int i; + + for (i = 0; i < rfill; i++) { + /* Move all keys to the left */ + setkey(geo, left, lfill + i, bkey(geo, right, i)); + setval(geo, left, lfill + i, bval(geo, right, i)); + } + /* Exchange left and right child in parent */ + setval(geo, parent, lpos, right); + setval(geo, parent, lpos + 1, left); + /* Remove left (formerly right) child from parent */ + btree_remove_level(head, geo, bkey(geo, parent, lpos), level + 1); + mempool_free(right, head->mempool); +} + +static void rebalance(struct btree_head *head, struct btree_geo *geo, + unsigned long *key, int level, unsigned long *child, int fill) +{ + unsigned long *parent, *left = NULL, *right = NULL; + int i, no_left, no_right; + + if (fill == 0) { + /* Because we don't steal entries from a neigbour, this case + * can happen. Parent node contains a single child, this + * node, so merging with a sibling never happens. + */ + btree_remove_level(head, geo, key, level + 1); + mempool_free(child, head->mempool); + return; + } + + parent = find_level(head, geo, key, level + 1); + i = getpos(geo, parent, key); + BUG_ON(bval(geo, parent, i) != child); + + if (i > 0) { + left = bval(geo, parent, i - 1); + no_left = getfill(geo, left, 0); + if (fill + no_left <= geo->no_pairs) { + merge(head, geo, level, + left, no_left, + child, fill, + parent, i - 1); + return; + } + } + if (i + 1 < getfill(geo, parent, i)) { + right = bval(geo, parent, i + 1); + no_right = getfill(geo, right, 0); + if (fill + no_right <= geo->no_pairs) { + merge(head, geo, level, + child, fill, + right, no_right, + parent, i); + return; + } + } + /* + * We could also try to steal one entry from the left or right + * neighbor. By not doing so we changed the invariant from + * "all nodes are at least half full" to "no two neighboring + * nodes can be merged". Which means that the average fill of + * all nodes is still half or better. + */ +} + +static void *btree_remove_level(struct btree_head *head, struct btree_geo *geo, + unsigned long *key, int level) +{ + unsigned long *node; + int i, pos, fill; + void *ret; + + if (level > head->height) { + /* we recursed all the way up */ + head->height = 0; + head->node = NULL; + return NULL; + } + + node = find_level(head, geo, key, level); + pos = getpos(geo, node, key); + fill = getfill(geo, node, pos); + if ((level == 1) && (keycmp(geo, node, pos, key) != 0)) + return NULL; + ret = bval(geo, node, pos); + + /* remove and shift */ + for (i = pos; i < fill - 1; i++) { + setkey(geo, node, i, bkey(geo, node, i + 1)); + setval(geo, node, i, bval(geo, node, i + 1)); + } + clearpair(geo, node, fill - 1); + + if (fill - 1 < geo->no_pairs / 2) { + if (level < head->height) + rebalance(head, geo, key, level, node, fill - 1); + else if (fill - 1 == 1) + btree_shrink(head, geo); + } + + return ret; +} + +void *btree_remove(struct btree_head *head, struct btree_geo *geo, + unsigned long *key) +{ + if (head->height == 0) + return NULL; + + return btree_remove_level(head, geo, key, 1); +} +EXPORT_SYMBOL_GPL(btree_remove); + +int btree_merge(struct btree_head *target, struct btree_head *victim, + struct btree_geo *geo, gfp_t gfp) +{ + unsigned long key[geo->keylen]; + unsigned long dup[geo->keylen]; + void *val; + int err; + + BUG_ON(target == victim); + + if (!(target->node)) { + /* target is empty, just copy fields over */ + target->node = victim->node; + target->height = victim->height; + __btree_init(victim); + return 0; + } + + /* TODO: This needs some optimizations. Currently we do three tree + * walks to remove a single object from the victim. + */ + for (;;) { + if (!btree_last(victim, geo, key)) + break; + val = btree_lookup(victim, geo, key); + err = btree_insert(target, geo, key, val, gfp); + if (err) + return err; + /* We must make a copy of the key, as the original will get + * mangled inside btree_remove. */ + longcpy(dup, key, geo->keylen); + btree_remove(victim, geo, dup); + } + return 0; +} +EXPORT_SYMBOL_GPL(btree_merge); + +static size_t __btree_for_each(struct btree_head *head, struct btree_geo *geo, + unsigned long *node, unsigned long opaque, + void (*func)(void *elem, unsigned long opaque, + unsigned long *key, size_t index, + void *func2), + void *func2, int reap, int height, size_t count) +{ + int i; + unsigned long *child; + + for (i = 0; i < geo->no_pairs; i++) { + child = bval(geo, node, i); + if (!child) + break; + if (height > 1) + count = __btree_for_each(head, geo, child, opaque, + func, func2, reap, height - 1, count); + else + func(child, opaque, bkey(geo, node, i), count++, + func2); + } + if (reap) + mempool_free(node, head->mempool); + return count; +} + +static void empty(void *elem, unsigned long opaque, unsigned long *key, + size_t index, void *func2) +{ +} + +void visitorl(void *elem, unsigned long opaque, unsigned long *key, + size_t index, void *__func) +{ + visitorl_t func = __func; + + func(elem, opaque, *key, index); +} +EXPORT_SYMBOL_GPL(visitorl); + +void visitor32(void *elem, unsigned long opaque, unsigned long *__key, + size_t index, void *__func) +{ + visitor32_t func = __func; + u32 *key = (void *)__key; + + func(elem, opaque, *key, index); +} +EXPORT_SYMBOL_GPL(visitor32); + +void visitor64(void *elem, unsigned long opaque, unsigned long *__key, + size_t index, void *__func) +{ + visitor64_t func = __func; + u64 *key = (void *)__key; + + func(elem, opaque, *key, index); +} +EXPORT_SYMBOL_GPL(visitor64); + +void visitor128(void *elem, unsigned long opaque, unsigned long *__key, + size_t index, void *__func) +{ + visitor128_t func = __func; + u64 *key = (void *)__key; + + func(elem, opaque, key[0], key[1], index); +} +EXPORT_SYMBOL_GPL(visitor128); + +size_t btree_visitor(struct btree_head *head, struct btree_geo *geo, + unsigned long opaque, + void (*func)(void *elem, unsigned long opaque, + unsigned long *key, + size_t index, void *func2), + void *func2) +{ + size_t count = 0; + + if (!func2) + func = empty; + if (head->node) + count = __btree_for_each(head, geo, head->node, opaque, func, + func2, 0, head->height, 0); + return count; +} +EXPORT_SYMBOL_GPL(btree_visitor); + +size_t btree_grim_visitor(struct btree_head *head, struct btree_geo *geo, + unsigned long opaque, + void (*func)(void *elem, unsigned long opaque, + unsigned long *key, + size_t index, void *func2), + void *func2) +{ + size_t count = 0; + + if (!func2) + func = empty; + if (head->node) + count = __btree_for_each(head, geo, head->node, opaque, func, + func2, 1, head->height, 0); + __btree_init(head); + return count; +} +EXPORT_SYMBOL_GPL(btree_grim_visitor); + +static int __init btree_module_init(void) +{ + btree_cachep = kmem_cache_create("btree_node", NODESIZE, 0, + SLAB_HWCACHE_ALIGN, NULL); + return 0; +} + +static void __exit btree_module_exit(void) +{ + kmem_cache_destroy(btree_cachep); +} + +/* If core code starts using btree, initialization should happen even earlier */ +module_init(btree_module_init); +module_exit(btree_module_exit); + +MODULE_AUTHOR("Joern Engel <joern@logfs.org>"); +MODULE_AUTHOR("Johannes Berg <johannes@sipsolutions.net>"); +MODULE_LICENSE("GPL"); diff --git a/lib/bug.c b/lib/bug.c new file mode 100644 index 00000000..19552096 --- /dev/null +++ b/lib/bug.c @@ -0,0 +1,183 @@ +/* + Generic support for BUG() + + This respects the following config options: + + CONFIG_BUG - emit BUG traps. Nothing happens without this. + CONFIG_GENERIC_BUG - enable this code. + CONFIG_GENERIC_BUG_RELATIVE_POINTERS - use 32-bit pointers relative to + the containing struct bug_entry for bug_addr and file. + CONFIG_DEBUG_BUGVERBOSE - emit full file+line information for each BUG + + CONFIG_BUG and CONFIG_DEBUG_BUGVERBOSE are potentially user-settable + (though they're generally always on). + + CONFIG_GENERIC_BUG is set by each architecture using this code. + + To use this, your architecture must: + + 1. Set up the config options: + - Enable CONFIG_GENERIC_BUG if CONFIG_BUG + + 2. Implement BUG (and optionally BUG_ON, WARN, WARN_ON) + - Define HAVE_ARCH_BUG + - Implement BUG() to generate a faulting instruction + - NOTE: struct bug_entry does not have "file" or "line" entries + when CONFIG_DEBUG_BUGVERBOSE is not enabled, so you must generate + the values accordingly. + + 3. Implement the trap + - In the illegal instruction trap handler (typically), verify + that the fault was in kernel mode, and call report_bug() + - report_bug() will return whether it was a false alarm, a warning, + or an actual bug. + - You must implement the is_valid_bugaddr(bugaddr) callback which + returns true if the eip is a real kernel address, and it points + to the expected BUG trap instruction. + + Jeremy Fitzhardinge <jeremy@goop.org> 2006 + */ +#include <linux/list.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/bug.h> +#include <linux/sched.h> + +extern const struct bug_entry __start___bug_table[], __stop___bug_table[]; + +static inline unsigned long bug_addr(const struct bug_entry *bug) +{ +#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS + return bug->bug_addr; +#else + return (unsigned long)bug + bug->bug_addr_disp; +#endif +} + +#ifdef CONFIG_MODULES +static LIST_HEAD(module_bug_list); + +static const struct bug_entry *module_find_bug(unsigned long bugaddr) +{ + struct module *mod; + + list_for_each_entry(mod, &module_bug_list, bug_list) { + const struct bug_entry *bug = mod->bug_table; + unsigned i; + + for (i = 0; i < mod->num_bugs; ++i, ++bug) + if (bugaddr == bug_addr(bug)) + return bug; + } + return NULL; +} + +void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, + struct module *mod) +{ + char *secstrings; + unsigned int i; + + mod->bug_table = NULL; + mod->num_bugs = 0; + + /* Find the __bug_table section, if present */ + secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + for (i = 1; i < hdr->e_shnum; i++) { + if (strcmp(secstrings+sechdrs[i].sh_name, "__bug_table")) + continue; + mod->bug_table = (void *) sechdrs[i].sh_addr; + mod->num_bugs = sechdrs[i].sh_size / sizeof(struct bug_entry); + break; + } + + /* + * Strictly speaking this should have a spinlock to protect against + * traversals, but since we only traverse on BUG()s, a spinlock + * could potentially lead to deadlock and thus be counter-productive. + */ + list_add(&mod->bug_list, &module_bug_list); +} + +void module_bug_cleanup(struct module *mod) +{ + list_del(&mod->bug_list); +} + +#else + +static inline const struct bug_entry *module_find_bug(unsigned long bugaddr) +{ + return NULL; +} +#endif + +const struct bug_entry *find_bug(unsigned long bugaddr) +{ + const struct bug_entry *bug; + + for (bug = __start___bug_table; bug < __stop___bug_table; ++bug) + if (bugaddr == bug_addr(bug)) + return bug; + + return module_find_bug(bugaddr); +} + +enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) +{ + const struct bug_entry *bug; + const char *file; + unsigned line, warning; + + if (!is_valid_bugaddr(bugaddr)) + return BUG_TRAP_TYPE_NONE; + + bug = find_bug(bugaddr); + + file = NULL; + line = 0; + warning = 0; + + if (bug) { +#ifdef CONFIG_DEBUG_BUGVERBOSE +#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS + file = bug->file; +#else + file = (const char *)bug + bug->file_disp; +#endif + line = bug->line; +#endif + warning = (bug->flags & BUGFLAG_WARNING) != 0; + } + + if (warning) { + /* this is a WARN_ON rather than BUG/BUG_ON */ + printk(KERN_WARNING "------------[ cut here ]------------\n"); + + if (file) + printk(KERN_WARNING "WARNING: at %s:%u\n", + file, line); + else + printk(KERN_WARNING "WARNING: at %p " + "[verbose debug info unavailable]\n", + (void *)bugaddr); + + print_modules(); + show_regs(regs); + print_oops_end_marker(); + add_taint(BUG_GET_TAINT(bug)); + return BUG_TRAP_TYPE_WARN; + } + + printk(KERN_EMERG "------------[ cut here ]------------\n"); + + if (file) + printk(KERN_CRIT "kernel BUG at %s:%u!\n", + file, line); + else + printk(KERN_CRIT "Kernel BUG at %p " + "[verbose debug info unavailable]\n", + (void *)bugaddr); + + return BUG_TRAP_TYPE_BUG; +} diff --git a/lib/bust_spinlocks.c b/lib/bust_spinlocks.c new file mode 100644 index 00000000..9681d54b --- /dev/null +++ b/lib/bust_spinlocks.c @@ -0,0 +1,32 @@ +/* + * lib/bust_spinlocks.c + * + * Provides a minimal bust_spinlocks for architectures which don't have one of their own. + * + * bust_spinlocks() clears any spinlocks which would prevent oops, die(), BUG() + * and panic() information from reaching the user. + */ + +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/tty.h> +#include <linux/wait.h> +#include <linux/vt_kern.h> +#include <linux/console.h> + + +void __attribute__((weak)) bust_spinlocks(int yes) +{ + if (yes) { + ++oops_in_progress; + } else { +#ifdef CONFIG_VT + unblank_screen(); +#endif + console_unblank(); + if (--oops_in_progress == 0) + wake_up_klogd(); + } +} + + diff --git a/lib/check_signature.c b/lib/check_signature.c new file mode 100644 index 00000000..fd6af199 --- /dev/null +++ b/lib/check_signature.c @@ -0,0 +1,26 @@ +#include <linux/io.h> +#include <linux/module.h> + +/** + * check_signature - find BIOS signatures + * @io_addr: mmio address to check + * @signature: signature block + * @length: length of signature + * + * Perform a signature comparison with the mmio address io_addr. This + * address should have been obtained by ioremap. + * Returns 1 on a match. + */ + +int check_signature(const volatile void __iomem *io_addr, + const unsigned char *signature, int length) +{ + while (length--) { + if (readb(io_addr) != *signature) + return 0; + io_addr++; + signature++; + } + return 1; +} +EXPORT_SYMBOL(check_signature); diff --git a/lib/checksum.c b/lib/checksum.c new file mode 100644 index 00000000..09750873 --- /dev/null +++ b/lib/checksum.c @@ -0,0 +1,203 @@ +/* + * + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * IP/TCP/UDP checksumming routines + * + * Authors: Jorge Cwik, <jorge@laser.satlink.net> + * Arnt Gulbrandsen, <agulbra@nvg.unit.no> + * Tom May, <ftom@netcom.com> + * Andreas Schwab, <schwab@issan.informatik.uni-dortmund.de> + * Lots of code moved from tcp.c and ip.c; see those files + * for more names. + * + * 03/02/96 Jes Sorensen, Andreas Schwab, Roman Hodek: + * Fixed some nasty bugs, causing some horrible crashes. + * A: At some points, the sum (%0) was used as + * length-counter instead of the length counter + * (%1). Thanks to Roman Hodek for pointing this out. + * B: GCC seems to mess up if one uses too many + * data-registers to hold input values and one tries to + * specify d0 and d1 as scratch registers. Letting gcc + * choose these registers itself solves the problem. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* Revised by Kenneth Albanowski for m68knommu. Basic problem: unaligned access + kills, so most of the assembly has to go. */ + +#include <linux/module.h> +#include <net/checksum.h> + +#include <asm/byteorder.h> + +#ifndef do_csum +static inline unsigned short from32to16(unsigned int x) +{ + /* add up 16-bit and 16-bit for 16+c bit */ + x = (x & 0xffff) + (x >> 16); + /* add up carry.. */ + x = (x & 0xffff) + (x >> 16); + return x; +} + +static unsigned int do_csum(const unsigned char *buff, int len) +{ + int odd, count; + unsigned int result = 0; + + if (len <= 0) + goto out; + odd = 1 & (unsigned long) buff; + if (odd) { +#ifdef __LITTLE_ENDIAN + result += (*buff << 8); +#else + result = *buff; +#endif + len--; + buff++; + } + count = len >> 1; /* nr of 16-bit words.. */ + if (count) { + if (2 & (unsigned long) buff) { + result += *(unsigned short *) buff; + count--; + len -= 2; + buff += 2; + } + count >>= 1; /* nr of 32-bit words.. */ + if (count) { + unsigned int carry = 0; + do { + unsigned int w = *(unsigned int *) buff; + count--; + buff += 4; + result += carry; + result += w; + carry = (w > result); + } while (count); + result += carry; + result = (result & 0xffff) + (result >> 16); + } + if (len & 2) { + result += *(unsigned short *) buff; + buff += 2; + } + } + if (len & 1) +#ifdef __LITTLE_ENDIAN + result += *buff; +#else + result += (*buff << 8); +#endif + result = from32to16(result); + if (odd) + result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); +out: + return result; +} +#endif + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + */ +__sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + return (__force __sum16)~do_csum(iph, ihl*4); +} +EXPORT_SYMBOL(ip_fast_csum); + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +__wsum csum_partial(const void *buff, int len, __wsum wsum) +{ + unsigned int sum = (__force unsigned int)wsum; + unsigned int result = do_csum(buff, len); + + /* add in old sum, and carry.. */ + result += sum; + if (sum > result) + result += 1; + return (__force __wsum)result; +} +EXPORT_SYMBOL(csum_partial); + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ +__sum16 ip_compute_csum(const void *buff, int len) +{ + return (__force __sum16)~do_csum(buff, len); +} +EXPORT_SYMBOL(ip_compute_csum); + +/* + * copy from fs while checksumming, otherwise like csum_partial + */ +__wsum +csum_partial_copy_from_user(const void __user *src, void *dst, int len, + __wsum sum, int *csum_err) +{ + int missing; + + missing = __copy_from_user(dst, src, len); + if (missing) { + memset(dst + len - missing, 0, missing); + *csum_err = -EFAULT; + } else + *csum_err = 0; + + return csum_partial(dst, len, sum); +} +EXPORT_SYMBOL(csum_partial_copy_from_user); + +/* + * copy from ds while checksumming, otherwise like csum_partial + */ +__wsum +csum_partial_copy(const void *src, void *dst, int len, __wsum sum) +{ + memcpy(dst, src, len); + return csum_partial(dst, len, sum); +} +EXPORT_SYMBOL(csum_partial_copy); + +#ifndef csum_tcpudp_nofold +__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, + unsigned short len, + unsigned short proto, + __wsum sum) +{ + unsigned long long s = (__force u32)sum; + + s += (__force u32)saddr; + s += (__force u32)daddr; +#ifdef __BIG_ENDIAN + s += proto + len; +#else + s += (proto + len) << 8; +#endif + s += (s >> 32); + return (__force __wsum)s; +} +EXPORT_SYMBOL(csum_tcpudp_nofold); +#endif diff --git a/lib/cmdline.c b/lib/cmdline.c new file mode 100644 index 00000000..f5f3ad8b --- /dev/null +++ b/lib/cmdline.c @@ -0,0 +1,159 @@ +/* + * linux/lib/cmdline.c + * Helper functions generally used for parsing kernel command line + * and module options. + * + * Code and copyrights come from init/main.c and arch/i386/kernel/setup.c. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + * + * GNU Indent formatting options for this file: -kr -i8 -npsl -pcs + * + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/string.h> + +/* + * If a hyphen was found in get_option, this will handle the + * range of numbers, M-N. This will expand the range and insert + * the values[M, M+1, ..., N] into the ints array in get_options. + */ + +static int get_range(char **str, int *pint) +{ + int x, inc_counter, upper_range; + + (*str)++; + upper_range = simple_strtol((*str), NULL, 0); + inc_counter = upper_range - *pint; + for (x = *pint; x < upper_range; x++) + *pint++ = x; + return inc_counter; +} + +/** + * get_option - Parse integer from an option string + * @str: option string + * @pint: (output) integer value parsed from @str + * + * Read an int from an option string; if available accept a subsequent + * comma as well. + * + * Return values: + * 0 - no int in string + * 1 - int found, no subsequent comma + * 2 - int found including a subsequent comma + * 3 - hyphen found to denote a range + */ + +int get_option (char **str, int *pint) +{ + char *cur = *str; + + if (!cur || !(*cur)) + return 0; + *pint = simple_strtol (cur, str, 0); + if (cur == *str) + return 0; + if (**str == ',') { + (*str)++; + return 2; + } + if (**str == '-') + return 3; + + return 1; +} + +/** + * get_options - Parse a string into a list of integers + * @str: String to be parsed + * @nints: size of integer array + * @ints: integer array + * + * This function parses a string containing a comma-separated + * list of integers, a hyphen-separated range of _positive_ integers, + * or a combination of both. The parse halts when the array is + * full, or when no more numbers can be retrieved from the + * string. + * + * Return value is the character in the string which caused + * the parse to end (typically a null terminator, if @str is + * completely parseable). + */ + +char *get_options(const char *str, int nints, int *ints) +{ + int res, i = 1; + + while (i < nints) { + res = get_option ((char **)&str, ints + i); + if (res == 0) + break; + if (res == 3) { + int range_nums; + range_nums = get_range((char **)&str, ints + i); + if (range_nums < 0) + break; + /* + * Decrement the result by one to leave out the + * last number in the range. The next iteration + * will handle the upper number in the range + */ + i += (range_nums - 1); + } + i++; + if (res == 1) + break; + } + ints[0] = i - 1; + return (char *)str; +} + +/** + * memparse - parse a string with mem suffixes into a number + * @ptr: Where parse begins + * @retptr: (output) Optional pointer to next char after parse completes + * + * Parses a string into a number. The number stored at @ptr is + * potentially suffixed with %K (for kilobytes, or 1024 bytes), + * %M (for megabytes, or 1048576 bytes), or %G (for gigabytes, or + * 1073741824). If the number is suffixed with K, M, or G, then + * the return value is the number multiplied by one kilobyte, one + * megabyte, or one gigabyte, respectively. + */ + +unsigned long long memparse(const char *ptr, char **retptr) +{ + char *endptr; /* local pointer to end of parsed string */ + + unsigned long long ret = simple_strtoull(ptr, &endptr, 0); + + switch (*endptr) { + case 'G': + case 'g': + ret <<= 10; + case 'M': + case 'm': + ret <<= 10; + case 'K': + case 'k': + ret <<= 10; + endptr++; + default: + break; + } + + if (retptr) + *retptr = endptr; + + return ret; +} + + +EXPORT_SYMBOL(memparse); +EXPORT_SYMBOL(get_option); +EXPORT_SYMBOL(get_options); diff --git a/lib/cpu-notifier-error-inject.c b/lib/cpu-notifier-error-inject.c new file mode 100644 index 00000000..4dc20321 --- /dev/null +++ b/lib/cpu-notifier-error-inject.c @@ -0,0 +1,63 @@ +#include <linux/kernel.h> +#include <linux/cpu.h> +#include <linux/module.h> +#include <linux/notifier.h> + +static int priority; +static int cpu_up_prepare_error; +static int cpu_down_prepare_error; + +module_param(priority, int, 0); +MODULE_PARM_DESC(priority, "specify cpu notifier priority"); + +module_param(cpu_up_prepare_error, int, 0644); +MODULE_PARM_DESC(cpu_up_prepare_error, + "specify error code to inject CPU_UP_PREPARE action"); + +module_param(cpu_down_prepare_error, int, 0644); +MODULE_PARM_DESC(cpu_down_prepare_error, + "specify error code to inject CPU_DOWN_PREPARE action"); + +static int err_inject_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + int err = 0; + + switch (action) { + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + err = cpu_up_prepare_error; + break; + case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: + err = cpu_down_prepare_error; + break; + } + if (err) + printk(KERN_INFO "Injecting error (%d) at cpu notifier\n", err); + + return notifier_from_errno(err); +} + +static struct notifier_block err_inject_cpu_notifier = { + .notifier_call = err_inject_cpu_callback, +}; + +static int err_inject_init(void) +{ + err_inject_cpu_notifier.priority = priority; + + return register_hotcpu_notifier(&err_inject_cpu_notifier); +} + +static void err_inject_exit(void) +{ + unregister_hotcpu_notifier(&err_inject_cpu_notifier); +} + +module_init(err_inject_init); +module_exit(err_inject_exit); + +MODULE_DESCRIPTION("CPU notifier error injection module"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Akinobu Mita <akinobu.mita@gmail.com>"); diff --git a/lib/cpumask.c b/lib/cpumask.c new file mode 100644 index 00000000..05d6aca7 --- /dev/null +++ b/lib/cpumask.c @@ -0,0 +1,178 @@ +#include <linux/slab.h> +#include <linux/kernel.h> +#include <linux/bitops.h> +#include <linux/cpumask.h> +#include <linux/module.h> +#include <linux/bootmem.h> + +int __first_cpu(const cpumask_t *srcp) +{ + return min_t(int, NR_CPUS, find_first_bit(srcp->bits, NR_CPUS)); +} +EXPORT_SYMBOL(__first_cpu); + +int __next_cpu(int n, const cpumask_t *srcp) +{ + return min_t(int, NR_CPUS, find_next_bit(srcp->bits, NR_CPUS, n+1)); +} +EXPORT_SYMBOL(__next_cpu); + +#if NR_CPUS > 64 +int __next_cpu_nr(int n, const cpumask_t *srcp) +{ + return min_t(int, nr_cpu_ids, + find_next_bit(srcp->bits, nr_cpu_ids, n+1)); +} +EXPORT_SYMBOL(__next_cpu_nr); +#endif + +int __any_online_cpu(const cpumask_t *mask) +{ + int cpu; + + for_each_cpu_mask(cpu, *mask) { + if (cpu_online(cpu)) + break; + } + return cpu; +} +EXPORT_SYMBOL(__any_online_cpu); + +/** + * cpumask_next_and - get the next cpu in *src1p & *src2p + * @n: the cpu prior to the place to search (ie. return will be > @n) + * @src1p: the first cpumask pointer + * @src2p: the second cpumask pointer + * + * Returns >= nr_cpu_ids if no further cpus set in both. + */ +int cpumask_next_and(int n, const struct cpumask *src1p, + const struct cpumask *src2p) +{ + while ((n = cpumask_next(n, src1p)) < nr_cpu_ids) + if (cpumask_test_cpu(n, src2p)) + break; + return n; +} +EXPORT_SYMBOL(cpumask_next_and); + +/** + * cpumask_any_but - return a "random" in a cpumask, but not this one. + * @mask: the cpumask to search + * @cpu: the cpu to ignore. + * + * Often used to find any cpu but smp_processor_id() in a mask. + * Returns >= nr_cpu_ids if no cpus set. + */ +int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) +{ + unsigned int i; + + cpumask_check(cpu); + for_each_cpu(i, mask) + if (i != cpu) + break; + return i; +} + +/* These are not inline because of header tangles. */ +#ifdef CONFIG_CPUMASK_OFFSTACK +/** + * alloc_cpumask_var_node - allocate a struct cpumask on a given node + * @mask: pointer to cpumask_var_t where the cpumask is returned + * @flags: GFP_ flags + * + * Only defined when CONFIG_CPUMASK_OFFSTACK=y, otherwise is + * a nop returning a constant 1 (in <linux/cpumask.h>) + * Returns TRUE if memory allocation succeeded, FALSE otherwise. + * + * In addition, mask will be NULL if this fails. Note that gcc is + * usually smart enough to know that mask can never be NULL if + * CONFIG_CPUMASK_OFFSTACK=n, so does code elimination in that case + * too. + */ +bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) +{ + *mask = kmalloc_node(cpumask_size(), flags, node); + +#ifdef CONFIG_DEBUG_PER_CPU_MAPS + if (!*mask) { + printk(KERN_ERR "=> alloc_cpumask_var: failed!\n"); + dump_stack(); + } +#endif + /* FIXME: Bandaid to save us from old primitives which go to NR_CPUS. */ + if (*mask) { + unsigned char *ptr = (unsigned char *)cpumask_bits(*mask); + unsigned int tail; + tail = BITS_TO_LONGS(NR_CPUS - nr_cpumask_bits) * sizeof(long); + memset(ptr + cpumask_size() - tail, 0, tail); + } + + return *mask != NULL; +} +EXPORT_SYMBOL(alloc_cpumask_var_node); + +bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) +{ + return alloc_cpumask_var_node(mask, flags | __GFP_ZERO, node); +} +EXPORT_SYMBOL(zalloc_cpumask_var_node); + +/** + * alloc_cpumask_var - allocate a struct cpumask + * @mask: pointer to cpumask_var_t where the cpumask is returned + * @flags: GFP_ flags + * + * Only defined when CONFIG_CPUMASK_OFFSTACK=y, otherwise is + * a nop returning a constant 1 (in <linux/cpumask.h>). + * + * See alloc_cpumask_var_node. + */ +bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) +{ + return alloc_cpumask_var_node(mask, flags, numa_node_id()); +} +EXPORT_SYMBOL(alloc_cpumask_var); + +bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) +{ + return alloc_cpumask_var(mask, flags | __GFP_ZERO); +} +EXPORT_SYMBOL(zalloc_cpumask_var); + +/** + * alloc_bootmem_cpumask_var - allocate a struct cpumask from the bootmem arena. + * @mask: pointer to cpumask_var_t where the cpumask is returned + * + * Only defined when CONFIG_CPUMASK_OFFSTACK=y, otherwise is + * a nop (in <linux/cpumask.h>). + * Either returns an allocated (zero-filled) cpumask, or causes the + * system to panic. + */ +void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask) +{ + *mask = alloc_bootmem(cpumask_size()); +} + +/** + * free_cpumask_var - frees memory allocated for a struct cpumask. + * @mask: cpumask to free + * + * This is safe on a NULL mask. + */ +void free_cpumask_var(cpumask_var_t mask) +{ + kfree(mask); +} +EXPORT_SYMBOL(free_cpumask_var); + +/** + * free_bootmem_cpumask_var - frees result of alloc_bootmem_cpumask_var + * @mask: cpumask to free + */ +void __init free_bootmem_cpumask_var(cpumask_var_t mask) +{ + free_bootmem((unsigned long)mask, cpumask_size()); +} +#endif diff --git a/lib/crc-ccitt.c b/lib/crc-ccitt.c new file mode 100644 index 00000000..7f6dd68d --- /dev/null +++ b/lib/crc-ccitt.c @@ -0,0 +1,69 @@ +/* + * linux/lib/crc-ccitt.c + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/crc-ccitt.h> + +/* + * This mysterious table is just the CRC of each possible byte. It can be + * computed using the standard bit-at-a-time methods. The polynomial can + * be seen in entry 128, 0x8408. This corresponds to x^0 + x^5 + x^12. + * Add the implicit x^16, and you have the standard CRC-CCITT. + */ +u16 const crc_ccitt_table[256] = { + 0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf, + 0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7, + 0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e, + 0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876, + 0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd, + 0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5, + 0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c, + 0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974, + 0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb, + 0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3, + 0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a, + 0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72, + 0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9, + 0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1, + 0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738, + 0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70, + 0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7, + 0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff, + 0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036, + 0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e, + 0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5, + 0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd, + 0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134, + 0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c, + 0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3, + 0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb, + 0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232, + 0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a, + 0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1, + 0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9, + 0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330, + 0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78 +}; +EXPORT_SYMBOL(crc_ccitt_table); + +/** + * crc_ccitt - recompute the CRC for the data buffer + * @crc: previous CRC value + * @buffer: data pointer + * @len: number of bytes in the buffer + */ +u16 crc_ccitt(u16 crc, u8 const *buffer, size_t len) +{ + while (len--) + crc = crc_ccitt_byte(crc, *buffer++); + return crc; +} +EXPORT_SYMBOL(crc_ccitt); + +MODULE_DESCRIPTION("CRC-CCITT calculations"); +MODULE_LICENSE("GPL"); diff --git a/lib/crc-itu-t.c b/lib/crc-itu-t.c new file mode 100644 index 00000000..a63472b8 --- /dev/null +++ b/lib/crc-itu-t.c @@ -0,0 +1,69 @@ +/* + * crc-itu-t.c + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/crc-itu-t.h> + +/** CRC table for the CRC ITU-T V.41 0x0x1021 (x^16 + x^12 + x^15 + 1) */ +const u16 crc_itu_t_table[256] = { + 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7, + 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef, + 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6, + 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de, + 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485, + 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d, + 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4, + 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc, + 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823, + 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b, + 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12, + 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a, + 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41, + 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49, + 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70, + 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78, + 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f, + 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067, + 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e, + 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256, + 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d, + 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405, + 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c, + 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634, + 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab, + 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3, + 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a, + 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92, + 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9, + 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1, + 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8, + 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0 +}; + +EXPORT_SYMBOL(crc_itu_t_table); + +/** + * crc_itu_t - Compute the CRC-ITU-T for the data buffer + * + * @crc: previous CRC value + * @buffer: data pointer + * @len: number of bytes in the buffer + * + * Returns the updated CRC value + */ +u16 crc_itu_t(u16 crc, const u8 *buffer, size_t len) +{ + while (len--) + crc = crc_itu_t_byte(crc, *buffer++); + return crc; +} +EXPORT_SYMBOL(crc_itu_t); + +MODULE_DESCRIPTION("CRC ITU-T V.41 calculations"); +MODULE_LICENSE("GPL"); + diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c new file mode 100644 index 00000000..fbbd66ed --- /dev/null +++ b/lib/crc-t10dif.c @@ -0,0 +1,67 @@ +/* + * T10 Data Integrity Field CRC16 calculation + * + * Copyright (c) 2007 Oracle Corporation. All rights reserved. + * Written by Martin K. Petersen <martin.petersen@oracle.com> + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/crc-t10dif.h> + +/* Table generated using the following polynomium: + * x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1 + * gt: 0x8bb7 + */ +static const __u16 t10_dif_crc_table[256] = { + 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B, + 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6, + 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6, + 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B, + 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1, + 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C, + 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C, + 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781, + 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8, + 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255, + 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925, + 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698, + 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472, + 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF, + 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF, + 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02, + 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA, + 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067, + 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17, + 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA, + 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640, + 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD, + 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D, + 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30, + 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759, + 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4, + 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394, + 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29, + 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3, + 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E, + 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E, + 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 +}; + +__u16 crc_t10dif(const unsigned char *buffer, size_t len) +{ + __u16 crc = 0; + unsigned int i; + + for (i = 0 ; i < len ; i++) + crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff]; + + return crc; +} +EXPORT_SYMBOL(crc_t10dif); + +MODULE_DESCRIPTION("T10 DIF CRC calculation"); +MODULE_LICENSE("GPL"); diff --git a/lib/crc16.c b/lib/crc16.c new file mode 100644 index 00000000..8737b084 --- /dev/null +++ b/lib/crc16.c @@ -0,0 +1,67 @@ +/* + * crc16.c + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/crc16.h> + +/** CRC table for the CRC-16. The poly is 0x8005 (x^16 + x^15 + x^2 + 1) */ +u16 const crc16_table[256] = { + 0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241, + 0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440, + 0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40, + 0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841, + 0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40, + 0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41, + 0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641, + 0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040, + 0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240, + 0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441, + 0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41, + 0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840, + 0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41, + 0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40, + 0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640, + 0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041, + 0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240, + 0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441, + 0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41, + 0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840, + 0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41, + 0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40, + 0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640, + 0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041, + 0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241, + 0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440, + 0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40, + 0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841, + 0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40, + 0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41, + 0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641, + 0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040 +}; +EXPORT_SYMBOL(crc16_table); + +/** + * crc16 - compute the CRC-16 for the data buffer + * @crc: previous CRC value + * @buffer: data pointer + * @len: number of bytes in the buffer + * + * Returns the updated CRC value. + */ +u16 crc16(u16 crc, u8 const *buffer, size_t len) +{ + while (len--) + crc = crc16_byte(crc, *buffer++); + return crc; +} +EXPORT_SYMBOL(crc16); + +MODULE_DESCRIPTION("CRC16 calculations"); +MODULE_LICENSE("GPL"); + diff --git a/lib/crc32.c b/lib/crc32.c new file mode 100644 index 00000000..4855995f --- /dev/null +++ b/lib/crc32.c @@ -0,0 +1,471 @@ +/* + * Oct 15, 2000 Matt Domsch <Matt_Domsch@dell.com> + * Nicer crc32 functions/docs submitted by linux@horizon.com. Thanks! + * Code was from the public domain, copyright abandoned. Code was + * subsequently included in the kernel, thus was re-licensed under the + * GNU GPL v2. + * + * Oct 12, 2000 Matt Domsch <Matt_Domsch@dell.com> + * Same crc32 function was used in 5 other places in the kernel. + * I made one version, and deleted the others. + * There are various incantations of crc32(). Some use a seed of 0 or ~0. + * Some xor at the end with ~0. The generic crc32() function takes + * seed as an argument, and doesn't xor at the end. Then individual + * users can do whatever they need. + * drivers/net/smc9194.c uses seed ~0, doesn't xor with ~0. + * fs/jffs2 uses seed 0, doesn't xor with ~0. + * fs/partitions/efi.c uses seed ~0, xor's with ~0. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <linux/crc32.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/compiler.h> +#include <linux/types.h> +#include <linux/init.h> +#include <asm/atomic.h> +#include "crc32defs.h" +#if CRC_LE_BITS == 8 +# define tole(x) __constant_cpu_to_le32(x) +#else +# define tole(x) (x) +#endif + +#if CRC_BE_BITS == 8 +# define tobe(x) __constant_cpu_to_be32(x) +#else +# define tobe(x) (x) +#endif +#include "crc32table.h" + +MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>"); +MODULE_DESCRIPTION("Ethernet CRC32 calculations"); +MODULE_LICENSE("GPL"); + +#if CRC_LE_BITS == 8 || CRC_BE_BITS == 8 + +static inline u32 +crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) +{ +# ifdef __LITTLE_ENDIAN +# define DO_CRC(x) crc = tab[0][(crc ^ (x)) & 255] ^ (crc >> 8) +# define DO_CRC4 crc = tab[3][(crc) & 255] ^ \ + tab[2][(crc >> 8) & 255] ^ \ + tab[1][(crc >> 16) & 255] ^ \ + tab[0][(crc >> 24) & 255] +# else +# define DO_CRC(x) crc = tab[0][((crc >> 24) ^ (x)) & 255] ^ (crc << 8) +# define DO_CRC4 crc = tab[0][(crc) & 255] ^ \ + tab[1][(crc >> 8) & 255] ^ \ + tab[2][(crc >> 16) & 255] ^ \ + tab[3][(crc >> 24) & 255] +# endif + const u32 *b; + size_t rem_len; + + /* Align it */ + if (unlikely((long)buf & 3 && len)) { + do { + DO_CRC(*buf++); + } while ((--len) && ((long)buf)&3); + } + rem_len = len & 3; + /* load data 32 bits wide, xor data 32 bits wide. */ + len = len >> 2; + b = (const u32 *)buf; + for (--b; len; --len) { + crc ^= *++b; /* use pre increment for speed */ + DO_CRC4; + } + len = rem_len; + /* And the last few bytes */ + if (len) { + u8 *p = (u8 *)(b + 1) - 1; + do { + DO_CRC(*++p); /* use pre increment for speed */ + } while (--len); + } + return crc; +#undef DO_CRC +#undef DO_CRC4 +} +#endif +/** + * crc32_le() - Calculate bitwise little-endian Ethernet AUTODIN II CRC32 + * @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for + * other uses, or the previous crc32 value if computing incrementally. + * @p: pointer to buffer over which CRC is run + * @len: length of buffer @p + */ +u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len); + +#if CRC_LE_BITS == 1 +/* + * In fact, the table-based code will work in this case, but it can be + * simplified by inlining the table in ?: form. + */ + +u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) +{ + int i; + while (len--) { + crc ^= *p++; + for (i = 0; i < 8; i++) + crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); + } + return crc; +} +#else /* Table-based approach */ + +u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) +{ +# if CRC_LE_BITS == 8 + const u32 (*tab)[] = crc32table_le; + + crc = __cpu_to_le32(crc); + crc = crc32_body(crc, p, len, tab); + return __le32_to_cpu(crc); +# elif CRC_LE_BITS == 4 + while (len--) { + crc ^= *p++; + crc = (crc >> 4) ^ crc32table_le[crc & 15]; + crc = (crc >> 4) ^ crc32table_le[crc & 15]; + } + return crc; +# elif CRC_LE_BITS == 2 + while (len--) { + crc ^= *p++; + crc = (crc >> 2) ^ crc32table_le[crc & 3]; + crc = (crc >> 2) ^ crc32table_le[crc & 3]; + crc = (crc >> 2) ^ crc32table_le[crc & 3]; + crc = (crc >> 2) ^ crc32table_le[crc & 3]; + } + return crc; +# endif +} +#endif + +/** + * crc32_be() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32 + * @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for + * other uses, or the previous crc32 value if computing incrementally. + * @p: pointer to buffer over which CRC is run + * @len: length of buffer @p + */ +u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len); + +#if CRC_BE_BITS == 1 +/* + * In fact, the table-based code will work in this case, but it can be + * simplified by inlining the table in ?: form. + */ + +u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) +{ + int i; + while (len--) { + crc ^= *p++ << 24; + for (i = 0; i < 8; i++) + crc = + (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE : + 0); + } + return crc; +} + +#else /* Table-based approach */ +u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) +{ +# if CRC_BE_BITS == 8 + const u32 (*tab)[] = crc32table_be; + + crc = __cpu_to_be32(crc); + crc = crc32_body(crc, p, len, tab); + return __be32_to_cpu(crc); +# elif CRC_BE_BITS == 4 + while (len--) { + crc ^= *p++ << 24; + crc = (crc << 4) ^ crc32table_be[crc >> 28]; + crc = (crc << 4) ^ crc32table_be[crc >> 28]; + } + return crc; +# elif CRC_BE_BITS == 2 + while (len--) { + crc ^= *p++ << 24; + crc = (crc << 2) ^ crc32table_be[crc >> 30]; + crc = (crc << 2) ^ crc32table_be[crc >> 30]; + crc = (crc << 2) ^ crc32table_be[crc >> 30]; + crc = (crc << 2) ^ crc32table_be[crc >> 30]; + } + return crc; +# endif +} +#endif + +EXPORT_SYMBOL(crc32_le); +EXPORT_SYMBOL(crc32_be); + +/* + * A brief CRC tutorial. + * + * A CRC is a long-division remainder. You add the CRC to the message, + * and the whole thing (message+CRC) is a multiple of the given + * CRC polynomial. To check the CRC, you can either check that the + * CRC matches the recomputed value, *or* you can check that the + * remainder computed on the message+CRC is 0. This latter approach + * is used by a lot of hardware implementations, and is why so many + * protocols put the end-of-frame flag after the CRC. + * + * It's actually the same long division you learned in school, except that + * - We're working in binary, so the digits are only 0 and 1, and + * - When dividing polynomials, there are no carries. Rather than add and + * subtract, we just xor. Thus, we tend to get a bit sloppy about + * the difference between adding and subtracting. + * + * A 32-bit CRC polynomial is actually 33 bits long. But since it's + * 33 bits long, bit 32 is always going to be set, so usually the CRC + * is written in hex with the most significant bit omitted. (If you're + * familiar with the IEEE 754 floating-point format, it's the same idea.) + * + * Note that a CRC is computed over a string of *bits*, so you have + * to decide on the endianness of the bits within each byte. To get + * the best error-detecting properties, this should correspond to the + * order they're actually sent. For example, standard RS-232 serial is + * little-endian; the most significant bit (sometimes used for parity) + * is sent last. And when appending a CRC word to a message, you should + * do it in the right order, matching the endianness. + * + * Just like with ordinary division, the remainder is always smaller than + * the divisor (the CRC polynomial) you're dividing by. Each step of the + * division, you take one more digit (bit) of the dividend and append it + * to the current remainder. Then you figure out the appropriate multiple + * of the divisor to subtract to being the remainder back into range. + * In binary, it's easy - it has to be either 0 or 1, and to make the + * XOR cancel, it's just a copy of bit 32 of the remainder. + * + * When computing a CRC, we don't care about the quotient, so we can + * throw the quotient bit away, but subtract the appropriate multiple of + * the polynomial from the remainder and we're back to where we started, + * ready to process the next bit. + * + * A big-endian CRC written this way would be coded like: + * for (i = 0; i < input_bits; i++) { + * multiple = remainder & 0x80000000 ? CRCPOLY : 0; + * remainder = (remainder << 1 | next_input_bit()) ^ multiple; + * } + * Notice how, to get at bit 32 of the shifted remainder, we look + * at bit 31 of the remainder *before* shifting it. + * + * But also notice how the next_input_bit() bits we're shifting into + * the remainder don't actually affect any decision-making until + * 32 bits later. Thus, the first 32 cycles of this are pretty boring. + * Also, to add the CRC to a message, we need a 32-bit-long hole for it at + * the end, so we have to add 32 extra cycles shifting in zeros at the + * end of every message, + * + * So the standard trick is to rearrage merging in the next_input_bit() + * until the moment it's needed. Then the first 32 cycles can be precomputed, + * and merging in the final 32 zero bits to make room for the CRC can be + * skipped entirely. + * This changes the code to: + * for (i = 0; i < input_bits; i++) { + * remainder ^= next_input_bit() << 31; + * multiple = (remainder & 0x80000000) ? CRCPOLY : 0; + * remainder = (remainder << 1) ^ multiple; + * } + * With this optimization, the little-endian code is simpler: + * for (i = 0; i < input_bits; i++) { + * remainder ^= next_input_bit(); + * multiple = (remainder & 1) ? CRCPOLY : 0; + * remainder = (remainder >> 1) ^ multiple; + * } + * + * Note that the other details of endianness have been hidden in CRCPOLY + * (which must be bit-reversed) and next_input_bit(). + * + * However, as long as next_input_bit is returning the bits in a sensible + * order, we can actually do the merging 8 or more bits at a time rather + * than one bit at a time: + * for (i = 0; i < input_bytes; i++) { + * remainder ^= next_input_byte() << 24; + * for (j = 0; j < 8; j++) { + * multiple = (remainder & 0x80000000) ? CRCPOLY : 0; + * remainder = (remainder << 1) ^ multiple; + * } + * } + * Or in little-endian: + * for (i = 0; i < input_bytes; i++) { + * remainder ^= next_input_byte(); + * for (j = 0; j < 8; j++) { + * multiple = (remainder & 1) ? CRCPOLY : 0; + * remainder = (remainder << 1) ^ multiple; + * } + * } + * If the input is a multiple of 32 bits, you can even XOR in a 32-bit + * word at a time and increase the inner loop count to 32. + * + * You can also mix and match the two loop styles, for example doing the + * bulk of a message byte-at-a-time and adding bit-at-a-time processing + * for any fractional bytes at the end. + * + * The only remaining optimization is to the byte-at-a-time table method. + * Here, rather than just shifting one bit of the remainder to decide + * in the correct multiple to subtract, we can shift a byte at a time. + * This produces a 40-bit (rather than a 33-bit) intermediate remainder, + * but again the multiple of the polynomial to subtract depends only on + * the high bits, the high 8 bits in this case. + * + * The multiple we need in that case is the low 32 bits of a 40-bit + * value whose high 8 bits are given, and which is a multiple of the + * generator polynomial. This is simply the CRC-32 of the given + * one-byte message. + * + * Two more details: normally, appending zero bits to a message which + * is already a multiple of a polynomial produces a larger multiple of that + * polynomial. To enable a CRC to detect this condition, it's common to + * invert the CRC before appending it. This makes the remainder of the + * message+crc come out not as zero, but some fixed non-zero value. + * + * The same problem applies to zero bits prepended to the message, and + * a similar solution is used. Instead of starting with a remainder of + * 0, an initial remainder of all ones is used. As long as you start + * the same way on decoding, it doesn't make a difference. + */ + +#ifdef UNITTEST + +#include <stdlib.h> +#include <stdio.h> + +#if 0 /*Not used at present */ +static void +buf_dump(char const *prefix, unsigned char const *buf, size_t len) +{ + fputs(prefix, stdout); + while (len--) + printf(" %02x", *buf++); + putchar('\n'); + +} +#endif + +static void bytereverse(unsigned char *buf, size_t len) +{ + while (len--) { + unsigned char x = bitrev8(*buf); + *buf++ = x; + } +} + +static void random_garbage(unsigned char *buf, size_t len) +{ + while (len--) + *buf++ = (unsigned char) random(); +} + +#if 0 /* Not used at present */ +static void store_le(u32 x, unsigned char *buf) +{ + buf[0] = (unsigned char) x; + buf[1] = (unsigned char) (x >> 8); + buf[2] = (unsigned char) (x >> 16); + buf[3] = (unsigned char) (x >> 24); +} +#endif + +static void store_be(u32 x, unsigned char *buf) +{ + buf[0] = (unsigned char) (x >> 24); + buf[1] = (unsigned char) (x >> 16); + buf[2] = (unsigned char) (x >> 8); + buf[3] = (unsigned char) x; +} + +/* + * This checks that CRC(buf + CRC(buf)) = 0, and that + * CRC commutes with bit-reversal. This has the side effect + * of bytewise bit-reversing the input buffer, and returns + * the CRC of the reversed buffer. + */ +static u32 test_step(u32 init, unsigned char *buf, size_t len) +{ + u32 crc1, crc2; + size_t i; + + crc1 = crc32_be(init, buf, len); + store_be(crc1, buf + len); + crc2 = crc32_be(init, buf, len + 4); + if (crc2) + printf("\nCRC cancellation fail: 0x%08x should be 0\n", + crc2); + + for (i = 0; i <= len + 4; i++) { + crc2 = crc32_be(init, buf, i); + crc2 = crc32_be(crc2, buf + i, len + 4 - i); + if (crc2) + printf("\nCRC split fail: 0x%08x\n", crc2); + } + + /* Now swap it around for the other test */ + + bytereverse(buf, len + 4); + init = bitrev32(init); + crc2 = bitrev32(crc1); + if (crc1 != bitrev32(crc2)) + printf("\nBit reversal fail: 0x%08x -> 0x%08x -> 0x%08x\n", + crc1, crc2, bitrev32(crc2)); + crc1 = crc32_le(init, buf, len); + if (crc1 != crc2) + printf("\nCRC endianness fail: 0x%08x != 0x%08x\n", crc1, + crc2); + crc2 = crc32_le(init, buf, len + 4); + if (crc2) + printf("\nCRC cancellation fail: 0x%08x should be 0\n", + crc2); + + for (i = 0; i <= len + 4; i++) { + crc2 = crc32_le(init, buf, i); + crc2 = crc32_le(crc2, buf + i, len + 4 - i); + if (crc2) + printf("\nCRC split fail: 0x%08x\n", crc2); + } + + return crc1; +} + +#define SIZE 64 +#define INIT1 0 +#define INIT2 0 + +int main(void) +{ + unsigned char buf1[SIZE + 4]; + unsigned char buf2[SIZE + 4]; + unsigned char buf3[SIZE + 4]; + int i, j; + u32 crc1, crc2, crc3; + + for (i = 0; i <= SIZE; i++) { + printf("\rTesting length %d...", i); + fflush(stdout); + random_garbage(buf1, i); + random_garbage(buf2, i); + for (j = 0; j < i; j++) + buf3[j] = buf1[j] ^ buf2[j]; + + crc1 = test_step(INIT1, buf1, i); + crc2 = test_step(INIT2, buf2, i); + /* Now check that CRC(buf1 ^ buf2) = CRC(buf1) ^ CRC(buf2) */ + crc3 = test_step(INIT1 ^ INIT2, buf3, i); + if (crc3 != (crc1 ^ crc2)) + printf("CRC XOR fail: 0x%08x != 0x%08x ^ 0x%08x\n", + crc3, crc1, crc2); + } + printf("\nAll test complete. No failures expected.\n"); + return 0; +} + +#endif /* UNITTEST */ diff --git a/lib/crc32defs.h b/lib/crc32defs.h new file mode 100644 index 00000000..9b6773d7 --- /dev/null +++ b/lib/crc32defs.h @@ -0,0 +1,32 @@ +/* + * There are multiple 16-bit CRC polynomials in common use, but this is + * *the* standard CRC-32 polynomial, first popularized by Ethernet. + * x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0 + */ +#define CRCPOLY_LE 0xedb88320 +#define CRCPOLY_BE 0x04c11db7 + +/* How many bits at a time to use. Requires a table of 4<<CRC_xx_BITS bytes. */ +/* For less performance-sensitive, use 4 */ +#ifndef CRC_LE_BITS +# define CRC_LE_BITS 8 +#endif +#ifndef CRC_BE_BITS +# define CRC_BE_BITS 8 +#endif + +/* + * Little-endian CRC computation. Used with serial bit streams sent + * lsbit-first. Be sure to use cpu_to_le32() to append the computed CRC. + */ +#if CRC_LE_BITS > 8 || CRC_LE_BITS < 1 || CRC_LE_BITS & CRC_LE_BITS-1 +# error CRC_LE_BITS must be a power of 2 between 1 and 8 +#endif + +/* + * Big-endian CRC computation. Used with serial bit streams sent + * msbit-first. Be sure to use cpu_to_be32() to append the computed CRC. + */ +#if CRC_BE_BITS > 8 || CRC_BE_BITS < 1 || CRC_BE_BITS & CRC_BE_BITS-1 +# error CRC_BE_BITS must be a power of 2 between 1 and 8 +#endif diff --git a/lib/crc7.c b/lib/crc7.c new file mode 100644 index 00000000..f1c3a144 --- /dev/null +++ b/lib/crc7.c @@ -0,0 +1,68 @@ +/* + * crc7.c + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/crc7.h> + + +/* Table for CRC-7 (polynomial x^7 + x^3 + 1) */ +const u8 crc7_syndrome_table[256] = { + 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, + 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77, + 0x19, 0x10, 0x0b, 0x02, 0x3d, 0x34, 0x2f, 0x26, + 0x51, 0x58, 0x43, 0x4a, 0x75, 0x7c, 0x67, 0x6e, + 0x32, 0x3b, 0x20, 0x29, 0x16, 0x1f, 0x04, 0x0d, + 0x7a, 0x73, 0x68, 0x61, 0x5e, 0x57, 0x4c, 0x45, + 0x2b, 0x22, 0x39, 0x30, 0x0f, 0x06, 0x1d, 0x14, + 0x63, 0x6a, 0x71, 0x78, 0x47, 0x4e, 0x55, 0x5c, + 0x64, 0x6d, 0x76, 0x7f, 0x40, 0x49, 0x52, 0x5b, + 0x2c, 0x25, 0x3e, 0x37, 0x08, 0x01, 0x1a, 0x13, + 0x7d, 0x74, 0x6f, 0x66, 0x59, 0x50, 0x4b, 0x42, + 0x35, 0x3c, 0x27, 0x2e, 0x11, 0x18, 0x03, 0x0a, + 0x56, 0x5f, 0x44, 0x4d, 0x72, 0x7b, 0x60, 0x69, + 0x1e, 0x17, 0x0c, 0x05, 0x3a, 0x33, 0x28, 0x21, + 0x4f, 0x46, 0x5d, 0x54, 0x6b, 0x62, 0x79, 0x70, + 0x07, 0x0e, 0x15, 0x1c, 0x23, 0x2a, 0x31, 0x38, + 0x41, 0x48, 0x53, 0x5a, 0x65, 0x6c, 0x77, 0x7e, + 0x09, 0x00, 0x1b, 0x12, 0x2d, 0x24, 0x3f, 0x36, + 0x58, 0x51, 0x4a, 0x43, 0x7c, 0x75, 0x6e, 0x67, + 0x10, 0x19, 0x02, 0x0b, 0x34, 0x3d, 0x26, 0x2f, + 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c, + 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, + 0x6a, 0x63, 0x78, 0x71, 0x4e, 0x47, 0x5c, 0x55, + 0x22, 0x2b, 0x30, 0x39, 0x06, 0x0f, 0x14, 0x1d, + 0x25, 0x2c, 0x37, 0x3e, 0x01, 0x08, 0x13, 0x1a, + 0x6d, 0x64, 0x7f, 0x76, 0x49, 0x40, 0x5b, 0x52, + 0x3c, 0x35, 0x2e, 0x27, 0x18, 0x11, 0x0a, 0x03, + 0x74, 0x7d, 0x66, 0x6f, 0x50, 0x59, 0x42, 0x4b, + 0x17, 0x1e, 0x05, 0x0c, 0x33, 0x3a, 0x21, 0x28, + 0x5f, 0x56, 0x4d, 0x44, 0x7b, 0x72, 0x69, 0x60, + 0x0e, 0x07, 0x1c, 0x15, 0x2a, 0x23, 0x38, 0x31, + 0x46, 0x4f, 0x54, 0x5d, 0x62, 0x6b, 0x70, 0x79 +}; +EXPORT_SYMBOL(crc7_syndrome_table); + +/** + * crc7 - update the CRC7 for the data buffer + * @crc: previous CRC7 value + * @buffer: data pointer + * @len: number of bytes in the buffer + * Context: any + * + * Returns the updated CRC7 value. + */ +u8 crc7(u8 crc, const u8 *buffer, size_t len) +{ + while (len--) + crc = crc7_byte(crc, *buffer++); + return crc; +} +EXPORT_SYMBOL(crc7); + +MODULE_DESCRIPTION("CRC7 calculations"); +MODULE_LICENSE("GPL"); diff --git a/lib/ctype.c b/lib/ctype.c new file mode 100644 index 00000000..26baa620 --- /dev/null +++ b/lib/ctype.c @@ -0,0 +1,36 @@ +/* + * linux/lib/ctype.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <linux/ctype.h> +#include <linux/module.h> + +const unsigned char _ctype[] = { +_C,_C,_C,_C,_C,_C,_C,_C, /* 0-7 */ +_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C, /* 8-15 */ +_C,_C,_C,_C,_C,_C,_C,_C, /* 16-23 */ +_C,_C,_C,_C,_C,_C,_C,_C, /* 24-31 */ +_S|_SP,_P,_P,_P,_P,_P,_P,_P, /* 32-39 */ +_P,_P,_P,_P,_P,_P,_P,_P, /* 40-47 */ +_D,_D,_D,_D,_D,_D,_D,_D, /* 48-55 */ +_D,_D,_P,_P,_P,_P,_P,_P, /* 56-63 */ +_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U, /* 64-71 */ +_U,_U,_U,_U,_U,_U,_U,_U, /* 72-79 */ +_U,_U,_U,_U,_U,_U,_U,_U, /* 80-87 */ +_U,_U,_U,_P,_P,_P,_P,_P, /* 88-95 */ +_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L, /* 96-103 */ +_L,_L,_L,_L,_L,_L,_L,_L, /* 104-111 */ +_L,_L,_L,_L,_L,_L,_L,_L, /* 112-119 */ +_L,_L,_L,_P,_P,_P,_P,_C, /* 120-127 */ +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 128-143 */ +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 144-159 */ +_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 160-175 */ +_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 176-191 */ +_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U, /* 192-207 */ +_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L, /* 208-223 */ +_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L, /* 224-239 */ +_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L}; /* 240-255 */ + +EXPORT_SYMBOL(_ctype); diff --git a/lib/debug_locks.c b/lib/debug_locks.c new file mode 100644 index 00000000..5bf0020b --- /dev/null +++ b/lib/debug_locks.c @@ -0,0 +1,48 @@ +/* + * lib/debug_locks.c + * + * Generic place for common debugging facilities for various locks: + * spinlocks, rwlocks, mutexes and rwsems. + * + * Started by Ingo Molnar: + * + * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + */ +#include <linux/kernel.h> +#include <linux/rwsem.h> +#include <linux/mutex.h> +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/debug_locks.h> + +/* + * We want to turn all lock-debugging facilities on/off at once, + * via a global flag. The reason is that once a single bug has been + * detected and reported, there might be cascade of followup bugs + * that would just muddy the log. So we report the first one and + * shut up after that. + */ +int debug_locks = 1; +EXPORT_SYMBOL_GPL(debug_locks); + +/* + * The locking-testsuite uses <debug_locks_silent> to get a + * 'silent failure': nothing is printed to the console when + * a locking bug is detected. + */ +int debug_locks_silent; + +/* + * Generic 'turn off all lock debugging' function: + */ +int debug_locks_off(void) +{ + if (__debug_locks_off()) { + if (!debug_locks_silent) { + oops_in_progress = 1; + console_verbose(); + return 1; + } + } + return 0; +} diff --git a/lib/debugobjects.c b/lib/debugobjects.c new file mode 100644 index 00000000..deebcc57 --- /dev/null +++ b/lib/debugobjects.c @@ -0,0 +1,1049 @@ +/* + * Generic infrastructure for lifetime debugging of objects. + * + * Started by Thomas Gleixner + * + * Copyright (C) 2008, Thomas Gleixner <tglx@linutronix.de> + * + * For licencing details see kernel-base/COPYING + */ +#include <linux/debugobjects.h> +#include <linux/interrupt.h> +#include <linux/sched.h> +#include <linux/seq_file.h> +#include <linux/debugfs.h> +#include <linux/slab.h> +#include <linux/hash.h> + +#define ODEBUG_HASH_BITS 14 +#define ODEBUG_HASH_SIZE (1 << ODEBUG_HASH_BITS) + +#define ODEBUG_POOL_SIZE 512 +#define ODEBUG_POOL_MIN_LEVEL 256 + +#define ODEBUG_CHUNK_SHIFT PAGE_SHIFT +#define ODEBUG_CHUNK_SIZE (1 << ODEBUG_CHUNK_SHIFT) +#define ODEBUG_CHUNK_MASK (~(ODEBUG_CHUNK_SIZE - 1)) + +struct debug_bucket { + struct hlist_head list; + raw_spinlock_t lock; +}; + +static struct debug_bucket obj_hash[ODEBUG_HASH_SIZE]; + +static struct debug_obj obj_static_pool[ODEBUG_POOL_SIZE] __initdata; + +static DEFINE_RAW_SPINLOCK(pool_lock); + +static HLIST_HEAD(obj_pool); + +static int obj_pool_min_free = ODEBUG_POOL_SIZE; +static int obj_pool_free = ODEBUG_POOL_SIZE; +static int obj_pool_used; +static int obj_pool_max_used; +static struct kmem_cache *obj_cache; + +static int debug_objects_maxchain __read_mostly; +static int debug_objects_fixups __read_mostly; +static int debug_objects_warnings __read_mostly; +static int debug_objects_enabled __read_mostly + = CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT; + +static struct debug_obj_descr *descr_test __read_mostly; + +static void free_obj_work(struct work_struct *work); +static DECLARE_WORK(debug_obj_work, free_obj_work); + +static int __init enable_object_debug(char *str) +{ + debug_objects_enabled = 1; + return 0; +} + +static int __init disable_object_debug(char *str) +{ + debug_objects_enabled = 0; + return 0; +} + +early_param("debug_objects", enable_object_debug); +early_param("no_debug_objects", disable_object_debug); + +static const char *obj_states[ODEBUG_STATE_MAX] = { + [ODEBUG_STATE_NONE] = "none", + [ODEBUG_STATE_INIT] = "initialized", + [ODEBUG_STATE_INACTIVE] = "inactive", + [ODEBUG_STATE_ACTIVE] = "active", + [ODEBUG_STATE_DESTROYED] = "destroyed", + [ODEBUG_STATE_NOTAVAILABLE] = "not available", +}; + +static int fill_pool(void) +{ + gfp_t gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; + struct debug_obj *new; + unsigned long flags; + + if (likely(obj_pool_free >= ODEBUG_POOL_MIN_LEVEL)) + return obj_pool_free; + + if (unlikely(!obj_cache)) + return obj_pool_free; + + while (obj_pool_free < ODEBUG_POOL_MIN_LEVEL) { + + new = kmem_cache_zalloc(obj_cache, gfp); + if (!new) + return obj_pool_free; + + raw_spin_lock_irqsave(&pool_lock, flags); + hlist_add_head(&new->node, &obj_pool); + obj_pool_free++; + raw_spin_unlock_irqrestore(&pool_lock, flags); + } + return obj_pool_free; +} + +/* + * Lookup an object in the hash bucket. + */ +static struct debug_obj *lookup_object(void *addr, struct debug_bucket *b) +{ + struct hlist_node *node; + struct debug_obj *obj; + int cnt = 0; + + hlist_for_each_entry(obj, node, &b->list, node) { + cnt++; + if (obj->object == addr) + return obj; + } + if (cnt > debug_objects_maxchain) + debug_objects_maxchain = cnt; + + return NULL; +} + +/* + * Allocate a new object. If the pool is empty, switch off the debugger. + * Must be called with interrupts disabled. + */ +static struct debug_obj * +alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr) +{ + struct debug_obj *obj = NULL; + + raw_spin_lock(&pool_lock); + if (obj_pool.first) { + obj = hlist_entry(obj_pool.first, typeof(*obj), node); + + obj->object = addr; + obj->descr = descr; + obj->state = ODEBUG_STATE_NONE; + obj->astate = 0; + hlist_del(&obj->node); + + hlist_add_head(&obj->node, &b->list); + + obj_pool_used++; + if (obj_pool_used > obj_pool_max_used) + obj_pool_max_used = obj_pool_used; + + obj_pool_free--; + if (obj_pool_free < obj_pool_min_free) + obj_pool_min_free = obj_pool_free; + } + raw_spin_unlock(&pool_lock); + + return obj; +} + +/* + * workqueue function to free objects. + */ +static void free_obj_work(struct work_struct *work) +{ + struct debug_obj *obj; + unsigned long flags; + + raw_spin_lock_irqsave(&pool_lock, flags); + while (obj_pool_free > ODEBUG_POOL_SIZE) { + obj = hlist_entry(obj_pool.first, typeof(*obj), node); + hlist_del(&obj->node); + obj_pool_free--; + /* + * We release pool_lock across kmem_cache_free() to + * avoid contention on pool_lock. + */ + raw_spin_unlock_irqrestore(&pool_lock, flags); + kmem_cache_free(obj_cache, obj); + raw_spin_lock_irqsave(&pool_lock, flags); + } + raw_spin_unlock_irqrestore(&pool_lock, flags); +} + +/* + * Put the object back into the pool and schedule work to free objects + * if necessary. + */ +static void free_object(struct debug_obj *obj) +{ + unsigned long flags; + int sched = 0; + + raw_spin_lock_irqsave(&pool_lock, flags); + /* + * schedule work when the pool is filled and the cache is + * initialized: + */ + if (obj_pool_free > ODEBUG_POOL_SIZE && obj_cache) + sched = !work_pending(&debug_obj_work); + hlist_add_head(&obj->node, &obj_pool); + obj_pool_free++; + obj_pool_used--; + raw_spin_unlock_irqrestore(&pool_lock, flags); + if (sched) + schedule_work(&debug_obj_work); +} + +/* + * We run out of memory. That means we probably have tons of objects + * allocated. + */ +static void debug_objects_oom(void) +{ + struct debug_bucket *db = obj_hash; + struct hlist_node *node, *tmp; + HLIST_HEAD(freelist); + struct debug_obj *obj; + unsigned long flags; + int i; + + printk(KERN_WARNING "ODEBUG: Out of memory. ODEBUG disabled\n"); + + for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) { + raw_spin_lock_irqsave(&db->lock, flags); + hlist_move_list(&db->list, &freelist); + raw_spin_unlock_irqrestore(&db->lock, flags); + + /* Now free them */ + hlist_for_each_entry_safe(obj, node, tmp, &freelist, node) { + hlist_del(&obj->node); + free_object(obj); + } + } +} + +/* + * We use the pfn of the address for the hash. That way we can check + * for freed objects simply by checking the affected bucket. + */ +static struct debug_bucket *get_bucket(unsigned long addr) +{ + unsigned long hash; + + hash = hash_long((addr >> ODEBUG_CHUNK_SHIFT), ODEBUG_HASH_BITS); + return &obj_hash[hash]; +} + +static void debug_print_object(struct debug_obj *obj, char *msg) +{ + static int limit; + + if (limit < 5 && obj->descr != descr_test) { + limit++; + WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) " + "object type: %s\n", + msg, obj_states[obj->state], obj->astate, + obj->descr->name); + } + debug_objects_warnings++; +} + +/* + * Try to repair the damage, so we have a better chance to get useful + * debug output. + */ +static void +debug_object_fixup(int (*fixup)(void *addr, enum debug_obj_state state), + void * addr, enum debug_obj_state state) +{ + if (fixup) + debug_objects_fixups += fixup(addr, state); +} + +static void debug_object_is_on_stack(void *addr, int onstack) +{ + int is_on_stack; + static int limit; + + if (limit > 4) + return; + + is_on_stack = object_is_on_stack(addr); + if (is_on_stack == onstack) + return; + + limit++; + if (is_on_stack) + printk(KERN_WARNING + "ODEBUG: object is on stack, but not annotated\n"); + else + printk(KERN_WARNING + "ODEBUG: object is not on stack, but annotated\n"); + WARN_ON(1); +} + +static void +__debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack) +{ + enum debug_obj_state state; + struct debug_bucket *db; + struct debug_obj *obj; + unsigned long flags; + + fill_pool(); + + db = get_bucket((unsigned long) addr); + + raw_spin_lock_irqsave(&db->lock, flags); + + obj = lookup_object(addr, db); + if (!obj) { + obj = alloc_object(addr, db, descr); + if (!obj) { + debug_objects_enabled = 0; + raw_spin_unlock_irqrestore(&db->lock, flags); + debug_objects_oom(); + return; + } + debug_object_is_on_stack(addr, onstack); + } + + switch (obj->state) { + case ODEBUG_STATE_NONE: + case ODEBUG_STATE_INIT: + case ODEBUG_STATE_INACTIVE: + obj->state = ODEBUG_STATE_INIT; + break; + + case ODEBUG_STATE_ACTIVE: + debug_print_object(obj, "init"); + state = obj->state; + raw_spin_unlock_irqrestore(&db->lock, flags); + debug_object_fixup(descr->fixup_init, addr, state); + return; + + case ODEBUG_STATE_DESTROYED: + debug_print_object(obj, "init"); + break; + default: + break; + } + + raw_spin_unlock_irqrestore(&db->lock, flags); +} + +/** + * debug_object_init - debug checks when an object is initialized + * @addr: address of the object + * @descr: pointer to an object specific debug description structure + */ +void debug_object_init(void *addr, struct debug_obj_descr *descr) +{ + if (!debug_objects_enabled) + return; + + __debug_object_init(addr, descr, 0); +} + +/** + * debug_object_init_on_stack - debug checks when an object on stack is + * initialized + * @addr: address of the object + * @descr: pointer to an object specific debug description structure + */ +void debug_object_init_on_stack(void *addr, struct debug_obj_descr *descr) +{ + if (!debug_objects_enabled) + return; + + __debug_object_init(addr, descr, 1); +} + +/** + * debug_object_activate - debug checks when an object is activated + * @addr: address of the object + * @descr: pointer to an object specific debug description structure + */ +void debug_object_activate(void *addr, struct debug_obj_descr *descr) +{ + enum debug_obj_state state; + struct debug_bucket *db; + struct debug_obj *obj; + unsigned long flags; + + if (!debug_objects_enabled) + return; + + db = get_bucket((unsigned long) addr); + + raw_spin_lock_irqsave(&db->lock, flags); + + obj = lookup_object(addr, db); + if (obj) { + switch (obj->state) { + case ODEBUG_STATE_INIT: + case ODEBUG_STATE_INACTIVE: + obj->state = ODEBUG_STATE_ACTIVE; + break; + + case ODEBUG_STATE_ACTIVE: + debug_print_object(obj, "activate"); + state = obj->state; + raw_spin_unlock_irqrestore(&db->lock, flags); + debug_object_fixup(descr->fixup_activate, addr, state); + return; + + case ODEBUG_STATE_DESTROYED: + debug_print_object(obj, "activate"); + break; + default: + break; + } + raw_spin_unlock_irqrestore(&db->lock, flags); + return; + } + + raw_spin_unlock_irqrestore(&db->lock, flags); + /* + * This happens when a static object is activated. We + * let the type specific code decide whether this is + * true or not. + */ + debug_object_fixup(descr->fixup_activate, addr, + ODEBUG_STATE_NOTAVAILABLE); +} + +/** + * debug_object_deactivate - debug checks when an object is deactivated + * @addr: address of the object + * @descr: pointer to an object specific debug description structure + */ +void debug_object_deactivate(void *addr, struct debug_obj_descr *descr) +{ + struct debug_bucket *db; + struct debug_obj *obj; + unsigned long flags; + + if (!debug_objects_enabled) + return; + + db = get_bucket((unsigned long) addr); + + raw_spin_lock_irqsave(&db->lock, flags); + + obj = lookup_object(addr, db); + if (obj) { + switch (obj->state) { + case ODEBUG_STATE_INIT: + case ODEBUG_STATE_INACTIVE: + case ODEBUG_STATE_ACTIVE: + if (!obj->astate) + obj->state = ODEBUG_STATE_INACTIVE; + else + debug_print_object(obj, "deactivate"); + break; + + case ODEBUG_STATE_DESTROYED: + debug_print_object(obj, "deactivate"); + break; + default: + break; + } + } else { + struct debug_obj o = { .object = addr, + .state = ODEBUG_STATE_NOTAVAILABLE, + .descr = descr }; + + debug_print_object(&o, "deactivate"); + } + + raw_spin_unlock_irqrestore(&db->lock, flags); +} + +/** + * debug_object_destroy - debug checks when an object is destroyed + * @addr: address of the object + * @descr: pointer to an object specific debug description structure + */ +void debug_object_destroy(void *addr, struct debug_obj_descr *descr) +{ + enum debug_obj_state state; + struct debug_bucket *db; + struct debug_obj *obj; + unsigned long flags; + + if (!debug_objects_enabled) + return; + + db = get_bucket((unsigned long) addr); + + raw_spin_lock_irqsave(&db->lock, flags); + + obj = lookup_object(addr, db); + if (!obj) + goto out_unlock; + + switch (obj->state) { + case ODEBUG_STATE_NONE: + case ODEBUG_STATE_INIT: + case ODEBUG_STATE_INACTIVE: + obj->state = ODEBUG_STATE_DESTROYED; + break; + case ODEBUG_STATE_ACTIVE: + debug_print_object(obj, "destroy"); + state = obj->state; + raw_spin_unlock_irqrestore(&db->lock, flags); + debug_object_fixup(descr->fixup_destroy, addr, state); + return; + + case ODEBUG_STATE_DESTROYED: + debug_print_object(obj, "destroy"); + break; + default: + break; + } +out_unlock: + raw_spin_unlock_irqrestore(&db->lock, flags); +} + +/** + * debug_object_free - debug checks when an object is freed + * @addr: address of the object + * @descr: pointer to an object specific debug description structure + */ +void debug_object_free(void *addr, struct debug_obj_descr *descr) +{ + enum debug_obj_state state; + struct debug_bucket *db; + struct debug_obj *obj; + unsigned long flags; + + if (!debug_objects_enabled) + return; + + db = get_bucket((unsigned long) addr); + + raw_spin_lock_irqsave(&db->lock, flags); + + obj = lookup_object(addr, db); + if (!obj) + goto out_unlock; + + switch (obj->state) { + case ODEBUG_STATE_ACTIVE: + debug_print_object(obj, "free"); + state = obj->state; + raw_spin_unlock_irqrestore(&db->lock, flags); + debug_object_fixup(descr->fixup_free, addr, state); + return; + default: + hlist_del(&obj->node); + raw_spin_unlock_irqrestore(&db->lock, flags); + free_object(obj); + return; + } +out_unlock: + raw_spin_unlock_irqrestore(&db->lock, flags); +} + +/** + * debug_object_active_state - debug checks object usage state machine + * @addr: address of the object + * @descr: pointer to an object specific debug description structure + * @expect: expected state + * @next: state to move to if expected state is found + */ +void +debug_object_active_state(void *addr, struct debug_obj_descr *descr, + unsigned int expect, unsigned int next) +{ + struct debug_bucket *db; + struct debug_obj *obj; + unsigned long flags; + + if (!debug_objects_enabled) + return; + + db = get_bucket((unsigned long) addr); + + raw_spin_lock_irqsave(&db->lock, flags); + + obj = lookup_object(addr, db); + if (obj) { + switch (obj->state) { + case ODEBUG_STATE_ACTIVE: + if (obj->astate == expect) + obj->astate = next; + else + debug_print_object(obj, "active_state"); + break; + + default: + debug_print_object(obj, "active_state"); + break; + } + } else { + struct debug_obj o = { .object = addr, + .state = ODEBUG_STATE_NOTAVAILABLE, + .descr = descr }; + + debug_print_object(&o, "active_state"); + } + + raw_spin_unlock_irqrestore(&db->lock, flags); +} + +#ifdef CONFIG_DEBUG_OBJECTS_FREE +static void __debug_check_no_obj_freed(const void *address, unsigned long size) +{ + unsigned long flags, oaddr, saddr, eaddr, paddr, chunks; + struct hlist_node *node, *tmp; + HLIST_HEAD(freelist); + struct debug_obj_descr *descr; + enum debug_obj_state state; + struct debug_bucket *db; + struct debug_obj *obj; + int cnt; + + saddr = (unsigned long) address; + eaddr = saddr + size; + paddr = saddr & ODEBUG_CHUNK_MASK; + chunks = ((eaddr - paddr) + (ODEBUG_CHUNK_SIZE - 1)); + chunks >>= ODEBUG_CHUNK_SHIFT; + + for (;chunks > 0; chunks--, paddr += ODEBUG_CHUNK_SIZE) { + db = get_bucket(paddr); + +repeat: + cnt = 0; + raw_spin_lock_irqsave(&db->lock, flags); + hlist_for_each_entry_safe(obj, node, tmp, &db->list, node) { + cnt++; + oaddr = (unsigned long) obj->object; + if (oaddr < saddr || oaddr >= eaddr) + continue; + + switch (obj->state) { + case ODEBUG_STATE_ACTIVE: + debug_print_object(obj, "free"); + descr = obj->descr; + state = obj->state; + raw_spin_unlock_irqrestore(&db->lock, flags); + debug_object_fixup(descr->fixup_free, + (void *) oaddr, state); + goto repeat; + default: + hlist_del(&obj->node); + hlist_add_head(&obj->node, &freelist); + break; + } + } + raw_spin_unlock_irqrestore(&db->lock, flags); + + /* Now free them */ + hlist_for_each_entry_safe(obj, node, tmp, &freelist, node) { + hlist_del(&obj->node); + free_object(obj); + } + + if (cnt > debug_objects_maxchain) + debug_objects_maxchain = cnt; + } +} + +void debug_check_no_obj_freed(const void *address, unsigned long size) +{ + if (debug_objects_enabled) + __debug_check_no_obj_freed(address, size); +} +#endif + +#ifdef CONFIG_DEBUG_FS + +static int debug_stats_show(struct seq_file *m, void *v) +{ + seq_printf(m, "max_chain :%d\n", debug_objects_maxchain); + seq_printf(m, "warnings :%d\n", debug_objects_warnings); + seq_printf(m, "fixups :%d\n", debug_objects_fixups); + seq_printf(m, "pool_free :%d\n", obj_pool_free); + seq_printf(m, "pool_min_free :%d\n", obj_pool_min_free); + seq_printf(m, "pool_used :%d\n", obj_pool_used); + seq_printf(m, "pool_max_used :%d\n", obj_pool_max_used); + return 0; +} + +static int debug_stats_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, debug_stats_show, NULL); +} + +static const struct file_operations debug_stats_fops = { + .open = debug_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init debug_objects_init_debugfs(void) +{ + struct dentry *dbgdir, *dbgstats; + + if (!debug_objects_enabled) + return 0; + + dbgdir = debugfs_create_dir("debug_objects", NULL); + if (!dbgdir) + return -ENOMEM; + + dbgstats = debugfs_create_file("stats", 0444, dbgdir, NULL, + &debug_stats_fops); + if (!dbgstats) + goto err; + + return 0; + +err: + debugfs_remove(dbgdir); + + return -ENOMEM; +} +__initcall(debug_objects_init_debugfs); + +#else +static inline void debug_objects_init_debugfs(void) { } +#endif + +#ifdef CONFIG_DEBUG_OBJECTS_SELFTEST + +/* Random data structure for the self test */ +struct self_test { + unsigned long dummy1[6]; + int static_init; + unsigned long dummy2[3]; +}; + +static __initdata struct debug_obj_descr descr_type_test; + +/* + * fixup_init is called when: + * - an active object is initialized + */ +static int __init fixup_init(void *addr, enum debug_obj_state state) +{ + struct self_test *obj = addr; + + switch (state) { + case ODEBUG_STATE_ACTIVE: + debug_object_deactivate(obj, &descr_type_test); + debug_object_init(obj, &descr_type_test); + return 1; + default: + return 0; + } +} + +/* + * fixup_activate is called when: + * - an active object is activated + * - an unknown object is activated (might be a statically initialized object) + */ +static int __init fixup_activate(void *addr, enum debug_obj_state state) +{ + struct self_test *obj = addr; + + switch (state) { + case ODEBUG_STATE_NOTAVAILABLE: + if (obj->static_init == 1) { + debug_object_init(obj, &descr_type_test); + debug_object_activate(obj, &descr_type_test); + /* + * Real code should return 0 here ! This is + * not a fixup of some bad behaviour. We + * merily call the debug_init function to keep + * track of the object. + */ + return 1; + } else { + /* Real code needs to emit a warning here */ + } + return 0; + + case ODEBUG_STATE_ACTIVE: + debug_object_deactivate(obj, &descr_type_test); + debug_object_activate(obj, &descr_type_test); + return 1; + + default: + return 0; + } +} + +/* + * fixup_destroy is called when: + * - an active object is destroyed + */ +static int __init fixup_destroy(void *addr, enum debug_obj_state state) +{ + struct self_test *obj = addr; + + switch (state) { + case ODEBUG_STATE_ACTIVE: + debug_object_deactivate(obj, &descr_type_test); + debug_object_destroy(obj, &descr_type_test); + return 1; + default: + return 0; + } +} + +/* + * fixup_free is called when: + * - an active object is freed + */ +static int __init fixup_free(void *addr, enum debug_obj_state state) +{ + struct self_test *obj = addr; + + switch (state) { + case ODEBUG_STATE_ACTIVE: + debug_object_deactivate(obj, &descr_type_test); + debug_object_free(obj, &descr_type_test); + return 1; + default: + return 0; + } +} + +static int __init +check_results(void *addr, enum debug_obj_state state, int fixups, int warnings) +{ + struct debug_bucket *db; + struct debug_obj *obj; + unsigned long flags; + int res = -EINVAL; + + db = get_bucket((unsigned long) addr); + + raw_spin_lock_irqsave(&db->lock, flags); + + obj = lookup_object(addr, db); + if (!obj && state != ODEBUG_STATE_NONE) { + WARN(1, KERN_ERR "ODEBUG: selftest object not found\n"); + goto out; + } + if (obj && obj->state != state) { + WARN(1, KERN_ERR "ODEBUG: selftest wrong state: %d != %d\n", + obj->state, state); + goto out; + } + if (fixups != debug_objects_fixups) { + WARN(1, KERN_ERR "ODEBUG: selftest fixups failed %d != %d\n", + fixups, debug_objects_fixups); + goto out; + } + if (warnings != debug_objects_warnings) { + WARN(1, KERN_ERR "ODEBUG: selftest warnings failed %d != %d\n", + warnings, debug_objects_warnings); + goto out; + } + res = 0; +out: + raw_spin_unlock_irqrestore(&db->lock, flags); + if (res) + debug_objects_enabled = 0; + return res; +} + +static __initdata struct debug_obj_descr descr_type_test = { + .name = "selftest", + .fixup_init = fixup_init, + .fixup_activate = fixup_activate, + .fixup_destroy = fixup_destroy, + .fixup_free = fixup_free, +}; + +static __initdata struct self_test obj = { .static_init = 0 }; + +static void __init debug_objects_selftest(void) +{ + int fixups, oldfixups, warnings, oldwarnings; + unsigned long flags; + + local_irq_save(flags); + + fixups = oldfixups = debug_objects_fixups; + warnings = oldwarnings = debug_objects_warnings; + descr_test = &descr_type_test; + + debug_object_init(&obj, &descr_type_test); + if (check_results(&obj, ODEBUG_STATE_INIT, fixups, warnings)) + goto out; + debug_object_activate(&obj, &descr_type_test); + if (check_results(&obj, ODEBUG_STATE_ACTIVE, fixups, warnings)) + goto out; + debug_object_activate(&obj, &descr_type_test); + if (check_results(&obj, ODEBUG_STATE_ACTIVE, ++fixups, ++warnings)) + goto out; + debug_object_deactivate(&obj, &descr_type_test); + if (check_results(&obj, ODEBUG_STATE_INACTIVE, fixups, warnings)) + goto out; + debug_object_destroy(&obj, &descr_type_test); + if (check_results(&obj, ODEBUG_STATE_DESTROYED, fixups, warnings)) + goto out; + debug_object_init(&obj, &descr_type_test); + if (check_results(&obj, ODEBUG_STATE_DESTROYED, fixups, ++warnings)) + goto out; + debug_object_activate(&obj, &descr_type_test); + if (check_results(&obj, ODEBUG_STATE_DESTROYED, fixups, ++warnings)) + goto out; + debug_object_deactivate(&obj, &descr_type_test); + if (check_results(&obj, ODEBUG_STATE_DESTROYED, fixups, ++warnings)) + goto out; + debug_object_free(&obj, &descr_type_test); + if (check_results(&obj, ODEBUG_STATE_NONE, fixups, warnings)) + goto out; + + obj.static_init = 1; + debug_object_activate(&obj, &descr_type_test); + if (check_results(&obj, ODEBUG_STATE_ACTIVE, ++fixups, warnings)) + goto out; + debug_object_init(&obj, &descr_type_test); + if (check_results(&obj, ODEBUG_STATE_INIT, ++fixups, ++warnings)) + goto out; + debug_object_free(&obj, &descr_type_test); + if (check_results(&obj, ODEBUG_STATE_NONE, fixups, warnings)) + goto out; + +#ifdef CONFIG_DEBUG_OBJECTS_FREE + debug_object_init(&obj, &descr_type_test); + if (check_results(&obj, ODEBUG_STATE_INIT, fixups, warnings)) + goto out; + debug_object_activate(&obj, &descr_type_test); + if (check_results(&obj, ODEBUG_STATE_ACTIVE, fixups, warnings)) + goto out; + __debug_check_no_obj_freed(&obj, sizeof(obj)); + if (check_results(&obj, ODEBUG_STATE_NONE, ++fixups, ++warnings)) + goto out; +#endif + printk(KERN_INFO "ODEBUG: selftest passed\n"); + +out: + debug_objects_fixups = oldfixups; + debug_objects_warnings = oldwarnings; + descr_test = NULL; + + local_irq_restore(flags); +} +#else +static inline void debug_objects_selftest(void) { } +#endif + +/* + * Called during early boot to initialize the hash buckets and link + * the static object pool objects into the poll list. After this call + * the object tracker is fully operational. + */ +void __init debug_objects_early_init(void) +{ + int i; + + for (i = 0; i < ODEBUG_HASH_SIZE; i++) + raw_spin_lock_init(&obj_hash[i].lock); + + for (i = 0; i < ODEBUG_POOL_SIZE; i++) + hlist_add_head(&obj_static_pool[i].node, &obj_pool); +} + +/* + * Convert the statically allocated objects to dynamic ones: + */ +static int __init debug_objects_replace_static_objects(void) +{ + struct debug_bucket *db = obj_hash; + struct hlist_node *node, *tmp; + struct debug_obj *obj, *new; + HLIST_HEAD(objects); + int i, cnt = 0; + + for (i = 0; i < ODEBUG_POOL_SIZE; i++) { + obj = kmem_cache_zalloc(obj_cache, GFP_KERNEL); + if (!obj) + goto free; + hlist_add_head(&obj->node, &objects); + } + + /* + * When debug_objects_mem_init() is called we know that only + * one CPU is up, so disabling interrupts is enough + * protection. This avoids the lockdep hell of lock ordering. + */ + local_irq_disable(); + + /* Remove the statically allocated objects from the pool */ + hlist_for_each_entry_safe(obj, node, tmp, &obj_pool, node) + hlist_del(&obj->node); + /* Move the allocated objects to the pool */ + hlist_move_list(&objects, &obj_pool); + + /* Replace the active object references */ + for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) { + hlist_move_list(&db->list, &objects); + + hlist_for_each_entry(obj, node, &objects, node) { + new = hlist_entry(obj_pool.first, typeof(*obj), node); + hlist_del(&new->node); + /* copy object data */ + *new = *obj; + hlist_add_head(&new->node, &db->list); + cnt++; + } + } + + printk(KERN_DEBUG "ODEBUG: %d of %d active objects replaced\n", cnt, + obj_pool_used); + local_irq_enable(); + return 0; +free: + hlist_for_each_entry_safe(obj, node, tmp, &objects, node) { + hlist_del(&obj->node); + kmem_cache_free(obj_cache, obj); + } + return -ENOMEM; +} + +/* + * Called after the kmem_caches are functional to setup a dedicated + * cache pool, which has the SLAB_DEBUG_OBJECTS flag set. This flag + * prevents that the debug code is called on kmem_cache_free() for the + * debug tracker objects to avoid recursive calls. + */ +void __init debug_objects_mem_init(void) +{ + if (!debug_objects_enabled) + return; + + obj_cache = kmem_cache_create("debug_objects_cache", + sizeof (struct debug_obj), 0, + SLAB_DEBUG_OBJECTS, NULL); + + if (!obj_cache || debug_objects_replace_static_objects()) { + debug_objects_enabled = 0; + if (obj_cache) + kmem_cache_destroy(obj_cache); + printk(KERN_WARNING "ODEBUG: out of memory.\n"); + } else + debug_objects_selftest(); +} diff --git a/lib/dec_and_lock.c b/lib/dec_and_lock.c new file mode 100644 index 00000000..e73822aa --- /dev/null +++ b/lib/dec_and_lock.c @@ -0,0 +1,34 @@ +#include <linux/module.h> +#include <linux/spinlock.h> +#include <asm/atomic.h> + +/* + * This is an implementation of the notion of "decrement a + * reference count, and return locked if it decremented to zero". + * + * NOTE NOTE NOTE! This is _not_ equivalent to + * + * if (atomic_dec_and_test(&atomic)) { + * spin_lock(&lock); + * return 1; + * } + * return 0; + * + * because the spin-lock and the decrement must be + * "atomic". + */ +int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) +{ + /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ + if (atomic_add_unless(atomic, -1, 1)) + return 0; + + /* Otherwise do it the slow way */ + spin_lock(lock); + if (atomic_dec_and_test(atomic)) + return 1; + spin_unlock(lock); + return 0; +} + +EXPORT_SYMBOL(_atomic_dec_and_lock); diff --git a/lib/decompress.c b/lib/decompress.c new file mode 100644 index 00000000..a7606815 --- /dev/null +++ b/lib/decompress.c @@ -0,0 +1,59 @@ +/* + * decompress.c + * + * Detect the decompression method based on magic number + */ + +#include <linux/decompress/generic.h> + +#include <linux/decompress/bunzip2.h> +#include <linux/decompress/unlzma.h> +#include <linux/decompress/inflate.h> +#include <linux/decompress/unlzo.h> + +#include <linux/types.h> +#include <linux/string.h> + +#ifndef CONFIG_DECOMPRESS_GZIP +# define gunzip NULL +#endif +#ifndef CONFIG_DECOMPRESS_BZIP2 +# define bunzip2 NULL +#endif +#ifndef CONFIG_DECOMPRESS_LZMA +# define unlzma NULL +#endif +#ifndef CONFIG_DECOMPRESS_LZO +# define unlzo NULL +#endif + +static const struct compress_format { + unsigned char magic[2]; + const char *name; + decompress_fn decompressor; +} compressed_formats[] = { + { {037, 0213}, "gzip", gunzip }, + { {037, 0236}, "gzip", gunzip }, + { {0x42, 0x5a}, "bzip2", bunzip2 }, + { {0x5d, 0x00}, "lzma", unlzma }, + { {0x89, 0x4c}, "lzo", unlzo }, + { {0, 0}, NULL, NULL } +}; + +decompress_fn decompress_method(const unsigned char *inbuf, int len, + const char **name) +{ + const struct compress_format *cf; + + if (len < 2) + return NULL; /* Need at least this much... */ + + for (cf = compressed_formats; cf->name; cf++) { + if (!memcmp(inbuf, cf->magic, 2)) + break; + + } + if (name) + *name = cf->name; + return cf->decompressor; +} diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c new file mode 100644 index 00000000..81c8bb1c --- /dev/null +++ b/lib/decompress_bunzip2.c @@ -0,0 +1,758 @@ +/* vi: set sw = 4 ts = 4: */ +/* Small bzip2 deflate implementation, by Rob Landley (rob@landley.net). + + Based on bzip2 decompression code by Julian R Seward (jseward@acm.org), + which also acknowledges contributions by Mike Burrows, David Wheeler, + Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten, + Robert Sedgewick, and Jon L. Bentley. + + This code is licensed under the LGPLv2: + LGPL (http://www.gnu.org/copyleft/lgpl.html +*/ + +/* + Size and speed optimizations by Manuel Novoa III (mjn3@codepoet.org). + + More efficient reading of Huffman codes, a streamlined read_bunzip() + function, and various other tweaks. In (limited) tests, approximately + 20% faster than bzcat on x86 and about 10% faster on arm. + + Note that about 2/3 of the time is spent in read_unzip() reversing + the Burrows-Wheeler transformation. Much of that time is delay + resulting from cache misses. + + I would ask that anyone benefiting from this work, especially those + using it in commercial products, consider making a donation to my local + non-profit hospice organization in the name of the woman I loved, who + passed away Feb. 12, 2003. + + In memory of Toni W. Hagan + + Hospice of Acadiana, Inc. + 2600 Johnston St., Suite 200 + Lafayette, LA 70503-3240 + + Phone (337) 232-1234 or 1-800-738-2226 + Fax (337) 232-1297 + + http://www.hospiceacadiana.com/ + + Manuel + */ + +/* + Made it fit for running in Linux Kernel by Alain Knaff (alain@knaff.lu) +*/ + + +#ifdef STATIC +#define PREBOOT +#else +#include <linux/decompress/bunzip2.h> +#include <linux/slab.h> +#endif /* STATIC */ + +#include <linux/decompress/mm.h> + +#ifndef INT_MAX +#define INT_MAX 0x7fffffff +#endif + +/* Constants for Huffman coding */ +#define MAX_GROUPS 6 +#define GROUP_SIZE 50 /* 64 would have been more efficient */ +#define MAX_HUFCODE_BITS 20 /* Longest Huffman code allowed */ +#define MAX_SYMBOLS 258 /* 256 literals + RUNA + RUNB */ +#define SYMBOL_RUNA 0 +#define SYMBOL_RUNB 1 + +/* Status return values */ +#define RETVAL_OK 0 +#define RETVAL_LAST_BLOCK (-1) +#define RETVAL_NOT_BZIP_DATA (-2) +#define RETVAL_UNEXPECTED_INPUT_EOF (-3) +#define RETVAL_UNEXPECTED_OUTPUT_EOF (-4) +#define RETVAL_DATA_ERROR (-5) +#define RETVAL_OUT_OF_MEMORY (-6) +#define RETVAL_OBSOLETE_INPUT (-7) + +/* Other housekeeping constants */ +#define BZIP2_IOBUF_SIZE 4096 + +/* This is what we know about each Huffman coding group */ +struct group_data { + /* We have an extra slot at the end of limit[] for a sentinal value. */ + int limit[MAX_HUFCODE_BITS+1]; + int base[MAX_HUFCODE_BITS]; + int permute[MAX_SYMBOLS]; + int minLen, maxLen; +}; + +/* Structure holding all the housekeeping data, including IO buffers and + memory that persists between calls to bunzip */ +struct bunzip_data { + /* State for interrupting output loop */ + int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent; + /* I/O tracking data (file handles, buffers, positions, etc.) */ + int (*fill)(void*, unsigned int); + int inbufCount, inbufPos /*, outbufPos*/; + unsigned char *inbuf /*,*outbuf*/; + unsigned int inbufBitCount, inbufBits; + /* The CRC values stored in the block header and calculated from the + data */ + unsigned int crc32Table[256], headerCRC, totalCRC, writeCRC; + /* Intermediate buffer and its size (in bytes) */ + unsigned int *dbuf, dbufSize; + /* These things are a bit too big to go on the stack */ + unsigned char selectors[32768]; /* nSelectors = 15 bits */ + struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */ + int io_error; /* non-zero if we have IO error */ + int byteCount[256]; + unsigned char symToByte[256], mtfSymbol[256]; +}; + + +/* Return the next nnn bits of input. All reads from the compressed input + are done through this function. All reads are big endian */ +static unsigned int INIT get_bits(struct bunzip_data *bd, char bits_wanted) +{ + unsigned int bits = 0; + + /* If we need to get more data from the byte buffer, do so. + (Loop getting one byte at a time to enforce endianness and avoid + unaligned access.) */ + while (bd->inbufBitCount < bits_wanted) { + /* If we need to read more data from file into byte buffer, do + so */ + if (bd->inbufPos == bd->inbufCount) { + if (bd->io_error) + return 0; + bd->inbufCount = bd->fill(bd->inbuf, BZIP2_IOBUF_SIZE); + if (bd->inbufCount <= 0) { + bd->io_error = RETVAL_UNEXPECTED_INPUT_EOF; + return 0; + } + bd->inbufPos = 0; + } + /* Avoid 32-bit overflow (dump bit buffer to top of output) */ + if (bd->inbufBitCount >= 24) { + bits = bd->inbufBits&((1 << bd->inbufBitCount)-1); + bits_wanted -= bd->inbufBitCount; + bits <<= bits_wanted; + bd->inbufBitCount = 0; + } + /* Grab next 8 bits of input from buffer. */ + bd->inbufBits = (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++]; + bd->inbufBitCount += 8; + } + /* Calculate result */ + bd->inbufBitCount -= bits_wanted; + bits |= (bd->inbufBits >> bd->inbufBitCount)&((1 << bits_wanted)-1); + + return bits; +} + +/* Unpacks the next block and sets up for the inverse burrows-wheeler step. */ + +static int INIT get_next_block(struct bunzip_data *bd) +{ + struct group_data *hufGroup = NULL; + int *base = NULL; + int *limit = NULL; + int dbufCount, nextSym, dbufSize, groupCount, selector, + i, j, k, t, runPos, symCount, symTotal, nSelectors, *byteCount; + unsigned char uc, *symToByte, *mtfSymbol, *selectors; + unsigned int *dbuf, origPtr; + + dbuf = bd->dbuf; + dbufSize = bd->dbufSize; + selectors = bd->selectors; + byteCount = bd->byteCount; + symToByte = bd->symToByte; + mtfSymbol = bd->mtfSymbol; + + /* Read in header signature and CRC, then validate signature. + (last block signature means CRC is for whole file, return now) */ + i = get_bits(bd, 24); + j = get_bits(bd, 24); + bd->headerCRC = get_bits(bd, 32); + if ((i == 0x177245) && (j == 0x385090)) + return RETVAL_LAST_BLOCK; + if ((i != 0x314159) || (j != 0x265359)) + return RETVAL_NOT_BZIP_DATA; + /* We can add support for blockRandomised if anybody complains. + There was some code for this in busybox 1.0.0-pre3, but nobody ever + noticed that it didn't actually work. */ + if (get_bits(bd, 1)) + return RETVAL_OBSOLETE_INPUT; + origPtr = get_bits(bd, 24); + if (origPtr > dbufSize) + return RETVAL_DATA_ERROR; + /* mapping table: if some byte values are never used (encoding things + like ascii text), the compression code removes the gaps to have fewer + symbols to deal with, and writes a sparse bitfield indicating which + values were present. We make a translation table to convert the + symbols back to the corresponding bytes. */ + t = get_bits(bd, 16); + symTotal = 0; + for (i = 0; i < 16; i++) { + if (t&(1 << (15-i))) { + k = get_bits(bd, 16); + for (j = 0; j < 16; j++) + if (k&(1 << (15-j))) + symToByte[symTotal++] = (16*i)+j; + } + } + /* How many different Huffman coding groups does this block use? */ + groupCount = get_bits(bd, 3); + if (groupCount < 2 || groupCount > MAX_GROUPS) + return RETVAL_DATA_ERROR; + /* nSelectors: Every GROUP_SIZE many symbols we select a new + Huffman coding group. Read in the group selector list, + which is stored as MTF encoded bit runs. (MTF = Move To + Front, as each value is used it's moved to the start of the + list.) */ + nSelectors = get_bits(bd, 15); + if (!nSelectors) + return RETVAL_DATA_ERROR; + for (i = 0; i < groupCount; i++) + mtfSymbol[i] = i; + for (i = 0; i < nSelectors; i++) { + /* Get next value */ + for (j = 0; get_bits(bd, 1); j++) + if (j >= groupCount) + return RETVAL_DATA_ERROR; + /* Decode MTF to get the next selector */ + uc = mtfSymbol[j]; + for (; j; j--) + mtfSymbol[j] = mtfSymbol[j-1]; + mtfSymbol[0] = selectors[i] = uc; + } + /* Read the Huffman coding tables for each group, which code + for symTotal literal symbols, plus two run symbols (RUNA, + RUNB) */ + symCount = symTotal+2; + for (j = 0; j < groupCount; j++) { + unsigned char length[MAX_SYMBOLS], temp[MAX_HUFCODE_BITS+1]; + int minLen, maxLen, pp; + /* Read Huffman code lengths for each symbol. They're + stored in a way similar to mtf; record a starting + value for the first symbol, and an offset from the + previous value for everys symbol after that. + (Subtracting 1 before the loop and then adding it + back at the end is an optimization that makes the + test inside the loop simpler: symbol length 0 + becomes negative, so an unsigned inequality catches + it.) */ + t = get_bits(bd, 5)-1; + for (i = 0; i < symCount; i++) { + for (;;) { + if (((unsigned)t) > (MAX_HUFCODE_BITS-1)) + return RETVAL_DATA_ERROR; + + /* If first bit is 0, stop. Else + second bit indicates whether to + increment or decrement the value. + Optimization: grab 2 bits and unget + the second if the first was 0. */ + + k = get_bits(bd, 2); + if (k < 2) { + bd->inbufBitCount++; + break; + } + /* Add one if second bit 1, else + * subtract 1. Avoids if/else */ + t += (((k+1)&2)-1); + } + /* Correct for the initial -1, to get the + * final symbol length */ + length[i] = t+1; + } + /* Find largest and smallest lengths in this group */ + minLen = maxLen = length[0]; + + for (i = 1; i < symCount; i++) { + if (length[i] > maxLen) + maxLen = length[i]; + else if (length[i] < minLen) + minLen = length[i]; + } + + /* Calculate permute[], base[], and limit[] tables from + * length[]. + * + * permute[] is the lookup table for converting + * Huffman coded symbols into decoded symbols. base[] + * is the amount to subtract from the value of a + * Huffman symbol of a given length when using + * permute[]. + * + * limit[] indicates the largest numerical value a + * symbol with a given number of bits can have. This + * is how the Huffman codes can vary in length: each + * code with a value > limit[length] needs another + * bit. + */ + hufGroup = bd->groups+j; + hufGroup->minLen = minLen; + hufGroup->maxLen = maxLen; + /* Note that minLen can't be smaller than 1, so we + adjust the base and limit array pointers so we're + not always wasting the first entry. We do this + again when using them (during symbol decoding).*/ + base = hufGroup->base-1; + limit = hufGroup->limit-1; + /* Calculate permute[]. Concurrently, initialize + * temp[] and limit[]. */ + pp = 0; + for (i = minLen; i <= maxLen; i++) { + temp[i] = limit[i] = 0; + for (t = 0; t < symCount; t++) + if (length[t] == i) + hufGroup->permute[pp++] = t; + } + /* Count symbols coded for at each bit length */ + for (i = 0; i < symCount; i++) + temp[length[i]]++; + /* Calculate limit[] (the largest symbol-coding value + *at each bit length, which is (previous limit << + *1)+symbols at this level), and base[] (number of + *symbols to ignore at each bit length, which is limit + *minus the cumulative count of symbols coded for + *already). */ + pp = t = 0; + for (i = minLen; i < maxLen; i++) { + pp += temp[i]; + /* We read the largest possible symbol size + and then unget bits after determining how + many we need, and those extra bits could be + set to anything. (They're noise from + future symbols.) At each level we're + really only interested in the first few + bits, so here we set all the trailing + to-be-ignored bits to 1 so they don't + affect the value > limit[length] + comparison. */ + limit[i] = (pp << (maxLen - i)) - 1; + pp <<= 1; + base[i+1] = pp-(t += temp[i]); + } + limit[maxLen+1] = INT_MAX; /* Sentinal value for + * reading next sym. */ + limit[maxLen] = pp+temp[maxLen]-1; + base[minLen] = 0; + } + /* We've finished reading and digesting the block header. Now + read this block's Huffman coded symbols from the file and + undo the Huffman coding and run length encoding, saving the + result into dbuf[dbufCount++] = uc */ + + /* Initialize symbol occurrence counters and symbol Move To + * Front table */ + for (i = 0; i < 256; i++) { + byteCount[i] = 0; + mtfSymbol[i] = (unsigned char)i; + } + /* Loop through compressed symbols. */ + runPos = dbufCount = symCount = selector = 0; + for (;;) { + /* Determine which Huffman coding group to use. */ + if (!(symCount--)) { + symCount = GROUP_SIZE-1; + if (selector >= nSelectors) + return RETVAL_DATA_ERROR; + hufGroup = bd->groups+selectors[selector++]; + base = hufGroup->base-1; + limit = hufGroup->limit-1; + } + /* Read next Huffman-coded symbol. */ + /* Note: It is far cheaper to read maxLen bits and + back up than it is to read minLen bits and then an + additional bit at a time, testing as we go. + Because there is a trailing last block (with file + CRC), there is no danger of the overread causing an + unexpected EOF for a valid compressed file. As a + further optimization, we do the read inline + (falling back to a call to get_bits if the buffer + runs dry). The following (up to got_huff_bits:) is + equivalent to j = get_bits(bd, hufGroup->maxLen); + */ + while (bd->inbufBitCount < hufGroup->maxLen) { + if (bd->inbufPos == bd->inbufCount) { + j = get_bits(bd, hufGroup->maxLen); + goto got_huff_bits; + } + bd->inbufBits = + (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++]; + bd->inbufBitCount += 8; + }; + bd->inbufBitCount -= hufGroup->maxLen; + j = (bd->inbufBits >> bd->inbufBitCount)& + ((1 << hufGroup->maxLen)-1); +got_huff_bits: + /* Figure how how many bits are in next symbol and + * unget extras */ + i = hufGroup->minLen; + while (j > limit[i]) + ++i; + bd->inbufBitCount += (hufGroup->maxLen - i); + /* Huffman decode value to get nextSym (with bounds checking) */ + if ((i > hufGroup->maxLen) + || (((unsigned)(j = (j>>(hufGroup->maxLen-i))-base[i])) + >= MAX_SYMBOLS)) + return RETVAL_DATA_ERROR; + nextSym = hufGroup->permute[j]; + /* We have now decoded the symbol, which indicates + either a new literal byte, or a repeated run of the + most recent literal byte. First, check if nextSym + indicates a repeated run, and if so loop collecting + how many times to repeat the last literal. */ + if (((unsigned)nextSym) <= SYMBOL_RUNB) { /* RUNA or RUNB */ + /* If this is the start of a new run, zero out + * counter */ + if (!runPos) { + runPos = 1; + t = 0; + } + /* Neat trick that saves 1 symbol: instead of + or-ing 0 or 1 at each bit position, add 1 + or 2 instead. For example, 1011 is 1 << 0 + + 1 << 1 + 2 << 2. 1010 is 2 << 0 + 2 << 1 + + 1 << 2. You can make any bit pattern + that way using 1 less symbol than the basic + or 0/1 method (except all bits 0, which + would use no symbols, but a run of length 0 + doesn't mean anything in this context). + Thus space is saved. */ + t += (runPos << nextSym); + /* +runPos if RUNA; +2*runPos if RUNB */ + + runPos <<= 1; + continue; + } + /* When we hit the first non-run symbol after a run, + we now know how many times to repeat the last + literal, so append that many copies to our buffer + of decoded symbols (dbuf) now. (The last literal + used is the one at the head of the mtfSymbol + array.) */ + if (runPos) { + runPos = 0; + if (dbufCount+t >= dbufSize) + return RETVAL_DATA_ERROR; + + uc = symToByte[mtfSymbol[0]]; + byteCount[uc] += t; + while (t--) + dbuf[dbufCount++] = uc; + } + /* Is this the terminating symbol? */ + if (nextSym > symTotal) + break; + /* At this point, nextSym indicates a new literal + character. Subtract one to get the position in the + MTF array at which this literal is currently to be + found. (Note that the result can't be -1 or 0, + because 0 and 1 are RUNA and RUNB. But another + instance of the first symbol in the mtf array, + position 0, would have been handled as part of a + run above. Therefore 1 unused mtf position minus 2 + non-literal nextSym values equals -1.) */ + if (dbufCount >= dbufSize) + return RETVAL_DATA_ERROR; + i = nextSym - 1; + uc = mtfSymbol[i]; + /* Adjust the MTF array. Since we typically expect to + *move only a small number of symbols, and are bound + *by 256 in any case, using memmove here would + *typically be bigger and slower due to function call + *overhead and other assorted setup costs. */ + do { + mtfSymbol[i] = mtfSymbol[i-1]; + } while (--i); + mtfSymbol[0] = uc; + uc = symToByte[uc]; + /* We have our literal byte. Save it into dbuf. */ + byteCount[uc]++; + dbuf[dbufCount++] = (unsigned int)uc; + } + /* At this point, we've read all the Huffman-coded symbols + (and repeated runs) for this block from the input stream, + and decoded them into the intermediate buffer. There are + dbufCount many decoded bytes in dbuf[]. Now undo the + Burrows-Wheeler transform on dbuf. See + http://dogma.net/markn/articles/bwt/bwt.htm + */ + /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */ + j = 0; + for (i = 0; i < 256; i++) { + k = j+byteCount[i]; + byteCount[i] = j; + j = k; + } + /* Figure out what order dbuf would be in if we sorted it. */ + for (i = 0; i < dbufCount; i++) { + uc = (unsigned char)(dbuf[i] & 0xff); + dbuf[byteCount[uc]] |= (i << 8); + byteCount[uc]++; + } + /* Decode first byte by hand to initialize "previous" byte. + Note that it doesn't get output, and if the first three + characters are identical it doesn't qualify as a run (hence + writeRunCountdown = 5). */ + if (dbufCount) { + if (origPtr >= dbufCount) + return RETVAL_DATA_ERROR; + bd->writePos = dbuf[origPtr]; + bd->writeCurrent = (unsigned char)(bd->writePos&0xff); + bd->writePos >>= 8; + bd->writeRunCountdown = 5; + } + bd->writeCount = dbufCount; + + return RETVAL_OK; +} + +/* Undo burrows-wheeler transform on intermediate buffer to produce output. + If start_bunzip was initialized with out_fd =-1, then up to len bytes of + data are written to outbuf. Return value is number of bytes written or + error (all errors are negative numbers). If out_fd!=-1, outbuf and len + are ignored, data is written to out_fd and return is RETVAL_OK or error. +*/ + +static int INIT read_bunzip(struct bunzip_data *bd, char *outbuf, int len) +{ + const unsigned int *dbuf; + int pos, xcurrent, previous, gotcount; + + /* If last read was short due to end of file, return last block now */ + if (bd->writeCount < 0) + return bd->writeCount; + + gotcount = 0; + dbuf = bd->dbuf; + pos = bd->writePos; + xcurrent = bd->writeCurrent; + + /* We will always have pending decoded data to write into the output + buffer unless this is the very first call (in which case we haven't + Huffman-decoded a block into the intermediate buffer yet). */ + + if (bd->writeCopies) { + /* Inside the loop, writeCopies means extra copies (beyond 1) */ + --bd->writeCopies; + /* Loop outputting bytes */ + for (;;) { + /* If the output buffer is full, snapshot + * state and return */ + if (gotcount >= len) { + bd->writePos = pos; + bd->writeCurrent = xcurrent; + bd->writeCopies++; + return len; + } + /* Write next byte into output buffer, updating CRC */ + outbuf[gotcount++] = xcurrent; + bd->writeCRC = (((bd->writeCRC) << 8) + ^bd->crc32Table[((bd->writeCRC) >> 24) + ^xcurrent]); + /* Loop now if we're outputting multiple + * copies of this byte */ + if (bd->writeCopies) { + --bd->writeCopies; + continue; + } +decode_next_byte: + if (!bd->writeCount--) + break; + /* Follow sequence vector to undo + * Burrows-Wheeler transform */ + previous = xcurrent; + pos = dbuf[pos]; + xcurrent = pos&0xff; + pos >>= 8; + /* After 3 consecutive copies of the same + byte, the 4th is a repeat count. We count + down from 4 instead *of counting up because + testing for non-zero is faster */ + if (--bd->writeRunCountdown) { + if (xcurrent != previous) + bd->writeRunCountdown = 4; + } else { + /* We have a repeated run, this byte + * indicates the count */ + bd->writeCopies = xcurrent; + xcurrent = previous; + bd->writeRunCountdown = 5; + /* Sometimes there are just 3 bytes + * (run length 0) */ + if (!bd->writeCopies) + goto decode_next_byte; + /* Subtract the 1 copy we'd output + * anyway to get extras */ + --bd->writeCopies; + } + } + /* Decompression of this block completed successfully */ + bd->writeCRC = ~bd->writeCRC; + bd->totalCRC = ((bd->totalCRC << 1) | + (bd->totalCRC >> 31)) ^ bd->writeCRC; + /* If this block had a CRC error, force file level CRC error. */ + if (bd->writeCRC != bd->headerCRC) { + bd->totalCRC = bd->headerCRC+1; + return RETVAL_LAST_BLOCK; + } + } + + /* Refill the intermediate buffer by Huffman-decoding next + * block of input */ + /* (previous is just a convenient unused temp variable here) */ + previous = get_next_block(bd); + if (previous) { + bd->writeCount = previous; + return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount; + } + bd->writeCRC = 0xffffffffUL; + pos = bd->writePos; + xcurrent = bd->writeCurrent; + goto decode_next_byte; +} + +static int INIT nofill(void *buf, unsigned int len) +{ + return -1; +} + +/* Allocate the structure, read file header. If in_fd ==-1, inbuf must contain + a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are + ignored, and data is read from file handle into temporary buffer. */ +static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len, + int (*fill)(void*, unsigned int)) +{ + struct bunzip_data *bd; + unsigned int i, j, c; + const unsigned int BZh0 = + (((unsigned int)'B') << 24)+(((unsigned int)'Z') << 16) + +(((unsigned int)'h') << 8)+(unsigned int)'0'; + + /* Figure out how much data to allocate */ + i = sizeof(struct bunzip_data); + + /* Allocate bunzip_data. Most fields initialize to zero. */ + bd = *bdp = malloc(i); + if (!bd) + return RETVAL_OUT_OF_MEMORY; + memset(bd, 0, sizeof(struct bunzip_data)); + /* Setup input buffer */ + bd->inbuf = inbuf; + bd->inbufCount = len; + if (fill != NULL) + bd->fill = fill; + else + bd->fill = nofill; + + /* Init the CRC32 table (big endian) */ + for (i = 0; i < 256; i++) { + c = i << 24; + for (j = 8; j; j--) + c = c&0x80000000 ? (c << 1)^0x04c11db7 : (c << 1); + bd->crc32Table[i] = c; + } + + /* Ensure that file starts with "BZh['1'-'9']." */ + i = get_bits(bd, 32); + if (((unsigned int)(i-BZh0-1)) >= 9) + return RETVAL_NOT_BZIP_DATA; + + /* Fourth byte (ascii '1'-'9'), indicates block size in units of 100k of + uncompressed data. Allocate intermediate buffer for block. */ + bd->dbufSize = 100000*(i-BZh0); + + bd->dbuf = large_malloc(bd->dbufSize * sizeof(int)); + if (!bd->dbuf) + return RETVAL_OUT_OF_MEMORY; + return RETVAL_OK; +} + +/* Example usage: decompress src_fd to dst_fd. (Stops at end of bzip2 data, + not end of file.) */ +STATIC int INIT bunzip2(unsigned char *buf, int len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *outbuf, + int *pos, + void(*error_fn)(char *x)) +{ + struct bunzip_data *bd; + int i = -1; + unsigned char *inbuf; + + set_error_fn(error_fn); + if (flush) + outbuf = malloc(BZIP2_IOBUF_SIZE); + + if (!outbuf) { + error("Could not allocate output bufer"); + return RETVAL_OUT_OF_MEMORY; + } + if (buf) + inbuf = buf; + else + inbuf = malloc(BZIP2_IOBUF_SIZE); + if (!inbuf) { + error("Could not allocate input bufer"); + i = RETVAL_OUT_OF_MEMORY; + goto exit_0; + } + i = start_bunzip(&bd, inbuf, len, fill); + if (!i) { + for (;;) { + i = read_bunzip(bd, outbuf, BZIP2_IOBUF_SIZE); + if (i <= 0) + break; + if (!flush) + outbuf += i; + else + if (i != flush(outbuf, i)) { + i = RETVAL_UNEXPECTED_OUTPUT_EOF; + break; + } + } + } + /* Check CRC and release memory */ + if (i == RETVAL_LAST_BLOCK) { + if (bd->headerCRC != bd->totalCRC) + error("Data integrity error when decompressing."); + else + i = RETVAL_OK; + } else if (i == RETVAL_UNEXPECTED_OUTPUT_EOF) { + error("Compressed file ends unexpectedly"); + } + if (!bd) + goto exit_1; + if (bd->dbuf) + large_free(bd->dbuf); + if (pos) + *pos = bd->inbufPos; + free(bd); +exit_1: + if (!buf) + free(inbuf); +exit_0: + if (flush) + free(outbuf); + return i; +} + +#ifdef PREBOOT +STATIC int INIT decompress(unsigned char *buf, int len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *outbuf, + int *pos, + void(*error_fn)(char *x)) +{ + return bunzip2(buf, len - 4, fill, flush, outbuf, pos, error_fn); +} +#endif diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c new file mode 100644 index 00000000..fc686c7a --- /dev/null +++ b/lib/decompress_inflate.c @@ -0,0 +1,176 @@ +#ifdef STATIC +/* Pre-boot environment: included */ + +/* prevent inclusion of _LINUX_KERNEL_H in pre-boot environment: lots + * errors about console_printk etc... on ARM */ +#define _LINUX_KERNEL_H + +#include "zlib_inflate/inftrees.c" +#include "zlib_inflate/inffast.c" +#include "zlib_inflate/inflate.c" + +#else /* STATIC */ +/* initramfs et al: linked */ + +#include <linux/zutil.h> + +#include "zlib_inflate/inftrees.h" +#include "zlib_inflate/inffast.h" +#include "zlib_inflate/inflate.h" + +#include "zlib_inflate/infutil.h" +#include <linux/slab.h> + +#endif /* STATIC */ + +#include <linux/decompress/mm.h> + +#define GZIP_IOBUF_SIZE (16*1024) + +static int nofill(void *buffer, unsigned int len) +{ + return -1; +} + +/* Included from initramfs et al code */ +STATIC int INIT gunzip(unsigned char *buf, int len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *out_buf, + int *pos, + void(*error_fn)(char *x)) { + u8 *zbuf; + struct z_stream_s *strm; + int rc; + size_t out_len; + + set_error_fn(error_fn); + rc = -1; + if (flush) { + out_len = 0x8000; /* 32 K */ + out_buf = malloc(out_len); + } else { + out_len = 0x7fffffff; /* no limit */ + } + if (!out_buf) { + error("Out of memory while allocating output buffer"); + goto gunzip_nomem1; + } + + if (buf) + zbuf = buf; + else { + zbuf = malloc(GZIP_IOBUF_SIZE); + len = 0; + } + if (!zbuf) { + error("Out of memory while allocating input buffer"); + goto gunzip_nomem2; + } + + strm = malloc(sizeof(*strm)); + if (strm == NULL) { + error("Out of memory while allocating z_stream"); + goto gunzip_nomem3; + } + + strm->workspace = malloc(flush ? zlib_inflate_workspacesize() : + sizeof(struct inflate_state)); + if (strm->workspace == NULL) { + error("Out of memory while allocating workspace"); + goto gunzip_nomem4; + } + + if (!fill) + fill = nofill; + + if (len == 0) + len = fill(zbuf, GZIP_IOBUF_SIZE); + + /* verify the gzip header */ + if (len < 10 || + zbuf[0] != 0x1f || zbuf[1] != 0x8b || zbuf[2] != 0x08) { + if (pos) + *pos = 0; + error("Not a gzip file"); + goto gunzip_5; + } + + /* skip over gzip header (1f,8b,08... 10 bytes total + + * possible asciz filename) + */ + strm->next_in = zbuf + 10; + /* skip over asciz filename */ + if (zbuf[3] & 0x8) { + while (strm->next_in[0]) + strm->next_in++; + strm->next_in++; + } + strm->avail_in = len - (strm->next_in - zbuf); + + strm->next_out = out_buf; + strm->avail_out = out_len; + + rc = zlib_inflateInit2(strm, -MAX_WBITS); + + if (!flush) { + WS(strm)->inflate_state.wsize = 0; + WS(strm)->inflate_state.window = NULL; + } + + while (rc == Z_OK) { + if (strm->avail_in == 0) { + /* TODO: handle case where both pos and fill are set */ + len = fill(zbuf, GZIP_IOBUF_SIZE); + if (len < 0) { + rc = -1; + error("read error"); + break; + } + strm->next_in = zbuf; + strm->avail_in = len; + } + rc = zlib_inflate(strm, 0); + + /* Write any data generated */ + if (flush && strm->next_out > out_buf) { + int l = strm->next_out - out_buf; + if (l != flush(out_buf, l)) { + rc = -1; + error("write error"); + break; + } + strm->next_out = out_buf; + strm->avail_out = out_len; + } + + /* after Z_FINISH, only Z_STREAM_END is "we unpacked it all" */ + if (rc == Z_STREAM_END) { + rc = 0; + break; + } else if (rc != Z_OK) { + error("uncompression error"); + rc = -1; + } + } + + zlib_inflateEnd(strm); + if (pos) + /* add + 8 to skip over trailer */ + *pos = strm->next_in - zbuf+8; + +gunzip_5: + free(strm->workspace); +gunzip_nomem4: + free(strm); +gunzip_nomem3: + if (!buf) + free(zbuf); +gunzip_nomem2: + if (flush) + free(out_buf); +gunzip_nomem1: + return rc; /* returns Z_OK (0) if successful */ +} + +#define decompress gunzip diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c new file mode 100644 index 00000000..ca82fde8 --- /dev/null +++ b/lib/decompress_unlzma.c @@ -0,0 +1,667 @@ +/* Lzma decompressor for Linux kernel. Shamelessly snarfed + *from busybox 1.1.1 + * + *Linux kernel adaptation + *Copyright (C) 2006 Alain < alain@knaff.lu > + * + *Based on small lzma deflate implementation/Small range coder + *implementation for lzma. + *Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > + * + *Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) + *Copyright (C) 1999-2005 Igor Pavlov + * + *Copyrights of the parts, see headers below. + * + * + *This program is free software; you can redistribute it and/or + *modify it under the terms of the GNU Lesser General Public + *License as published by the Free Software Foundation; either + *version 2.1 of the License, or (at your option) any later version. + * + *This program is distributed in the hope that it will be useful, + *but WITHOUT ANY WARRANTY; without even the implied warranty of + *MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + *Lesser General Public License for more details. + * + *You should have received a copy of the GNU Lesser General Public + *License along with this library; if not, write to the Free Software + *Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifdef STATIC +#define PREBOOT +#else +#include <linux/decompress/unlzma.h> +#include <linux/slab.h> +#endif /* STATIC */ + +#include <linux/decompress/mm.h> + +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) + +static long long INIT read_int(unsigned char *ptr, int size) +{ + int i; + long long ret = 0; + + for (i = 0; i < size; i++) + ret = (ret << 8) | ptr[size-i-1]; + return ret; +} + +#define ENDIAN_CONVERT(x) \ + x = (typeof(x))read_int((unsigned char *)&x, sizeof(x)) + + +/* Small range coder implementation for lzma. + *Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > + * + *Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) + *Copyright (c) 1999-2005 Igor Pavlov + */ + +#include <linux/compiler.h> + +#define LZMA_IOBUF_SIZE 0x10000 + +struct rc { + int (*fill)(void*, unsigned int); + uint8_t *ptr; + uint8_t *buffer; + uint8_t *buffer_end; + int buffer_size; + uint32_t code; + uint32_t range; + uint32_t bound; +}; + + +#define RC_TOP_BITS 24 +#define RC_MOVE_BITS 5 +#define RC_MODEL_TOTAL_BITS 11 + + +static int nofill(void *buffer, unsigned int len) +{ + return -1; +} + +/* Called twice: once at startup and once in rc_normalize() */ +static void INIT rc_read(struct rc *rc) +{ + rc->buffer_size = rc->fill((char *)rc->buffer, LZMA_IOBUF_SIZE); + if (rc->buffer_size <= 0) + error("unexpected EOF"); + rc->ptr = rc->buffer; + rc->buffer_end = rc->buffer + rc->buffer_size; +} + +/* Called once */ +static inline void INIT rc_init(struct rc *rc, + int (*fill)(void*, unsigned int), + char *buffer, int buffer_size) +{ + if (fill) + rc->fill = fill; + else + rc->fill = nofill; + rc->buffer = (uint8_t *)buffer; + rc->buffer_size = buffer_size; + rc->buffer_end = rc->buffer + rc->buffer_size; + rc->ptr = rc->buffer; + + rc->code = 0; + rc->range = 0xFFFFFFFF; +} + +static inline void INIT rc_init_code(struct rc *rc) +{ + int i; + + for (i = 0; i < 5; i++) { + if (rc->ptr >= rc->buffer_end) + rc_read(rc); + rc->code = (rc->code << 8) | *rc->ptr++; + } +} + + +/* Called once. TODO: bb_maybe_free() */ +static inline void INIT rc_free(struct rc *rc) +{ + free(rc->buffer); +} + +/* Called twice, but one callsite is in inline'd rc_is_bit_0_helper() */ +static void INIT rc_do_normalize(struct rc *rc) +{ + if (rc->ptr >= rc->buffer_end) + rc_read(rc); + rc->range <<= 8; + rc->code = (rc->code << 8) | *rc->ptr++; +} +static inline void INIT rc_normalize(struct rc *rc) +{ + if (rc->range < (1 << RC_TOP_BITS)) + rc_do_normalize(rc); +} + +/* Called 9 times */ +/* Why rc_is_bit_0_helper exists? + *Because we want to always expose (rc->code < rc->bound) to optimizer + */ +static inline uint32_t INIT rc_is_bit_0_helper(struct rc *rc, uint16_t *p) +{ + rc_normalize(rc); + rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS); + return rc->bound; +} +static inline int INIT rc_is_bit_0(struct rc *rc, uint16_t *p) +{ + uint32_t t = rc_is_bit_0_helper(rc, p); + return rc->code < t; +} + +/* Called ~10 times, but very small, thus inlined */ +static inline void INIT rc_update_bit_0(struct rc *rc, uint16_t *p) +{ + rc->range = rc->bound; + *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS; +} +static inline void rc_update_bit_1(struct rc *rc, uint16_t *p) +{ + rc->range -= rc->bound; + rc->code -= rc->bound; + *p -= *p >> RC_MOVE_BITS; +} + +/* Called 4 times in unlzma loop */ +static int INIT rc_get_bit(struct rc *rc, uint16_t *p, int *symbol) +{ + if (rc_is_bit_0(rc, p)) { + rc_update_bit_0(rc, p); + *symbol *= 2; + return 0; + } else { + rc_update_bit_1(rc, p); + *symbol = *symbol * 2 + 1; + return 1; + } +} + +/* Called once */ +static inline int INIT rc_direct_bit(struct rc *rc) +{ + rc_normalize(rc); + rc->range >>= 1; + if (rc->code >= rc->range) { + rc->code -= rc->range; + return 1; + } + return 0; +} + +/* Called twice */ +static inline void INIT +rc_bit_tree_decode(struct rc *rc, uint16_t *p, int num_levels, int *symbol) +{ + int i = num_levels; + + *symbol = 1; + while (i--) + rc_get_bit(rc, p + *symbol, symbol); + *symbol -= 1 << num_levels; +} + + +/* + * Small lzma deflate implementation. + * Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org > + * + * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) + * Copyright (C) 1999-2005 Igor Pavlov + */ + + +struct lzma_header { + uint8_t pos; + uint32_t dict_size; + uint64_t dst_size; +} __attribute__ ((packed)) ; + + +#define LZMA_BASE_SIZE 1846 +#define LZMA_LIT_SIZE 768 + +#define LZMA_NUM_POS_BITS_MAX 4 + +#define LZMA_LEN_NUM_LOW_BITS 3 +#define LZMA_LEN_NUM_MID_BITS 3 +#define LZMA_LEN_NUM_HIGH_BITS 8 + +#define LZMA_LEN_CHOICE 0 +#define LZMA_LEN_CHOICE_2 (LZMA_LEN_CHOICE + 1) +#define LZMA_LEN_LOW (LZMA_LEN_CHOICE_2 + 1) +#define LZMA_LEN_MID (LZMA_LEN_LOW \ + + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_LOW_BITS))) +#define LZMA_LEN_HIGH (LZMA_LEN_MID \ + +(1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_MID_BITS))) +#define LZMA_NUM_LEN_PROBS (LZMA_LEN_HIGH + (1 << LZMA_LEN_NUM_HIGH_BITS)) + +#define LZMA_NUM_STATES 12 +#define LZMA_NUM_LIT_STATES 7 + +#define LZMA_START_POS_MODEL_INDEX 4 +#define LZMA_END_POS_MODEL_INDEX 14 +#define LZMA_NUM_FULL_DISTANCES (1 << (LZMA_END_POS_MODEL_INDEX >> 1)) + +#define LZMA_NUM_POS_SLOT_BITS 6 +#define LZMA_NUM_LEN_TO_POS_STATES 4 + +#define LZMA_NUM_ALIGN_BITS 4 + +#define LZMA_MATCH_MIN_LEN 2 + +#define LZMA_IS_MATCH 0 +#define LZMA_IS_REP (LZMA_IS_MATCH + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)) +#define LZMA_IS_REP_G0 (LZMA_IS_REP + LZMA_NUM_STATES) +#define LZMA_IS_REP_G1 (LZMA_IS_REP_G0 + LZMA_NUM_STATES) +#define LZMA_IS_REP_G2 (LZMA_IS_REP_G1 + LZMA_NUM_STATES) +#define LZMA_IS_REP_0_LONG (LZMA_IS_REP_G2 + LZMA_NUM_STATES) +#define LZMA_POS_SLOT (LZMA_IS_REP_0_LONG \ + + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)) +#define LZMA_SPEC_POS (LZMA_POS_SLOT \ + +(LZMA_NUM_LEN_TO_POS_STATES << LZMA_NUM_POS_SLOT_BITS)) +#define LZMA_ALIGN (LZMA_SPEC_POS \ + + LZMA_NUM_FULL_DISTANCES - LZMA_END_POS_MODEL_INDEX) +#define LZMA_LEN_CODER (LZMA_ALIGN + (1 << LZMA_NUM_ALIGN_BITS)) +#define LZMA_REP_LEN_CODER (LZMA_LEN_CODER + LZMA_NUM_LEN_PROBS) +#define LZMA_LITERAL (LZMA_REP_LEN_CODER + LZMA_NUM_LEN_PROBS) + + +struct writer { + uint8_t *buffer; + uint8_t previous_byte; + size_t buffer_pos; + int bufsize; + size_t global_pos; + int(*flush)(void*, unsigned int); + struct lzma_header *header; +}; + +struct cstate { + int state; + uint32_t rep0, rep1, rep2, rep3; +}; + +static inline size_t INIT get_pos(struct writer *wr) +{ + return + wr->global_pos + wr->buffer_pos; +} + +static inline uint8_t INIT peek_old_byte(struct writer *wr, + uint32_t offs) +{ + if (!wr->flush) { + int32_t pos; + while (offs > wr->header->dict_size) + offs -= wr->header->dict_size; + pos = wr->buffer_pos - offs; + return wr->buffer[pos]; + } else { + uint32_t pos = wr->buffer_pos - offs; + while (pos >= wr->header->dict_size) + pos += wr->header->dict_size; + return wr->buffer[pos]; + } + +} + +static inline void INIT write_byte(struct writer *wr, uint8_t byte) +{ + wr->buffer[wr->buffer_pos++] = wr->previous_byte = byte; + if (wr->flush && wr->buffer_pos == wr->header->dict_size) { + wr->buffer_pos = 0; + wr->global_pos += wr->header->dict_size; + wr->flush((char *)wr->buffer, wr->header->dict_size); + } +} + + +static inline void INIT copy_byte(struct writer *wr, uint32_t offs) +{ + write_byte(wr, peek_old_byte(wr, offs)); +} + +static inline void INIT copy_bytes(struct writer *wr, + uint32_t rep0, int len) +{ + do { + copy_byte(wr, rep0); + len--; + } while (len != 0 && wr->buffer_pos < wr->header->dst_size); +} + +static inline void INIT process_bit0(struct writer *wr, struct rc *rc, + struct cstate *cst, uint16_t *p, + int pos_state, uint16_t *prob, + int lc, uint32_t literal_pos_mask) { + int mi = 1; + rc_update_bit_0(rc, prob); + prob = (p + LZMA_LITERAL + + (LZMA_LIT_SIZE + * (((get_pos(wr) & literal_pos_mask) << lc) + + (wr->previous_byte >> (8 - lc)))) + ); + + if (cst->state >= LZMA_NUM_LIT_STATES) { + int match_byte = peek_old_byte(wr, cst->rep0); + do { + int bit; + uint16_t *prob_lit; + + match_byte <<= 1; + bit = match_byte & 0x100; + prob_lit = prob + 0x100 + bit + mi; + if (rc_get_bit(rc, prob_lit, &mi)) { + if (!bit) + break; + } else { + if (bit) + break; + } + } while (mi < 0x100); + } + while (mi < 0x100) { + uint16_t *prob_lit = prob + mi; + rc_get_bit(rc, prob_lit, &mi); + } + write_byte(wr, mi); + if (cst->state < 4) + cst->state = 0; + else if (cst->state < 10) + cst->state -= 3; + else + cst->state -= 6; +} + +static inline void INIT process_bit1(struct writer *wr, struct rc *rc, + struct cstate *cst, uint16_t *p, + int pos_state, uint16_t *prob) { + int offset; + uint16_t *prob_len; + int num_bits; + int len; + + rc_update_bit_1(rc, prob); + prob = p + LZMA_IS_REP + cst->state; + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + cst->rep3 = cst->rep2; + cst->rep2 = cst->rep1; + cst->rep1 = cst->rep0; + cst->state = cst->state < LZMA_NUM_LIT_STATES ? 0 : 3; + prob = p + LZMA_LEN_CODER; + } else { + rc_update_bit_1(rc, prob); + prob = p + LZMA_IS_REP_G0 + cst->state; + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + prob = (p + LZMA_IS_REP_0_LONG + + (cst->state << + LZMA_NUM_POS_BITS_MAX) + + pos_state); + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + + cst->state = cst->state < LZMA_NUM_LIT_STATES ? + 9 : 11; + copy_byte(wr, cst->rep0); + return; + } else { + rc_update_bit_1(rc, prob); + } + } else { + uint32_t distance; + + rc_update_bit_1(rc, prob); + prob = p + LZMA_IS_REP_G1 + cst->state; + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + distance = cst->rep1; + } else { + rc_update_bit_1(rc, prob); + prob = p + LZMA_IS_REP_G2 + cst->state; + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + distance = cst->rep2; + } else { + rc_update_bit_1(rc, prob); + distance = cst->rep3; + cst->rep3 = cst->rep2; + } + cst->rep2 = cst->rep1; + } + cst->rep1 = cst->rep0; + cst->rep0 = distance; + } + cst->state = cst->state < LZMA_NUM_LIT_STATES ? 8 : 11; + prob = p + LZMA_REP_LEN_CODER; + } + + prob_len = prob + LZMA_LEN_CHOICE; + if (rc_is_bit_0(rc, prob_len)) { + rc_update_bit_0(rc, prob_len); + prob_len = (prob + LZMA_LEN_LOW + + (pos_state << + LZMA_LEN_NUM_LOW_BITS)); + offset = 0; + num_bits = LZMA_LEN_NUM_LOW_BITS; + } else { + rc_update_bit_1(rc, prob_len); + prob_len = prob + LZMA_LEN_CHOICE_2; + if (rc_is_bit_0(rc, prob_len)) { + rc_update_bit_0(rc, prob_len); + prob_len = (prob + LZMA_LEN_MID + + (pos_state << + LZMA_LEN_NUM_MID_BITS)); + offset = 1 << LZMA_LEN_NUM_LOW_BITS; + num_bits = LZMA_LEN_NUM_MID_BITS; + } else { + rc_update_bit_1(rc, prob_len); + prob_len = prob + LZMA_LEN_HIGH; + offset = ((1 << LZMA_LEN_NUM_LOW_BITS) + + (1 << LZMA_LEN_NUM_MID_BITS)); + num_bits = LZMA_LEN_NUM_HIGH_BITS; + } + } + + rc_bit_tree_decode(rc, prob_len, num_bits, &len); + len += offset; + + if (cst->state < 4) { + int pos_slot; + + cst->state += LZMA_NUM_LIT_STATES; + prob = + p + LZMA_POS_SLOT + + ((len < + LZMA_NUM_LEN_TO_POS_STATES ? len : + LZMA_NUM_LEN_TO_POS_STATES - 1) + << LZMA_NUM_POS_SLOT_BITS); + rc_bit_tree_decode(rc, prob, + LZMA_NUM_POS_SLOT_BITS, + &pos_slot); + if (pos_slot >= LZMA_START_POS_MODEL_INDEX) { + int i, mi; + num_bits = (pos_slot >> 1) - 1; + cst->rep0 = 2 | (pos_slot & 1); + if (pos_slot < LZMA_END_POS_MODEL_INDEX) { + cst->rep0 <<= num_bits; + prob = p + LZMA_SPEC_POS + + cst->rep0 - pos_slot - 1; + } else { + num_bits -= LZMA_NUM_ALIGN_BITS; + while (num_bits--) + cst->rep0 = (cst->rep0 << 1) | + rc_direct_bit(rc); + prob = p + LZMA_ALIGN; + cst->rep0 <<= LZMA_NUM_ALIGN_BITS; + num_bits = LZMA_NUM_ALIGN_BITS; + } + i = 1; + mi = 1; + while (num_bits--) { + if (rc_get_bit(rc, prob + mi, &mi)) + cst->rep0 |= i; + i <<= 1; + } + } else + cst->rep0 = pos_slot; + if (++(cst->rep0) == 0) + return; + } + + len += LZMA_MATCH_MIN_LEN; + + copy_bytes(wr, cst->rep0, len); +} + + + +STATIC inline int INIT unlzma(unsigned char *buf, int in_len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *output, + int *posp, + void(*error_fn)(char *x) + ) +{ + struct lzma_header header; + int lc, pb, lp; + uint32_t pos_state_mask; + uint32_t literal_pos_mask; + uint16_t *p; + int num_probs; + struct rc rc; + int i, mi; + struct writer wr; + struct cstate cst; + unsigned char *inbuf; + int ret = -1; + + set_error_fn(error_fn); + + if (buf) + inbuf = buf; + else + inbuf = malloc(LZMA_IOBUF_SIZE); + if (!inbuf) { + error("Could not allocate input bufer"); + goto exit_0; + } + + cst.state = 0; + cst.rep0 = cst.rep1 = cst.rep2 = cst.rep3 = 1; + + wr.header = &header; + wr.flush = flush; + wr.global_pos = 0; + wr.previous_byte = 0; + wr.buffer_pos = 0; + + rc_init(&rc, fill, inbuf, in_len); + + for (i = 0; i < sizeof(header); i++) { + if (rc.ptr >= rc.buffer_end) + rc_read(&rc); + ((unsigned char *)&header)[i] = *rc.ptr++; + } + + if (header.pos >= (9 * 5 * 5)) + error("bad header"); + + mi = 0; + lc = header.pos; + while (lc >= 9) { + mi++; + lc -= 9; + } + pb = 0; + lp = mi; + while (lp >= 5) { + pb++; + lp -= 5; + } + pos_state_mask = (1 << pb) - 1; + literal_pos_mask = (1 << lp) - 1; + + ENDIAN_CONVERT(header.dict_size); + ENDIAN_CONVERT(header.dst_size); + + if (header.dict_size == 0) + header.dict_size = 1; + + if (output) + wr.buffer = output; + else { + wr.bufsize = MIN(header.dst_size, header.dict_size); + wr.buffer = large_malloc(wr.bufsize); + } + if (wr.buffer == NULL) + goto exit_1; + + num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp)); + p = (uint16_t *) large_malloc(num_probs * sizeof(*p)); + if (p == 0) + goto exit_2; + num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp)); + for (i = 0; i < num_probs; i++) + p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1; + + rc_init_code(&rc); + + while (get_pos(&wr) < header.dst_size) { + int pos_state = get_pos(&wr) & pos_state_mask; + uint16_t *prob = p + LZMA_IS_MATCH + + (cst.state << LZMA_NUM_POS_BITS_MAX) + pos_state; + if (rc_is_bit_0(&rc, prob)) + process_bit0(&wr, &rc, &cst, p, pos_state, prob, + lc, literal_pos_mask); + else { + process_bit1(&wr, &rc, &cst, p, pos_state, prob); + if (cst.rep0 == 0) + break; + } + } + + if (posp) + *posp = rc.ptr-rc.buffer; + if (wr.flush) + wr.flush(wr.buffer, wr.buffer_pos); + ret = 0; + large_free(p); +exit_2: + if (!output) + large_free(wr.buffer); +exit_1: + if (!buf) + free(inbuf); +exit_0: + return ret; +} + +#ifdef PREBOOT +STATIC int INIT decompress(unsigned char *buf, int in_len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *output, + int *posp, + void(*error_fn)(char *x) + ) +{ + return unlzma(buf, in_len - 4, fill, flush, output, posp, error_fn); +} +#endif diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c new file mode 100644 index 00000000..bcb3a4bd --- /dev/null +++ b/lib/decompress_unlzo.c @@ -0,0 +1,217 @@ +/* + * LZO decompressor for the Linux kernel. Code borrowed from the lzo + * implementation by Markus Franz Xaver Johannes Oberhumer. + * + * Linux kernel adaptation: + * Copyright (C) 2009 + * Albin Tonnerre, Free Electrons <albin.tonnerre@free-electrons.com> + * + * Original code: + * Copyright (C) 1996-2005 Markus Franz Xaver Johannes Oberhumer + * All Rights Reserved. + * + * lzop and the LZO library are free software; you can redistribute them + * and/or modify them under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. + * If not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Markus F.X.J. Oberhumer + * <markus@oberhumer.com> + * http://www.oberhumer.com/opensource/lzop/ + */ + +#ifdef STATIC +#include "lzo/lzo1x_decompress.c" +#else +#include <linux/slab.h> +#include <linux/decompress/unlzo.h> +#endif + +#include <linux/types.h> +#include <linux/lzo.h> +#include <linux/decompress/mm.h> + +#include <linux/compiler.h> +#include <asm/unaligned.h> + +static const unsigned char lzop_magic[] = { + 0x89, 0x4c, 0x5a, 0x4f, 0x00, 0x0d, 0x0a, 0x1a, 0x0a }; + +#define LZO_BLOCK_SIZE (256*1024l) +#define HEADER_HAS_FILTER 0x00000800L + +STATIC inline int INIT parse_header(u8 *input, u8 *skip) +{ + int l; + u8 *parse = input; + u8 level = 0; + u16 version; + + /* read magic: 9 first bits */ + for (l = 0; l < 9; l++) { + if (*parse++ != lzop_magic[l]) + return 0; + } + /* get version (2bytes), skip library version (2), + * 'need to be extracted' version (2) and + * method (1) */ + version = get_unaligned_be16(parse); + parse += 7; + if (version >= 0x0940) + level = *parse++; + if (get_unaligned_be32(parse) & HEADER_HAS_FILTER) + parse += 8; /* flags + filter info */ + else + parse += 4; /* flags */ + + /* skip mode and mtime_low */ + parse += 8; + if (version >= 0x0940) + parse += 4; /* skip mtime_high */ + + l = *parse++; + /* don't care about the file name, and skip checksum */ + parse += l + 4; + + *skip = parse - input; + return 1; +} + +STATIC inline int INIT unlzo(u8 *input, int in_len, + int (*fill) (void *, unsigned int), + int (*flush) (void *, unsigned int), + u8 *output, int *posp, + void (*error_fn) (char *x)) +{ + u8 skip = 0, r = 0; + u32 src_len, dst_len; + size_t tmp; + u8 *in_buf, *in_buf_save, *out_buf; + int ret = -1; + + set_error_fn(error_fn); + + if (output) { + out_buf = output; + } else if (!flush) { + error("NULL output pointer and no flush function provided"); + goto exit; + } else { + out_buf = malloc(LZO_BLOCK_SIZE); + if (!out_buf) { + error("Could not allocate output buffer"); + goto exit; + } + } + + if (input && fill) { + error("Both input pointer and fill function provided, don't know what to do"); + goto exit_1; + } else if (input) { + in_buf = input; + } else if (!fill || !posp) { + error("NULL input pointer and missing position pointer or fill function"); + goto exit_1; + } else { + in_buf = malloc(lzo1x_worst_compress(LZO_BLOCK_SIZE)); + if (!in_buf) { + error("Could not allocate input buffer"); + goto exit_1; + } + } + in_buf_save = in_buf; + + if (posp) + *posp = 0; + + if (fill) + fill(in_buf, lzo1x_worst_compress(LZO_BLOCK_SIZE)); + + if (!parse_header(input, &skip)) { + error("invalid header"); + goto exit_2; + } + in_buf += skip; + + if (posp) + *posp = skip; + + for (;;) { + /* read uncompressed block size */ + dst_len = get_unaligned_be32(in_buf); + in_buf += 4; + + /* exit if last block */ + if (dst_len == 0) { + if (posp) + *posp += 4; + break; + } + + if (dst_len > LZO_BLOCK_SIZE) { + error("dest len longer than block size"); + goto exit_2; + } + + /* read compressed block size, and skip block checksum info */ + src_len = get_unaligned_be32(in_buf); + in_buf += 8; + + if (src_len <= 0 || src_len > dst_len) { + error("file corrupted"); + goto exit_2; + } + + /* decompress */ + tmp = dst_len; + + /* When the input data is not compressed at all, + * lzo1x_decompress_safe will fail, so call memcpy() + * instead */ + if (unlikely(dst_len == src_len)) + memcpy(out_buf, in_buf, src_len); + else { + r = lzo1x_decompress_safe((u8 *) in_buf, src_len, + out_buf, &tmp); + + if (r != LZO_E_OK || dst_len != tmp) { + error("Compressed data violation"); + goto exit_2; + } + } + + if (flush) + flush(out_buf, dst_len); + if (output) + out_buf += dst_len; + if (posp) + *posp += src_len + 12; + if (fill) { + in_buf = in_buf_save; + fill(in_buf, lzo1x_worst_compress(LZO_BLOCK_SIZE)); + } else + in_buf += src_len; + } + + ret = 0; +exit_2: + if (!input) + free(in_buf); +exit_1: + if (!output) + free(out_buf); +exit: + return ret; +} + +#define decompress unlzo diff --git a/lib/devres.c b/lib/devres.c new file mode 100644 index 00000000..6efddf53 --- /dev/null +++ b/lib/devres.c @@ -0,0 +1,352 @@ +#include <linux/pci.h> +#include <linux/io.h> +#include <linux/gfp.h> +#include <linux/module.h> + +void devm_ioremap_release(struct device *dev, void *res) +{ + iounmap(*(void __iomem **)res); +} + +static int devm_ioremap_match(struct device *dev, void *res, void *match_data) +{ + return *(void **)res == match_data; +} + +/** + * devm_ioremap - Managed ioremap() + * @dev: Generic device to remap IO address for + * @offset: BUS offset to map + * @size: Size of map + * + * Managed ioremap(). Map is automatically unmapped on driver detach. + */ +void __iomem *devm_ioremap(struct device *dev, resource_size_t offset, + unsigned long size) +{ + void __iomem **ptr, *addr; + + ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return NULL; + + addr = ioremap(offset, size); + if (addr) { + *ptr = addr; + devres_add(dev, ptr); + } else + devres_free(ptr); + + return addr; +} +EXPORT_SYMBOL(devm_ioremap); + +/** + * devm_ioremap_nocache - Managed ioremap_nocache() + * @dev: Generic device to remap IO address for + * @offset: BUS offset to map + * @size: Size of map + * + * Managed ioremap_nocache(). Map is automatically unmapped on driver + * detach. + */ +void __iomem *devm_ioremap_nocache(struct device *dev, resource_size_t offset, + unsigned long size) +{ + void __iomem **ptr, *addr; + + ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return NULL; + + addr = ioremap_nocache(offset, size); + if (addr) { + *ptr = addr; + devres_add(dev, ptr); + } else + devres_free(ptr); + + return addr; +} +EXPORT_SYMBOL(devm_ioremap_nocache); + +/** + * devm_iounmap - Managed iounmap() + * @dev: Generic device to unmap for + * @addr: Address to unmap + * + * Managed iounmap(). @addr must have been mapped using devm_ioremap*(). + */ +void devm_iounmap(struct device *dev, void __iomem *addr) +{ + iounmap(addr); + WARN_ON(devres_destroy(dev, devm_ioremap_release, devm_ioremap_match, + (void *)addr)); +} +EXPORT_SYMBOL(devm_iounmap); + +#ifdef CONFIG_HAS_IOPORT +/* + * Generic iomap devres + */ +static void devm_ioport_map_release(struct device *dev, void *res) +{ + ioport_unmap(*(void __iomem **)res); +} + +static int devm_ioport_map_match(struct device *dev, void *res, + void *match_data) +{ + return *(void **)res == match_data; +} + +/** + * devm_ioport_map - Managed ioport_map() + * @dev: Generic device to map ioport for + * @port: Port to map + * @nr: Number of ports to map + * + * Managed ioport_map(). Map is automatically unmapped on driver + * detach. + */ +void __iomem * devm_ioport_map(struct device *dev, unsigned long port, + unsigned int nr) +{ + void __iomem **ptr, *addr; + + ptr = devres_alloc(devm_ioport_map_release, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return NULL; + + addr = ioport_map(port, nr); + if (addr) { + *ptr = addr; + devres_add(dev, ptr); + } else + devres_free(ptr); + + return addr; +} +EXPORT_SYMBOL(devm_ioport_map); + +/** + * devm_ioport_unmap - Managed ioport_unmap() + * @dev: Generic device to unmap for + * @addr: Address to unmap + * + * Managed ioport_unmap(). @addr must have been mapped using + * devm_ioport_map(). + */ +void devm_ioport_unmap(struct device *dev, void __iomem *addr) +{ + ioport_unmap(addr); + WARN_ON(devres_destroy(dev, devm_ioport_map_release, + devm_ioport_map_match, (void *)addr)); +} +EXPORT_SYMBOL(devm_ioport_unmap); + +#ifdef CONFIG_PCI +/* + * PCI iomap devres + */ +#define PCIM_IOMAP_MAX PCI_ROM_RESOURCE + +struct pcim_iomap_devres { + void __iomem *table[PCIM_IOMAP_MAX]; +}; + +static void pcim_iomap_release(struct device *gendev, void *res) +{ + struct pci_dev *dev = container_of(gendev, struct pci_dev, dev); + struct pcim_iomap_devres *this = res; + int i; + + for (i = 0; i < PCIM_IOMAP_MAX; i++) + if (this->table[i]) + pci_iounmap(dev, this->table[i]); +} + +/** + * pcim_iomap_table - access iomap allocation table + * @pdev: PCI device to access iomap table for + * + * Access iomap allocation table for @dev. If iomap table doesn't + * exist and @pdev is managed, it will be allocated. All iomaps + * recorded in the iomap table are automatically unmapped on driver + * detach. + * + * This function might sleep when the table is first allocated but can + * be safely called without context and guaranteed to succed once + * allocated. + */ +void __iomem * const * pcim_iomap_table(struct pci_dev *pdev) +{ + struct pcim_iomap_devres *dr, *new_dr; + + dr = devres_find(&pdev->dev, pcim_iomap_release, NULL, NULL); + if (dr) + return dr->table; + + new_dr = devres_alloc(pcim_iomap_release, sizeof(*new_dr), GFP_KERNEL); + if (!new_dr) + return NULL; + dr = devres_get(&pdev->dev, new_dr, NULL, NULL); + return dr->table; +} +EXPORT_SYMBOL(pcim_iomap_table); + +/** + * pcim_iomap - Managed pcim_iomap() + * @pdev: PCI device to iomap for + * @bar: BAR to iomap + * @maxlen: Maximum length of iomap + * + * Managed pci_iomap(). Map is automatically unmapped on driver + * detach. + */ +void __iomem * pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen) +{ + void __iomem **tbl; + + BUG_ON(bar >= PCIM_IOMAP_MAX); + + tbl = (void __iomem **)pcim_iomap_table(pdev); + if (!tbl || tbl[bar]) /* duplicate mappings not allowed */ + return NULL; + + tbl[bar] = pci_iomap(pdev, bar, maxlen); + return tbl[bar]; +} +EXPORT_SYMBOL(pcim_iomap); + +/** + * pcim_iounmap - Managed pci_iounmap() + * @pdev: PCI device to iounmap for + * @addr: Address to unmap + * + * Managed pci_iounmap(). @addr must have been mapped using pcim_iomap(). + */ +void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr) +{ + void __iomem **tbl; + int i; + + pci_iounmap(pdev, addr); + + tbl = (void __iomem **)pcim_iomap_table(pdev); + BUG_ON(!tbl); + + for (i = 0; i < PCIM_IOMAP_MAX; i++) + if (tbl[i] == addr) { + tbl[i] = NULL; + return; + } + WARN_ON(1); +} +EXPORT_SYMBOL(pcim_iounmap); + +/** + * pcim_iomap_regions - Request and iomap PCI BARs + * @pdev: PCI device to map IO resources for + * @mask: Mask of BARs to request and iomap + * @name: Name used when requesting regions + * + * Request and iomap regions specified by @mask. + */ +int pcim_iomap_regions(struct pci_dev *pdev, u16 mask, const char *name) +{ + void __iomem * const *iomap; + int i, rc; + + iomap = pcim_iomap_table(pdev); + if (!iomap) + return -ENOMEM; + + for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { + unsigned long len; + + if (!(mask & (1 << i))) + continue; + + rc = -EINVAL; + len = pci_resource_len(pdev, i); + if (!len) + goto err_inval; + + rc = pci_request_region(pdev, i, name); + if (rc) + goto err_inval; + + rc = -ENOMEM; + if (!pcim_iomap(pdev, i, 0)) + goto err_region; + } + + return 0; + + err_region: + pci_release_region(pdev, i); + err_inval: + while (--i >= 0) { + if (!(mask & (1 << i))) + continue; + pcim_iounmap(pdev, iomap[i]); + pci_release_region(pdev, i); + } + + return rc; +} +EXPORT_SYMBOL(pcim_iomap_regions); + +/** + * pcim_iomap_regions_request_all - Request all BARs and iomap specified ones + * @pdev: PCI device to map IO resources for + * @mask: Mask of BARs to iomap + * @name: Name used when requesting regions + * + * Request all PCI BARs and iomap regions specified by @mask. + */ +int pcim_iomap_regions_request_all(struct pci_dev *pdev, u16 mask, + const char *name) +{ + int request_mask = ((1 << 6) - 1) & ~mask; + int rc; + + rc = pci_request_selected_regions(pdev, request_mask, name); + if (rc) + return rc; + + rc = pcim_iomap_regions(pdev, mask, name); + if (rc) + pci_release_selected_regions(pdev, request_mask); + return rc; +} +EXPORT_SYMBOL(pcim_iomap_regions_request_all); + +/** + * pcim_iounmap_regions - Unmap and release PCI BARs + * @pdev: PCI device to map IO resources for + * @mask: Mask of BARs to unmap and release + * + * Unmap and release regions specified by @mask. + */ +void pcim_iounmap_regions(struct pci_dev *pdev, u16 mask) +{ + void __iomem * const *iomap; + int i; + + iomap = pcim_iomap_table(pdev); + if (!iomap) + return; + + for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { + if (!(mask & (1 << i))) + continue; + + pcim_iounmap(pdev, iomap[i]); + pci_release_region(pdev, i); + } +} +EXPORT_SYMBOL(pcim_iounmap_regions); +#endif +#endif diff --git a/lib/div64.c b/lib/div64.c new file mode 100644 index 00000000..a111eb8d --- /dev/null +++ b/lib/div64.c @@ -0,0 +1,110 @@ +/* + * Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com> + * + * Based on former do_div() implementation from asm-parisc/div64.h: + * Copyright (C) 1999 Hewlett-Packard Co + * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com> + * + * + * Generic C version of 64bit/32bit division and modulo, with + * 64bit result and 32bit remainder. + * + * The fast case for (n>>32 == 0) is handled inline by do_div(). + * + * Code generated for this function might be very inefficient + * for some CPUs. __div64_32() can be overridden by linking arch-specific + * assembly versions such as arch/ppc/lib/div64.S and arch/sh/lib/div64.S. + */ + +#include <linux/module.h> +#include <linux/math64.h> + +/* Not needed on 64bit architectures */ +#if BITS_PER_LONG == 32 + +uint32_t __attribute__((weak)) __div64_32(uint64_t *n, uint32_t base) +{ + uint64_t rem = *n; + uint64_t b = base; + uint64_t res, d = 1; + uint32_t high = rem >> 32; + + /* Reduce the thing a bit first */ + res = 0; + if (high >= base) { + high /= base; + res = (uint64_t) high << 32; + rem -= (uint64_t) (high*base) << 32; + } + + while ((int64_t)b > 0 && b < rem) { + b = b+b; + d = d+d; + } + + do { + if (rem >= b) { + rem -= b; + res += d; + } + b >>= 1; + d >>= 1; + } while (d); + + *n = res; + return rem; +} + +EXPORT_SYMBOL(__div64_32); + +#ifndef div_s64_rem +s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder) +{ + u64 quotient; + + if (dividend < 0) { + quotient = div_u64_rem(-dividend, abs(divisor), (u32 *)remainder); + *remainder = -*remainder; + if (divisor > 0) + quotient = -quotient; + } else { + quotient = div_u64_rem(dividend, abs(divisor), (u32 *)remainder); + if (divisor < 0) + quotient = -quotient; + } + return quotient; +} +EXPORT_SYMBOL(div_s64_rem); +#endif + +/* 64bit divisor, dividend and result. dynamic precision */ +#ifndef div64_u64 +u64 div64_u64(u64 dividend, u64 divisor) +{ + u32 high, d; + + high = divisor >> 32; + if (high) { + unsigned int shift = fls(high); + + d = divisor >> shift; + dividend >>= shift; + } else + d = divisor; + + return div_u64(dividend, d); +} +EXPORT_SYMBOL(div64_u64); +#endif + +#endif /* BITS_PER_LONG == 32 */ + +/* + * Iterative div/mod for use when dividend is not expected to be much + * bigger than divisor. + */ +u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder) +{ + return __iter_div_u64_rem(dividend, divisor, remainder); +} +EXPORT_SYMBOL(iter_div_u64_rem); diff --git a/lib/dma-debug.c b/lib/dma-debug.c new file mode 100644 index 00000000..01e64270 --- /dev/null +++ b/lib/dma-debug.c @@ -0,0 +1,1302 @@ +/* + * Copyright (C) 2008 Advanced Micro Devices, Inc. + * + * Author: Joerg Roedel <joerg.roedel@amd.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/scatterlist.h> +#include <linux/dma-mapping.h> +#include <linux/stacktrace.h> +#include <linux/dma-debug.h> +#include <linux/spinlock.h> +#include <linux/debugfs.h> +#include <linux/uaccess.h> +#include <linux/device.h> +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/ctype.h> +#include <linux/list.h> +#include <linux/slab.h> + +#include <asm/sections.h> + +#define HASH_SIZE 1024ULL +#define HASH_FN_SHIFT 13 +#define HASH_FN_MASK (HASH_SIZE - 1) + +enum { + dma_debug_single, + dma_debug_page, + dma_debug_sg, + dma_debug_coherent, +}; + +#define DMA_DEBUG_STACKTRACE_ENTRIES 5 + +struct dma_debug_entry { + struct list_head list; + struct device *dev; + int type; + phys_addr_t paddr; + u64 dev_addr; + u64 size; + int direction; + int sg_call_ents; + int sg_mapped_ents; +#ifdef CONFIG_STACKTRACE + struct stack_trace stacktrace; + unsigned long st_entries[DMA_DEBUG_STACKTRACE_ENTRIES]; +#endif +}; + +struct hash_bucket { + struct list_head list; + spinlock_t lock; +} ____cacheline_aligned_in_smp; + +/* Hash list to save the allocated dma addresses */ +static struct hash_bucket dma_entry_hash[HASH_SIZE]; +/* List of pre-allocated dma_debug_entry's */ +static LIST_HEAD(free_entries); +/* Lock for the list above */ +static DEFINE_SPINLOCK(free_entries_lock); + +/* Global disable flag - will be set in case of an error */ +static bool global_disable __read_mostly; + +/* Global error count */ +static u32 error_count; + +/* Global error show enable*/ +static u32 show_all_errors __read_mostly; +/* Number of errors to show */ +static u32 show_num_errors = 1; + +static u32 num_free_entries; +static u32 min_free_entries; +static u32 nr_total_entries; + +/* number of preallocated entries requested by kernel cmdline */ +static u32 req_entries; + +/* debugfs dentry's for the stuff above */ +static struct dentry *dma_debug_dent __read_mostly; +static struct dentry *global_disable_dent __read_mostly; +static struct dentry *error_count_dent __read_mostly; +static struct dentry *show_all_errors_dent __read_mostly; +static struct dentry *show_num_errors_dent __read_mostly; +static struct dentry *num_free_entries_dent __read_mostly; +static struct dentry *min_free_entries_dent __read_mostly; +static struct dentry *filter_dent __read_mostly; + +/* per-driver filter related state */ + +#define NAME_MAX_LEN 64 + +static char current_driver_name[NAME_MAX_LEN] __read_mostly; +static struct device_driver *current_driver __read_mostly; + +static DEFINE_RWLOCK(driver_name_lock); + +static const char *type2name[4] = { "single", "page", + "scather-gather", "coherent" }; + +static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE", + "DMA_FROM_DEVICE", "DMA_NONE" }; + +/* little merge helper - remove it after the merge window */ +#ifndef BUS_NOTIFY_UNBOUND_DRIVER +#define BUS_NOTIFY_UNBOUND_DRIVER 0x0005 +#endif + +/* + * The access to some variables in this macro is racy. We can't use atomic_t + * here because all these variables are exported to debugfs. Some of them even + * writeable. This is also the reason why a lock won't help much. But anyway, + * the races are no big deal. Here is why: + * + * error_count: the addition is racy, but the worst thing that can happen is + * that we don't count some errors + * show_num_errors: the subtraction is racy. Also no big deal because in + * worst case this will result in one warning more in the + * system log than the user configured. This variable is + * writeable via debugfs. + */ +static inline void dump_entry_trace(struct dma_debug_entry *entry) +{ +#ifdef CONFIG_STACKTRACE + if (entry) { + pr_warning("Mapped at:\n"); + print_stack_trace(&entry->stacktrace, 0); + } +#endif +} + +static bool driver_filter(struct device *dev) +{ + struct device_driver *drv; + unsigned long flags; + bool ret; + + /* driver filter off */ + if (likely(!current_driver_name[0])) + return true; + + /* driver filter on and initialized */ + if (current_driver && dev && dev->driver == current_driver) + return true; + + /* driver filter on, but we can't filter on a NULL device... */ + if (!dev) + return false; + + if (current_driver || !current_driver_name[0]) + return false; + + /* driver filter on but not yet initialized */ + drv = get_driver(dev->driver); + if (!drv) + return false; + + /* lock to protect against change of current_driver_name */ + read_lock_irqsave(&driver_name_lock, flags); + + ret = false; + if (drv->name && + strncmp(current_driver_name, drv->name, NAME_MAX_LEN - 1) == 0) { + current_driver = drv; + ret = true; + } + + read_unlock_irqrestore(&driver_name_lock, flags); + put_driver(drv); + + return ret; +} + +#define err_printk(dev, entry, format, arg...) do { \ + error_count += 1; \ + if (driver_filter(dev) && \ + (show_all_errors || show_num_errors > 0)) { \ + WARN(1, "%s %s: " format, \ + dev ? dev_driver_string(dev) : "NULL", \ + dev ? dev_name(dev) : "NULL", ## arg); \ + dump_entry_trace(entry); \ + } \ + if (!show_all_errors && show_num_errors > 0) \ + show_num_errors -= 1; \ + } while (0); + +/* + * Hash related functions + * + * Every DMA-API request is saved into a struct dma_debug_entry. To + * have quick access to these structs they are stored into a hash. + */ +static int hash_fn(struct dma_debug_entry *entry) +{ + /* + * Hash function is based on the dma address. + * We use bits 20-27 here as the index into the hash + */ + return (entry->dev_addr >> HASH_FN_SHIFT) & HASH_FN_MASK; +} + +/* + * Request exclusive access to a hash bucket for a given dma_debug_entry. + */ +static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry, + unsigned long *flags) +{ + int idx = hash_fn(entry); + unsigned long __flags; + + spin_lock_irqsave(&dma_entry_hash[idx].lock, __flags); + *flags = __flags; + return &dma_entry_hash[idx]; +} + +/* + * Give up exclusive access to the hash bucket + */ +static void put_hash_bucket(struct hash_bucket *bucket, + unsigned long *flags) +{ + unsigned long __flags = *flags; + + spin_unlock_irqrestore(&bucket->lock, __flags); +} + +/* + * Search a given entry in the hash bucket list + */ +static struct dma_debug_entry *hash_bucket_find(struct hash_bucket *bucket, + struct dma_debug_entry *ref) +{ + struct dma_debug_entry *entry, *ret = NULL; + int matches = 0, match_lvl, last_lvl = 0; + + list_for_each_entry(entry, &bucket->list, list) { + if ((entry->dev_addr != ref->dev_addr) || + (entry->dev != ref->dev)) + continue; + + /* + * Some drivers map the same physical address multiple + * times. Without a hardware IOMMU this results in the + * same device addresses being put into the dma-debug + * hash multiple times too. This can result in false + * positives being reported. Therefore we implement a + * best-fit algorithm here which returns the entry from + * the hash which fits best to the reference value + * instead of the first-fit. + */ + matches += 1; + match_lvl = 0; + entry->size == ref->size ? ++match_lvl : 0; + entry->type == ref->type ? ++match_lvl : 0; + entry->direction == ref->direction ? ++match_lvl : 0; + entry->sg_call_ents == ref->sg_call_ents ? ++match_lvl : 0; + + if (match_lvl == 4) { + /* perfect-fit - return the result */ + return entry; + } else if (match_lvl > last_lvl) { + /* + * We found an entry that fits better then the + * previous one + */ + last_lvl = match_lvl; + ret = entry; + } + } + + /* + * If we have multiple matches but no perfect-fit, just return + * NULL. + */ + ret = (matches == 1) ? ret : NULL; + + return ret; +} + +/* + * Add an entry to a hash bucket + */ +static void hash_bucket_add(struct hash_bucket *bucket, + struct dma_debug_entry *entry) +{ + list_add_tail(&entry->list, &bucket->list); +} + +/* + * Remove entry from a hash bucket list + */ +static void hash_bucket_del(struct dma_debug_entry *entry) +{ + list_del(&entry->list); +} + +/* + * Dump mapping entries for debugging purposes + */ +void debug_dma_dump_mappings(struct device *dev) +{ + int idx; + + for (idx = 0; idx < HASH_SIZE; idx++) { + struct hash_bucket *bucket = &dma_entry_hash[idx]; + struct dma_debug_entry *entry; + unsigned long flags; + + spin_lock_irqsave(&bucket->lock, flags); + + list_for_each_entry(entry, &bucket->list, list) { + if (!dev || dev == entry->dev) { + dev_info(entry->dev, + "%s idx %d P=%Lx D=%Lx L=%Lx %s\n", + type2name[entry->type], idx, + (unsigned long long)entry->paddr, + entry->dev_addr, entry->size, + dir2name[entry->direction]); + } + } + + spin_unlock_irqrestore(&bucket->lock, flags); + } +} +EXPORT_SYMBOL(debug_dma_dump_mappings); + +/* + * Wrapper function for adding an entry to the hash. + * This function takes care of locking itself. + */ +static void add_dma_entry(struct dma_debug_entry *entry) +{ + struct hash_bucket *bucket; + unsigned long flags; + + bucket = get_hash_bucket(entry, &flags); + hash_bucket_add(bucket, entry); + put_hash_bucket(bucket, &flags); +} + +static struct dma_debug_entry *__dma_entry_alloc(void) +{ + struct dma_debug_entry *entry; + + entry = list_entry(free_entries.next, struct dma_debug_entry, list); + list_del(&entry->list); + memset(entry, 0, sizeof(*entry)); + + num_free_entries -= 1; + if (num_free_entries < min_free_entries) + min_free_entries = num_free_entries; + + return entry; +} + +/* struct dma_entry allocator + * + * The next two functions implement the allocator for + * struct dma_debug_entries. + */ +static struct dma_debug_entry *dma_entry_alloc(void) +{ + struct dma_debug_entry *entry = NULL; + unsigned long flags; + + spin_lock_irqsave(&free_entries_lock, flags); + + if (list_empty(&free_entries)) { + pr_err("DMA-API: debugging out of memory - disabling\n"); + global_disable = true; + goto out; + } + + entry = __dma_entry_alloc(); + +#ifdef CONFIG_STACKTRACE + entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES; + entry->stacktrace.entries = entry->st_entries; + entry->stacktrace.skip = 2; + save_stack_trace(&entry->stacktrace); +#endif + +out: + spin_unlock_irqrestore(&free_entries_lock, flags); + + return entry; +} + +static void dma_entry_free(struct dma_debug_entry *entry) +{ + unsigned long flags; + + /* + * add to beginning of the list - this way the entries are + * more likely cache hot when they are reallocated. + */ + spin_lock_irqsave(&free_entries_lock, flags); + list_add(&entry->list, &free_entries); + num_free_entries += 1; + spin_unlock_irqrestore(&free_entries_lock, flags); +} + +int dma_debug_resize_entries(u32 num_entries) +{ + int i, delta, ret = 0; + unsigned long flags; + struct dma_debug_entry *entry; + LIST_HEAD(tmp); + + spin_lock_irqsave(&free_entries_lock, flags); + + if (nr_total_entries < num_entries) { + delta = num_entries - nr_total_entries; + + spin_unlock_irqrestore(&free_entries_lock, flags); + + for (i = 0; i < delta; i++) { + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + break; + + list_add_tail(&entry->list, &tmp); + } + + spin_lock_irqsave(&free_entries_lock, flags); + + list_splice(&tmp, &free_entries); + nr_total_entries += i; + num_free_entries += i; + } else { + delta = nr_total_entries - num_entries; + + for (i = 0; i < delta && !list_empty(&free_entries); i++) { + entry = __dma_entry_alloc(); + kfree(entry); + } + + nr_total_entries -= i; + } + + if (nr_total_entries != num_entries) + ret = 1; + + spin_unlock_irqrestore(&free_entries_lock, flags); + + return ret; +} +EXPORT_SYMBOL(dma_debug_resize_entries); + +/* + * DMA-API debugging init code + * + * The init code does two things: + * 1. Initialize core data structures + * 2. Preallocate a given number of dma_debug_entry structs + */ + +static int prealloc_memory(u32 num_entries) +{ + struct dma_debug_entry *entry, *next_entry; + int i; + + for (i = 0; i < num_entries; ++i) { + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + goto out_err; + + list_add_tail(&entry->list, &free_entries); + } + + num_free_entries = num_entries; + min_free_entries = num_entries; + + pr_info("DMA-API: preallocated %d debug entries\n", num_entries); + + return 0; + +out_err: + + list_for_each_entry_safe(entry, next_entry, &free_entries, list) { + list_del(&entry->list); + kfree(entry); + } + + return -ENOMEM; +} + +static ssize_t filter_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + char buf[NAME_MAX_LEN + 1]; + unsigned long flags; + int len; + + if (!current_driver_name[0]) + return 0; + + /* + * We can't copy to userspace directly because current_driver_name can + * only be read under the driver_name_lock with irqs disabled. So + * create a temporary copy first. + */ + read_lock_irqsave(&driver_name_lock, flags); + len = scnprintf(buf, NAME_MAX_LEN + 1, "%s\n", current_driver_name); + read_unlock_irqrestore(&driver_name_lock, flags); + + return simple_read_from_buffer(user_buf, count, ppos, buf, len); +} + +static ssize_t filter_write(struct file *file, const char __user *userbuf, + size_t count, loff_t *ppos) +{ + char buf[NAME_MAX_LEN]; + unsigned long flags; + size_t len; + int i; + + /* + * We can't copy from userspace directly. Access to + * current_driver_name is protected with a write_lock with irqs + * disabled. Since copy_from_user can fault and may sleep we + * need to copy to temporary buffer first + */ + len = min(count, (size_t)(NAME_MAX_LEN - 1)); + if (copy_from_user(buf, userbuf, len)) + return -EFAULT; + + buf[len] = 0; + + write_lock_irqsave(&driver_name_lock, flags); + + /* + * Now handle the string we got from userspace very carefully. + * The rules are: + * - only use the first token we got + * - token delimiter is everything looking like a space + * character (' ', '\n', '\t' ...) + * + */ + if (!isalnum(buf[0])) { + /* + * If the first character userspace gave us is not + * alphanumerical then assume the filter should be + * switched off. + */ + if (current_driver_name[0]) + pr_info("DMA-API: switching off dma-debug driver filter\n"); + current_driver_name[0] = 0; + current_driver = NULL; + goto out_unlock; + } + + /* + * Now parse out the first token and use it as the name for the + * driver to filter for. + */ + for (i = 0; i < NAME_MAX_LEN - 1; ++i) { + current_driver_name[i] = buf[i]; + if (isspace(buf[i]) || buf[i] == ' ' || buf[i] == 0) + break; + } + current_driver_name[i] = 0; + current_driver = NULL; + + pr_info("DMA-API: enable driver filter for driver [%s]\n", + current_driver_name); + +out_unlock: + write_unlock_irqrestore(&driver_name_lock, flags); + + return count; +} + +static const struct file_operations filter_fops = { + .read = filter_read, + .write = filter_write, +}; + +static int dma_debug_fs_init(void) +{ + dma_debug_dent = debugfs_create_dir("dma-api", NULL); + if (!dma_debug_dent) { + pr_err("DMA-API: can not create debugfs directory\n"); + return -ENOMEM; + } + + global_disable_dent = debugfs_create_bool("disabled", 0444, + dma_debug_dent, + (u32 *)&global_disable); + if (!global_disable_dent) + goto out_err; + + error_count_dent = debugfs_create_u32("error_count", 0444, + dma_debug_dent, &error_count); + if (!error_count_dent) + goto out_err; + + show_all_errors_dent = debugfs_create_u32("all_errors", 0644, + dma_debug_dent, + &show_all_errors); + if (!show_all_errors_dent) + goto out_err; + + show_num_errors_dent = debugfs_create_u32("num_errors", 0644, + dma_debug_dent, + &show_num_errors); + if (!show_num_errors_dent) + goto out_err; + + num_free_entries_dent = debugfs_create_u32("num_free_entries", 0444, + dma_debug_dent, + &num_free_entries); + if (!num_free_entries_dent) + goto out_err; + + min_free_entries_dent = debugfs_create_u32("min_free_entries", 0444, + dma_debug_dent, + &min_free_entries); + if (!min_free_entries_dent) + goto out_err; + + filter_dent = debugfs_create_file("driver_filter", 0644, + dma_debug_dent, NULL, &filter_fops); + if (!filter_dent) + goto out_err; + + return 0; + +out_err: + debugfs_remove_recursive(dma_debug_dent); + + return -ENOMEM; +} + +static int device_dma_allocations(struct device *dev) +{ + struct dma_debug_entry *entry; + unsigned long flags; + int count = 0, i; + + local_irq_save(flags); + + for (i = 0; i < HASH_SIZE; ++i) { + spin_lock(&dma_entry_hash[i].lock); + list_for_each_entry(entry, &dma_entry_hash[i].list, list) { + if (entry->dev == dev) + count += 1; + } + spin_unlock(&dma_entry_hash[i].lock); + } + + local_irq_restore(flags); + + return count; +} + +static int dma_debug_device_change(struct notifier_block *nb, unsigned long action, void *data) +{ + struct device *dev = data; + int count; + + if (global_disable) + return 0; + + switch (action) { + case BUS_NOTIFY_UNBOUND_DRIVER: + count = device_dma_allocations(dev); + if (count == 0) + break; + err_printk(dev, NULL, "DMA-API: device driver has pending " + "DMA allocations while released from device " + "[count=%d]\n", count); + break; + default: + break; + } + + return 0; +} + +void dma_debug_add_bus(struct bus_type *bus) +{ + struct notifier_block *nb; + + if (global_disable) + return; + + nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL); + if (nb == NULL) { + pr_err("dma_debug_add_bus: out of memory\n"); + return; + } + + nb->notifier_call = dma_debug_device_change; + + bus_register_notifier(bus, nb); +} + +/* + * Let the architectures decide how many entries should be preallocated. + */ +void dma_debug_init(u32 num_entries) +{ + int i; + + if (global_disable) + return; + + for (i = 0; i < HASH_SIZE; ++i) { + INIT_LIST_HEAD(&dma_entry_hash[i].list); + spin_lock_init(&dma_entry_hash[i].lock); + } + + if (dma_debug_fs_init() != 0) { + pr_err("DMA-API: error creating debugfs entries - disabling\n"); + global_disable = true; + + return; + } + + if (req_entries) + num_entries = req_entries; + + if (prealloc_memory(num_entries) != 0) { + pr_err("DMA-API: debugging out of memory error - disabled\n"); + global_disable = true; + + return; + } + + nr_total_entries = num_free_entries; + + pr_info("DMA-API: debugging enabled by kernel config\n"); +} + +static __init int dma_debug_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (strncmp(str, "off", 3) == 0) { + pr_info("DMA-API: debugging disabled on kernel command line\n"); + global_disable = true; + } + + return 0; +} + +static __init int dma_debug_entries_cmdline(char *str) +{ + int res; + + if (!str) + return -EINVAL; + + res = get_option(&str, &req_entries); + + if (!res) + req_entries = 0; + + return 0; +} + +__setup("dma_debug=", dma_debug_cmdline); +__setup("dma_debug_entries=", dma_debug_entries_cmdline); + +static void check_unmap(struct dma_debug_entry *ref) +{ + struct dma_debug_entry *entry; + struct hash_bucket *bucket; + unsigned long flags; + + if (dma_mapping_error(ref->dev, ref->dev_addr)) { + err_printk(ref->dev, NULL, "DMA-API: device driver tries " + "to free an invalid DMA memory address\n"); + return; + } + + bucket = get_hash_bucket(ref, &flags); + entry = hash_bucket_find(bucket, ref); + + if (!entry) { + err_printk(ref->dev, NULL, "DMA-API: device driver tries " + "to free DMA memory it has not allocated " + "[device address=0x%016llx] [size=%llu bytes]\n", + ref->dev_addr, ref->size); + goto out; + } + + if (ref->size != entry->size) { + err_printk(ref->dev, entry, "DMA-API: device driver frees " + "DMA memory with different size " + "[device address=0x%016llx] [map size=%llu bytes] " + "[unmap size=%llu bytes]\n", + ref->dev_addr, entry->size, ref->size); + } + + if (ref->type != entry->type) { + err_printk(ref->dev, entry, "DMA-API: device driver frees " + "DMA memory with wrong function " + "[device address=0x%016llx] [size=%llu bytes] " + "[mapped as %s] [unmapped as %s]\n", + ref->dev_addr, ref->size, + type2name[entry->type], type2name[ref->type]); + } else if ((entry->type == dma_debug_coherent) && + (ref->paddr != entry->paddr)) { + err_printk(ref->dev, entry, "DMA-API: device driver frees " + "DMA memory with different CPU address " + "[device address=0x%016llx] [size=%llu bytes] " + "[cpu alloc address=0x%016llx] " + "[cpu free address=0x%016llx]", + ref->dev_addr, ref->size, + (unsigned long long)entry->paddr, + (unsigned long long)ref->paddr); + } + + if (ref->sg_call_ents && ref->type == dma_debug_sg && + ref->sg_call_ents != entry->sg_call_ents) { + err_printk(ref->dev, entry, "DMA-API: device driver frees " + "DMA sg list with different entry count " + "[map count=%d] [unmap count=%d]\n", + entry->sg_call_ents, ref->sg_call_ents); + } + + /* + * This may be no bug in reality - but most implementations of the + * DMA API don't handle this properly, so check for it here + */ + if (ref->direction != entry->direction) { + err_printk(ref->dev, entry, "DMA-API: device driver frees " + "DMA memory with different direction " + "[device address=0x%016llx] [size=%llu bytes] " + "[mapped with %s] [unmapped with %s]\n", + ref->dev_addr, ref->size, + dir2name[entry->direction], + dir2name[ref->direction]); + } + + hash_bucket_del(entry); + dma_entry_free(entry); + +out: + put_hash_bucket(bucket, &flags); +} + +static void check_for_stack(struct device *dev, void *addr) +{ + if (object_is_on_stack(addr)) + err_printk(dev, NULL, "DMA-API: device driver maps memory from" + "stack [addr=%p]\n", addr); +} + +static inline bool overlap(void *addr, unsigned long len, void *start, void *end) +{ + unsigned long a1 = (unsigned long)addr; + unsigned long b1 = a1 + len; + unsigned long a2 = (unsigned long)start; + unsigned long b2 = (unsigned long)end; + + return !(b1 <= a2 || a1 >= b2); +} + +static void check_for_illegal_area(struct device *dev, void *addr, unsigned long len) +{ + if (overlap(addr, len, _text, _etext) || + overlap(addr, len, __start_rodata, __end_rodata)) + err_printk(dev, NULL, "DMA-API: device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len); +} + +static void check_sync(struct device *dev, + struct dma_debug_entry *ref, + bool to_cpu) +{ + struct dma_debug_entry *entry; + struct hash_bucket *bucket; + unsigned long flags; + + bucket = get_hash_bucket(ref, &flags); + + entry = hash_bucket_find(bucket, ref); + + if (!entry) { + err_printk(dev, NULL, "DMA-API: device driver tries " + "to sync DMA memory it has not allocated " + "[device address=0x%016llx] [size=%llu bytes]\n", + (unsigned long long)ref->dev_addr, ref->size); + goto out; + } + + if (ref->size > entry->size) { + err_printk(dev, entry, "DMA-API: device driver syncs" + " DMA memory outside allocated range " + "[device address=0x%016llx] " + "[allocation size=%llu bytes] " + "[sync offset+size=%llu]\n", + entry->dev_addr, entry->size, + ref->size); + } + + if (entry->direction == DMA_BIDIRECTIONAL) + goto out; + + if (ref->direction != entry->direction) { + err_printk(dev, entry, "DMA-API: device driver syncs " + "DMA memory with different direction " + "[device address=0x%016llx] [size=%llu bytes] " + "[mapped with %s] [synced with %s]\n", + (unsigned long long)ref->dev_addr, entry->size, + dir2name[entry->direction], + dir2name[ref->direction]); + } + + if (to_cpu && !(entry->direction == DMA_FROM_DEVICE) && + !(ref->direction == DMA_TO_DEVICE)) + err_printk(dev, entry, "DMA-API: device driver syncs " + "device read-only DMA memory for cpu " + "[device address=0x%016llx] [size=%llu bytes] " + "[mapped with %s] [synced with %s]\n", + (unsigned long long)ref->dev_addr, entry->size, + dir2name[entry->direction], + dir2name[ref->direction]); + + if (!to_cpu && !(entry->direction == DMA_TO_DEVICE) && + !(ref->direction == DMA_FROM_DEVICE)) + err_printk(dev, entry, "DMA-API: device driver syncs " + "device write-only DMA memory to device " + "[device address=0x%016llx] [size=%llu bytes] " + "[mapped with %s] [synced with %s]\n", + (unsigned long long)ref->dev_addr, entry->size, + dir2name[entry->direction], + dir2name[ref->direction]); + +out: + put_hash_bucket(bucket, &flags); +} + +void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, + size_t size, int direction, dma_addr_t dma_addr, + bool map_single) +{ + struct dma_debug_entry *entry; + + if (unlikely(global_disable)) + return; + + if (unlikely(dma_mapping_error(dev, dma_addr))) + return; + + entry = dma_entry_alloc(); + if (!entry) + return; + + entry->dev = dev; + entry->type = dma_debug_page; + entry->paddr = page_to_phys(page) + offset; + entry->dev_addr = dma_addr; + entry->size = size; + entry->direction = direction; + + if (map_single) + entry->type = dma_debug_single; + + if (!PageHighMem(page)) { + void *addr = page_address(page) + offset; + + check_for_stack(dev, addr); + check_for_illegal_area(dev, addr, size); + } + + add_dma_entry(entry); +} +EXPORT_SYMBOL(debug_dma_map_page); + +void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, int direction, bool map_single) +{ + struct dma_debug_entry ref = { + .type = dma_debug_page, + .dev = dev, + .dev_addr = addr, + .size = size, + .direction = direction, + }; + + if (unlikely(global_disable)) + return; + + if (map_single) + ref.type = dma_debug_single; + + check_unmap(&ref); +} +EXPORT_SYMBOL(debug_dma_unmap_page); + +void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, int mapped_ents, int direction) +{ + struct dma_debug_entry *entry; + struct scatterlist *s; + int i; + + if (unlikely(global_disable)) + return; + + for_each_sg(sg, s, mapped_ents, i) { + entry = dma_entry_alloc(); + if (!entry) + return; + + entry->type = dma_debug_sg; + entry->dev = dev; + entry->paddr = sg_phys(s); + entry->size = sg_dma_len(s); + entry->dev_addr = sg_dma_address(s); + entry->direction = direction; + entry->sg_call_ents = nents; + entry->sg_mapped_ents = mapped_ents; + + if (!PageHighMem(sg_page(s))) { + check_for_stack(dev, sg_virt(s)); + check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s)); + } + + add_dma_entry(entry); + } +} +EXPORT_SYMBOL(debug_dma_map_sg); + +static int get_nr_mapped_entries(struct device *dev, + struct dma_debug_entry *ref) +{ + struct dma_debug_entry *entry; + struct hash_bucket *bucket; + unsigned long flags; + int mapped_ents; + + bucket = get_hash_bucket(ref, &flags); + entry = hash_bucket_find(bucket, ref); + mapped_ents = 0; + + if (entry) + mapped_ents = entry->sg_mapped_ents; + put_hash_bucket(bucket, &flags); + + return mapped_ents; +} + +void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, + int nelems, int dir) +{ + struct scatterlist *s; + int mapped_ents = 0, i; + + if (unlikely(global_disable)) + return; + + for_each_sg(sglist, s, nelems, i) { + + struct dma_debug_entry ref = { + .type = dma_debug_sg, + .dev = dev, + .paddr = sg_phys(s), + .dev_addr = sg_dma_address(s), + .size = sg_dma_len(s), + .direction = dir, + .sg_call_ents = nelems, + }; + + if (mapped_ents && i >= mapped_ents) + break; + + if (!i) + mapped_ents = get_nr_mapped_entries(dev, &ref); + + check_unmap(&ref); + } +} +EXPORT_SYMBOL(debug_dma_unmap_sg); + +void debug_dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t dma_addr, void *virt) +{ + struct dma_debug_entry *entry; + + if (unlikely(global_disable)) + return; + + if (unlikely(virt == NULL)) + return; + + entry = dma_entry_alloc(); + if (!entry) + return; + + entry->type = dma_debug_coherent; + entry->dev = dev; + entry->paddr = virt_to_phys(virt); + entry->size = size; + entry->dev_addr = dma_addr; + entry->direction = DMA_BIDIRECTIONAL; + + add_dma_entry(entry); +} +EXPORT_SYMBOL(debug_dma_alloc_coherent); + +void debug_dma_free_coherent(struct device *dev, size_t size, + void *virt, dma_addr_t addr) +{ + struct dma_debug_entry ref = { + .type = dma_debug_coherent, + .dev = dev, + .paddr = virt_to_phys(virt), + .dev_addr = addr, + .size = size, + .direction = DMA_BIDIRECTIONAL, + }; + + if (unlikely(global_disable)) + return; + + check_unmap(&ref); +} +EXPORT_SYMBOL(debug_dma_free_coherent); + +void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, + size_t size, int direction) +{ + struct dma_debug_entry ref; + + if (unlikely(global_disable)) + return; + + ref.type = dma_debug_single; + ref.dev = dev; + ref.dev_addr = dma_handle; + ref.size = size; + ref.direction = direction; + ref.sg_call_ents = 0; + + check_sync(dev, &ref, true); +} +EXPORT_SYMBOL(debug_dma_sync_single_for_cpu); + +void debug_dma_sync_single_for_device(struct device *dev, + dma_addr_t dma_handle, size_t size, + int direction) +{ + struct dma_debug_entry ref; + + if (unlikely(global_disable)) + return; + + ref.type = dma_debug_single; + ref.dev = dev; + ref.dev_addr = dma_handle; + ref.size = size; + ref.direction = direction; + ref.sg_call_ents = 0; + + check_sync(dev, &ref, false); +} +EXPORT_SYMBOL(debug_dma_sync_single_for_device); + +void debug_dma_sync_single_range_for_cpu(struct device *dev, + dma_addr_t dma_handle, + unsigned long offset, size_t size, + int direction) +{ + struct dma_debug_entry ref; + + if (unlikely(global_disable)) + return; + + ref.type = dma_debug_single; + ref.dev = dev; + ref.dev_addr = dma_handle; + ref.size = offset + size; + ref.direction = direction; + ref.sg_call_ents = 0; + + check_sync(dev, &ref, true); +} +EXPORT_SYMBOL(debug_dma_sync_single_range_for_cpu); + +void debug_dma_sync_single_range_for_device(struct device *dev, + dma_addr_t dma_handle, + unsigned long offset, + size_t size, int direction) +{ + struct dma_debug_entry ref; + + if (unlikely(global_disable)) + return; + + ref.type = dma_debug_single; + ref.dev = dev; + ref.dev_addr = dma_handle; + ref.size = offset + size; + ref.direction = direction; + ref.sg_call_ents = 0; + + check_sync(dev, &ref, false); +} +EXPORT_SYMBOL(debug_dma_sync_single_range_for_device); + +void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, + int nelems, int direction) +{ + struct scatterlist *s; + int mapped_ents = 0, i; + + if (unlikely(global_disable)) + return; + + for_each_sg(sg, s, nelems, i) { + + struct dma_debug_entry ref = { + .type = dma_debug_sg, + .dev = dev, + .paddr = sg_phys(s), + .dev_addr = sg_dma_address(s), + .size = sg_dma_len(s), + .direction = direction, + .sg_call_ents = nelems, + }; + + if (!i) + mapped_ents = get_nr_mapped_entries(dev, &ref); + + if (i >= mapped_ents) + break; + + check_sync(dev, &ref, true); + } +} +EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu); + +void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int nelems, int direction) +{ + struct scatterlist *s; + int mapped_ents = 0, i; + + if (unlikely(global_disable)) + return; + + for_each_sg(sg, s, nelems, i) { + + struct dma_debug_entry ref = { + .type = dma_debug_sg, + .dev = dev, + .paddr = sg_phys(s), + .dev_addr = sg_dma_address(s), + .size = sg_dma_len(s), + .direction = direction, + .sg_call_ents = nelems, + }; + if (!i) + mapped_ents = get_nr_mapped_entries(dev, &ref); + + if (i >= mapped_ents) + break; + + check_sync(dev, &ref, false); + } +} +EXPORT_SYMBOL(debug_dma_sync_sg_for_device); + +static int __init dma_debug_driver_setup(char *str) +{ + int i; + + for (i = 0; i < NAME_MAX_LEN - 1; ++i, ++str) { + current_driver_name[i] = *str; + if (*str == 0) + break; + } + + if (current_driver_name[0]) + pr_info("DMA-API: enable driver filter for driver [%s]\n", + current_driver_name); + + + return 1; +} +__setup("dma_debug_driver=", dma_debug_driver_setup); diff --git a/lib/dump_stack.c b/lib/dump_stack.c new file mode 100644 index 00000000..53bff4c8 --- /dev/null +++ b/lib/dump_stack.c @@ -0,0 +1,15 @@ +/* + * Provide a default dump_stack() function for architectures + * which don't implement their own. + */ + +#include <linux/kernel.h> +#include <linux/module.h> + +void dump_stack(void) +{ + printk(KERN_NOTICE + "This architecture does not implement dump_stack()\n"); +} + +EXPORT_SYMBOL(dump_stack); diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c new file mode 100644 index 00000000..02afc253 --- /dev/null +++ b/lib/dynamic_debug.c @@ -0,0 +1,770 @@ +/* + * lib/dynamic_debug.c + * + * make pr_debug()/dev_dbg() calls runtime configurable based upon their + * source module. + * + * Copyright (C) 2008 Jason Baron <jbaron@redhat.com> + * By Greg Banks <gnb@melbourne.sgi.com> + * Copyright (c) 2008 Silicon Graphics Inc. All Rights Reserved. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/kallsyms.h> +#include <linux/version.h> +#include <linux/types.h> +#include <linux/mutex.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/list.h> +#include <linux/sysctl.h> +#include <linux/ctype.h> +#include <linux/string.h> +#include <linux/uaccess.h> +#include <linux/dynamic_debug.h> +#include <linux/debugfs.h> +#include <linux/slab.h> + +extern struct _ddebug __start___verbose[]; +extern struct _ddebug __stop___verbose[]; + +/* dynamic_debug_enabled, and dynamic_debug_enabled2 are bitmasks in which + * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They + * use independent hash functions, to reduce the chance of false positives. + */ +long long dynamic_debug_enabled; +EXPORT_SYMBOL_GPL(dynamic_debug_enabled); +long long dynamic_debug_enabled2; +EXPORT_SYMBOL_GPL(dynamic_debug_enabled2); + +struct ddebug_table { + struct list_head link; + char *mod_name; + unsigned int num_ddebugs; + unsigned int num_enabled; + struct _ddebug *ddebugs; +}; + +struct ddebug_query { + const char *filename; + const char *module; + const char *function; + const char *format; + unsigned int first_lineno, last_lineno; +}; + +struct ddebug_iter { + struct ddebug_table *table; + unsigned int idx; +}; + +static DEFINE_MUTEX(ddebug_lock); +static LIST_HEAD(ddebug_tables); +static int verbose = 0; + +/* Return the last part of a pathname */ +static inline const char *basename(const char *path) +{ + const char *tail = strrchr(path, '/'); + return tail ? tail+1 : path; +} + +/* format a string into buf[] which describes the _ddebug's flags */ +static char *ddebug_describe_flags(struct _ddebug *dp, char *buf, + size_t maxlen) +{ + char *p = buf; + + BUG_ON(maxlen < 4); + if (dp->flags & _DPRINTK_FLAGS_PRINT) + *p++ = 'p'; + if (p == buf) + *p++ = '-'; + *p = '\0'; + + return buf; +} + +/* + * must be called with ddebug_lock held + */ + +static int disabled_hash(char hash, bool first_table) +{ + struct ddebug_table *dt; + char table_hash_value; + + list_for_each_entry(dt, &ddebug_tables, link) { + if (first_table) + table_hash_value = dt->ddebugs->primary_hash; + else + table_hash_value = dt->ddebugs->secondary_hash; + if (dt->num_enabled && (hash == table_hash_value)) + return 0; + } + return 1; +} + +/* + * Search the tables for _ddebug's which match the given + * `query' and apply the `flags' and `mask' to them. Tells + * the user which ddebug's were changed, or whether none + * were matched. + */ +static void ddebug_change(const struct ddebug_query *query, + unsigned int flags, unsigned int mask) +{ + int i; + struct ddebug_table *dt; + unsigned int newflags; + unsigned int nfound = 0; + char flagbuf[8]; + + /* search for matching ddebugs */ + mutex_lock(&ddebug_lock); + list_for_each_entry(dt, &ddebug_tables, link) { + + /* match against the module name */ + if (query->module != NULL && + strcmp(query->module, dt->mod_name)) + continue; + + for (i = 0 ; i < dt->num_ddebugs ; i++) { + struct _ddebug *dp = &dt->ddebugs[i]; + + /* match against the source filename */ + if (query->filename != NULL && + strcmp(query->filename, dp->filename) && + strcmp(query->filename, basename(dp->filename))) + continue; + + /* match against the function */ + if (query->function != NULL && + strcmp(query->function, dp->function)) + continue; + + /* match against the format */ + if (query->format != NULL && + strstr(dp->format, query->format) == NULL) + continue; + + /* match against the line number range */ + if (query->first_lineno && + dp->lineno < query->first_lineno) + continue; + if (query->last_lineno && + dp->lineno > query->last_lineno) + continue; + + nfound++; + + newflags = (dp->flags & mask) | flags; + if (newflags == dp->flags) + continue; + + if (!newflags) + dt->num_enabled--; + else if (!dp->flags) + dt->num_enabled++; + dp->flags = newflags; + if (newflags) { + dynamic_debug_enabled |= + (1LL << dp->primary_hash); + dynamic_debug_enabled2 |= + (1LL << dp->secondary_hash); + } else { + if (disabled_hash(dp->primary_hash, true)) + dynamic_debug_enabled &= + ~(1LL << dp->primary_hash); + if (disabled_hash(dp->secondary_hash, false)) + dynamic_debug_enabled2 &= + ~(1LL << dp->secondary_hash); + } + if (verbose) + printk(KERN_INFO + "ddebug: changed %s:%d [%s]%s %s\n", + dp->filename, dp->lineno, + dt->mod_name, dp->function, + ddebug_describe_flags(dp, flagbuf, + sizeof(flagbuf))); + } + } + mutex_unlock(&ddebug_lock); + + if (!nfound && verbose) + printk(KERN_INFO "ddebug: no matches for query\n"); +} + +/* + * Split the buffer `buf' into space-separated words. + * Handles simple " and ' quoting, i.e. without nested, + * embedded or escaped \". Return the number of words + * or <0 on error. + */ +static int ddebug_tokenize(char *buf, char *words[], int maxwords) +{ + int nwords = 0; + + while (*buf) { + char *end; + + /* Skip leading whitespace */ + buf = skip_spaces(buf); + if (!*buf) + break; /* oh, it was trailing whitespace */ + + /* Run `end' over a word, either whitespace separated or quoted */ + if (*buf == '"' || *buf == '\'') { + int quote = *buf++; + for (end = buf ; *end && *end != quote ; end++) + ; + if (!*end) + return -EINVAL; /* unclosed quote */ + } else { + for (end = buf ; *end && !isspace(*end) ; end++) + ; + BUG_ON(end == buf); + } + /* Here `buf' is the start of the word, `end' is one past the end */ + + if (nwords == maxwords) + return -EINVAL; /* ran out of words[] before bytes */ + if (*end) + *end++ = '\0'; /* terminate the word */ + words[nwords++] = buf; + buf = end; + } + + if (verbose) { + int i; + printk(KERN_INFO "%s: split into words:", __func__); + for (i = 0 ; i < nwords ; i++) + printk(" \"%s\"", words[i]); + printk("\n"); + } + + return nwords; +} + +/* + * Parse a single line number. Note that the empty string "" + * is treated as a special case and converted to zero, which + * is later treated as a "don't care" value. + */ +static inline int parse_lineno(const char *str, unsigned int *val) +{ + char *end = NULL; + BUG_ON(str == NULL); + if (*str == '\0') { + *val = 0; + return 0; + } + *val = simple_strtoul(str, &end, 10); + return end == NULL || end == str || *end != '\0' ? -EINVAL : 0; +} + +/* + * Undo octal escaping in a string, inplace. This is useful to + * allow the user to express a query which matches a format + * containing embedded spaces. + */ +#define isodigit(c) ((c) >= '0' && (c) <= '7') +static char *unescape(char *str) +{ + char *in = str; + char *out = str; + + while (*in) { + if (*in == '\\') { + if (in[1] == '\\') { + *out++ = '\\'; + in += 2; + continue; + } else if (in[1] == 't') { + *out++ = '\t'; + in += 2; + continue; + } else if (in[1] == 'n') { + *out++ = '\n'; + in += 2; + continue; + } else if (isodigit(in[1]) && + isodigit(in[2]) && + isodigit(in[3])) { + *out++ = ((in[1] - '0')<<6) | + ((in[2] - '0')<<3) | + (in[3] - '0'); + in += 4; + continue; + } + } + *out++ = *in++; + } + *out = '\0'; + + return str; +} + +/* + * Parse words[] as a ddebug query specification, which is a series + * of (keyword, value) pairs chosen from these possibilities: + * + * func <function-name> + * file <full-pathname> + * file <base-filename> + * module <module-name> + * format <escaped-string-to-find-in-format> + * line <lineno> + * line <first-lineno>-<last-lineno> // where either may be empty + */ +static int ddebug_parse_query(char *words[], int nwords, + struct ddebug_query *query) +{ + unsigned int i; + + /* check we have an even number of words */ + if (nwords % 2 != 0) + return -EINVAL; + memset(query, 0, sizeof(*query)); + + for (i = 0 ; i < nwords ; i += 2) { + if (!strcmp(words[i], "func")) + query->function = words[i+1]; + else if (!strcmp(words[i], "file")) + query->filename = words[i+1]; + else if (!strcmp(words[i], "module")) + query->module = words[i+1]; + else if (!strcmp(words[i], "format")) + query->format = unescape(words[i+1]); + else if (!strcmp(words[i], "line")) { + char *first = words[i+1]; + char *last = strchr(first, '-'); + if (last) + *last++ = '\0'; + if (parse_lineno(first, &query->first_lineno) < 0) + return -EINVAL; + if (last != NULL) { + /* range <first>-<last> */ + if (parse_lineno(last, &query->last_lineno) < 0) + return -EINVAL; + } else { + query->last_lineno = query->first_lineno; + } + } else { + if (verbose) + printk(KERN_ERR "%s: unknown keyword \"%s\"\n", + __func__, words[i]); + return -EINVAL; + } + } + + if (verbose) + printk(KERN_INFO "%s: q->function=\"%s\" q->filename=\"%s\" " + "q->module=\"%s\" q->format=\"%s\" q->lineno=%u-%u\n", + __func__, query->function, query->filename, + query->module, query->format, query->first_lineno, + query->last_lineno); + + return 0; +} + +/* + * Parse `str' as a flags specification, format [-+=][p]+. + * Sets up *maskp and *flagsp to be used when changing the + * flags fields of matched _ddebug's. Returns 0 on success + * or <0 on error. + */ +static int ddebug_parse_flags(const char *str, unsigned int *flagsp, + unsigned int *maskp) +{ + unsigned flags = 0; + int op = '='; + + switch (*str) { + case '+': + case '-': + case '=': + op = *str++; + break; + default: + return -EINVAL; + } + if (verbose) + printk(KERN_INFO "%s: op='%c'\n", __func__, op); + + for ( ; *str ; ++str) { + switch (*str) { + case 'p': + flags |= _DPRINTK_FLAGS_PRINT; + break; + default: + return -EINVAL; + } + } + if (flags == 0) + return -EINVAL; + if (verbose) + printk(KERN_INFO "%s: flags=0x%x\n", __func__, flags); + + /* calculate final *flagsp, *maskp according to mask and op */ + switch (op) { + case '=': + *maskp = 0; + *flagsp = flags; + break; + case '+': + *maskp = ~0U; + *flagsp = flags; + break; + case '-': + *maskp = ~flags; + *flagsp = 0; + break; + } + if (verbose) + printk(KERN_INFO "%s: *flagsp=0x%x *maskp=0x%x\n", + __func__, *flagsp, *maskp); + return 0; +} + +/* + * File_ops->write method for <debugfs>/dynamic_debug/conrol. Gathers the + * command text from userspace, parses and executes it. + */ +static ssize_t ddebug_proc_write(struct file *file, const char __user *ubuf, + size_t len, loff_t *offp) +{ + unsigned int flags = 0, mask = 0; + struct ddebug_query query; +#define MAXWORDS 9 + int nwords; + char *words[MAXWORDS]; + char tmpbuf[256]; + + if (len == 0) + return 0; + /* we don't check *offp -- multiple writes() are allowed */ + if (len > sizeof(tmpbuf)-1) + return -E2BIG; + if (copy_from_user(tmpbuf, ubuf, len)) + return -EFAULT; + tmpbuf[len] = '\0'; + if (verbose) + printk(KERN_INFO "%s: read %d bytes from userspace\n", + __func__, (int)len); + + nwords = ddebug_tokenize(tmpbuf, words, MAXWORDS); + if (nwords <= 0) + return -EINVAL; + if (ddebug_parse_query(words, nwords-1, &query)) + return -EINVAL; + if (ddebug_parse_flags(words[nwords-1], &flags, &mask)) + return -EINVAL; + + /* actually go and implement the change */ + ddebug_change(&query, flags, mask); + + *offp += len; + return len; +} + +/* + * Set the iterator to point to the first _ddebug object + * and return a pointer to that first object. Returns + * NULL if there are no _ddebugs at all. + */ +static struct _ddebug *ddebug_iter_first(struct ddebug_iter *iter) +{ + if (list_empty(&ddebug_tables)) { + iter->table = NULL; + iter->idx = 0; + return NULL; + } + iter->table = list_entry(ddebug_tables.next, + struct ddebug_table, link); + iter->idx = 0; + return &iter->table->ddebugs[iter->idx]; +} + +/* + * Advance the iterator to point to the next _ddebug + * object from the one the iterator currently points at, + * and returns a pointer to the new _ddebug. Returns + * NULL if the iterator has seen all the _ddebugs. + */ +static struct _ddebug *ddebug_iter_next(struct ddebug_iter *iter) +{ + if (iter->table == NULL) + return NULL; + if (++iter->idx == iter->table->num_ddebugs) { + /* iterate to next table */ + iter->idx = 0; + if (list_is_last(&iter->table->link, &ddebug_tables)) { + iter->table = NULL; + return NULL; + } + iter->table = list_entry(iter->table->link.next, + struct ddebug_table, link); + } + return &iter->table->ddebugs[iter->idx]; +} + +/* + * Seq_ops start method. Called at the start of every + * read() call from userspace. Takes the ddebug_lock and + * seeks the seq_file's iterator to the given position. + */ +static void *ddebug_proc_start(struct seq_file *m, loff_t *pos) +{ + struct ddebug_iter *iter = m->private; + struct _ddebug *dp; + int n = *pos; + + if (verbose) + printk(KERN_INFO "%s: called m=%p *pos=%lld\n", + __func__, m, (unsigned long long)*pos); + + mutex_lock(&ddebug_lock); + + if (!n) + return SEQ_START_TOKEN; + if (n < 0) + return NULL; + dp = ddebug_iter_first(iter); + while (dp != NULL && --n > 0) + dp = ddebug_iter_next(iter); + return dp; +} + +/* + * Seq_ops next method. Called several times within a read() + * call from userspace, with ddebug_lock held. Walks to the + * next _ddebug object with a special case for the header line. + */ +static void *ddebug_proc_next(struct seq_file *m, void *p, loff_t *pos) +{ + struct ddebug_iter *iter = m->private; + struct _ddebug *dp; + + if (verbose) + printk(KERN_INFO "%s: called m=%p p=%p *pos=%lld\n", + __func__, m, p, (unsigned long long)*pos); + + if (p == SEQ_START_TOKEN) + dp = ddebug_iter_first(iter); + else + dp = ddebug_iter_next(iter); + ++*pos; + return dp; +} + +/* + * Seq_ops show method. Called several times within a read() + * call from userspace, with ddebug_lock held. Formats the + * current _ddebug as a single human-readable line, with a + * special case for the header line. + */ +static int ddebug_proc_show(struct seq_file *m, void *p) +{ + struct ddebug_iter *iter = m->private; + struct _ddebug *dp = p; + char flagsbuf[8]; + + if (verbose) + printk(KERN_INFO "%s: called m=%p p=%p\n", + __func__, m, p); + + if (p == SEQ_START_TOKEN) { + seq_puts(m, + "# filename:lineno [module]function flags format\n"); + return 0; + } + + seq_printf(m, "%s:%u [%s]%s %s \"", + dp->filename, dp->lineno, + iter->table->mod_name, dp->function, + ddebug_describe_flags(dp, flagsbuf, sizeof(flagsbuf))); + seq_escape(m, dp->format, "\t\r\n\""); + seq_puts(m, "\"\n"); + + return 0; +} + +/* + * Seq_ops stop method. Called at the end of each read() + * call from userspace. Drops ddebug_lock. + */ +static void ddebug_proc_stop(struct seq_file *m, void *p) +{ + if (verbose) + printk(KERN_INFO "%s: called m=%p p=%p\n", + __func__, m, p); + mutex_unlock(&ddebug_lock); +} + +static const struct seq_operations ddebug_proc_seqops = { + .start = ddebug_proc_start, + .next = ddebug_proc_next, + .show = ddebug_proc_show, + .stop = ddebug_proc_stop +}; + +/* + * File_ops->open method for <debugfs>/dynamic_debug/control. Does the seq_file + * setup dance, and also creates an iterator to walk the _ddebugs. + * Note that we create a seq_file always, even for O_WRONLY files + * where it's not needed, as doing so simplifies the ->release method. + */ +static int ddebug_proc_open(struct inode *inode, struct file *file) +{ + struct ddebug_iter *iter; + int err; + + if (verbose) + printk(KERN_INFO "%s: called\n", __func__); + + iter = kzalloc(sizeof(*iter), GFP_KERNEL); + if (iter == NULL) + return -ENOMEM; + + err = seq_open(file, &ddebug_proc_seqops); + if (err) { + kfree(iter); + return err; + } + ((struct seq_file *) file->private_data)->private = iter; + return 0; +} + +static const struct file_operations ddebug_proc_fops = { + .owner = THIS_MODULE, + .open = ddebug_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, + .write = ddebug_proc_write +}; + +/* + * Allocate a new ddebug_table for the given module + * and add it to the global list. + */ +int ddebug_add_module(struct _ddebug *tab, unsigned int n, + const char *name) +{ + struct ddebug_table *dt; + char *new_name; + + dt = kzalloc(sizeof(*dt), GFP_KERNEL); + if (dt == NULL) + return -ENOMEM; + new_name = kstrdup(name, GFP_KERNEL); + if (new_name == NULL) { + kfree(dt); + return -ENOMEM; + } + dt->mod_name = new_name; + dt->num_ddebugs = n; + dt->num_enabled = 0; + dt->ddebugs = tab; + + mutex_lock(&ddebug_lock); + list_add_tail(&dt->link, &ddebug_tables); + mutex_unlock(&ddebug_lock); + + if (verbose) + printk(KERN_INFO "%u debug prints in module %s\n", + n, dt->mod_name); + return 0; +} +EXPORT_SYMBOL_GPL(ddebug_add_module); + +static void ddebug_table_free(struct ddebug_table *dt) +{ + list_del_init(&dt->link); + kfree(dt->mod_name); + kfree(dt); +} + +/* + * Called in response to a module being unloaded. Removes + * any ddebug_table's which point at the module. + */ +int ddebug_remove_module(const char *mod_name) +{ + struct ddebug_table *dt, *nextdt; + int ret = -ENOENT; + + if (verbose) + printk(KERN_INFO "%s: removing module \"%s\"\n", + __func__, mod_name); + + mutex_lock(&ddebug_lock); + list_for_each_entry_safe(dt, nextdt, &ddebug_tables, link) { + if (!strcmp(dt->mod_name, mod_name)) { + ddebug_table_free(dt); + ret = 0; + } + } + mutex_unlock(&ddebug_lock); + return ret; +} +EXPORT_SYMBOL_GPL(ddebug_remove_module); + +static void ddebug_remove_all_tables(void) +{ + mutex_lock(&ddebug_lock); + while (!list_empty(&ddebug_tables)) { + struct ddebug_table *dt = list_entry(ddebug_tables.next, + struct ddebug_table, + link); + ddebug_table_free(dt); + } + mutex_unlock(&ddebug_lock); +} + +static int __init dynamic_debug_init(void) +{ + struct dentry *dir, *file; + struct _ddebug *iter, *iter_start; + const char *modname = NULL; + int ret = 0; + int n = 0; + + dir = debugfs_create_dir("dynamic_debug", NULL); + if (!dir) + return -ENOMEM; + file = debugfs_create_file("control", 0644, dir, NULL, + &ddebug_proc_fops); + if (!file) { + debugfs_remove(dir); + return -ENOMEM; + } + if (__start___verbose != __stop___verbose) { + iter = __start___verbose; + modname = iter->modname; + iter_start = iter; + for (; iter < __stop___verbose; iter++) { + if (strcmp(modname, iter->modname)) { + ret = ddebug_add_module(iter_start, n, modname); + if (ret) + goto out_free; + n = 0; + modname = iter->modname; + iter_start = iter; + } + n++; + } + ret = ddebug_add_module(iter_start, n, modname); + } +out_free: + if (ret) { + ddebug_remove_all_tables(); + debugfs_remove(dir); + debugfs_remove(file); + } + return 0; +} +module_init(dynamic_debug_init); diff --git a/lib/extable.c b/lib/extable.c new file mode 100644 index 00000000..4cac81ec --- /dev/null +++ b/lib/extable.c @@ -0,0 +1,93 @@ +/* + * Derived from arch/ppc/mm/extable.c and arch/i386/mm/extable.c. + * + * Copyright (C) 2004 Paul Mackerras, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/sort.h> +#include <asm/uaccess.h> + +#ifndef ARCH_HAS_SORT_EXTABLE +/* + * The exception table needs to be sorted so that the binary + * search that we use to find entries in it works properly. + * This is used both for the kernel exception table and for + * the exception tables of modules that get loaded. + */ +static int cmp_ex(const void *a, const void *b) +{ + const struct exception_table_entry *x = a, *y = b; + + /* avoid overflow */ + if (x->insn > y->insn) + return 1; + if (x->insn < y->insn) + return -1; + return 0; +} + +void sort_extable(struct exception_table_entry *start, + struct exception_table_entry *finish) +{ + sort(start, finish - start, sizeof(struct exception_table_entry), + cmp_ex, NULL); +} + +#ifdef CONFIG_MODULES +/* + * If the exception table is sorted, any referring to the module init + * will be at the beginning or the end. + */ +void trim_init_extable(struct module *m) +{ + /*trim the beginning*/ + while (m->num_exentries && within_module_init(m->extable[0].insn, m)) { + m->extable++; + m->num_exentries--; + } + /*trim the end*/ + while (m->num_exentries && + within_module_init(m->extable[m->num_exentries-1].insn, m)) + m->num_exentries--; +} +#endif /* CONFIG_MODULES */ +#endif /* !ARCH_HAS_SORT_EXTABLE */ + +#ifndef ARCH_HAS_SEARCH_EXTABLE +/* + * Search one exception table for an entry corresponding to the + * given instruction address, and return the address of the entry, + * or NULL if none is found. + * We use a binary search, and thus we assume that the table is + * already sorted. + */ +const struct exception_table_entry * +search_extable(const struct exception_table_entry *first, + const struct exception_table_entry *last, + unsigned long value) +{ + while (first <= last) { + const struct exception_table_entry *mid; + + mid = ((last - first) >> 1) + first; + /* + * careful, the distance between value and insn + * can be larger than MAX_LONG: + */ + if (mid->insn < value) + first = mid + 1; + else if (mid->insn > value) + last = mid - 1; + else + return mid; + } + return NULL; +} +#endif diff --git a/lib/fault-inject.c b/lib/fault-inject.c new file mode 100644 index 00000000..7e65af70 --- /dev/null +++ b/lib/fault-inject.c @@ -0,0 +1,315 @@ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/random.h> +#include <linux/sched.h> +#include <linux/stat.h> +#include <linux/types.h> +#include <linux/fs.h> +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/stacktrace.h> +#include <linux/kallsyms.h> +#include <linux/fault-inject.h> + +/* + * setup_fault_attr() is a helper function for various __setup handlers, so it + * returns 0 on error, because that is what __setup handlers do. + */ +int __init setup_fault_attr(struct fault_attr *attr, char *str) +{ + unsigned long probability; + unsigned long interval; + int times; + int space; + + /* "<interval>,<probability>,<space>,<times>" */ + if (sscanf(str, "%lu,%lu,%d,%d", + &interval, &probability, &space, ×) < 4) { + printk(KERN_WARNING + "FAULT_INJECTION: failed to parse arguments\n"); + return 0; + } + + attr->probability = probability; + attr->interval = interval; + atomic_set(&attr->times, times); + atomic_set(&attr->space, space); + + return 1; +} + +static void fail_dump(struct fault_attr *attr) +{ + if (attr->verbose > 0) + printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure\n"); + if (attr->verbose > 1) + dump_stack(); +} + +#define atomic_dec_not_zero(v) atomic_add_unless((v), -1, 0) + +static bool fail_task(struct fault_attr *attr, struct task_struct *task) +{ + return !in_interrupt() && task->make_it_fail; +} + +#define MAX_STACK_TRACE_DEPTH 32 + +#ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER + +static bool fail_stacktrace(struct fault_attr *attr) +{ + struct stack_trace trace; + int depth = attr->stacktrace_depth; + unsigned long entries[MAX_STACK_TRACE_DEPTH]; + int n; + bool found = (attr->require_start == 0 && attr->require_end == ULONG_MAX); + + if (depth == 0) + return found; + + trace.nr_entries = 0; + trace.entries = entries; + trace.max_entries = depth; + trace.skip = 1; + + save_stack_trace(&trace); + for (n = 0; n < trace.nr_entries; n++) { + if (attr->reject_start <= entries[n] && + entries[n] < attr->reject_end) + return false; + if (attr->require_start <= entries[n] && + entries[n] < attr->require_end) + found = true; + } + return found; +} + +#else + +static inline bool fail_stacktrace(struct fault_attr *attr) +{ + return true; +} + +#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */ + +/* + * This code is stolen from failmalloc-1.0 + * http://www.nongnu.org/failmalloc/ + */ + +bool should_fail(struct fault_attr *attr, ssize_t size) +{ + if (attr->task_filter && !fail_task(attr, current)) + return false; + + if (atomic_read(&attr->times) == 0) + return false; + + if (atomic_read(&attr->space) > size) { + atomic_sub(size, &attr->space); + return false; + } + + if (attr->interval > 1) { + attr->count++; + if (attr->count % attr->interval) + return false; + } + + if (attr->probability <= random32() % 100) + return false; + + if (!fail_stacktrace(attr)) + return false; + + fail_dump(attr); + + if (atomic_read(&attr->times) != -1) + atomic_dec_not_zero(&attr->times); + + return true; +} + +#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS + +static int debugfs_ul_set(void *data, u64 val) +{ + *(unsigned long *)data = val; + return 0; +} + +#ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER +static int debugfs_ul_set_MAX_STACK_TRACE_DEPTH(void *data, u64 val) +{ + *(unsigned long *)data = + val < MAX_STACK_TRACE_DEPTH ? + val : MAX_STACK_TRACE_DEPTH; + return 0; +} +#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */ + +static int debugfs_ul_get(void *data, u64 *val) +{ + *val = *(unsigned long *)data; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_ul, debugfs_ul_get, debugfs_ul_set, "%llu\n"); + +static struct dentry *debugfs_create_ul(const char *name, mode_t mode, + struct dentry *parent, unsigned long *value) +{ + return debugfs_create_file(name, mode, parent, value, &fops_ul); +} + +#ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER +DEFINE_SIMPLE_ATTRIBUTE(fops_ul_MAX_STACK_TRACE_DEPTH, debugfs_ul_get, + debugfs_ul_set_MAX_STACK_TRACE_DEPTH, "%llu\n"); + +static struct dentry *debugfs_create_ul_MAX_STACK_TRACE_DEPTH( + const char *name, mode_t mode, + struct dentry *parent, unsigned long *value) +{ + return debugfs_create_file(name, mode, parent, value, + &fops_ul_MAX_STACK_TRACE_DEPTH); +} +#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */ + +static int debugfs_atomic_t_set(void *data, u64 val) +{ + atomic_set((atomic_t *)data, val); + return 0; +} + +static int debugfs_atomic_t_get(void *data, u64 *val) +{ + *val = atomic_read((atomic_t *)data); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_atomic_t, debugfs_atomic_t_get, + debugfs_atomic_t_set, "%lld\n"); + +static struct dentry *debugfs_create_atomic_t(const char *name, mode_t mode, + struct dentry *parent, atomic_t *value) +{ + return debugfs_create_file(name, mode, parent, value, &fops_atomic_t); +} + +void cleanup_fault_attr_dentries(struct fault_attr *attr) +{ + debugfs_remove(attr->dentries.probability_file); + attr->dentries.probability_file = NULL; + + debugfs_remove(attr->dentries.interval_file); + attr->dentries.interval_file = NULL; + + debugfs_remove(attr->dentries.times_file); + attr->dentries.times_file = NULL; + + debugfs_remove(attr->dentries.space_file); + attr->dentries.space_file = NULL; + + debugfs_remove(attr->dentries.verbose_file); + attr->dentries.verbose_file = NULL; + + debugfs_remove(attr->dentries.task_filter_file); + attr->dentries.task_filter_file = NULL; + +#ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER + + debugfs_remove(attr->dentries.stacktrace_depth_file); + attr->dentries.stacktrace_depth_file = NULL; + + debugfs_remove(attr->dentries.require_start_file); + attr->dentries.require_start_file = NULL; + + debugfs_remove(attr->dentries.require_end_file); + attr->dentries.require_end_file = NULL; + + debugfs_remove(attr->dentries.reject_start_file); + attr->dentries.reject_start_file = NULL; + + debugfs_remove(attr->dentries.reject_end_file); + attr->dentries.reject_end_file = NULL; + +#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */ + + if (attr->dentries.dir) + WARN_ON(!simple_empty(attr->dentries.dir)); + + debugfs_remove(attr->dentries.dir); + attr->dentries.dir = NULL; +} + +int init_fault_attr_dentries(struct fault_attr *attr, const char *name) +{ + mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; + struct dentry *dir; + + memset(&attr->dentries, 0, sizeof(attr->dentries)); + + dir = debugfs_create_dir(name, NULL); + if (!dir) + goto fail; + attr->dentries.dir = dir; + + attr->dentries.probability_file = + debugfs_create_ul("probability", mode, dir, &attr->probability); + + attr->dentries.interval_file = + debugfs_create_ul("interval", mode, dir, &attr->interval); + + attr->dentries.times_file = + debugfs_create_atomic_t("times", mode, dir, &attr->times); + + attr->dentries.space_file = + debugfs_create_atomic_t("space", mode, dir, &attr->space); + + attr->dentries.verbose_file = + debugfs_create_ul("verbose", mode, dir, &attr->verbose); + + attr->dentries.task_filter_file = debugfs_create_bool("task-filter", + mode, dir, &attr->task_filter); + + if (!attr->dentries.probability_file || !attr->dentries.interval_file || + !attr->dentries.times_file || !attr->dentries.space_file || + !attr->dentries.verbose_file || !attr->dentries.task_filter_file) + goto fail; + +#ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER + + attr->dentries.stacktrace_depth_file = + debugfs_create_ul_MAX_STACK_TRACE_DEPTH( + "stacktrace-depth", mode, dir, &attr->stacktrace_depth); + + attr->dentries.require_start_file = + debugfs_create_ul("require-start", mode, dir, &attr->require_start); + + attr->dentries.require_end_file = + debugfs_create_ul("require-end", mode, dir, &attr->require_end); + + attr->dentries.reject_start_file = + debugfs_create_ul("reject-start", mode, dir, &attr->reject_start); + + attr->dentries.reject_end_file = + debugfs_create_ul("reject-end", mode, dir, &attr->reject_end); + + if (!attr->dentries.stacktrace_depth_file || + !attr->dentries.require_start_file || + !attr->dentries.require_end_file || + !attr->dentries.reject_start_file || + !attr->dentries.reject_end_file) + goto fail; + +#endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */ + + return 0; +fail: + cleanup_fault_attr_dentries(attr); + return -ENOMEM; +} + +#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */ diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c new file mode 100644 index 00000000..5d202e36 --- /dev/null +++ b/lib/find_last_bit.c @@ -0,0 +1,45 @@ +/* find_last_bit.c: fallback find next bit implementation + * + * Copyright (C) 2008 IBM Corporation + * Written by Rusty Russell <rusty@rustcorp.com.au> + * (Inspired by David Howell's find_next_bit implementation) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/bitops.h> +#include <linux/module.h> +#include <asm/types.h> +#include <asm/byteorder.h> + +unsigned long find_last_bit(const unsigned long *addr, unsigned long size) +{ + unsigned long words; + unsigned long tmp; + + /* Start at final word. */ + words = size / BITS_PER_LONG; + + /* Partial final word? */ + if (size & (BITS_PER_LONG-1)) { + tmp = (addr[words] & (~0UL >> (BITS_PER_LONG + - (size & (BITS_PER_LONG-1))))); + if (tmp) + goto found; + } + + while (words) { + tmp = addr[--words]; + if (tmp) { +found: + return words * BITS_PER_LONG + __fls(tmp); + } + } + + /* Not found */ + return size; +} +EXPORT_SYMBOL(find_last_bit); diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c new file mode 100644 index 00000000..24c59ded --- /dev/null +++ b/lib/find_next_bit.c @@ -0,0 +1,275 @@ +/* find_next_bit.c: fallback find next bit implementation + * + * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/bitops.h> +#include <linux/module.h> +#include <asm/types.h> +#include <asm/byteorder.h> + +#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) + +#ifdef CONFIG_GENERIC_FIND_NEXT_BIT +/* + * Find the next set bit in a memory region. + */ +unsigned long find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + const unsigned long *p = addr + BITOP_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG-1); + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset %= BITS_PER_LONG; + if (offset) { + tmp = *(p++); + tmp &= (~0UL << offset); + if (size < BITS_PER_LONG) + goto found_first; + if (tmp) + goto found_middle; + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + while (size & ~(BITS_PER_LONG-1)) { + if ((tmp = *(p++))) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = *p; + +found_first: + tmp &= (~0UL >> (BITS_PER_LONG - size)); + if (tmp == 0UL) /* Are any bits set? */ + return result + size; /* Nope. */ +found_middle: + return result + __ffs(tmp); +} +EXPORT_SYMBOL(find_next_bit); + +/* + * This implementation of find_{first,next}_zero_bit was stolen from + * Linus' asm-alpha/bitops.h. + */ +unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + const unsigned long *p = addr + BITOP_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG-1); + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset %= BITS_PER_LONG; + if (offset) { + tmp = *(p++); + tmp |= ~0UL >> (BITS_PER_LONG - offset); + if (size < BITS_PER_LONG) + goto found_first; + if (~tmp) + goto found_middle; + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + while (size & ~(BITS_PER_LONG-1)) { + if (~(tmp = *(p++))) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = *p; + +found_first: + tmp |= ~0UL << size; + if (tmp == ~0UL) /* Are any bits zero? */ + return result + size; /* Nope. */ +found_middle: + return result + ffz(tmp); +} +EXPORT_SYMBOL(find_next_zero_bit); +#endif /* CONFIG_GENERIC_FIND_NEXT_BIT */ + +#ifdef CONFIG_GENERIC_FIND_FIRST_BIT +/* + * Find the first set bit in a memory region. + */ +unsigned long find_first_bit(const unsigned long *addr, unsigned long size) +{ + const unsigned long *p = addr; + unsigned long result = 0; + unsigned long tmp; + + while (size & ~(BITS_PER_LONG-1)) { + if ((tmp = *(p++))) + goto found; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + + tmp = (*p) & (~0UL >> (BITS_PER_LONG - size)); + if (tmp == 0UL) /* Are any bits set? */ + return result + size; /* Nope. */ +found: + return result + __ffs(tmp); +} +EXPORT_SYMBOL(find_first_bit); + +/* + * Find the first cleared bit in a memory region. + */ +unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) +{ + const unsigned long *p = addr; + unsigned long result = 0; + unsigned long tmp; + + while (size & ~(BITS_PER_LONG-1)) { + if (~(tmp = *(p++))) + goto found; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + + tmp = (*p) | (~0UL << size); + if (tmp == ~0UL) /* Are any bits zero? */ + return result + size; /* Nope. */ +found: + return result + ffz(tmp); +} +EXPORT_SYMBOL(find_first_zero_bit); +#endif /* CONFIG_GENERIC_FIND_FIRST_BIT */ + +#ifdef __BIG_ENDIAN + +/* include/linux/byteorder does not support "unsigned long" type */ +static inline unsigned long ext2_swabp(const unsigned long * x) +{ +#if BITS_PER_LONG == 64 + return (unsigned long) __swab64p((u64 *) x); +#elif BITS_PER_LONG == 32 + return (unsigned long) __swab32p((u32 *) x); +#else +#error BITS_PER_LONG not defined +#endif +} + +/* include/linux/byteorder doesn't support "unsigned long" type */ +static inline unsigned long ext2_swab(const unsigned long y) +{ +#if BITS_PER_LONG == 64 + return (unsigned long) __swab64((u64) y); +#elif BITS_PER_LONG == 32 + return (unsigned long) __swab32((u32) y); +#else +#error BITS_PER_LONG not defined +#endif +} + +unsigned long generic_find_next_zero_le_bit(const unsigned long *addr, unsigned + long size, unsigned long offset) +{ + const unsigned long *p = addr + BITOP_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG - 1); + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset &= (BITS_PER_LONG - 1UL); + if (offset) { + tmp = ext2_swabp(p++); + tmp |= (~0UL >> (BITS_PER_LONG - offset)); + if (size < BITS_PER_LONG) + goto found_first; + if (~tmp) + goto found_middle; + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + + while (size & ~(BITS_PER_LONG - 1)) { + if (~(tmp = *(p++))) + goto found_middle_swap; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = ext2_swabp(p); +found_first: + tmp |= ~0UL << size; + if (tmp == ~0UL) /* Are any bits zero? */ + return result + size; /* Nope. Skip ffz */ +found_middle: + return result + ffz(tmp); + +found_middle_swap: + return result + ffz(ext2_swab(tmp)); +} + +EXPORT_SYMBOL(generic_find_next_zero_le_bit); + +unsigned long generic_find_next_le_bit(const unsigned long *addr, unsigned + long size, unsigned long offset) +{ + const unsigned long *p = addr + BITOP_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG - 1); + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset &= (BITS_PER_LONG - 1UL); + if (offset) { + tmp = ext2_swabp(p++); + tmp &= (~0UL << offset); + if (size < BITS_PER_LONG) + goto found_first; + if (tmp) + goto found_middle; + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + + while (size & ~(BITS_PER_LONG - 1)) { + tmp = *(p++); + if (tmp) + goto found_middle_swap; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = ext2_swabp(p); +found_first: + tmp &= (~0UL >> (BITS_PER_LONG - size)); + if (tmp == 0UL) /* Are any bits set? */ + return result + size; /* Nope. */ +found_middle: + return result + __ffs(tmp); + +found_middle_swap: + return result + __ffs(ext2_swab(tmp)); +} +EXPORT_SYMBOL(generic_find_next_le_bit); +#endif /* __BIG_ENDIAN */ diff --git a/lib/flex_array.c b/lib/flex_array.c new file mode 100644 index 00000000..77a6fea7 --- /dev/null +++ b/lib/flex_array.c @@ -0,0 +1,350 @@ +/* + * Flexible array managed in PAGE_SIZE parts + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2009 + * + * Author: Dave Hansen <dave@linux.vnet.ibm.com> + */ + +#include <linux/flex_array.h> +#include <linux/slab.h> +#include <linux/stddef.h> + +struct flex_array_part { + char elements[FLEX_ARRAY_PART_SIZE]; +}; + +/* + * If a user requests an allocation which is small + * enough, we may simply use the space in the + * flex_array->parts[] array to store the user + * data. + */ +static inline int elements_fit_in_base(struct flex_array *fa) +{ + int data_size = fa->element_size * fa->total_nr_elements; + if (data_size <= FLEX_ARRAY_BASE_BYTES_LEFT) + return 1; + return 0; +} + +/** + * flex_array_alloc - allocate a new flexible array + * @element_size: the size of individual elements in the array + * @total: total number of elements that this should hold + * @flags: page allocation flags to use for base array + * + * Note: all locking must be provided by the caller. + * + * @total is used to size internal structures. If the user ever + * accesses any array indexes >=@total, it will produce errors. + * + * The maximum number of elements is defined as: the number of + * elements that can be stored in a page times the number of + * page pointers that we can fit in the base structure or (using + * integer math): + * + * (PAGE_SIZE/element_size) * (PAGE_SIZE-8)/sizeof(void *) + * + * Here's a table showing example capacities. Note that the maximum + * index that the get/put() functions is just nr_objects-1. This + * basically means that you get 4MB of storage on 32-bit and 2MB on + * 64-bit. + * + * + * Element size | Objects | Objects | + * PAGE_SIZE=4k | 32-bit | 64-bit | + * ---------------------------------| + * 1 bytes | 4186112 | 2093056 | + * 2 bytes | 2093056 | 1046528 | + * 3 bytes | 1395030 | 697515 | + * 4 bytes | 1046528 | 523264 | + * 32 bytes | 130816 | 65408 | + * 33 bytes | 126728 | 63364 | + * 2048 bytes | 2044 | 1022 | + * 2049 bytes | 1022 | 511 | + * void * | 1046528 | 261632 | + * + * Since 64-bit pointers are twice the size, we lose half the + * capacity in the base structure. Also note that no effort is made + * to efficiently pack objects across page boundaries. + */ +struct flex_array *flex_array_alloc(int element_size, unsigned int total, + gfp_t flags) +{ + struct flex_array *ret; + int max_size = FLEX_ARRAY_NR_BASE_PTRS * + FLEX_ARRAY_ELEMENTS_PER_PART(element_size); + + /* max_size will end up 0 if element_size > PAGE_SIZE */ + if (total > max_size) + return NULL; + ret = kzalloc(sizeof(struct flex_array), flags); + if (!ret) + return NULL; + ret->element_size = element_size; + ret->total_nr_elements = total; + if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO)) + memset(&ret->parts[0], FLEX_ARRAY_FREE, + FLEX_ARRAY_BASE_BYTES_LEFT); + return ret; +} + +static int fa_element_to_part_nr(struct flex_array *fa, + unsigned int element_nr) +{ + return element_nr / FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size); +} + +/** + * flex_array_free_parts - just free the second-level pages + * @fa: the flex array from which to free parts + * + * This is to be used in cases where the base 'struct flex_array' + * has been statically allocated and should not be free. + */ +void flex_array_free_parts(struct flex_array *fa) +{ + int part_nr; + + if (elements_fit_in_base(fa)) + return; + for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) + kfree(fa->parts[part_nr]); +} + +void flex_array_free(struct flex_array *fa) +{ + flex_array_free_parts(fa); + kfree(fa); +} + +static unsigned int index_inside_part(struct flex_array *fa, + unsigned int element_nr) +{ + unsigned int part_offset; + + part_offset = element_nr % + FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size); + return part_offset * fa->element_size; +} + +static struct flex_array_part * +__fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags) +{ + struct flex_array_part *part = fa->parts[part_nr]; + if (!part) { + part = kmalloc(sizeof(struct flex_array_part), flags); + if (!part) + return NULL; + if (!(flags & __GFP_ZERO)) + memset(part, FLEX_ARRAY_FREE, + sizeof(struct flex_array_part)); + fa->parts[part_nr] = part; + } + return part; +} + +/** + * flex_array_put - copy data into the array at @element_nr + * @fa: the flex array to copy data into + * @element_nr: index of the position in which to insert + * the new element. + * @src: address of data to copy into the array + * @flags: page allocation flags to use for array expansion + * + * + * Note that this *copies* the contents of @src into + * the array. If you are trying to store an array of + * pointers, make sure to pass in &ptr instead of ptr. + * You may instead wish to use the flex_array_put_ptr() + * helper function. + * + * Locking must be provided by the caller. + */ +int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src, + gfp_t flags) +{ + int part_nr = fa_element_to_part_nr(fa, element_nr); + struct flex_array_part *part; + void *dst; + + if (element_nr >= fa->total_nr_elements) + return -ENOSPC; + if (elements_fit_in_base(fa)) + part = (struct flex_array_part *)&fa->parts[0]; + else { + part = __fa_get_part(fa, part_nr, flags); + if (!part) + return -ENOMEM; + } + dst = &part->elements[index_inside_part(fa, element_nr)]; + memcpy(dst, src, fa->element_size); + return 0; +} + +/** + * flex_array_clear - clear element in array at @element_nr + * @fa: the flex array of the element. + * @element_nr: index of the position to clear. + * + * Locking must be provided by the caller. + */ +int flex_array_clear(struct flex_array *fa, unsigned int element_nr) +{ + int part_nr = fa_element_to_part_nr(fa, element_nr); + struct flex_array_part *part; + void *dst; + + if (element_nr >= fa->total_nr_elements) + return -ENOSPC; + if (elements_fit_in_base(fa)) + part = (struct flex_array_part *)&fa->parts[0]; + else { + part = fa->parts[part_nr]; + if (!part) + return -EINVAL; + } + dst = &part->elements[index_inside_part(fa, element_nr)]; + memset(dst, FLEX_ARRAY_FREE, fa->element_size); + return 0; +} + +/** + * flex_array_prealloc - guarantee that array space exists + * @fa: the flex array for which to preallocate parts + * @start: index of first array element for which space is allocated + * @end: index of last (inclusive) element for which space is allocated + * @flags: page allocation flags + * + * This will guarantee that no future calls to flex_array_put() + * will allocate memory. It can be used if you are expecting to + * be holding a lock or in some atomic context while writing + * data into the array. + * + * Locking must be provided by the caller. + */ +int flex_array_prealloc(struct flex_array *fa, unsigned int start, + unsigned int end, gfp_t flags) +{ + int start_part; + int end_part; + int part_nr; + struct flex_array_part *part; + + if (start >= fa->total_nr_elements || end >= fa->total_nr_elements) + return -ENOSPC; + if (elements_fit_in_base(fa)) + return 0; + start_part = fa_element_to_part_nr(fa, start); + end_part = fa_element_to_part_nr(fa, end); + for (part_nr = start_part; part_nr <= end_part; part_nr++) { + part = __fa_get_part(fa, part_nr, flags); + if (!part) + return -ENOMEM; + } + return 0; +} + +/** + * flex_array_get - pull data back out of the array + * @fa: the flex array from which to extract data + * @element_nr: index of the element to fetch from the array + * + * Returns a pointer to the data at index @element_nr. Note + * that this is a copy of the data that was passed in. If you + * are using this to store pointers, you'll get back &ptr. You + * may instead wish to use the flex_array_get_ptr helper. + * + * Locking must be provided by the caller. + */ +void *flex_array_get(struct flex_array *fa, unsigned int element_nr) +{ + int part_nr = fa_element_to_part_nr(fa, element_nr); + struct flex_array_part *part; + + if (element_nr >= fa->total_nr_elements) + return NULL; + if (elements_fit_in_base(fa)) + part = (struct flex_array_part *)&fa->parts[0]; + else { + part = fa->parts[part_nr]; + if (!part) + return NULL; + } + return &part->elements[index_inside_part(fa, element_nr)]; +} + +/** + * flex_array_get_ptr - pull a ptr back out of the array + * @fa: the flex array from which to extract data + * @element_nr: index of the element to fetch from the array + * + * Returns the pointer placed in the flex array at element_nr using + * flex_array_put_ptr(). This function should not be called if the + * element in question was not set using the _put_ptr() helper. + */ +void *flex_array_get_ptr(struct flex_array *fa, unsigned int element_nr) +{ + void **tmp; + + tmp = flex_array_get(fa, element_nr); + if (!tmp) + return NULL; + + return *tmp; +} + +static int part_is_free(struct flex_array_part *part) +{ + int i; + + for (i = 0; i < sizeof(struct flex_array_part); i++) + if (part->elements[i] != FLEX_ARRAY_FREE) + return 0; + return 1; +} + +/** + * flex_array_shrink - free unused second-level pages + * @fa: the flex array to shrink + * + * Frees all second-level pages that consist solely of unused + * elements. Returns the number of pages freed. + * + * Locking must be provided by the caller. + */ +int flex_array_shrink(struct flex_array *fa) +{ + struct flex_array_part *part; + int part_nr; + int ret = 0; + + if (elements_fit_in_base(fa)) + return ret; + for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) { + part = fa->parts[part_nr]; + if (!part) + continue; + if (part_is_free(part)) { + fa->parts[part_nr] = NULL; + kfree(part); + ret++; + } + } + return ret; +} diff --git a/lib/gcd.c b/lib/gcd.c new file mode 100644 index 00000000..f879033d --- /dev/null +++ b/lib/gcd.c @@ -0,0 +1,18 @@ +#include <linux/kernel.h> +#include <linux/gcd.h> +#include <linux/module.h> + +/* Greatest common divisor */ +unsigned long gcd(unsigned long a, unsigned long b) +{ + unsigned long r; + + if (a < b) + swap(a, b); + while ((r = a % b) != 0) { + a = b; + b = r; + } + return b; +} +EXPORT_SYMBOL_GPL(gcd); diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c new file mode 100644 index 00000000..85d0e412 --- /dev/null +++ b/lib/gen_crc32table.c @@ -0,0 +1,99 @@ +#include <stdio.h> +#include "crc32defs.h" +#include <inttypes.h> + +#define ENTRIES_PER_LINE 4 + +#define LE_TABLE_SIZE (1 << CRC_LE_BITS) +#define BE_TABLE_SIZE (1 << CRC_BE_BITS) + +static uint32_t crc32table_le[4][LE_TABLE_SIZE]; +static uint32_t crc32table_be[4][BE_TABLE_SIZE]; + +/** + * crc32init_le() - allocate and initialize LE table data + * + * crc is the crc of the byte i; other entries are filled in based on the + * fact that crctable[i^j] = crctable[i] ^ crctable[j]. + * + */ +static void crc32init_le(void) +{ + unsigned i, j; + uint32_t crc = 1; + + crc32table_le[0][0] = 0; + + for (i = 1 << (CRC_LE_BITS - 1); i; i >>= 1) { + crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); + for (j = 0; j < LE_TABLE_SIZE; j += 2 * i) + crc32table_le[0][i + j] = crc ^ crc32table_le[0][j]; + } + for (i = 0; i < LE_TABLE_SIZE; i++) { + crc = crc32table_le[0][i]; + for (j = 1; j < 4; j++) { + crc = crc32table_le[0][crc & 0xff] ^ (crc >> 8); + crc32table_le[j][i] = crc; + } + } +} + +/** + * crc32init_be() - allocate and initialize BE table data + */ +static void crc32init_be(void) +{ + unsigned i, j; + uint32_t crc = 0x80000000; + + crc32table_be[0][0] = 0; + + for (i = 1; i < BE_TABLE_SIZE; i <<= 1) { + crc = (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE : 0); + for (j = 0; j < i; j++) + crc32table_be[0][i + j] = crc ^ crc32table_be[0][j]; + } + for (i = 0; i < BE_TABLE_SIZE; i++) { + crc = crc32table_be[0][i]; + for (j = 1; j < 4; j++) { + crc = crc32table_be[0][(crc >> 24) & 0xff] ^ (crc << 8); + crc32table_be[j][i] = crc; + } + } +} + +static void output_table(uint32_t table[4][256], int len, char *trans) +{ + int i, j; + + for (j = 0 ; j < 4; j++) { + printf("{"); + for (i = 0; i < len - 1; i++) { + if (i % ENTRIES_PER_LINE == 0) + printf("\n"); + printf("%s(0x%8.8xL), ", trans, table[j][i]); + } + printf("%s(0x%8.8xL)},\n", trans, table[j][len - 1]); + } +} + +int main(int argc, char** argv) +{ + printf("/* this file is generated - do not edit */\n\n"); + + if (CRC_LE_BITS > 1) { + crc32init_le(); + printf("static const u32 crc32table_le[4][256] = {"); + output_table(crc32table_le, LE_TABLE_SIZE, "tole"); + printf("};\n"); + } + + if (CRC_BE_BITS > 1) { + crc32init_be(); + printf("static const u32 crc32table_be[4][256] = {"); + output_table(crc32table_be, BE_TABLE_SIZE, "tobe"); + printf("};\n"); + } + + return 0; +} diff --git a/lib/genalloc.c b/lib/genalloc.c new file mode 100644 index 00000000..dc6d8331 --- /dev/null +++ b/lib/genalloc.c @@ -0,0 +1,215 @@ +/* + * Basic general purpose allocator for managing special purpose memory + * not managed by the regular kmalloc/kfree interface. + * Uses for this includes on-device special memory, uncached memory + * etc. + * + * Copyright 2005 (C) Jes Sorensen <jes@trained-monkey.org> + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/bitmap.h> +#include <linux/genalloc.h> + +/* General purpose special memory pool descriptor. */ +struct gen_pool { + rwlock_t lock; /* protects chunks list */ + struct list_head chunks; /* list of chunks in this pool */ + unsigned order; /* minimum allocation order */ +}; + +/* General purpose special memory pool chunk descriptor. */ +struct gen_pool_chunk { + spinlock_t lock; /* protects bits */ + struct list_head next_chunk; /* next chunk in pool */ + unsigned long start; /* start of memory chunk */ + unsigned long size; /* number of bits */ + unsigned long bits[0]; /* bitmap for allocating memory chunk */ +}; + +/** + * gen_pool_create() - create a new special memory pool + * @order: Log base 2 of number of bytes each bitmap bit + * represents. + * @nid: Node id of the node the pool structure should be allocated + * on, or -1. + * + * Create a new special memory pool that can be used to manage special purpose + * memory not managed by the regular kmalloc/kfree interface. + */ +struct gen_pool *__must_check gen_pool_create(unsigned order, int nid) +{ + struct gen_pool *pool; + + if (WARN_ON(order >= BITS_PER_LONG)) + return NULL; + + pool = kmalloc_node(sizeof *pool, GFP_KERNEL, nid); + if (pool) { + rwlock_init(&pool->lock); + INIT_LIST_HEAD(&pool->chunks); + pool->order = order; + } + return pool; +} +EXPORT_SYMBOL(gen_pool_create); + +/** + * gen_pool_add() - add a new chunk of special memory to the pool + * @pool: Pool to add new memory chunk to. + * @addr: Starting address of memory chunk to add to pool. + * @size: Size in bytes of the memory chunk to add to pool. + * @nid: Node id of the node the pool structure should be allocated + * on, or -1. + * + * Add a new chunk of special memory to the specified pool. + */ +int __must_check +gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size, int nid) +{ + struct gen_pool_chunk *chunk; + size_t nbytes; + + if (WARN_ON(!addr || addr + size < addr || + (addr & ((1 << pool->order) - 1)))) + return -EINVAL; + + size = size >> pool->order; + if (WARN_ON(!size)) + return -EINVAL; + + nbytes = sizeof *chunk + BITS_TO_LONGS(size) * sizeof *chunk->bits; + chunk = kzalloc_node(nbytes, GFP_KERNEL, nid); + if (!chunk) + return -ENOMEM; + + spin_lock_init(&chunk->lock); + chunk->start = addr >> pool->order; + chunk->size = size; + + write_lock(&pool->lock); + list_add(&chunk->next_chunk, &pool->chunks); + write_unlock(&pool->lock); + + return 0; +} +EXPORT_SYMBOL(gen_pool_add); + +/** + * gen_pool_destroy() - destroy a special memory pool + * @pool: Pool to destroy. + * + * Destroy the specified special memory pool. Verifies that there are no + * outstanding allocations. + */ +void gen_pool_destroy(struct gen_pool *pool) +{ + struct gen_pool_chunk *chunk; + int bit; + + while (!list_empty(&pool->chunks)) { + chunk = list_entry(pool->chunks.next, struct gen_pool_chunk, + next_chunk); + list_del(&chunk->next_chunk); + + bit = find_next_bit(chunk->bits, chunk->size, 0); + BUG_ON(bit < chunk->size); + + kfree(chunk); + } + kfree(pool); +} +EXPORT_SYMBOL(gen_pool_destroy); + +/** + * gen_pool_alloc_aligned() - allocate special memory from the pool + * @pool: Pool to allocate from. + * @size: Number of bytes to allocate from the pool. + * @alignment_order: Order the allocated space should be + * aligned to (eg. 20 means allocated space + * must be aligned to 1MiB). + * + * Allocate the requested number of bytes from the specified pool. + * Uses a first-fit algorithm. + */ +unsigned long __must_check +gen_pool_alloc_aligned(struct gen_pool *pool, size_t size, + unsigned alignment_order) +{ + unsigned long addr, align_mask = 0, flags, start; + struct gen_pool_chunk *chunk; + + if (size == 0) + return 0; + + if (alignment_order > pool->order) + align_mask = (1 << (alignment_order - pool->order)) - 1; + + size = (size + (1UL << pool->order) - 1) >> pool->order; + + read_lock(&pool->lock); + list_for_each_entry(chunk, &pool->chunks, next_chunk) { + if (chunk->size < size) + continue; + + spin_lock_irqsave(&chunk->lock, flags); + start = bitmap_find_next_zero_area_off(chunk->bits, chunk->size, + 0, size, align_mask, + chunk->start); + if (start >= chunk->size) { + spin_unlock_irqrestore(&chunk->lock, flags); + continue; + } + + bitmap_set(chunk->bits, start, size); + spin_unlock_irqrestore(&chunk->lock, flags); + addr = (chunk->start + start) << pool->order; + goto done; + } + + addr = 0; +done: + read_unlock(&pool->lock); + return addr; +} +EXPORT_SYMBOL(gen_pool_alloc_aligned); + +/** + * gen_pool_free() - free allocated special memory back to the pool + * @pool: Pool to free to. + * @addr: Starting address of memory to free back to pool. + * @size: Size in bytes of memory to free. + * + * Free previously allocated special memory back to the specified pool. + */ +void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) +{ + struct gen_pool_chunk *chunk; + unsigned long flags; + + if (!size) + return; + + addr = addr >> pool->order; + size = (size + (1UL << pool->order) - 1) >> pool->order; + + BUG_ON(addr + size < addr); + + read_lock(&pool->lock); + list_for_each_entry(chunk, &pool->chunks, next_chunk) + if (addr >= chunk->start && + addr + size <= chunk->start + chunk->size) { + spin_lock_irqsave(&chunk->lock, flags); + bitmap_clear(chunk->bits, addr - chunk->start, size); + spin_unlock_irqrestore(&chunk->lock, flags); + goto done; + } + BUG_ON(1); +done: + read_unlock(&pool->lock); +} +EXPORT_SYMBOL(gen_pool_free); diff --git a/lib/halfmd4.c b/lib/halfmd4.c new file mode 100644 index 00000000..e11db26f --- /dev/null +++ b/lib/halfmd4.c @@ -0,0 +1,66 @@ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/cryptohash.h> + +/* F, G and H are basic MD4 functions: selection, majority, parity */ +#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) +#define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z))) +#define H(x, y, z) ((x) ^ (y) ^ (z)) + +/* + * The generic round function. The application is so specific that + * we don't bother protecting all the arguments with parens, as is generally + * good macro practice, in favor of extra legibility. + * Rotation is separate from addition to prevent recomputation + */ +#define ROUND(f, a, b, c, d, x, s) \ + (a += f(b, c, d) + x, a = (a << s) | (a >> (32 - s))) +#define K1 0 +#define K2 013240474631UL +#define K3 015666365641UL + +/* + * Basic cut-down MD4 transform. Returns only 32 bits of result. + */ +__u32 half_md4_transform(__u32 buf[4], __u32 const in[8]) +{ + __u32 a = buf[0], b = buf[1], c = buf[2], d = buf[3]; + + /* Round 1 */ + ROUND(F, a, b, c, d, in[0] + K1, 3); + ROUND(F, d, a, b, c, in[1] + K1, 7); + ROUND(F, c, d, a, b, in[2] + K1, 11); + ROUND(F, b, c, d, a, in[3] + K1, 19); + ROUND(F, a, b, c, d, in[4] + K1, 3); + ROUND(F, d, a, b, c, in[5] + K1, 7); + ROUND(F, c, d, a, b, in[6] + K1, 11); + ROUND(F, b, c, d, a, in[7] + K1, 19); + + /* Round 2 */ + ROUND(G, a, b, c, d, in[1] + K2, 3); + ROUND(G, d, a, b, c, in[3] + K2, 5); + ROUND(G, c, d, a, b, in[5] + K2, 9); + ROUND(G, b, c, d, a, in[7] + K2, 13); + ROUND(G, a, b, c, d, in[0] + K2, 3); + ROUND(G, d, a, b, c, in[2] + K2, 5); + ROUND(G, c, d, a, b, in[4] + K2, 9); + ROUND(G, b, c, d, a, in[6] + K2, 13); + + /* Round 3 */ + ROUND(H, a, b, c, d, in[3] + K3, 3); + ROUND(H, d, a, b, c, in[7] + K3, 9); + ROUND(H, c, d, a, b, in[2] + K3, 11); + ROUND(H, b, c, d, a, in[6] + K3, 15); + ROUND(H, a, b, c, d, in[1] + K3, 3); + ROUND(H, d, a, b, c, in[5] + K3, 9); + ROUND(H, c, d, a, b, in[0] + K3, 11); + ROUND(H, b, c, d, a, in[4] + K3, 15); + + buf[0] += a; + buf[1] += b; + buf[2] += c; + buf[3] += d; + + return buf[1]; /* "most hashed" word */ +} +EXPORT_SYMBOL(half_md4_transform); diff --git a/lib/hexdump.c b/lib/hexdump.c new file mode 100644 index 00000000..5d7a4802 --- /dev/null +++ b/lib/hexdump.c @@ -0,0 +1,224 @@ +/* + * lib/hexdump.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. See README and COPYING for + * more details. + */ + +#include <linux/types.h> +#include <linux/ctype.h> +#include <linux/kernel.h> +#include <linux/module.h> + +const char hex_asc[] = "0123456789abcdef"; +EXPORT_SYMBOL(hex_asc); + +/** + * hex_to_bin - convert a hex digit to its real value + * @ch: ascii character represents hex digit + * + * hex_to_bin() converts one hex digit to its actual value or -1 in case of bad + * input. + */ +int hex_to_bin(char ch) +{ + if ((ch >= '0') && (ch <= '9')) + return ch - '0'; + ch = tolower(ch); + if ((ch >= 'a') && (ch <= 'f')) + return ch - 'a' + 10; + return -1; +} +EXPORT_SYMBOL(hex_to_bin); + +/** + * hex_dump_to_buffer - convert a blob of data to "hex ASCII" in memory + * @buf: data blob to dump + * @len: number of bytes in the @buf + * @rowsize: number of bytes to print per line; must be 16 or 32 + * @groupsize: number of bytes to print at a time (1, 2, 4, 8; default = 1) + * @linebuf: where to put the converted data + * @linebuflen: total size of @linebuf, including space for terminating NUL + * @ascii: include ASCII after the hex output + * + * hex_dump_to_buffer() works on one "line" of output at a time, i.e., + * 16 or 32 bytes of input data converted to hex + ASCII output. + * + * Given a buffer of u8 data, hex_dump_to_buffer() converts the input data + * to a hex + ASCII dump at the supplied memory location. + * The converted output is always NUL-terminated. + * + * E.g.: + * hex_dump_to_buffer(frame->data, frame->len, 16, 1, + * linebuf, sizeof(linebuf), true); + * + * example output buffer: + * 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO + */ +void hex_dump_to_buffer(const void *buf, size_t len, int rowsize, + int groupsize, char *linebuf, size_t linebuflen, + bool ascii) +{ + const u8 *ptr = buf; + u8 ch; + int j, lx = 0; + int ascii_column; + + if (rowsize != 16 && rowsize != 32) + rowsize = 16; + + if (!len) + goto nil; + if (len > rowsize) /* limit to one line at a time */ + len = rowsize; + if ((len % groupsize) != 0) /* no mixed size output */ + groupsize = 1; + + switch (groupsize) { + case 8: { + const u64 *ptr8 = buf; + int ngroups = len / groupsize; + + for (j = 0; j < ngroups; j++) + lx += scnprintf(linebuf + lx, linebuflen - lx, + "%s%16.16llx", j ? " " : "", + (unsigned long long)*(ptr8 + j)); + ascii_column = 17 * ngroups + 2; + break; + } + + case 4: { + const u32 *ptr4 = buf; + int ngroups = len / groupsize; + + for (j = 0; j < ngroups; j++) + lx += scnprintf(linebuf + lx, linebuflen - lx, + "%s%8.8x", j ? " " : "", *(ptr4 + j)); + ascii_column = 9 * ngroups + 2; + break; + } + + case 2: { + const u16 *ptr2 = buf; + int ngroups = len / groupsize; + + for (j = 0; j < ngroups; j++) + lx += scnprintf(linebuf + lx, linebuflen - lx, + "%s%4.4x", j ? " " : "", *(ptr2 + j)); + ascii_column = 5 * ngroups + 2; + break; + } + + default: + for (j = 0; (j < len) && (lx + 3) <= linebuflen; j++) { + ch = ptr[j]; + linebuf[lx++] = hex_asc_hi(ch); + linebuf[lx++] = hex_asc_lo(ch); + linebuf[lx++] = ' '; + } + if (j) + lx--; + + ascii_column = 3 * rowsize + 2; + break; + } + if (!ascii) + goto nil; + + while (lx < (linebuflen - 1) && lx < (ascii_column - 1)) + linebuf[lx++] = ' '; + for (j = 0; (j < len) && (lx + 2) < linebuflen; j++) { + ch = ptr[j]; + linebuf[lx++] = (isascii(ch) && isprint(ch)) ? ch : '.'; + } +nil: + linebuf[lx++] = '\0'; +} +EXPORT_SYMBOL(hex_dump_to_buffer); + +/** + * print_hex_dump - print a text hex dump to syslog for a binary blob of data + * @level: kernel log level (e.g. KERN_DEBUG) + * @prefix_str: string to prefix each line with; + * caller supplies trailing spaces for alignment if desired + * @prefix_type: controls whether prefix of an offset, address, or none + * is printed (%DUMP_PREFIX_OFFSET, %DUMP_PREFIX_ADDRESS, %DUMP_PREFIX_NONE) + * @rowsize: number of bytes to print per line; must be 16 or 32 + * @groupsize: number of bytes to print at a time (1, 2, 4, 8; default = 1) + * @buf: data blob to dump + * @len: number of bytes in the @buf + * @ascii: include ASCII after the hex output + * + * Given a buffer of u8 data, print_hex_dump() prints a hex + ASCII dump + * to the kernel log at the specified kernel log level, with an optional + * leading prefix. + * + * print_hex_dump() works on one "line" of output at a time, i.e., + * 16 or 32 bytes of input data converted to hex + ASCII output. + * print_hex_dump() iterates over the entire input @buf, breaking it into + * "line size" chunks to format and print. + * + * E.g.: + * print_hex_dump(KERN_DEBUG, "raw data: ", DUMP_PREFIX_ADDRESS, + * 16, 1, frame->data, frame->len, true); + * + * Example output using %DUMP_PREFIX_OFFSET and 1-byte mode: + * 0009ab42: 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO + * Example output using %DUMP_PREFIX_ADDRESS and 4-byte mode: + * ffffffff88089af0: 73727170 77767574 7b7a7978 7f7e7d7c pqrstuvwxyz{|}~. + */ +void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, + int rowsize, int groupsize, + const void *buf, size_t len, bool ascii) +{ + const u8 *ptr = buf; + int i, linelen, remaining = len; + unsigned char linebuf[32 * 3 + 2 + 32 + 1]; + + if (rowsize != 16 && rowsize != 32) + rowsize = 16; + + for (i = 0; i < len; i += rowsize) { + linelen = min(remaining, rowsize); + remaining -= rowsize; + + hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize, + linebuf, sizeof(linebuf), ascii); + + switch (prefix_type) { + case DUMP_PREFIX_ADDRESS: + printk("%s%s%p: %s\n", + level, prefix_str, ptr + i, linebuf); + break; + case DUMP_PREFIX_OFFSET: + printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf); + break; + default: + printk("%s%s%s\n", level, prefix_str, linebuf); + break; + } + } +} +EXPORT_SYMBOL(print_hex_dump); + +/** + * print_hex_dump_bytes - shorthand form of print_hex_dump() with default params + * @prefix_str: string to prefix each line with; + * caller supplies trailing spaces for alignment if desired + * @prefix_type: controls whether prefix of an offset, address, or none + * is printed (%DUMP_PREFIX_OFFSET, %DUMP_PREFIX_ADDRESS, %DUMP_PREFIX_NONE) + * @buf: data blob to dump + * @len: number of bytes in the @buf + * + * Calls print_hex_dump(), with log level of KERN_DEBUG, + * rowsize of 16, groupsize of 1, and ASCII output included. + */ +void print_hex_dump_bytes(const char *prefix_str, int prefix_type, + const void *buf, size_t len) +{ + print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, 16, 1, + buf, len, true); +} +EXPORT_SYMBOL(print_hex_dump_bytes); diff --git a/lib/hweight.c b/lib/hweight.c new file mode 100644 index 00000000..3c79d508 --- /dev/null +++ b/lib/hweight.c @@ -0,0 +1,67 @@ +#include <linux/module.h> +#include <linux/bitops.h> +#include <asm/types.h> + +/** + * hweightN - returns the hamming weight of a N-bit word + * @x: the word to weigh + * + * The Hamming Weight of a number is the total number of bits set in it. + */ + +unsigned int __sw_hweight32(unsigned int w) +{ +#ifdef ARCH_HAS_FAST_MULTIPLIER + w -= (w >> 1) & 0x55555555; + w = (w & 0x33333333) + ((w >> 2) & 0x33333333); + w = (w + (w >> 4)) & 0x0f0f0f0f; + return (w * 0x01010101) >> 24; +#else + unsigned int res = w - ((w >> 1) & 0x55555555); + res = (res & 0x33333333) + ((res >> 2) & 0x33333333); + res = (res + (res >> 4)) & 0x0F0F0F0F; + res = res + (res >> 8); + return (res + (res >> 16)) & 0x000000FF; +#endif +} +EXPORT_SYMBOL(__sw_hweight32); + +unsigned int __sw_hweight16(unsigned int w) +{ + unsigned int res = w - ((w >> 1) & 0x5555); + res = (res & 0x3333) + ((res >> 2) & 0x3333); + res = (res + (res >> 4)) & 0x0F0F; + return (res + (res >> 8)) & 0x00FF; +} +EXPORT_SYMBOL(__sw_hweight16); + +unsigned int __sw_hweight8(unsigned int w) +{ + unsigned int res = w - ((w >> 1) & 0x55); + res = (res & 0x33) + ((res >> 2) & 0x33); + return (res + (res >> 4)) & 0x0F; +} +EXPORT_SYMBOL(__sw_hweight8); + +unsigned long __sw_hweight64(__u64 w) +{ +#if BITS_PER_LONG == 32 + return __sw_hweight32((unsigned int)(w >> 32)) + + __sw_hweight32((unsigned int)w); +#elif BITS_PER_LONG == 64 +#ifdef ARCH_HAS_FAST_MULTIPLIER + w -= (w >> 1) & 0x5555555555555555ul; + w = (w & 0x3333333333333333ul) + ((w >> 2) & 0x3333333333333333ul); + w = (w + (w >> 4)) & 0x0f0f0f0f0f0f0f0ful; + return (w * 0x0101010101010101ul) >> 56; +#else + __u64 res = w - ((w >> 1) & 0x5555555555555555ul); + res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul); + res = (res + (res >> 4)) & 0x0F0F0F0F0F0F0F0Ful; + res = res + (res >> 8); + res = res + (res >> 16); + return (res + (res >> 32)) & 0x00000000000000FFul; +#endif +#endif +} +EXPORT_SYMBOL(__sw_hweight64); diff --git a/lib/idr.c b/lib/idr.c new file mode 100644 index 00000000..7f1a4f0a --- /dev/null +++ b/lib/idr.c @@ -0,0 +1,937 @@ +/* + * 2002-10-18 written by Jim Houston jim.houston@ccur.com + * Copyright (C) 2002 by Concurrent Computer Corporation + * Distributed under the GNU GPL license version 2. + * + * Modified by George Anzinger to reuse immediately and to use + * find bit instructions. Also removed _irq on spinlocks. + * + * Modified by Nadia Derbey to make it RCU safe. + * + * Small id to pointer translation service. + * + * It uses a radix tree like structure as a sparse array indexed + * by the id to obtain the pointer. The bitmap makes allocating + * a new id quick. + * + * You call it to allocate an id (an int) an associate with that id a + * pointer or what ever, we treat it as a (void *). You can pass this + * id to a user for him to pass back at a later time. You then pass + * that id to this code and it returns your pointer. + + * You can release ids at any time. When all ids are released, most of + * the memory is returned (we keep IDR_FREE_MAX) in a local pool so we + * don't need to go to the memory "store" during an id allocate, just + * so you don't need to be too concerned about locking and conflicts + * with the slab allocator. + */ + +#ifndef TEST // to test in user space... +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/module.h> +#endif +#include <linux/err.h> +#include <linux/string.h> +#include <linux/idr.h> + +static struct kmem_cache *idr_layer_cache; + +static struct idr_layer *get_from_free_list(struct idr *idp) +{ + struct idr_layer *p; + unsigned long flags; + + spin_lock_irqsave(&idp->lock, flags); + if ((p = idp->id_free)) { + idp->id_free = p->ary[0]; + idp->id_free_cnt--; + p->ary[0] = NULL; + } + spin_unlock_irqrestore(&idp->lock, flags); + return(p); +} + +static void idr_layer_rcu_free(struct rcu_head *head) +{ + struct idr_layer *layer; + + layer = container_of(head, struct idr_layer, rcu_head); + kmem_cache_free(idr_layer_cache, layer); +} + +static inline void free_layer(struct idr_layer *p) +{ + call_rcu(&p->rcu_head, idr_layer_rcu_free); +} + +/* only called when idp->lock is held */ +static void __move_to_free_list(struct idr *idp, struct idr_layer *p) +{ + p->ary[0] = idp->id_free; + idp->id_free = p; + idp->id_free_cnt++; +} + +static void move_to_free_list(struct idr *idp, struct idr_layer *p) +{ + unsigned long flags; + + /* + * Depends on the return element being zeroed. + */ + spin_lock_irqsave(&idp->lock, flags); + __move_to_free_list(idp, p); + spin_unlock_irqrestore(&idp->lock, flags); +} + +static void idr_mark_full(struct idr_layer **pa, int id) +{ + struct idr_layer *p = pa[0]; + int l = 0; + + __set_bit(id & IDR_MASK, &p->bitmap); + /* + * If this layer is full mark the bit in the layer above to + * show that this part of the radix tree is full. This may + * complete the layer above and require walking up the radix + * tree. + */ + while (p->bitmap == IDR_FULL) { + if (!(p = pa[++l])) + break; + id = id >> IDR_BITS; + __set_bit((id & IDR_MASK), &p->bitmap); + } +} + +/** + * idr_pre_get - reserver resources for idr allocation + * @idp: idr handle + * @gfp_mask: memory allocation flags + * + * This function should be called prior to locking and calling the + * idr_get_new* functions. It preallocates enough memory to satisfy + * the worst possible allocation. + * + * If the system is REALLY out of memory this function returns 0, + * otherwise 1. + */ +int idr_pre_get(struct idr *idp, gfp_t gfp_mask) +{ + while (idp->id_free_cnt < IDR_FREE_MAX) { + struct idr_layer *new; + new = kmem_cache_zalloc(idr_layer_cache, gfp_mask); + if (new == NULL) + return (0); + move_to_free_list(idp, new); + } + return 1; +} +EXPORT_SYMBOL(idr_pre_get); + +static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa) +{ + int n, m, sh; + struct idr_layer *p, *new; + int l, id, oid; + unsigned long bm; + + id = *starting_id; + restart: + p = idp->top; + l = idp->layers; + pa[l--] = NULL; + while (1) { + /* + * We run around this while until we reach the leaf node... + */ + n = (id >> (IDR_BITS*l)) & IDR_MASK; + bm = ~p->bitmap; + m = find_next_bit(&bm, IDR_SIZE, n); + if (m == IDR_SIZE) { + /* no space available go back to previous layer. */ + l++; + oid = id; + id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1; + + /* if already at the top layer, we need to grow */ + if (id >= 1 << (idp->layers * IDR_BITS)) { + *starting_id = id; + return IDR_NEED_TO_GROW; + } + p = pa[l]; + BUG_ON(!p); + + /* If we need to go up one layer, continue the + * loop; otherwise, restart from the top. + */ + sh = IDR_BITS * (l + 1); + if (oid >> sh == id >> sh) + continue; + else + goto restart; + } + if (m != n) { + sh = IDR_BITS*l; + id = ((id >> sh) ^ n ^ m) << sh; + } + if ((id >= MAX_ID_BIT) || (id < 0)) + return IDR_NOMORE_SPACE; + if (l == 0) + break; + /* + * Create the layer below if it is missing. + */ + if (!p->ary[m]) { + new = get_from_free_list(idp); + if (!new) + return -1; + new->layer = l-1; + rcu_assign_pointer(p->ary[m], new); + p->count++; + } + pa[l--] = p; + p = p->ary[m]; + } + + pa[l] = p; + return id; +} + +static int idr_get_empty_slot(struct idr *idp, int starting_id, + struct idr_layer **pa) +{ + struct idr_layer *p, *new; + int layers, v, id; + unsigned long flags; + + id = starting_id; +build_up: + p = idp->top; + layers = idp->layers; + if (unlikely(!p)) { + if (!(p = get_from_free_list(idp))) + return -1; + p->layer = 0; + layers = 1; + } + /* + * Add a new layer to the top of the tree if the requested + * id is larger than the currently allocated space. + */ + while ((layers < (MAX_LEVEL - 1)) && (id >= (1 << (layers*IDR_BITS)))) { + layers++; + if (!p->count) { + /* special case: if the tree is currently empty, + * then we grow the tree by moving the top node + * upwards. + */ + p->layer++; + continue; + } + if (!(new = get_from_free_list(idp))) { + /* + * The allocation failed. If we built part of + * the structure tear it down. + */ + spin_lock_irqsave(&idp->lock, flags); + for (new = p; p && p != idp->top; new = p) { + p = p->ary[0]; + new->ary[0] = NULL; + new->bitmap = new->count = 0; + __move_to_free_list(idp, new); + } + spin_unlock_irqrestore(&idp->lock, flags); + return -1; + } + new->ary[0] = p; + new->count = 1; + new->layer = layers-1; + if (p->bitmap == IDR_FULL) + __set_bit(0, &new->bitmap); + p = new; + } + rcu_assign_pointer(idp->top, p); + idp->layers = layers; + v = sub_alloc(idp, &id, pa); + if (v == IDR_NEED_TO_GROW) + goto build_up; + return(v); +} + +static int idr_get_new_above_int(struct idr *idp, void *ptr, int starting_id) +{ + struct idr_layer *pa[MAX_LEVEL]; + int id; + + id = idr_get_empty_slot(idp, starting_id, pa); + if (id >= 0) { + /* + * Successfully found an empty slot. Install the user + * pointer and mark the slot full. + */ + rcu_assign_pointer(pa[0]->ary[id & IDR_MASK], + (struct idr_layer *)ptr); + pa[0]->count++; + idr_mark_full(pa, id); + } + + return id; +} + +/** + * idr_get_new_above - allocate new idr entry above or equal to a start id + * @idp: idr handle + * @ptr: pointer you want associated with the id + * @start_id: id to start search at + * @id: pointer to the allocated handle + * + * This is the allocate id function. It should be called with any + * required locks. + * + * If memory is required, it will return -EAGAIN, you should unlock + * and go back to the idr_pre_get() call. If the idr is full, it will + * return -ENOSPC. + * + * @id returns a value in the range @starting_id ... 0x7fffffff + */ +int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id) +{ + int rv; + + rv = idr_get_new_above_int(idp, ptr, starting_id); + /* + * This is a cheap hack until the IDR code can be fixed to + * return proper error values. + */ + if (rv < 0) + return _idr_rc_to_errno(rv); + *id = rv; + return 0; +} +EXPORT_SYMBOL(idr_get_new_above); + +/** + * idr_get_new - allocate new idr entry + * @idp: idr handle + * @ptr: pointer you want associated with the id + * @id: pointer to the allocated handle + * + * This is the allocate id function. It should be called with any + * required locks. + * + * If memory is required, it will return -EAGAIN, you should unlock + * and go back to the idr_pre_get() call. If the idr is full, it will + * return -ENOSPC. + * + * @id returns a value in the range 0 ... 0x7fffffff + */ +int idr_get_new(struct idr *idp, void *ptr, int *id) +{ + int rv; + + rv = idr_get_new_above_int(idp, ptr, 0); + /* + * This is a cheap hack until the IDR code can be fixed to + * return proper error values. + */ + if (rv < 0) + return _idr_rc_to_errno(rv); + *id = rv; + return 0; +} +EXPORT_SYMBOL(idr_get_new); + +static void idr_remove_warning(int id) +{ + printk(KERN_WARNING + "idr_remove called for id=%d which is not allocated.\n", id); + dump_stack(); +} + +static void sub_remove(struct idr *idp, int shift, int id) +{ + struct idr_layer *p = idp->top; + struct idr_layer **pa[MAX_LEVEL]; + struct idr_layer ***paa = &pa[0]; + struct idr_layer *to_free; + int n; + + *paa = NULL; + *++paa = &idp->top; + + while ((shift > 0) && p) { + n = (id >> shift) & IDR_MASK; + __clear_bit(n, &p->bitmap); + *++paa = &p->ary[n]; + p = p->ary[n]; + shift -= IDR_BITS; + } + n = id & IDR_MASK; + if (likely(p != NULL && test_bit(n, &p->bitmap))){ + __clear_bit(n, &p->bitmap); + rcu_assign_pointer(p->ary[n], NULL); + to_free = NULL; + while(*paa && ! --((**paa)->count)){ + if (to_free) + free_layer(to_free); + to_free = **paa; + **paa-- = NULL; + } + if (!*paa) + idp->layers = 0; + if (to_free) + free_layer(to_free); + } else + idr_remove_warning(id); +} + +/** + * idr_remove - remove the given id and free it's slot + * @idp: idr handle + * @id: unique key + */ +void idr_remove(struct idr *idp, int id) +{ + struct idr_layer *p; + struct idr_layer *to_free; + + /* Mask off upper bits we don't use for the search. */ + id &= MAX_ID_MASK; + + sub_remove(idp, (idp->layers - 1) * IDR_BITS, id); + if (idp->top && idp->top->count == 1 && (idp->layers > 1) && + idp->top->ary[0]) { + /* + * Single child at leftmost slot: we can shrink the tree. + * This level is not needed anymore since when layers are + * inserted, they are inserted at the top of the existing + * tree. + */ + to_free = idp->top; + p = idp->top->ary[0]; + rcu_assign_pointer(idp->top, p); + --idp->layers; + to_free->bitmap = to_free->count = 0; + free_layer(to_free); + } + while (idp->id_free_cnt >= IDR_FREE_MAX) { + p = get_from_free_list(idp); + /* + * Note: we don't call the rcu callback here, since the only + * layers that fall into the freelist are those that have been + * preallocated. + */ + kmem_cache_free(idr_layer_cache, p); + } + return; +} +EXPORT_SYMBOL(idr_remove); + +/** + * idr_remove_all - remove all ids from the given idr tree + * @idp: idr handle + * + * idr_destroy() only frees up unused, cached idp_layers, but this + * function will remove all id mappings and leave all idp_layers + * unused. + * + * A typical clean-up sequence for objects stored in an idr tree, will + * use idr_for_each() to free all objects, if necessay, then + * idr_remove_all() to remove all ids, and idr_destroy() to free + * up the cached idr_layers. + */ +void idr_remove_all(struct idr *idp) +{ + int n, id, max; + int bt_mask; + struct idr_layer *p; + struct idr_layer *pa[MAX_LEVEL]; + struct idr_layer **paa = &pa[0]; + + n = idp->layers * IDR_BITS; + p = idp->top; + rcu_assign_pointer(idp->top, NULL); + max = 1 << n; + + id = 0; + while (id < max) { + while (n > IDR_BITS && p) { + n -= IDR_BITS; + *paa++ = p; + p = p->ary[(id >> n) & IDR_MASK]; + } + + bt_mask = id; + id += 1 << n; + /* Get the highest bit that the above add changed from 0->1. */ + while (n < fls(id ^ bt_mask)) { + if (p) + free_layer(p); + n += IDR_BITS; + p = *--paa; + } + } + idp->layers = 0; +} +EXPORT_SYMBOL(idr_remove_all); + +/** + * idr_destroy - release all cached layers within an idr tree + * idp: idr handle + */ +void idr_destroy(struct idr *idp) +{ + while (idp->id_free_cnt) { + struct idr_layer *p = get_from_free_list(idp); + kmem_cache_free(idr_layer_cache, p); + } +} +EXPORT_SYMBOL(idr_destroy); + +/** + * idr_find - return pointer for given id + * @idp: idr handle + * @id: lookup key + * + * Return the pointer given the id it has been registered with. A %NULL + * return indicates that @id is not valid or you passed %NULL in + * idr_get_new(). + * + * This function can be called under rcu_read_lock(), given that the leaf + * pointers lifetimes are correctly managed. + */ +void *idr_find(struct idr *idp, int id) +{ + int n; + struct idr_layer *p; + + p = rcu_dereference_raw(idp->top); + if (!p) + return NULL; + n = (p->layer+1) * IDR_BITS; + + /* Mask off upper bits we don't use for the search. */ + id &= MAX_ID_MASK; + + if (id >= (1 << n)) + return NULL; + BUG_ON(n == 0); + + while (n > 0 && p) { + n -= IDR_BITS; + BUG_ON(n != p->layer*IDR_BITS); + p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]); + } + return((void *)p); +} +EXPORT_SYMBOL(idr_find); + +/** + * idr_for_each - iterate through all stored pointers + * @idp: idr handle + * @fn: function to be called for each pointer + * @data: data passed back to callback function + * + * Iterate over the pointers registered with the given idr. The + * callback function will be called for each pointer currently + * registered, passing the id, the pointer and the data pointer passed + * to this function. It is not safe to modify the idr tree while in + * the callback, so functions such as idr_get_new and idr_remove are + * not allowed. + * + * We check the return of @fn each time. If it returns anything other + * than 0, we break out and return that value. + * + * The caller must serialize idr_for_each() vs idr_get_new() and idr_remove(). + */ +int idr_for_each(struct idr *idp, + int (*fn)(int id, void *p, void *data), void *data) +{ + int n, id, max, error = 0; + struct idr_layer *p; + struct idr_layer *pa[MAX_LEVEL]; + struct idr_layer **paa = &pa[0]; + + n = idp->layers * IDR_BITS; + p = rcu_dereference_raw(idp->top); + max = 1 << n; + + id = 0; + while (id < max) { + while (n > 0 && p) { + n -= IDR_BITS; + *paa++ = p; + p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]); + } + + if (p) { + error = fn(id, (void *)p, data); + if (error) + break; + } + + id += 1 << n; + while (n < fls(id)) { + n += IDR_BITS; + p = *--paa; + } + } + + return error; +} +EXPORT_SYMBOL(idr_for_each); + +/** + * idr_get_next - lookup next object of id to given id. + * @idp: idr handle + * @id: pointer to lookup key + * + * Returns pointer to registered object with id, which is next number to + * given id. + */ + +void *idr_get_next(struct idr *idp, int *nextidp) +{ + struct idr_layer *p, *pa[MAX_LEVEL]; + struct idr_layer **paa = &pa[0]; + int id = *nextidp; + int n, max; + + /* find first ent */ + n = idp->layers * IDR_BITS; + max = 1 << n; + p = rcu_dereference_raw(idp->top); + if (!p) + return NULL; + + while (id < max) { + while (n > 0 && p) { + n -= IDR_BITS; + *paa++ = p; + p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]); + } + + if (p) { + *nextidp = id; + return p; + } + + id += 1 << n; + while (n < fls(id)) { + n += IDR_BITS; + p = *--paa; + } + } + return NULL; +} +EXPORT_SYMBOL(idr_get_next); + + +/** + * idr_replace - replace pointer for given id + * @idp: idr handle + * @ptr: pointer you want associated with the id + * @id: lookup key + * + * Replace the pointer registered with an id and return the old value. + * A -ENOENT return indicates that @id was not found. + * A -EINVAL return indicates that @id was not within valid constraints. + * + * The caller must serialize with writers. + */ +void *idr_replace(struct idr *idp, void *ptr, int id) +{ + int n; + struct idr_layer *p, *old_p; + + p = idp->top; + if (!p) + return ERR_PTR(-EINVAL); + + n = (p->layer+1) * IDR_BITS; + + id &= MAX_ID_MASK; + + if (id >= (1 << n)) + return ERR_PTR(-EINVAL); + + n -= IDR_BITS; + while ((n > 0) && p) { + p = p->ary[(id >> n) & IDR_MASK]; + n -= IDR_BITS; + } + + n = id & IDR_MASK; + if (unlikely(p == NULL || !test_bit(n, &p->bitmap))) + return ERR_PTR(-ENOENT); + + old_p = p->ary[n]; + rcu_assign_pointer(p->ary[n], ptr); + + return old_p; +} +EXPORT_SYMBOL(idr_replace); + +void __init idr_init_cache(void) +{ + idr_layer_cache = kmem_cache_create("idr_layer_cache", + sizeof(struct idr_layer), 0, SLAB_PANIC, NULL); +} + +/** + * idr_init - initialize idr handle + * @idp: idr handle + * + * This function is use to set up the handle (@idp) that you will pass + * to the rest of the functions. + */ +void idr_init(struct idr *idp) +{ + memset(idp, 0, sizeof(struct idr)); + spin_lock_init(&idp->lock); +} +EXPORT_SYMBOL(idr_init); + + +/* + * IDA - IDR based ID allocator + * + * this is id allocator without id -> pointer translation. Memory + * usage is much lower than full blown idr because each id only + * occupies a bit. ida uses a custom leaf node which contains + * IDA_BITMAP_BITS slots. + * + * 2007-04-25 written by Tejun Heo <htejun@gmail.com> + */ + +static void free_bitmap(struct ida *ida, struct ida_bitmap *bitmap) +{ + unsigned long flags; + + if (!ida->free_bitmap) { + spin_lock_irqsave(&ida->idr.lock, flags); + if (!ida->free_bitmap) { + ida->free_bitmap = bitmap; + bitmap = NULL; + } + spin_unlock_irqrestore(&ida->idr.lock, flags); + } + + kfree(bitmap); +} + +/** + * ida_pre_get - reserve resources for ida allocation + * @ida: ida handle + * @gfp_mask: memory allocation flag + * + * This function should be called prior to locking and calling the + * following function. It preallocates enough memory to satisfy the + * worst possible allocation. + * + * If the system is REALLY out of memory this function returns 0, + * otherwise 1. + */ +int ida_pre_get(struct ida *ida, gfp_t gfp_mask) +{ + /* allocate idr_layers */ + if (!idr_pre_get(&ida->idr, gfp_mask)) + return 0; + + /* allocate free_bitmap */ + if (!ida->free_bitmap) { + struct ida_bitmap *bitmap; + + bitmap = kmalloc(sizeof(struct ida_bitmap), gfp_mask); + if (!bitmap) + return 0; + + free_bitmap(ida, bitmap); + } + + return 1; +} +EXPORT_SYMBOL(ida_pre_get); + +/** + * ida_get_new_above - allocate new ID above or equal to a start id + * @ida: ida handle + * @staring_id: id to start search at + * @p_id: pointer to the allocated handle + * + * Allocate new ID above or equal to @ida. It should be called with + * any required locks. + * + * If memory is required, it will return -EAGAIN, you should unlock + * and go back to the ida_pre_get() call. If the ida is full, it will + * return -ENOSPC. + * + * @p_id returns a value in the range @starting_id ... 0x7fffffff. + */ +int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) +{ + struct idr_layer *pa[MAX_LEVEL]; + struct ida_bitmap *bitmap; + unsigned long flags; + int idr_id = starting_id / IDA_BITMAP_BITS; + int offset = starting_id % IDA_BITMAP_BITS; + int t, id; + + restart: + /* get vacant slot */ + t = idr_get_empty_slot(&ida->idr, idr_id, pa); + if (t < 0) + return _idr_rc_to_errno(t); + + if (t * IDA_BITMAP_BITS >= MAX_ID_BIT) + return -ENOSPC; + + if (t != idr_id) + offset = 0; + idr_id = t; + + /* if bitmap isn't there, create a new one */ + bitmap = (void *)pa[0]->ary[idr_id & IDR_MASK]; + if (!bitmap) { + spin_lock_irqsave(&ida->idr.lock, flags); + bitmap = ida->free_bitmap; + ida->free_bitmap = NULL; + spin_unlock_irqrestore(&ida->idr.lock, flags); + + if (!bitmap) + return -EAGAIN; + + memset(bitmap, 0, sizeof(struct ida_bitmap)); + rcu_assign_pointer(pa[0]->ary[idr_id & IDR_MASK], + (void *)bitmap); + pa[0]->count++; + } + + /* lookup for empty slot */ + t = find_next_zero_bit(bitmap->bitmap, IDA_BITMAP_BITS, offset); + if (t == IDA_BITMAP_BITS) { + /* no empty slot after offset, continue to the next chunk */ + idr_id++; + offset = 0; + goto restart; + } + + id = idr_id * IDA_BITMAP_BITS + t; + if (id >= MAX_ID_BIT) + return -ENOSPC; + + __set_bit(t, bitmap->bitmap); + if (++bitmap->nr_busy == IDA_BITMAP_BITS) + idr_mark_full(pa, idr_id); + + *p_id = id; + + /* Each leaf node can handle nearly a thousand slots and the + * whole idea of ida is to have small memory foot print. + * Throw away extra resources one by one after each successful + * allocation. + */ + if (ida->idr.id_free_cnt || ida->free_bitmap) { + struct idr_layer *p = get_from_free_list(&ida->idr); + if (p) + kmem_cache_free(idr_layer_cache, p); + } + + return 0; +} +EXPORT_SYMBOL(ida_get_new_above); + +/** + * ida_get_new - allocate new ID + * @ida: idr handle + * @p_id: pointer to the allocated handle + * + * Allocate new ID. It should be called with any required locks. + * + * If memory is required, it will return -EAGAIN, you should unlock + * and go back to the idr_pre_get() call. If the idr is full, it will + * return -ENOSPC. + * + * @id returns a value in the range 0 ... 0x7fffffff. + */ +int ida_get_new(struct ida *ida, int *p_id) +{ + return ida_get_new_above(ida, 0, p_id); +} +EXPORT_SYMBOL(ida_get_new); + +/** + * ida_remove - remove the given ID + * @ida: ida handle + * @id: ID to free + */ +void ida_remove(struct ida *ida, int id) +{ + struct idr_layer *p = ida->idr.top; + int shift = (ida->idr.layers - 1) * IDR_BITS; + int idr_id = id / IDA_BITMAP_BITS; + int offset = id % IDA_BITMAP_BITS; + int n; + struct ida_bitmap *bitmap; + + /* clear full bits while looking up the leaf idr_layer */ + while ((shift > 0) && p) { + n = (idr_id >> shift) & IDR_MASK; + __clear_bit(n, &p->bitmap); + p = p->ary[n]; + shift -= IDR_BITS; + } + + if (p == NULL) + goto err; + + n = idr_id & IDR_MASK; + __clear_bit(n, &p->bitmap); + + bitmap = (void *)p->ary[n]; + if (!test_bit(offset, bitmap->bitmap)) + goto err; + + /* update bitmap and remove it if empty */ + __clear_bit(offset, bitmap->bitmap); + if (--bitmap->nr_busy == 0) { + __set_bit(n, &p->bitmap); /* to please idr_remove() */ + idr_remove(&ida->idr, idr_id); + free_bitmap(ida, bitmap); + } + + return; + + err: + printk(KERN_WARNING + "ida_remove called for id=%d which is not allocated.\n", id); +} +EXPORT_SYMBOL(ida_remove); + +/** + * ida_destroy - release all cached layers within an ida tree + * ida: ida handle + */ +void ida_destroy(struct ida *ida) +{ + idr_destroy(&ida->idr); + kfree(ida->free_bitmap); +} +EXPORT_SYMBOL(ida_destroy); + +/** + * ida_init - initialize ida handle + * @ida: ida handle + * + * This function is use to set up the handle (@ida) that you will pass + * to the rest of the functions. + */ +void ida_init(struct ida *ida) +{ + memset(ida, 0, sizeof(struct ida)); + idr_init(&ida->idr); + +} +EXPORT_SYMBOL(ida_init); diff --git a/lib/inflate.c b/lib/inflate.c new file mode 100644 index 00000000..013a7619 --- /dev/null +++ b/lib/inflate.c @@ -0,0 +1,1309 @@ +#define DEBG(x) +#define DEBG1(x) +/* inflate.c -- Not copyrighted 1992 by Mark Adler + version c10p1, 10 January 1993 */ + +/* + * Adapted for booting Linux by Hannu Savolainen 1993 + * based on gzip-1.0.3 + * + * Nicolas Pitre <nico@fluxnic.net>, 1999/04/14 : + * Little mods for all variable to reside either into rodata or bss segments + * by marking constant variables with 'const' and initializing all the others + * at run-time only. This allows for the kernel uncompressor to run + * directly from Flash or ROM memory on embedded systems. + */ + +/* + Inflate deflated (PKZIP's method 8 compressed) data. The compression + method searches for as much of the current string of bytes (up to a + length of 258) in the previous 32 K bytes. If it doesn't find any + matches (of at least length 3), it codes the next byte. Otherwise, it + codes the length of the matched string and its distance backwards from + the current position. There is a single Huffman code that codes both + single bytes (called "literals") and match lengths. A second Huffman + code codes the distance information, which follows a length code. Each + length or distance code actually represents a base value and a number + of "extra" (sometimes zero) bits to get to add to the base value. At + the end of each deflated block is a special end-of-block (EOB) literal/ + length code. The decoding process is basically: get a literal/length + code; if EOB then done; if a literal, emit the decoded byte; if a + length then get the distance and emit the referred-to bytes from the + sliding window of previously emitted data. + + There are (currently) three kinds of inflate blocks: stored, fixed, and + dynamic. The compressor deals with some chunk of data at a time, and + decides which method to use on a chunk-by-chunk basis. A chunk might + typically be 32 K or 64 K. If the chunk is incompressible, then the + "stored" method is used. In this case, the bytes are simply stored as + is, eight bits per byte, with none of the above coding. The bytes are + preceded by a count, since there is no longer an EOB code. + + If the data is compressible, then either the fixed or dynamic methods + are used. In the dynamic method, the compressed data is preceded by + an encoding of the literal/length and distance Huffman codes that are + to be used to decode this block. The representation is itself Huffman + coded, and so is preceded by a description of that code. These code + descriptions take up a little space, and so for small blocks, there is + a predefined set of codes, called the fixed codes. The fixed method is + used if the block codes up smaller that way (usually for quite small + chunks), otherwise the dynamic method is used. In the latter case, the + codes are customized to the probabilities in the current block, and so + can code it much better than the pre-determined fixed codes. + + The Huffman codes themselves are decoded using a multi-level table + lookup, in order to maximize the speed of decoding plus the speed of + building the decoding tables. See the comments below that precede the + lbits and dbits tuning parameters. + */ + + +/* + Notes beyond the 1.93a appnote.txt: + + 1. Distance pointers never point before the beginning of the output + stream. + 2. Distance pointers can point back across blocks, up to 32k away. + 3. There is an implied maximum of 7 bits for the bit length table and + 15 bits for the actual data. + 4. If only one code exists, then it is encoded using one bit. (Zero + would be more efficient, but perhaps a little confusing.) If two + codes exist, they are coded using one bit each (0 and 1). + 5. There is no way of sending zero distance codes--a dummy must be + sent if there are none. (History: a pre 2.0 version of PKZIP would + store blocks with no distance codes, but this was discovered to be + too harsh a criterion.) Valid only for 1.93a. 2.04c does allow + zero distance codes, which is sent as one code of zero bits in + length. + 6. There are up to 286 literal/length codes. Code 256 represents the + end-of-block. Note however that the static length tree defines + 288 codes just to fill out the Huffman codes. Codes 286 and 287 + cannot be used though, since there is no length base or extra bits + defined for them. Similarly, there are up to 30 distance codes. + However, static trees define 32 codes (all 5 bits) to fill out the + Huffman codes, but the last two had better not show up in the data. + 7. Unzip can check dynamic Huffman blocks for complete code sets. + The exception is that a single code would not be complete (see #4). + 8. The five bits following the block type is really the number of + literal codes sent minus 257. + 9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits + (1+6+6). Therefore, to output three times the length, you output + three codes (1+1+1), whereas to output four times the same length, + you only need two codes (1+3). Hmm. + 10. In the tree reconstruction algorithm, Code = Code + Increment + only if BitLength(i) is not zero. (Pretty obvious.) + 11. Correction: 4 Bits: # of Bit Length codes - 4 (4 - 19) + 12. Note: length code 284 can represent 227-258, but length code 285 + really is 258. The last length deserves its own, short code + since it gets used a lot in very redundant files. The length + 258 is special since 258 - 3 (the min match length) is 255. + 13. The literal/length and distance code bit lengths are read as a + single stream of lengths. It is possible (and advantageous) for + a repeat code (16, 17, or 18) to go across the boundary between + the two sets of lengths. + */ +#include <linux/compiler.h> +#ifdef NO_INFLATE_MALLOC +#include <linux/slab.h> +#endif + +#ifdef RCSID +static char rcsid[] = "#Id: inflate.c,v 0.14 1993/06/10 13:27:04 jloup Exp #"; +#endif + +#ifndef STATIC + +#if defined(STDC_HEADERS) || defined(HAVE_STDLIB_H) +# include <sys/types.h> +# include <stdlib.h> +#endif + +#include "gzip.h" +#define STATIC +#endif /* !STATIC */ + +#ifndef INIT +#define INIT +#endif + +#define slide window + +/* Huffman code lookup table entry--this entry is four bytes for machines + that have 16-bit pointers (e.g. PC's in the small or medium model). + Valid extra bits are 0..13. e == 15 is EOB (end of block), e == 16 + means that v is a literal, 16 < e < 32 means that v is a pointer to + the next table, which codes e - 16 bits, and lastly e == 99 indicates + an unused code. If a code with e == 99 is looked up, this implies an + error in the data. */ +struct huft { + uch e; /* number of extra bits or operation */ + uch b; /* number of bits in this code or subcode */ + union { + ush n; /* literal, length base, or distance base */ + struct huft *t; /* pointer to next level of table */ + } v; +}; + + +/* Function prototypes */ +STATIC int INIT huft_build OF((unsigned *, unsigned, unsigned, + const ush *, const ush *, struct huft **, int *)); +STATIC int INIT huft_free OF((struct huft *)); +STATIC int INIT inflate_codes OF((struct huft *, struct huft *, int, int)); +STATIC int INIT inflate_stored OF((void)); +STATIC int INIT inflate_fixed OF((void)); +STATIC int INIT inflate_dynamic OF((void)); +STATIC int INIT inflate_block OF((int *)); +STATIC int INIT inflate OF((void)); + + +/* The inflate algorithm uses a sliding 32 K byte window on the uncompressed + stream to find repeated byte strings. This is implemented here as a + circular buffer. The index is updated simply by incrementing and then + ANDing with 0x7fff (32K-1). */ +/* It is left to other modules to supply the 32 K area. It is assumed + to be usable as if it were declared "uch slide[32768];" or as just + "uch *slide;" and then malloc'ed in the latter case. The definition + must be in unzip.h, included above. */ +/* unsigned wp; current position in slide */ +#define wp outcnt +#define flush_output(w) (wp=(w),flush_window()) + +/* Tables for deflate from PKZIP's appnote.txt. */ +static const unsigned border[] = { /* Order of the bit length code lengths */ + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; +static const ush cplens[] = { /* Copy lengths for literal codes 257..285 */ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; + /* note: see note #13 above about the 258 in this list. */ +static const ush cplext[] = { /* Extra bits for literal codes 257..285 */ + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, + 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 99, 99}; /* 99==invalid */ +static const ush cpdist[] = { /* Copy offsets for distance codes 0..29 */ + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, + 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, + 8193, 12289, 16385, 24577}; +static const ush cpdext[] = { /* Extra bits for distance codes */ + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, + 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, + 12, 12, 13, 13}; + + + +/* Macros for inflate() bit peeking and grabbing. + The usage is: + + NEEDBITS(j) + x = b & mask_bits[j]; + DUMPBITS(j) + + where NEEDBITS makes sure that b has at least j bits in it, and + DUMPBITS removes the bits from b. The macros use the variable k + for the number of bits in b. Normally, b and k are register + variables for speed, and are initialized at the beginning of a + routine that uses these macros from a global bit buffer and count. + + If we assume that EOB will be the longest code, then we will never + ask for bits with NEEDBITS that are beyond the end of the stream. + So, NEEDBITS should not read any more bytes than are needed to + meet the request. Then no bytes need to be "returned" to the buffer + at the end of the last block. + + However, this assumption is not true for fixed blocks--the EOB code + is 7 bits, but the other literal/length codes can be 8 or 9 bits. + (The EOB code is shorter than other codes because fixed blocks are + generally short. So, while a block always has an EOB, many other + literal/length codes have a significantly lower probability of + showing up at all.) However, by making the first table have a + lookup of seven bits, the EOB code will be found in that first + lookup, and so will not require that too many bits be pulled from + the stream. + */ + +STATIC ulg bb; /* bit buffer */ +STATIC unsigned bk; /* bits in bit buffer */ + +STATIC const ush mask_bits[] = { + 0x0000, + 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff, + 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff +}; + +#define NEXTBYTE() ({ int v = get_byte(); if (v < 0) goto underrun; (uch)v; }) +#define NEEDBITS(n) {while(k<(n)){b|=((ulg)NEXTBYTE())<<k;k+=8;}} +#define DUMPBITS(n) {b>>=(n);k-=(n);} + +#ifndef NO_INFLATE_MALLOC +/* A trivial malloc implementation, adapted from + * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994 + */ + +static unsigned long malloc_ptr; +static int malloc_count; + +static void *malloc(int size) +{ + void *p; + + if (size < 0) + error("Malloc error"); + if (!malloc_ptr) + malloc_ptr = free_mem_ptr; + + malloc_ptr = (malloc_ptr + 3) & ~3; /* Align */ + + p = (void *)malloc_ptr; + malloc_ptr += size; + + if (free_mem_end_ptr && malloc_ptr >= free_mem_end_ptr) + error("Out of memory"); + + malloc_count++; + return p; +} + +static void free(void *where) +{ + malloc_count--; + if (!malloc_count) + malloc_ptr = free_mem_ptr; +} +#else +#define malloc(a) kmalloc(a, GFP_KERNEL) +#define free(a) kfree(a) +#endif + +/* + Huffman code decoding is performed using a multi-level table lookup. + The fastest way to decode is to simply build a lookup table whose + size is determined by the longest code. However, the time it takes + to build this table can also be a factor if the data being decoded + is not very long. The most common codes are necessarily the + shortest codes, so those codes dominate the decoding time, and hence + the speed. The idea is you can have a shorter table that decodes the + shorter, more probable codes, and then point to subsidiary tables for + the longer codes. The time it costs to decode the longer codes is + then traded against the time it takes to make longer tables. + + This results of this trade are in the variables lbits and dbits + below. lbits is the number of bits the first level table for literal/ + length codes can decode in one step, and dbits is the same thing for + the distance codes. Subsequent tables are also less than or equal to + those sizes. These values may be adjusted either when all of the + codes are shorter than that, in which case the longest code length in + bits is used, or when the shortest code is *longer* than the requested + table size, in which case the length of the shortest code in bits is + used. + + There are two different values for the two tables, since they code a + different number of possibilities each. The literal/length table + codes 286 possible values, or in a flat code, a little over eight + bits. The distance table codes 30 possible values, or a little less + than five bits, flat. The optimum values for speed end up being + about one bit more than those, so lbits is 8+1 and dbits is 5+1. + The optimum values may differ though from machine to machine, and + possibly even between compilers. Your mileage may vary. + */ + + +STATIC const int lbits = 9; /* bits in base literal/length lookup table */ +STATIC const int dbits = 6; /* bits in base distance lookup table */ + + +/* If BMAX needs to be larger than 16, then h and x[] should be ulg. */ +#define BMAX 16 /* maximum bit length of any code (16 for explode) */ +#define N_MAX 288 /* maximum number of codes in any set */ + + +STATIC unsigned hufts; /* track memory usage */ + + +STATIC int INIT huft_build( + unsigned *b, /* code lengths in bits (all assumed <= BMAX) */ + unsigned n, /* number of codes (assumed <= N_MAX) */ + unsigned s, /* number of simple-valued codes (0..s-1) */ + const ush *d, /* list of base values for non-simple codes */ + const ush *e, /* list of extra bits for non-simple codes */ + struct huft **t, /* result: starting table */ + int *m /* maximum lookup bits, returns actual */ + ) +/* Given a list of code lengths and a maximum table size, make a set of + tables to decode that set of codes. Return zero on success, one if + the given code set is incomplete (the tables are still built in this + case), two if the input is invalid (all zero length codes or an + oversubscribed set of lengths), and three if not enough memory. */ +{ + unsigned a; /* counter for codes of length k */ + unsigned f; /* i repeats in table every f entries */ + int g; /* maximum code length */ + int h; /* table level */ + register unsigned i; /* counter, current code */ + register unsigned j; /* counter */ + register int k; /* number of bits in current code */ + int l; /* bits per table (returned in m) */ + register unsigned *p; /* pointer into c[], b[], or v[] */ + register struct huft *q; /* points to current table */ + struct huft r; /* table entry for structure assignment */ + register int w; /* bits before this table == (l * h) */ + unsigned *xp; /* pointer into x */ + int y; /* number of dummy codes added */ + unsigned z; /* number of entries in current table */ + struct { + unsigned c[BMAX+1]; /* bit length count table */ + struct huft *u[BMAX]; /* table stack */ + unsigned v[N_MAX]; /* values in order of bit length */ + unsigned x[BMAX+1]; /* bit offsets, then code stack */ + } *stk; + unsigned *c, *v, *x; + struct huft **u; + int ret; + +DEBG("huft1 "); + + stk = malloc(sizeof(*stk)); + if (stk == NULL) + return 3; /* out of memory */ + + c = stk->c; + v = stk->v; + x = stk->x; + u = stk->u; + + /* Generate counts for each bit length */ + memzero(stk->c, sizeof(stk->c)); + p = b; i = n; + do { + Tracecv(*p, (stderr, (n-i >= ' ' && n-i <= '~' ? "%c %d\n" : "0x%x %d\n"), + n-i, *p)); + c[*p]++; /* assume all entries <= BMAX */ + p++; /* Can't combine with above line (Solaris bug) */ + } while (--i); + if (c[0] == n) /* null input--all zero length codes */ + { + *t = (struct huft *)NULL; + *m = 0; + ret = 2; + goto out; + } + +DEBG("huft2 "); + + /* Find minimum and maximum length, bound *m by those */ + l = *m; + for (j = 1; j <= BMAX; j++) + if (c[j]) + break; + k = j; /* minimum code length */ + if ((unsigned)l < j) + l = j; + for (i = BMAX; i; i--) + if (c[i]) + break; + g = i; /* maximum code length */ + if ((unsigned)l > i) + l = i; + *m = l; + +DEBG("huft3 "); + + /* Adjust last length count to fill out codes, if needed */ + for (y = 1 << j; j < i; j++, y <<= 1) + if ((y -= c[j]) < 0) { + ret = 2; /* bad input: more codes than bits */ + goto out; + } + if ((y -= c[i]) < 0) { + ret = 2; + goto out; + } + c[i] += y; + +DEBG("huft4 "); + + /* Generate starting offsets into the value table for each length */ + x[1] = j = 0; + p = c + 1; xp = x + 2; + while (--i) { /* note that i == g from above */ + *xp++ = (j += *p++); + } + +DEBG("huft5 "); + + /* Make a table of values in order of bit lengths */ + p = b; i = 0; + do { + if ((j = *p++) != 0) + v[x[j]++] = i; + } while (++i < n); + n = x[g]; /* set n to length of v */ + +DEBG("h6 "); + + /* Generate the Huffman codes and for each, make the table entries */ + x[0] = i = 0; /* first Huffman code is zero */ + p = v; /* grab values in bit order */ + h = -1; /* no tables yet--level -1 */ + w = -l; /* bits decoded == (l * h) */ + u[0] = (struct huft *)NULL; /* just to keep compilers happy */ + q = (struct huft *)NULL; /* ditto */ + z = 0; /* ditto */ +DEBG("h6a "); + + /* go through the bit lengths (k already is bits in shortest code) */ + for (; k <= g; k++) + { +DEBG("h6b "); + a = c[k]; + while (a--) + { +DEBG("h6b1 "); + /* here i is the Huffman code of length k bits for value *p */ + /* make tables up to required level */ + while (k > w + l) + { +DEBG1("1 "); + h++; + w += l; /* previous table always l bits */ + + /* compute minimum size table less than or equal to l bits */ + z = (z = g - w) > (unsigned)l ? l : z; /* upper limit on table size */ + if ((f = 1 << (j = k - w)) > a + 1) /* try a k-w bit table */ + { /* too few codes for k-w bit table */ +DEBG1("2 "); + f -= a + 1; /* deduct codes from patterns left */ + xp = c + k; + if (j < z) + while (++j < z) /* try smaller tables up to z bits */ + { + if ((f <<= 1) <= *++xp) + break; /* enough codes to use up j bits */ + f -= *xp; /* else deduct codes from patterns */ + } + } +DEBG1("3 "); + z = 1 << j; /* table entries for j-bit table */ + + /* allocate and link in new table */ + if ((q = (struct huft *)malloc((z + 1)*sizeof(struct huft))) == + (struct huft *)NULL) + { + if (h) + huft_free(u[0]); + ret = 3; /* not enough memory */ + goto out; + } +DEBG1("4 "); + hufts += z + 1; /* track memory usage */ + *t = q + 1; /* link to list for huft_free() */ + *(t = &(q->v.t)) = (struct huft *)NULL; + u[h] = ++q; /* table starts after link */ + +DEBG1("5 "); + /* connect to last table, if there is one */ + if (h) + { + x[h] = i; /* save pattern for backing up */ + r.b = (uch)l; /* bits to dump before this table */ + r.e = (uch)(16 + j); /* bits in this table */ + r.v.t = q; /* pointer to this table */ + j = i >> (w - l); /* (get around Turbo C bug) */ + u[h-1][j] = r; /* connect to last table */ + } +DEBG1("6 "); + } +DEBG("h6c "); + + /* set up table entry in r */ + r.b = (uch)(k - w); + if (p >= v + n) + r.e = 99; /* out of values--invalid code */ + else if (*p < s) + { + r.e = (uch)(*p < 256 ? 16 : 15); /* 256 is end-of-block code */ + r.v.n = (ush)(*p); /* simple code is just the value */ + p++; /* one compiler does not like *p++ */ + } + else + { + r.e = (uch)e[*p - s]; /* non-simple--look up in lists */ + r.v.n = d[*p++ - s]; + } +DEBG("h6d "); + + /* fill code-like entries with r */ + f = 1 << (k - w); + for (j = i >> w; j < z; j += f) + q[j] = r; + + /* backwards increment the k-bit code i */ + for (j = 1 << (k - 1); i & j; j >>= 1) + i ^= j; + i ^= j; + + /* backup over finished tables */ + while ((i & ((1 << w) - 1)) != x[h]) + { + h--; /* don't need to update q */ + w -= l; + } +DEBG("h6e "); + } +DEBG("h6f "); + } + +DEBG("huft7 "); + + /* Return true (1) if we were given an incomplete table */ + ret = y != 0 && g != 1; + + out: + free(stk); + return ret; +} + + + +STATIC int INIT huft_free( + struct huft *t /* table to free */ + ) +/* Free the malloc'ed tables built by huft_build(), which makes a linked + list of the tables it made, with the links in a dummy first entry of + each table. */ +{ + register struct huft *p, *q; + + + /* Go through linked list, freeing from the malloced (t[-1]) address. */ + p = t; + while (p != (struct huft *)NULL) + { + q = (--p)->v.t; + free((char*)p); + p = q; + } + return 0; +} + + +STATIC int INIT inflate_codes( + struct huft *tl, /* literal/length decoder tables */ + struct huft *td, /* distance decoder tables */ + int bl, /* number of bits decoded by tl[] */ + int bd /* number of bits decoded by td[] */ + ) +/* inflate (decompress) the codes in a deflated (compressed) block. + Return an error code or zero if it all goes ok. */ +{ + register unsigned e; /* table entry flag/number of extra bits */ + unsigned n, d; /* length and index for copy */ + unsigned w; /* current window position */ + struct huft *t; /* pointer to table entry */ + unsigned ml, md; /* masks for bl and bd bits */ + register ulg b; /* bit buffer */ + register unsigned k; /* number of bits in bit buffer */ + + + /* make local copies of globals */ + b = bb; /* initialize bit buffer */ + k = bk; + w = wp; /* initialize window position */ + + /* inflate the coded data */ + ml = mask_bits[bl]; /* precompute masks for speed */ + md = mask_bits[bd]; + for (;;) /* do until end of block */ + { + NEEDBITS((unsigned)bl) + if ((e = (t = tl + ((unsigned)b & ml))->e) > 16) + do { + if (e == 99) + return 1; + DUMPBITS(t->b) + e -= 16; + NEEDBITS(e) + } while ((e = (t = t->v.t + ((unsigned)b & mask_bits[e]))->e) > 16); + DUMPBITS(t->b) + if (e == 16) /* then it's a literal */ + { + slide[w++] = (uch)t->v.n; + Tracevv((stderr, "%c", slide[w-1])); + if (w == WSIZE) + { + flush_output(w); + w = 0; + } + } + else /* it's an EOB or a length */ + { + /* exit if end of block */ + if (e == 15) + break; + + /* get length of block to copy */ + NEEDBITS(e) + n = t->v.n + ((unsigned)b & mask_bits[e]); + DUMPBITS(e); + + /* decode distance of block to copy */ + NEEDBITS((unsigned)bd) + if ((e = (t = td + ((unsigned)b & md))->e) > 16) + do { + if (e == 99) + return 1; + DUMPBITS(t->b) + e -= 16; + NEEDBITS(e) + } while ((e = (t = t->v.t + ((unsigned)b & mask_bits[e]))->e) > 16); + DUMPBITS(t->b) + NEEDBITS(e) + d = w - t->v.n - ((unsigned)b & mask_bits[e]); + DUMPBITS(e) + Tracevv((stderr,"\\[%d,%d]", w-d, n)); + + /* do the copy */ + do { + n -= (e = (e = WSIZE - ((d &= WSIZE-1) > w ? d : w)) > n ? n : e); +#if !defined(NOMEMCPY) && !defined(DEBUG) + if (w - d >= e) /* (this test assumes unsigned comparison) */ + { + memcpy(slide + w, slide + d, e); + w += e; + d += e; + } + else /* do it slow to avoid memcpy() overlap */ +#endif /* !NOMEMCPY */ + do { + slide[w++] = slide[d++]; + Tracevv((stderr, "%c", slide[w-1])); + } while (--e); + if (w == WSIZE) + { + flush_output(w); + w = 0; + } + } while (n); + } + } + + + /* restore the globals from the locals */ + wp = w; /* restore global window pointer */ + bb = b; /* restore global bit buffer */ + bk = k; + + /* done */ + return 0; + + underrun: + return 4; /* Input underrun */ +} + + + +STATIC int INIT inflate_stored(void) +/* "decompress" an inflated type 0 (stored) block. */ +{ + unsigned n; /* number of bytes in block */ + unsigned w; /* current window position */ + register ulg b; /* bit buffer */ + register unsigned k; /* number of bits in bit buffer */ + +DEBG("<stor"); + + /* make local copies of globals */ + b = bb; /* initialize bit buffer */ + k = bk; + w = wp; /* initialize window position */ + + + /* go to byte boundary */ + n = k & 7; + DUMPBITS(n); + + + /* get the length and its complement */ + NEEDBITS(16) + n = ((unsigned)b & 0xffff); + DUMPBITS(16) + NEEDBITS(16) + if (n != (unsigned)((~b) & 0xffff)) + return 1; /* error in compressed data */ + DUMPBITS(16) + + + /* read and output the compressed data */ + while (n--) + { + NEEDBITS(8) + slide[w++] = (uch)b; + if (w == WSIZE) + { + flush_output(w); + w = 0; + } + DUMPBITS(8) + } + + + /* restore the globals from the locals */ + wp = w; /* restore global window pointer */ + bb = b; /* restore global bit buffer */ + bk = k; + + DEBG(">"); + return 0; + + underrun: + return 4; /* Input underrun */ +} + + +/* + * We use `noinline' here to prevent gcc-3.5 from using too much stack space + */ +STATIC int noinline INIT inflate_fixed(void) +/* decompress an inflated type 1 (fixed Huffman codes) block. We should + either replace this with a custom decoder, or at least precompute the + Huffman tables. */ +{ + int i; /* temporary variable */ + struct huft *tl; /* literal/length code table */ + struct huft *td; /* distance code table */ + int bl; /* lookup bits for tl */ + int bd; /* lookup bits for td */ + unsigned *l; /* length list for huft_build */ + +DEBG("<fix"); + + l = malloc(sizeof(*l) * 288); + if (l == NULL) + return 3; /* out of memory */ + + /* set up literal table */ + for (i = 0; i < 144; i++) + l[i] = 8; + for (; i < 256; i++) + l[i] = 9; + for (; i < 280; i++) + l[i] = 7; + for (; i < 288; i++) /* make a complete, but wrong code set */ + l[i] = 8; + bl = 7; + if ((i = huft_build(l, 288, 257, cplens, cplext, &tl, &bl)) != 0) { + free(l); + return i; + } + + /* set up distance table */ + for (i = 0; i < 30; i++) /* make an incomplete code set */ + l[i] = 5; + bd = 5; + if ((i = huft_build(l, 30, 0, cpdist, cpdext, &td, &bd)) > 1) + { + huft_free(tl); + free(l); + + DEBG(">"); + return i; + } + + + /* decompress until an end-of-block code */ + if (inflate_codes(tl, td, bl, bd)) { + free(l); + return 1; + } + + /* free the decoding tables, return */ + free(l); + huft_free(tl); + huft_free(td); + return 0; +} + + +/* + * We use `noinline' here to prevent gcc-3.5 from using too much stack space + */ +STATIC int noinline INIT inflate_dynamic(void) +/* decompress an inflated type 2 (dynamic Huffman codes) block. */ +{ + int i; /* temporary variables */ + unsigned j; + unsigned l; /* last length */ + unsigned m; /* mask for bit lengths table */ + unsigned n; /* number of lengths to get */ + struct huft *tl; /* literal/length code table */ + struct huft *td; /* distance code table */ + int bl; /* lookup bits for tl */ + int bd; /* lookup bits for td */ + unsigned nb; /* number of bit length codes */ + unsigned nl; /* number of literal/length codes */ + unsigned nd; /* number of distance codes */ + unsigned *ll; /* literal/length and distance code lengths */ + register ulg b; /* bit buffer */ + register unsigned k; /* number of bits in bit buffer */ + int ret; + +DEBG("<dyn"); + +#ifdef PKZIP_BUG_WORKAROUND + ll = malloc(sizeof(*ll) * (288+32)); /* literal/length and distance code lengths */ +#else + ll = malloc(sizeof(*ll) * (286+30)); /* literal/length and distance code lengths */ +#endif + + if (ll == NULL) + return 1; + + /* make local bit buffer */ + b = bb; + k = bk; + + + /* read in table lengths */ + NEEDBITS(5) + nl = 257 + ((unsigned)b & 0x1f); /* number of literal/length codes */ + DUMPBITS(5) + NEEDBITS(5) + nd = 1 + ((unsigned)b & 0x1f); /* number of distance codes */ + DUMPBITS(5) + NEEDBITS(4) + nb = 4 + ((unsigned)b & 0xf); /* number of bit length codes */ + DUMPBITS(4) +#ifdef PKZIP_BUG_WORKAROUND + if (nl > 288 || nd > 32) +#else + if (nl > 286 || nd > 30) +#endif + { + ret = 1; /* bad lengths */ + goto out; + } + +DEBG("dyn1 "); + + /* read in bit-length-code lengths */ + for (j = 0; j < nb; j++) + { + NEEDBITS(3) + ll[border[j]] = (unsigned)b & 7; + DUMPBITS(3) + } + for (; j < 19; j++) + ll[border[j]] = 0; + +DEBG("dyn2 "); + + /* build decoding table for trees--single level, 7 bit lookup */ + bl = 7; + if ((i = huft_build(ll, 19, 19, NULL, NULL, &tl, &bl)) != 0) + { + if (i == 1) + huft_free(tl); + ret = i; /* incomplete code set */ + goto out; + } + +DEBG("dyn3 "); + + /* read in literal and distance code lengths */ + n = nl + nd; + m = mask_bits[bl]; + i = l = 0; + while ((unsigned)i < n) + { + NEEDBITS((unsigned)bl) + j = (td = tl + ((unsigned)b & m))->b; + DUMPBITS(j) + j = td->v.n; + if (j < 16) /* length of code in bits (0..15) */ + ll[i++] = l = j; /* save last length in l */ + else if (j == 16) /* repeat last length 3 to 6 times */ + { + NEEDBITS(2) + j = 3 + ((unsigned)b & 3); + DUMPBITS(2) + if ((unsigned)i + j > n) { + ret = 1; + goto out; + } + while (j--) + ll[i++] = l; + } + else if (j == 17) /* 3 to 10 zero length codes */ + { + NEEDBITS(3) + j = 3 + ((unsigned)b & 7); + DUMPBITS(3) + if ((unsigned)i + j > n) { + ret = 1; + goto out; + } + while (j--) + ll[i++] = 0; + l = 0; + } + else /* j == 18: 11 to 138 zero length codes */ + { + NEEDBITS(7) + j = 11 + ((unsigned)b & 0x7f); + DUMPBITS(7) + if ((unsigned)i + j > n) { + ret = 1; + goto out; + } + while (j--) + ll[i++] = 0; + l = 0; + } + } + +DEBG("dyn4 "); + + /* free decoding table for trees */ + huft_free(tl); + +DEBG("dyn5 "); + + /* restore the global bit buffer */ + bb = b; + bk = k; + +DEBG("dyn5a "); + + /* build the decoding tables for literal/length and distance codes */ + bl = lbits; + if ((i = huft_build(ll, nl, 257, cplens, cplext, &tl, &bl)) != 0) + { +DEBG("dyn5b "); + if (i == 1) { + error("incomplete literal tree"); + huft_free(tl); + } + ret = i; /* incomplete code set */ + goto out; + } +DEBG("dyn5c "); + bd = dbits; + if ((i = huft_build(ll + nl, nd, 0, cpdist, cpdext, &td, &bd)) != 0) + { +DEBG("dyn5d "); + if (i == 1) { + error("incomplete distance tree"); +#ifdef PKZIP_BUG_WORKAROUND + i = 0; + } +#else + huft_free(td); + } + huft_free(tl); + ret = i; /* incomplete code set */ + goto out; +#endif + } + +DEBG("dyn6 "); + + /* decompress until an end-of-block code */ + if (inflate_codes(tl, td, bl, bd)) { + ret = 1; + goto out; + } + +DEBG("dyn7 "); + + /* free the decoding tables, return */ + huft_free(tl); + huft_free(td); + + DEBG(">"); + ret = 0; +out: + free(ll); + return ret; + +underrun: + ret = 4; /* Input underrun */ + goto out; +} + + + +STATIC int INIT inflate_block( + int *e /* last block flag */ + ) +/* decompress an inflated block */ +{ + unsigned t; /* block type */ + register ulg b; /* bit buffer */ + register unsigned k; /* number of bits in bit buffer */ + + DEBG("<blk"); + + /* make local bit buffer */ + b = bb; + k = bk; + + + /* read in last block bit */ + NEEDBITS(1) + *e = (int)b & 1; + DUMPBITS(1) + + + /* read in block type */ + NEEDBITS(2) + t = (unsigned)b & 3; + DUMPBITS(2) + + + /* restore the global bit buffer */ + bb = b; + bk = k; + + /* inflate that block type */ + if (t == 2) + return inflate_dynamic(); + if (t == 0) + return inflate_stored(); + if (t == 1) + return inflate_fixed(); + + DEBG(">"); + + /* bad block type */ + return 2; + + underrun: + return 4; /* Input underrun */ +} + + + +STATIC int INIT inflate(void) +/* decompress an inflated entry */ +{ + int e; /* last block flag */ + int r; /* result code */ + unsigned h; /* maximum struct huft's malloc'ed */ + + /* initialize window, bit buffer */ + wp = 0; + bk = 0; + bb = 0; + + + /* decompress until the last block */ + h = 0; + do { + hufts = 0; +#ifdef ARCH_HAS_DECOMP_WDOG + arch_decomp_wdog(); +#endif + r = inflate_block(&e); + if (r) + return r; + if (hufts > h) + h = hufts; + } while (!e); + + /* Undo too much lookahead. The next read will be byte aligned so we + * can discard unused bits in the last meaningful byte. + */ + while (bk >= 8) { + bk -= 8; + inptr--; + } + + /* flush out slide */ + flush_output(wp); + + + /* return success */ +#ifdef DEBUG + fprintf(stderr, "<%u> ", h); +#endif /* DEBUG */ + return 0; +} + +/********************************************************************** + * + * The following are support routines for inflate.c + * + **********************************************************************/ + +static ulg crc_32_tab[256]; +static ulg crc; /* initialized in makecrc() so it'll reside in bss */ +#define CRC_VALUE (crc ^ 0xffffffffUL) + +/* + * Code to compute the CRC-32 table. Borrowed from + * gzip-1.0.3/makecrc.c. + */ + +static void INIT +makecrc(void) +{ +/* Not copyrighted 1990 Mark Adler */ + + unsigned long c; /* crc shift register */ + unsigned long e; /* polynomial exclusive-or pattern */ + int i; /* counter for all possible eight bit values */ + int k; /* byte being shifted into crc apparatus */ + + /* terms of polynomial defining this crc (except x^32): */ + static const int p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26}; + + /* Make exclusive-or pattern from polynomial */ + e = 0; + for (i = 0; i < sizeof(p)/sizeof(int); i++) + e |= 1L << (31 - p[i]); + + crc_32_tab[0] = 0; + + for (i = 1; i < 256; i++) + { + c = 0; + for (k = i | 256; k != 1; k >>= 1) + { + c = c & 1 ? (c >> 1) ^ e : c >> 1; + if (k & 1) + c ^= e; + } + crc_32_tab[i] = c; + } + + /* this is initialized here so this code could reside in ROM */ + crc = (ulg)0xffffffffUL; /* shift register contents */ +} + +/* gzip flag byte */ +#define ASCII_FLAG 0x01 /* bit 0 set: file probably ASCII text */ +#define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gzip file */ +#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */ +#define ORIG_NAME 0x08 /* bit 3 set: original file name present */ +#define COMMENT 0x10 /* bit 4 set: file comment present */ +#define ENCRYPTED 0x20 /* bit 5 set: file is encrypted */ +#define RESERVED 0xC0 /* bit 6,7: reserved */ + +/* + * Do the uncompression! + */ +static int INIT gunzip(void) +{ + uch flags; + unsigned char magic[2]; /* magic header */ + char method; + ulg orig_crc = 0; /* original crc */ + ulg orig_len = 0; /* original uncompressed length */ + int res; + + magic[0] = NEXTBYTE(); + magic[1] = NEXTBYTE(); + method = NEXTBYTE(); + + if (magic[0] != 037 || + ((magic[1] != 0213) && (magic[1] != 0236))) { + error("bad gzip magic numbers"); + return -1; + } + + /* We only support method #8, DEFLATED */ + if (method != 8) { + error("internal error, invalid method"); + return -1; + } + + flags = (uch)get_byte(); + if ((flags & ENCRYPTED) != 0) { + error("Input is encrypted"); + return -1; + } + if ((flags & CONTINUATION) != 0) { + error("Multi part input"); + return -1; + } + if ((flags & RESERVED) != 0) { + error("Input has invalid flags"); + return -1; + } + NEXTBYTE(); /* Get timestamp */ + NEXTBYTE(); + NEXTBYTE(); + NEXTBYTE(); + + (void)NEXTBYTE(); /* Ignore extra flags for the moment */ + (void)NEXTBYTE(); /* Ignore OS type for the moment */ + + if ((flags & EXTRA_FIELD) != 0) { + unsigned len = (unsigned)NEXTBYTE(); + len |= ((unsigned)NEXTBYTE())<<8; + while (len--) (void)NEXTBYTE(); + } + + /* Get original file name if it was truncated */ + if ((flags & ORIG_NAME) != 0) { + /* Discard the old name */ + while (NEXTBYTE() != 0) /* null */ ; + } + + /* Discard file comment if any */ + if ((flags & COMMENT) != 0) { + while (NEXTBYTE() != 0) /* null */ ; + } + + /* Decompress */ + if ((res = inflate())) { + switch (res) { + case 0: + break; + case 1: + error("invalid compressed format (err=1)"); + break; + case 2: + error("invalid compressed format (err=2)"); + break; + case 3: + error("out of memory"); + break; + case 4: + error("out of input data"); + break; + default: + error("invalid compressed format (other)"); + } + return -1; + } + + /* Get the crc and original length */ + /* crc32 (see algorithm.doc) + * uncompressed input size modulo 2^32 + */ + orig_crc = (ulg) NEXTBYTE(); + orig_crc |= (ulg) NEXTBYTE() << 8; + orig_crc |= (ulg) NEXTBYTE() << 16; + orig_crc |= (ulg) NEXTBYTE() << 24; + + orig_len = (ulg) NEXTBYTE(); + orig_len |= (ulg) NEXTBYTE() << 8; + orig_len |= (ulg) NEXTBYTE() << 16; + orig_len |= (ulg) NEXTBYTE() << 24; + + /* Validate decompression */ + if (orig_crc != CRC_VALUE) { + error("crc error"); + return -1; + } + if (orig_len != bytes_out) { + error("length error"); + return -1; + } + return 0; + + underrun: /* NEXTBYTE() goto's here if needed */ + error("out of input data"); + return -1; +} + + diff --git a/lib/int_sqrt.c b/lib/int_sqrt.c new file mode 100644 index 00000000..fd355a99 --- /dev/null +++ b/lib/int_sqrt.c @@ -0,0 +1,32 @@ + +#include <linux/kernel.h> +#include <linux/module.h> + +/** + * int_sqrt - rough approximation to sqrt + * @x: integer of which to calculate the sqrt + * + * A very rough approximation to the sqrt() function. + */ +unsigned long int_sqrt(unsigned long x) +{ + unsigned long op, res, one; + + op = x; + res = 0; + + one = 1UL << (BITS_PER_LONG - 2); + while (one > op) + one >>= 2; + + while (one != 0) { + if (op >= res + one) { + op = op - (res + one); + res = res + 2 * one; + } + res /= 2; + one /= 4; + } + return res; +} +EXPORT_SYMBOL(int_sqrt); diff --git a/lib/iomap.c b/lib/iomap.c new file mode 100644 index 00000000..d3222938 --- /dev/null +++ b/lib/iomap.c @@ -0,0 +1,282 @@ +/* + * Implement the default iomap interfaces + * + * (C) Copyright 2004 Linus Torvalds + */ +#include <linux/pci.h> +#include <linux/io.h> + +#include <linux/module.h> + +/* + * Read/write from/to an (offsettable) iomem cookie. It might be a PIO + * access or a MMIO access, these functions don't care. The info is + * encoded in the hardware mapping set up by the mapping functions + * (or the cookie itself, depending on implementation and hw). + * + * The generic routines don't assume any hardware mappings, and just + * encode the PIO/MMIO as part of the cookie. They coldly assume that + * the MMIO IO mappings are not in the low address range. + * + * Architectures for which this is not true can't use this generic + * implementation and should do their own copy. + */ + +#ifndef HAVE_ARCH_PIO_SIZE +/* + * We encode the physical PIO addresses (0-0xffff) into the + * pointer by offsetting them with a constant (0x10000) and + * assuming that all the low addresses are always PIO. That means + * we can do some sanity checks on the low bits, and don't + * need to just take things for granted. + */ +#define PIO_OFFSET 0x10000UL +#define PIO_MASK 0x0ffffUL +#define PIO_RESERVED 0x40000UL +#endif + +static void bad_io_access(unsigned long port, const char *access) +{ + static int count = 10; + if (count) { + count--; + WARN(1, KERN_ERR "Bad IO access at port %#lx (%s)\n", port, access); + } +} + +/* + * Ugly macros are a way of life. + */ +#define IO_COND(addr, is_pio, is_mmio) do { \ + unsigned long port = (unsigned long __force)addr; \ + if (port >= PIO_RESERVED) { \ + is_mmio; \ + } else if (port > PIO_OFFSET) { \ + port &= PIO_MASK; \ + is_pio; \ + } else \ + bad_io_access(port, #is_pio ); \ +} while (0) + +#ifndef pio_read16be +#define pio_read16be(port) swab16(inw(port)) +#define pio_read32be(port) swab32(inl(port)) +#endif + +#ifndef mmio_read16be +#define mmio_read16be(addr) be16_to_cpu(__raw_readw(addr)) +#define mmio_read32be(addr) be32_to_cpu(__raw_readl(addr)) +#endif + +unsigned int ioread8(void __iomem *addr) +{ + IO_COND(addr, return inb(port), return readb(addr)); + return 0xff; +} +unsigned int ioread16(void __iomem *addr) +{ + IO_COND(addr, return inw(port), return readw(addr)); + return 0xffff; +} +unsigned int ioread16be(void __iomem *addr) +{ + IO_COND(addr, return pio_read16be(port), return mmio_read16be(addr)); + return 0xffff; +} +unsigned int ioread32(void __iomem *addr) +{ + IO_COND(addr, return inl(port), return readl(addr)); + return 0xffffffff; +} +unsigned int ioread32be(void __iomem *addr) +{ + IO_COND(addr, return pio_read32be(port), return mmio_read32be(addr)); + return 0xffffffff; +} +EXPORT_SYMBOL(ioread8); +EXPORT_SYMBOL(ioread16); +EXPORT_SYMBOL(ioread16be); +EXPORT_SYMBOL(ioread32); +EXPORT_SYMBOL(ioread32be); + +#ifndef pio_write16be +#define pio_write16be(val,port) outw(swab16(val),port) +#define pio_write32be(val,port) outl(swab32(val),port) +#endif + +#ifndef mmio_write16be +#define mmio_write16be(val,port) __raw_writew(be16_to_cpu(val),port) +#define mmio_write32be(val,port) __raw_writel(be32_to_cpu(val),port) +#endif + +void iowrite8(u8 val, void __iomem *addr) +{ + IO_COND(addr, outb(val,port), writeb(val, addr)); +} +void iowrite16(u16 val, void __iomem *addr) +{ + IO_COND(addr, outw(val,port), writew(val, addr)); +} +void iowrite16be(u16 val, void __iomem *addr) +{ + IO_COND(addr, pio_write16be(val,port), mmio_write16be(val, addr)); +} +void iowrite32(u32 val, void __iomem *addr) +{ + IO_COND(addr, outl(val,port), writel(val, addr)); +} +void iowrite32be(u32 val, void __iomem *addr) +{ + IO_COND(addr, pio_write32be(val,port), mmio_write32be(val, addr)); +} +EXPORT_SYMBOL(iowrite8); +EXPORT_SYMBOL(iowrite16); +EXPORT_SYMBOL(iowrite16be); +EXPORT_SYMBOL(iowrite32); +EXPORT_SYMBOL(iowrite32be); + +/* + * These are the "repeat MMIO read/write" functions. + * Note the "__raw" accesses, since we don't want to + * convert to CPU byte order. We write in "IO byte + * order" (we also don't have IO barriers). + */ +#ifndef mmio_insb +static inline void mmio_insb(void __iomem *addr, u8 *dst, int count) +{ + while (--count >= 0) { + u8 data = __raw_readb(addr); + *dst = data; + dst++; + } +} +static inline void mmio_insw(void __iomem *addr, u16 *dst, int count) +{ + while (--count >= 0) { + u16 data = __raw_readw(addr); + *dst = data; + dst++; + } +} +static inline void mmio_insl(void __iomem *addr, u32 *dst, int count) +{ + while (--count >= 0) { + u32 data = __raw_readl(addr); + *dst = data; + dst++; + } +} +#endif + +#ifndef mmio_outsb +static inline void mmio_outsb(void __iomem *addr, const u8 *src, int count) +{ + while (--count >= 0) { + __raw_writeb(*src, addr); + src++; + } +} +static inline void mmio_outsw(void __iomem *addr, const u16 *src, int count) +{ + while (--count >= 0) { + __raw_writew(*src, addr); + src++; + } +} +static inline void mmio_outsl(void __iomem *addr, const u32 *src, int count) +{ + while (--count >= 0) { + __raw_writel(*src, addr); + src++; + } +} +#endif + +void ioread8_rep(void __iomem *addr, void *dst, unsigned long count) +{ + IO_COND(addr, insb(port,dst,count), mmio_insb(addr, dst, count)); +} +void ioread16_rep(void __iomem *addr, void *dst, unsigned long count) +{ + IO_COND(addr, insw(port,dst,count), mmio_insw(addr, dst, count)); +} +void ioread32_rep(void __iomem *addr, void *dst, unsigned long count) +{ + IO_COND(addr, insl(port,dst,count), mmio_insl(addr, dst, count)); +} +EXPORT_SYMBOL(ioread8_rep); +EXPORT_SYMBOL(ioread16_rep); +EXPORT_SYMBOL(ioread32_rep); + +void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count) +{ + IO_COND(addr, outsb(port, src, count), mmio_outsb(addr, src, count)); +} +void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count) +{ + IO_COND(addr, outsw(port, src, count), mmio_outsw(addr, src, count)); +} +void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count) +{ + IO_COND(addr, outsl(port, src,count), mmio_outsl(addr, src, count)); +} +EXPORT_SYMBOL(iowrite8_rep); +EXPORT_SYMBOL(iowrite16_rep); +EXPORT_SYMBOL(iowrite32_rep); + +/* Create a virtual mapping cookie for an IO port range */ +void __iomem *ioport_map(unsigned long port, unsigned int nr) +{ + if (port > PIO_MASK) + return NULL; + return (void __iomem *) (unsigned long) (port + PIO_OFFSET); +} + +void ioport_unmap(void __iomem *addr) +{ + /* Nothing to do */ +} +EXPORT_SYMBOL(ioport_map); +EXPORT_SYMBOL(ioport_unmap); + +/** + * pci_iomap - create a virtual mapping cookie for a PCI BAR + * @dev: PCI device that owns the BAR + * @bar: BAR number + * @maxlen: length of the memory to map + * + * Using this function you will get a __iomem address to your device BAR. + * You can access it using ioread*() and iowrite*(). These functions hide + * the details if this is a MMIO or PIO address space and will just do what + * you expect from them in the correct way. + * + * @maxlen specifies the maximum length to map. If you want to get access to + * the complete BAR without checking for its length first, pass %0 here. + * */ +void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) +{ + resource_size_t start = pci_resource_start(dev, bar); + resource_size_t len = pci_resource_len(dev, bar); + unsigned long flags = pci_resource_flags(dev, bar); + + if (!len || !start) + return NULL; + if (maxlen && len > maxlen) + len = maxlen; + if (flags & IORESOURCE_IO) + return ioport_map(start, len); + if (flags & IORESOURCE_MEM) { + if (flags & IORESOURCE_CACHEABLE) + return ioremap(start, len); + return ioremap_nocache(start, len); + } + /* What? */ + return NULL; +} + +void pci_iounmap(struct pci_dev *dev, void __iomem * addr) +{ + IO_COND(addr, /* nothing */, iounmap(addr)); +} +EXPORT_SYMBOL(pci_iomap); +EXPORT_SYMBOL(pci_iounmap); diff --git a/lib/iomap_copy.c b/lib/iomap_copy.c new file mode 100644 index 00000000..864fc5ea --- /dev/null +++ b/lib/iomap_copy.c @@ -0,0 +1,70 @@ +/* + * Copyright 2006 PathScale, Inc. All Rights Reserved. + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include <linux/module.h> +#include <linux/io.h> + +/** + * __iowrite32_copy - copy data to MMIO space, in 32-bit units + * @to: destination, in MMIO space (must be 32-bit aligned) + * @from: source (must be 32-bit aligned) + * @count: number of 32-bit quantities to copy + * + * Copy data from kernel space to MMIO space, in units of 32 bits at a + * time. Order of access is not guaranteed, nor is a memory barrier + * performed afterwards. + */ +void __attribute__((weak)) __iowrite32_copy(void __iomem *to, + const void *from, + size_t count) +{ + u32 __iomem *dst = to; + const u32 *src = from; + const u32 *end = src + count; + + while (src < end) + __raw_writel(*src++, dst++); +} +EXPORT_SYMBOL_GPL(__iowrite32_copy); + +/** + * __iowrite64_copy - copy data to MMIO space, in 64-bit or 32-bit units + * @to: destination, in MMIO space (must be 64-bit aligned) + * @from: source (must be 64-bit aligned) + * @count: number of 64-bit quantities to copy + * + * Copy data from kernel space to MMIO space, in units of 32 or 64 bits at a + * time. Order of access is not guaranteed, nor is a memory barrier + * performed afterwards. + */ +void __attribute__((weak)) __iowrite64_copy(void __iomem *to, + const void *from, + size_t count) +{ +#ifdef CONFIG_64BIT + u64 __iomem *dst = to; + const u64 *src = from; + const u64 *end = src + count; + + while (src < end) + __raw_writeq(*src++, dst++); +#else + __iowrite32_copy(to, from, count * 2); +#endif +} + +EXPORT_SYMBOL_GPL(__iowrite64_copy); diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c new file mode 100644 index 00000000..da053313 --- /dev/null +++ b/lib/iommu-helper.c @@ -0,0 +1,40 @@ +/* + * IOMMU helper functions for the free area management + */ + +#include <linux/module.h> +#include <linux/bitmap.h> + +int iommu_is_span_boundary(unsigned int index, unsigned int nr, + unsigned long shift, + unsigned long boundary_size) +{ + BUG_ON(!is_power_of_2(boundary_size)); + + shift = (shift + index) & (boundary_size - 1); + return shift + nr > boundary_size; +} + +unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, + unsigned long start, unsigned int nr, + unsigned long shift, unsigned long boundary_size, + unsigned long align_mask) +{ + unsigned long index; + + /* We don't want the last of the limit */ + size -= 1; +again: + index = bitmap_find_next_zero_area(map, size, start, nr, align_mask); + if (index < size) { + if (iommu_is_span_boundary(index, nr, shift, boundary_size)) { + /* we could do more effectively */ + start = index + 1; + goto again; + } + bitmap_set(map, index, nr); + return index; + } + return -1; +} +EXPORT_SYMBOL(iommu_area_alloc); diff --git a/lib/ioremap.c b/lib/ioremap.c new file mode 100644 index 00000000..5730ecd3 --- /dev/null +++ b/lib/ioremap.c @@ -0,0 +1,92 @@ +/* + * Re-map IO memory to kernel address space so that we can access it. + * This is needed for high PCI addresses that aren't mapped in the + * 640k-1MB IO memory area on PC's + * + * (C) Copyright 1995 1996 Linus Torvalds + */ +#include <linux/vmalloc.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/io.h> +#include <asm/cacheflush.h> +#include <asm/pgtable.h> + +static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) +{ + pte_t *pte; + u64 pfn; + + pfn = phys_addr >> PAGE_SHIFT; + pte = pte_alloc_kernel(pmd, addr); + if (!pte) + return -ENOMEM; + do { + BUG_ON(!pte_none(*pte)); + set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot)); + pfn++; + } while (pte++, addr += PAGE_SIZE, addr != end); + return 0; +} + +static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) +{ + pmd_t *pmd; + unsigned long next; + + phys_addr -= addr; + pmd = pmd_alloc(&init_mm, pud, addr); + if (!pmd) + return -ENOMEM; + do { + next = pmd_addr_end(addr, end); + if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, prot)) + return -ENOMEM; + } while (pmd++, addr = next, addr != end); + return 0; +} + +static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) +{ + pud_t *pud; + unsigned long next; + + phys_addr -= addr; + pud = pud_alloc(&init_mm, pgd, addr); + if (!pud) + return -ENOMEM; + do { + next = pud_addr_end(addr, end); + if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, prot)) + return -ENOMEM; + } while (pud++, addr = next, addr != end); + return 0; +} + +int ioremap_page_range(unsigned long addr, + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) +{ + pgd_t *pgd; + unsigned long start; + unsigned long next; + int err; + + BUG_ON(addr >= end); + + start = addr; + phys_addr -= addr; + pgd = pgd_offset_k(addr); + do { + next = pgd_addr_end(addr, end); + err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, prot); + if (err) + break; + } while (pgd++, addr = next, addr != end); + + flush_cache_vmap(start, end); + + return err; +} diff --git a/lib/irq_regs.c b/lib/irq_regs.c new file mode 100644 index 00000000..753880a5 --- /dev/null +++ b/lib/irq_regs.c @@ -0,0 +1,17 @@ +/* saved per-CPU IRQ register pointer + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/module.h> +#include <asm/irq_regs.h> + +#ifndef ARCH_HAS_OWN_IRQ_REGS +DEFINE_PER_CPU(struct pt_regs *, __irq_regs); +EXPORT_PER_CPU_SYMBOL(__irq_regs); +#endif diff --git a/lib/is_single_threaded.c b/lib/is_single_threaded.c new file mode 100644 index 00000000..bd2bea96 --- /dev/null +++ b/lib/is_single_threaded.c @@ -0,0 +1,58 @@ +/* Function to determine if a thread group is single threaded or not + * + * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * - Derived from security/selinux/hooks.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#include <linux/sched.h> + +/* + * Returns true if the task does not share ->mm with another thread/process. + */ +bool current_is_single_threaded(void) +{ + struct task_struct *task = current; + struct mm_struct *mm = task->mm; + struct task_struct *p, *t; + bool ret; + + if (atomic_read(&task->signal->live) != 1) + return false; + + if (atomic_read(&mm->mm_users) == 1) + return true; + + ret = false; + rcu_read_lock(); + for_each_process(p) { + if (unlikely(p->flags & PF_KTHREAD)) + continue; + if (unlikely(p == task->group_leader)) + continue; + + t = p; + do { + if (unlikely(t->mm == mm)) + goto found; + if (likely(t->mm)) + break; + /* + * t->mm == NULL. Make sure next_thread/next_task + * will see other CLONE_VM tasks which might be + * forked before exiting. + */ + smp_rmb(); + } while_each_thread(p, t); + } + ret = true; +found: + rcu_read_unlock(); + + return ret; +} diff --git a/lib/kasprintf.c b/lib/kasprintf.c new file mode 100644 index 00000000..9c4233b2 --- /dev/null +++ b/lib/kasprintf.c @@ -0,0 +1,45 @@ +/* + * linux/lib/kasprintf.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +#include <stdarg.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/string.h> + +/* Simplified asprintf. */ +char *kvasprintf(gfp_t gfp, const char *fmt, va_list ap) +{ + unsigned int len; + char *p; + va_list aq; + + va_copy(aq, ap); + len = vsnprintf(NULL, 0, fmt, aq); + va_end(aq); + + p = kmalloc(len+1, gfp); + if (!p) + return NULL; + + vsnprintf(p, len+1, fmt, ap); + + return p; +} +EXPORT_SYMBOL(kvasprintf); + +char *kasprintf(gfp_t gfp, const char *fmt, ...) +{ + va_list ap; + char *p; + + va_start(ap, fmt); + p = kvasprintf(gfp, fmt, ap); + va_end(ap); + + return p; +} +EXPORT_SYMBOL(kasprintf); diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c new file mode 100644 index 00000000..b135d04a --- /dev/null +++ b/lib/kernel_lock.c @@ -0,0 +1,143 @@ +/* + * lib/kernel_lock.c + * + * This is the traditional BKL - big kernel lock. Largely + * relegated to obsolescence, but used by various less + * important (or lazy) subsystems. + */ +#include <linux/module.h> +#include <linux/kallsyms.h> +#include <linux/semaphore.h> +#include <linux/smp_lock.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/bkl.h> + +/* + * The 'big kernel lock' + * + * This spinlock is taken and released recursively by lock_kernel() + * and unlock_kernel(). It is transparently dropped and reacquired + * over schedule(). It is used to protect legacy code that hasn't + * been migrated to a proper locking design yet. + * + * Don't use in new code. + */ +static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(kernel_flag); + + +/* + * Acquire/release the underlying lock from the scheduler. + * + * This is called with preemption disabled, and should + * return an error value if it cannot get the lock and + * TIF_NEED_RESCHED gets set. + * + * If it successfully gets the lock, it should increment + * the preemption count like any spinlock does. + * + * (This works on UP too - do_raw_spin_trylock will never + * return false in that case) + */ +int __lockfunc __reacquire_kernel_lock(void) +{ + while (!do_raw_spin_trylock(&kernel_flag)) { + if (need_resched()) + return -EAGAIN; + cpu_relax(); + } + preempt_disable(); + return 0; +} + +void __lockfunc __release_kernel_lock(void) +{ + do_raw_spin_unlock(&kernel_flag); + preempt_enable_no_resched(); +} + +/* + * These are the BKL spinlocks - we try to be polite about preemption. + * If SMP is not on (ie UP preemption), this all goes away because the + * do_raw_spin_trylock() will always succeed. + */ +#ifdef CONFIG_PREEMPT +static inline void __lock_kernel(void) +{ + preempt_disable(); + if (unlikely(!do_raw_spin_trylock(&kernel_flag))) { + /* + * If preemption was disabled even before this + * was called, there's nothing we can be polite + * about - just spin. + */ + if (preempt_count() > 1) { + do_raw_spin_lock(&kernel_flag); + return; + } + + /* + * Otherwise, let's wait for the kernel lock + * with preemption enabled.. + */ + do { + preempt_enable(); + while (raw_spin_is_locked(&kernel_flag)) + cpu_relax(); + preempt_disable(); + } while (!do_raw_spin_trylock(&kernel_flag)); + } +} + +#else + +/* + * Non-preemption case - just get the spinlock + */ +static inline void __lock_kernel(void) +{ + do_raw_spin_lock(&kernel_flag); +} +#endif + +static inline void __unlock_kernel(void) +{ + /* + * the BKL is not covered by lockdep, so we open-code the + * unlocking sequence (and thus avoid the dep-chain ops): + */ + do_raw_spin_unlock(&kernel_flag); + preempt_enable(); +} + +/* + * Getting the big kernel lock. + * + * This cannot happen asynchronously, so we only need to + * worry about other CPU's. + */ +void __lockfunc _lock_kernel(const char *func, const char *file, int line) +{ + int depth = current->lock_depth + 1; + + trace_lock_kernel(func, file, line); + + if (likely(!depth)) { + might_sleep(); + __lock_kernel(); + } + current->lock_depth = depth; +} + +void __lockfunc _unlock_kernel(const char *func, const char *file, int line) +{ + BUG_ON(current->lock_depth < 0); + if (likely(--current->lock_depth < 0)) + __unlock_kernel(); + + trace_unlock_kernel(func, file, line); +} + +EXPORT_SYMBOL(_lock_kernel); +EXPORT_SYMBOL(_unlock_kernel); + diff --git a/lib/klist.c b/lib/klist.c new file mode 100644 index 00000000..573d6068 --- /dev/null +++ b/lib/klist.c @@ -0,0 +1,365 @@ +/* + * klist.c - Routines for manipulating klists. + * + * Copyright (C) 2005 Patrick Mochel + * + * This file is released under the GPL v2. + * + * This klist interface provides a couple of structures that wrap around + * struct list_head to provide explicit list "head" (struct klist) and list + * "node" (struct klist_node) objects. For struct klist, a spinlock is + * included that protects access to the actual list itself. struct + * klist_node provides a pointer to the klist that owns it and a kref + * reference count that indicates the number of current users of that node + * in the list. + * + * The entire point is to provide an interface for iterating over a list + * that is safe and allows for modification of the list during the + * iteration (e.g. insertion and removal), including modification of the + * current node on the list. + * + * It works using a 3rd object type - struct klist_iter - that is declared + * and initialized before an iteration. klist_next() is used to acquire the + * next element in the list. It returns NULL if there are no more items. + * Internally, that routine takes the klist's lock, decrements the + * reference count of the previous klist_node and increments the count of + * the next klist_node. It then drops the lock and returns. + * + * There are primitives for adding and removing nodes to/from a klist. + * When deleting, klist_del() will simply decrement the reference count. + * Only when the count goes to 0 is the node removed from the list. + * klist_remove() will try to delete the node from the list and block until + * it is actually removed. This is useful for objects (like devices) that + * have been removed from the system and must be freed (but must wait until + * all accessors have finished). + */ + +#include <linux/klist.h> +#include <linux/module.h> +#include <linux/sched.h> + +/* + * Use the lowest bit of n_klist to mark deleted nodes and exclude + * dead ones from iteration. + */ +#define KNODE_DEAD 1LU +#define KNODE_KLIST_MASK ~KNODE_DEAD + +static struct klist *knode_klist(struct klist_node *knode) +{ + return (struct klist *) + ((unsigned long)knode->n_klist & KNODE_KLIST_MASK); +} + +static bool knode_dead(struct klist_node *knode) +{ + return (unsigned long)knode->n_klist & KNODE_DEAD; +} + +static void knode_set_klist(struct klist_node *knode, struct klist *klist) +{ + knode->n_klist = klist; + /* no knode deserves to start its life dead */ + WARN_ON(knode_dead(knode)); +} + +static void knode_kill(struct klist_node *knode) +{ + /* and no knode should die twice ever either, see we're very humane */ + WARN_ON(knode_dead(knode)); + *(unsigned long *)&knode->n_klist |= KNODE_DEAD; +} + +/** + * klist_init - Initialize a klist structure. + * @k: The klist we're initializing. + * @get: The get function for the embedding object (NULL if none) + * @put: The put function for the embedding object (NULL if none) + * + * Initialises the klist structure. If the klist_node structures are + * going to be embedded in refcounted objects (necessary for safe + * deletion) then the get/put arguments are used to initialise + * functions that take and release references on the embedding + * objects. + */ +void klist_init(struct klist *k, void (*get)(struct klist_node *), + void (*put)(struct klist_node *)) +{ + INIT_LIST_HEAD(&k->k_list); + spin_lock_init(&k->k_lock); + k->get = get; + k->put = put; +} +EXPORT_SYMBOL_GPL(klist_init); + +static void add_head(struct klist *k, struct klist_node *n) +{ + spin_lock(&k->k_lock); + list_add(&n->n_node, &k->k_list); + spin_unlock(&k->k_lock); +} + +static void add_tail(struct klist *k, struct klist_node *n) +{ + spin_lock(&k->k_lock); + list_add_tail(&n->n_node, &k->k_list); + spin_unlock(&k->k_lock); +} + +static void klist_node_init(struct klist *k, struct klist_node *n) +{ + INIT_LIST_HEAD(&n->n_node); + kref_init(&n->n_ref); + knode_set_klist(n, k); + if (k->get) + k->get(n); +} + +/** + * klist_add_head - Initialize a klist_node and add it to front. + * @n: node we're adding. + * @k: klist it's going on. + */ +void klist_add_head(struct klist_node *n, struct klist *k) +{ + klist_node_init(k, n); + add_head(k, n); +} +EXPORT_SYMBOL_GPL(klist_add_head); + +/** + * klist_add_tail - Initialize a klist_node and add it to back. + * @n: node we're adding. + * @k: klist it's going on. + */ +void klist_add_tail(struct klist_node *n, struct klist *k) +{ + klist_node_init(k, n); + add_tail(k, n); +} +EXPORT_SYMBOL_GPL(klist_add_tail); + +/** + * klist_add_after - Init a klist_node and add it after an existing node + * @n: node we're adding. + * @pos: node to put @n after + */ +void klist_add_after(struct klist_node *n, struct klist_node *pos) +{ + struct klist *k = knode_klist(pos); + + klist_node_init(k, n); + spin_lock(&k->k_lock); + list_add(&n->n_node, &pos->n_node); + spin_unlock(&k->k_lock); +} +EXPORT_SYMBOL_GPL(klist_add_after); + +/** + * klist_add_before - Init a klist_node and add it before an existing node + * @n: node we're adding. + * @pos: node to put @n after + */ +void klist_add_before(struct klist_node *n, struct klist_node *pos) +{ + struct klist *k = knode_klist(pos); + + klist_node_init(k, n); + spin_lock(&k->k_lock); + list_add_tail(&n->n_node, &pos->n_node); + spin_unlock(&k->k_lock); +} +EXPORT_SYMBOL_GPL(klist_add_before); + +struct klist_waiter { + struct list_head list; + struct klist_node *node; + struct task_struct *process; + int woken; +}; + +static DEFINE_SPINLOCK(klist_remove_lock); +static LIST_HEAD(klist_remove_waiters); + +static void klist_release(struct kref *kref) +{ + struct klist_waiter *waiter, *tmp; + struct klist_node *n = container_of(kref, struct klist_node, n_ref); + + WARN_ON(!knode_dead(n)); + list_del(&n->n_node); + spin_lock(&klist_remove_lock); + list_for_each_entry_safe(waiter, tmp, &klist_remove_waiters, list) { + if (waiter->node != n) + continue; + + waiter->woken = 1; + mb(); + wake_up_process(waiter->process); + list_del(&waiter->list); + } + spin_unlock(&klist_remove_lock); + knode_set_klist(n, NULL); +} + +static int klist_dec_and_del(struct klist_node *n) +{ + return kref_put(&n->n_ref, klist_release); +} + +static void klist_put(struct klist_node *n, bool kill) +{ + struct klist *k = knode_klist(n); + void (*put)(struct klist_node *) = k->put; + + spin_lock(&k->k_lock); + if (kill) + knode_kill(n); + if (!klist_dec_and_del(n)) + put = NULL; + spin_unlock(&k->k_lock); + if (put) + put(n); +} + +/** + * klist_del - Decrement the reference count of node and try to remove. + * @n: node we're deleting. + */ +void klist_del(struct klist_node *n) +{ + klist_put(n, true); +} +EXPORT_SYMBOL_GPL(klist_del); + +/** + * klist_remove - Decrement the refcount of node and wait for it to go away. + * @n: node we're removing. + */ +void klist_remove(struct klist_node *n) +{ + struct klist_waiter waiter; + + waiter.node = n; + waiter.process = current; + waiter.woken = 0; + spin_lock(&klist_remove_lock); + list_add(&waiter.list, &klist_remove_waiters); + spin_unlock(&klist_remove_lock); + + klist_del(n); + + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (waiter.woken) + break; + schedule(); + } + __set_current_state(TASK_RUNNING); +} +EXPORT_SYMBOL_GPL(klist_remove); + +/** + * klist_node_attached - Say whether a node is bound to a list or not. + * @n: Node that we're testing. + */ +int klist_node_attached(struct klist_node *n) +{ + return (n->n_klist != NULL); +} +EXPORT_SYMBOL_GPL(klist_node_attached); + +/** + * klist_iter_init_node - Initialize a klist_iter structure. + * @k: klist we're iterating. + * @i: klist_iter we're filling. + * @n: node to start with. + * + * Similar to klist_iter_init(), but starts the action off with @n, + * instead of with the list head. + */ +void klist_iter_init_node(struct klist *k, struct klist_iter *i, + struct klist_node *n) +{ + i->i_klist = k; + i->i_cur = n; + if (n) + kref_get(&n->n_ref); +} +EXPORT_SYMBOL_GPL(klist_iter_init_node); + +/** + * klist_iter_init - Iniitalize a klist_iter structure. + * @k: klist we're iterating. + * @i: klist_iter structure we're filling. + * + * Similar to klist_iter_init_node(), but start with the list head. + */ +void klist_iter_init(struct klist *k, struct klist_iter *i) +{ + klist_iter_init_node(k, i, NULL); +} +EXPORT_SYMBOL_GPL(klist_iter_init); + +/** + * klist_iter_exit - Finish a list iteration. + * @i: Iterator structure. + * + * Must be called when done iterating over list, as it decrements the + * refcount of the current node. Necessary in case iteration exited before + * the end of the list was reached, and always good form. + */ +void klist_iter_exit(struct klist_iter *i) +{ + if (i->i_cur) { + klist_put(i->i_cur, false); + i->i_cur = NULL; + } +} +EXPORT_SYMBOL_GPL(klist_iter_exit); + +static struct klist_node *to_klist_node(struct list_head *n) +{ + return container_of(n, struct klist_node, n_node); +} + +/** + * klist_next - Ante up next node in list. + * @i: Iterator structure. + * + * First grab list lock. Decrement the reference count of the previous + * node, if there was one. Grab the next node, increment its reference + * count, drop the lock, and return that next node. + */ +struct klist_node *klist_next(struct klist_iter *i) +{ + void (*put)(struct klist_node *) = i->i_klist->put; + struct klist_node *last = i->i_cur; + struct klist_node *next; + + spin_lock(&i->i_klist->k_lock); + + if (last) { + next = to_klist_node(last->n_node.next); + if (!klist_dec_and_del(last)) + put = NULL; + } else + next = to_klist_node(i->i_klist->k_list.next); + + i->i_cur = NULL; + while (next != to_klist_node(&i->i_klist->k_list)) { + if (likely(!knode_dead(next))) { + kref_get(&next->n_ref); + i->i_cur = next; + break; + } + next = to_klist_node(next->n_node.next); + } + + spin_unlock(&i->i_klist->k_lock); + + if (put && last) + put(last); + return i->i_cur; +} +EXPORT_SYMBOL_GPL(klist_next); diff --git a/lib/kobject.c b/lib/kobject.c new file mode 100644 index 00000000..f07c5725 --- /dev/null +++ b/lib/kobject.c @@ -0,0 +1,973 @@ +/* + * kobject.c - library routines for handling generic kernel objects + * + * Copyright (c) 2002-2003 Patrick Mochel <mochel@osdl.org> + * Copyright (c) 2006-2007 Greg Kroah-Hartman <greg@kroah.com> + * Copyright (c) 2006-2007 Novell Inc. + * + * This file is released under the GPLv2. + * + * + * Please see the file Documentation/kobject.txt for critical information + * about using the kobject interface. + */ + +#include <linux/kobject.h> +#include <linux/string.h> +#include <linux/module.h> +#include <linux/stat.h> +#include <linux/slab.h> + +/* + * populate_dir - populate directory with attributes. + * @kobj: object we're working on. + * + * Most subsystems have a set of default attributes that are associated + * with an object that registers with them. This is a helper called during + * object registration that loops through the default attributes of the + * subsystem and creates attributes files for them in sysfs. + */ +static int populate_dir(struct kobject *kobj) +{ + struct kobj_type *t = get_ktype(kobj); + struct attribute *attr; + int error = 0; + int i; + + if (t && t->default_attrs) { + for (i = 0; (attr = t->default_attrs[i]) != NULL; i++) { + error = sysfs_create_file(kobj, attr); + if (error) + break; + } + } + return error; +} + +static int create_dir(struct kobject *kobj) +{ + int error = 0; + if (kobject_name(kobj)) { + error = sysfs_create_dir(kobj); + if (!error) { + error = populate_dir(kobj); + if (error) + sysfs_remove_dir(kobj); + } + } + return error; +} + +static int get_kobj_path_length(struct kobject *kobj) +{ + int length = 1; + struct kobject *parent = kobj; + + /* walk up the ancestors until we hit the one pointing to the + * root. + * Add 1 to strlen for leading '/' of each level. + */ + do { + if (kobject_name(parent) == NULL) + return 0; + length += strlen(kobject_name(parent)) + 1; + parent = parent->parent; + } while (parent); + return length; +} + +static void fill_kobj_path(struct kobject *kobj, char *path, int length) +{ + struct kobject *parent; + + --length; + for (parent = kobj; parent; parent = parent->parent) { + int cur = strlen(kobject_name(parent)); + /* back up enough to print this name with '/' */ + length -= cur; + strncpy(path + length, kobject_name(parent), cur); + *(path + --length) = '/'; + } + + pr_debug("kobject: '%s' (%p): %s: path = '%s'\n", kobject_name(kobj), + kobj, __func__, path); +} + +/** + * kobject_get_path - generate and return the path associated with a given kobj and kset pair. + * + * @kobj: kobject in question, with which to build the path + * @gfp_mask: the allocation type used to allocate the path + * + * The result must be freed by the caller with kfree(). + */ +char *kobject_get_path(struct kobject *kobj, gfp_t gfp_mask) +{ + char *path; + int len; + + len = get_kobj_path_length(kobj); + if (len == 0) + return NULL; + path = kzalloc(len, gfp_mask); + if (!path) + return NULL; + fill_kobj_path(kobj, path, len); + + return path; +} +EXPORT_SYMBOL_GPL(kobject_get_path); + +/* add the kobject to its kset's list */ +static void kobj_kset_join(struct kobject *kobj) +{ + if (!kobj->kset) + return; + + kset_get(kobj->kset); + spin_lock(&kobj->kset->list_lock); + list_add_tail(&kobj->entry, &kobj->kset->list); + spin_unlock(&kobj->kset->list_lock); +} + +/* remove the kobject from its kset's list */ +static void kobj_kset_leave(struct kobject *kobj) +{ + if (!kobj->kset) + return; + + spin_lock(&kobj->kset->list_lock); + list_del_init(&kobj->entry); + spin_unlock(&kobj->kset->list_lock); + kset_put(kobj->kset); +} + +static void kobject_init_internal(struct kobject *kobj) +{ + if (!kobj) + return; + kref_init(&kobj->kref); + INIT_LIST_HEAD(&kobj->entry); + kobj->state_in_sysfs = 0; + kobj->state_add_uevent_sent = 0; + kobj->state_remove_uevent_sent = 0; + kobj->state_initialized = 1; +} + + +static int kobject_add_internal(struct kobject *kobj) +{ + int error = 0; + struct kobject *parent; + + if (!kobj) + return -ENOENT; + + if (!kobj->name || !kobj->name[0]) { + WARN(1, "kobject: (%p): attempted to be registered with empty " + "name!\n", kobj); + return -EINVAL; + } + + parent = kobject_get(kobj->parent); + + /* join kset if set, use it as parent if we do not already have one */ + if (kobj->kset) { + if (!parent) + parent = kobject_get(&kobj->kset->kobj); + kobj_kset_join(kobj); + kobj->parent = parent; + } + + pr_debug("kobject: '%s' (%p): %s: parent: '%s', set: '%s'\n", + kobject_name(kobj), kobj, __func__, + parent ? kobject_name(parent) : "<NULL>", + kobj->kset ? kobject_name(&kobj->kset->kobj) : "<NULL>"); + + error = create_dir(kobj); + if (error) { + kobj_kset_leave(kobj); + kobject_put(parent); + kobj->parent = NULL; + + /* be noisy on error issues */ + if (error == -EEXIST) + printk(KERN_ERR "%s failed for %s with " + "-EEXIST, don't try to register things with " + "the same name in the same directory.\n", + __func__, kobject_name(kobj)); + else + printk(KERN_ERR "%s failed for %s (%d)\n", + __func__, kobject_name(kobj), error); + dump_stack(); + } else + kobj->state_in_sysfs = 1; + + return error; +} + +/** + * kobject_set_name_vargs - Set the name of an kobject + * @kobj: struct kobject to set the name of + * @fmt: format string used to build the name + * @vargs: vargs to format the string. + */ +int kobject_set_name_vargs(struct kobject *kobj, const char *fmt, + va_list vargs) +{ + const char *old_name = kobj->name; + char *s; + + if (kobj->name && !fmt) + return 0; + + kobj->name = kvasprintf(GFP_KERNEL, fmt, vargs); + if (!kobj->name) + return -ENOMEM; + + /* ewww... some of these buggers have '/' in the name ... */ + while ((s = strchr(kobj->name, '/'))) + s[0] = '!'; + + kfree(old_name); + return 0; +} + +/** + * kobject_set_name - Set the name of a kobject + * @kobj: struct kobject to set the name of + * @fmt: format string used to build the name + * + * This sets the name of the kobject. If you have already added the + * kobject to the system, you must call kobject_rename() in order to + * change the name of the kobject. + */ +int kobject_set_name(struct kobject *kobj, const char *fmt, ...) +{ + va_list vargs; + int retval; + + va_start(vargs, fmt); + retval = kobject_set_name_vargs(kobj, fmt, vargs); + va_end(vargs); + + return retval; +} +EXPORT_SYMBOL(kobject_set_name); + +/** + * kobject_init - initialize a kobject structure + * @kobj: pointer to the kobject to initialize + * @ktype: pointer to the ktype for this kobject. + * + * This function will properly initialize a kobject such that it can then + * be passed to the kobject_add() call. + * + * After this function is called, the kobject MUST be cleaned up by a call + * to kobject_put(), not by a call to kfree directly to ensure that all of + * the memory is cleaned up properly. + */ +void kobject_init(struct kobject *kobj, struct kobj_type *ktype) +{ + char *err_str; + + if (!kobj) { + err_str = "invalid kobject pointer!"; + goto error; + } + if (!ktype) { + err_str = "must have a ktype to be initialized properly!\n"; + goto error; + } + if (kobj->state_initialized) { + /* do not error out as sometimes we can recover */ + printk(KERN_ERR "kobject (%p): tried to init an initialized " + "object, something is seriously wrong.\n", kobj); + dump_stack(); + } + + kobject_init_internal(kobj); + kobj->ktype = ktype; + return; + +error: + printk(KERN_ERR "kobject (%p): %s\n", kobj, err_str); + dump_stack(); +} +EXPORT_SYMBOL(kobject_init); + +static int kobject_add_varg(struct kobject *kobj, struct kobject *parent, + const char *fmt, va_list vargs) +{ + int retval; + + retval = kobject_set_name_vargs(kobj, fmt, vargs); + if (retval) { + printk(KERN_ERR "kobject: can not set name properly!\n"); + return retval; + } + kobj->parent = parent; + return kobject_add_internal(kobj); +} + +/** + * kobject_add - the main kobject add function + * @kobj: the kobject to add + * @parent: pointer to the parent of the kobject. + * @fmt: format to name the kobject with. + * + * The kobject name is set and added to the kobject hierarchy in this + * function. + * + * If @parent is set, then the parent of the @kobj will be set to it. + * If @parent is NULL, then the parent of the @kobj will be set to the + * kobject associted with the kset assigned to this kobject. If no kset + * is assigned to the kobject, then the kobject will be located in the + * root of the sysfs tree. + * + * If this function returns an error, kobject_put() must be called to + * properly clean up the memory associated with the object. + * Under no instance should the kobject that is passed to this function + * be directly freed with a call to kfree(), that can leak memory. + * + * Note, no "add" uevent will be created with this call, the caller should set + * up all of the necessary sysfs files for the object and then call + * kobject_uevent() with the UEVENT_ADD parameter to ensure that + * userspace is properly notified of this kobject's creation. + */ +int kobject_add(struct kobject *kobj, struct kobject *parent, + const char *fmt, ...) +{ + va_list args; + int retval; + + if (!kobj) + return -EINVAL; + + if (!kobj->state_initialized) { + printk(KERN_ERR "kobject '%s' (%p): tried to add an " + "uninitialized object, something is seriously wrong.\n", + kobject_name(kobj), kobj); + dump_stack(); + return -EINVAL; + } + va_start(args, fmt); + retval = kobject_add_varg(kobj, parent, fmt, args); + va_end(args); + + return retval; +} +EXPORT_SYMBOL(kobject_add); + +/** + * kobject_init_and_add - initialize a kobject structure and add it to the kobject hierarchy + * @kobj: pointer to the kobject to initialize + * @ktype: pointer to the ktype for this kobject. + * @parent: pointer to the parent of this kobject. + * @fmt: the name of the kobject. + * + * This function combines the call to kobject_init() and + * kobject_add(). The same type of error handling after a call to + * kobject_add() and kobject lifetime rules are the same here. + */ +int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype, + struct kobject *parent, const char *fmt, ...) +{ + va_list args; + int retval; + + kobject_init(kobj, ktype); + + va_start(args, fmt); + retval = kobject_add_varg(kobj, parent, fmt, args); + va_end(args); + + return retval; +} +EXPORT_SYMBOL_GPL(kobject_init_and_add); + +/** + * kobject_rename - change the name of an object + * @kobj: object in question. + * @new_name: object's new name + * + * It is the responsibility of the caller to provide mutual + * exclusion between two different calls of kobject_rename + * on the same kobject and to ensure that new_name is valid and + * won't conflict with other kobjects. + */ +int kobject_rename(struct kobject *kobj, const char *new_name) +{ + int error = 0; + const char *devpath = NULL; + const char *dup_name = NULL, *name; + char *devpath_string = NULL; + char *envp[2]; + + kobj = kobject_get(kobj); + if (!kobj) + return -EINVAL; + if (!kobj->parent) + return -EINVAL; + + devpath = kobject_get_path(kobj, GFP_KERNEL); + if (!devpath) { + error = -ENOMEM; + goto out; + } + devpath_string = kmalloc(strlen(devpath) + 15, GFP_KERNEL); + if (!devpath_string) { + error = -ENOMEM; + goto out; + } + sprintf(devpath_string, "DEVPATH_OLD=%s", devpath); + envp[0] = devpath_string; + envp[1] = NULL; + + name = dup_name = kstrdup(new_name, GFP_KERNEL); + if (!name) { + error = -ENOMEM; + goto out; + } + + error = sysfs_rename_dir(kobj, new_name); + if (error) + goto out; + + /* Install the new kobject name */ + dup_name = kobj->name; + kobj->name = name; + + /* This function is mostly/only used for network interface. + * Some hotplug package track interfaces by their name and + * therefore want to know when the name is changed by the user. */ + kobject_uevent_env(kobj, KOBJ_MOVE, envp); + +out: + kfree(dup_name); + kfree(devpath_string); + kfree(devpath); + kobject_put(kobj); + + return error; +} +EXPORT_SYMBOL_GPL(kobject_rename); + +/** + * kobject_move - move object to another parent + * @kobj: object in question. + * @new_parent: object's new parent (can be NULL) + */ +int kobject_move(struct kobject *kobj, struct kobject *new_parent) +{ + int error; + struct kobject *old_parent; + const char *devpath = NULL; + char *devpath_string = NULL; + char *envp[2]; + + kobj = kobject_get(kobj); + if (!kobj) + return -EINVAL; + new_parent = kobject_get(new_parent); + if (!new_parent) { + if (kobj->kset) + new_parent = kobject_get(&kobj->kset->kobj); + } + /* old object path */ + devpath = kobject_get_path(kobj, GFP_KERNEL); + if (!devpath) { + error = -ENOMEM; + goto out; + } + devpath_string = kmalloc(strlen(devpath) + 15, GFP_KERNEL); + if (!devpath_string) { + error = -ENOMEM; + goto out; + } + sprintf(devpath_string, "DEVPATH_OLD=%s", devpath); + envp[0] = devpath_string; + envp[1] = NULL; + error = sysfs_move_dir(kobj, new_parent); + if (error) + goto out; + old_parent = kobj->parent; + kobj->parent = new_parent; + new_parent = NULL; + kobject_put(old_parent); + kobject_uevent_env(kobj, KOBJ_MOVE, envp); +out: + kobject_put(new_parent); + kobject_put(kobj); + kfree(devpath_string); + kfree(devpath); + return error; +} + +/** + * kobject_del - unlink kobject from hierarchy. + * @kobj: object. + */ +void kobject_del(struct kobject *kobj) +{ + if (!kobj) + return; + + sysfs_remove_dir(kobj); + kobj->state_in_sysfs = 0; + kobj_kset_leave(kobj); + kobject_put(kobj->parent); + kobj->parent = NULL; +} + +/** + * kobject_get - increment refcount for object. + * @kobj: object. + */ +struct kobject *kobject_get(struct kobject *kobj) +{ + if (kobj) + kref_get(&kobj->kref); + return kobj; +} + +/* + * kobject_cleanup - free kobject resources. + * @kobj: object to cleanup + */ +static void kobject_cleanup(struct kobject *kobj) +{ + struct kobj_type *t = get_ktype(kobj); + const char *name = kobj->name; + + pr_debug("kobject: '%s' (%p): %s\n", + kobject_name(kobj), kobj, __func__); + + if (t && !t->release) + pr_debug("kobject: '%s' (%p): does not have a release() " + "function, it is broken and must be fixed.\n", + kobject_name(kobj), kobj); + + /* send "remove" if the caller did not do it but sent "add" */ + if (kobj->state_add_uevent_sent && !kobj->state_remove_uevent_sent) { + pr_debug("kobject: '%s' (%p): auto cleanup 'remove' event\n", + kobject_name(kobj), kobj); + kobject_uevent(kobj, KOBJ_REMOVE); + } + + /* remove from sysfs if the caller did not do it */ + if (kobj->state_in_sysfs) { + pr_debug("kobject: '%s' (%p): auto cleanup kobject_del\n", + kobject_name(kobj), kobj); + kobject_del(kobj); + } + + if (t && t->release) { + pr_debug("kobject: '%s' (%p): calling ktype release\n", + kobject_name(kobj), kobj); + t->release(kobj); + } + + /* free name if we allocated it */ + if (name) { + pr_debug("kobject: '%s': free name\n", name); + kfree(name); + } +} + +static void kobject_release(struct kref *kref) +{ + kobject_cleanup(container_of(kref, struct kobject, kref)); +} + +/** + * kobject_put - decrement refcount for object. + * @kobj: object. + * + * Decrement the refcount, and if 0, call kobject_cleanup(). + */ +void kobject_put(struct kobject *kobj) +{ + if (kobj) { + if (!kobj->state_initialized) + WARN(1, KERN_WARNING "kobject: '%s' (%p): is not " + "initialized, yet kobject_put() is being " + "called.\n", kobject_name(kobj), kobj); + kref_put(&kobj->kref, kobject_release); + } +} + +static void dynamic_kobj_release(struct kobject *kobj) +{ + pr_debug("kobject: (%p): %s\n", kobj, __func__); + kfree(kobj); +} + +static struct kobj_type dynamic_kobj_ktype = { + .release = dynamic_kobj_release, + .sysfs_ops = &kobj_sysfs_ops, +}; + +/** + * kobject_create - create a struct kobject dynamically + * + * This function creates a kobject structure dynamically and sets it up + * to be a "dynamic" kobject with a default release function set up. + * + * If the kobject was not able to be created, NULL will be returned. + * The kobject structure returned from here must be cleaned up with a + * call to kobject_put() and not kfree(), as kobject_init() has + * already been called on this structure. + */ +struct kobject *kobject_create(void) +{ + struct kobject *kobj; + + kobj = kzalloc(sizeof(*kobj), GFP_KERNEL); + if (!kobj) + return NULL; + + kobject_init(kobj, &dynamic_kobj_ktype); + return kobj; +} + +/** + * kobject_create_and_add - create a struct kobject dynamically and register it with sysfs + * + * @name: the name for the kset + * @parent: the parent kobject of this kobject, if any. + * + * This function creates a kobject structure dynamically and registers it + * with sysfs. When you are finished with this structure, call + * kobject_put() and the structure will be dynamically freed when + * it is no longer being used. + * + * If the kobject was not able to be created, NULL will be returned. + */ +struct kobject *kobject_create_and_add(const char *name, struct kobject *parent) +{ + struct kobject *kobj; + int retval; + + kobj = kobject_create(); + if (!kobj) + return NULL; + + retval = kobject_add(kobj, parent, "%s", name); + if (retval) { + printk(KERN_WARNING "%s: kobject_add error: %d\n", + __func__, retval); + kobject_put(kobj); + kobj = NULL; + } + return kobj; +} +EXPORT_SYMBOL_GPL(kobject_create_and_add); + +/** + * kset_init - initialize a kset for use + * @k: kset + */ +void kset_init(struct kset *k) +{ + kobject_init_internal(&k->kobj); + INIT_LIST_HEAD(&k->list); + spin_lock_init(&k->list_lock); +} + +/* default kobject attribute operations */ +static ssize_t kobj_attr_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct kobj_attribute *kattr; + ssize_t ret = -EIO; + + kattr = container_of(attr, struct kobj_attribute, attr); + if (kattr->show) + ret = kattr->show(kobj, kattr, buf); + return ret; +} + +static ssize_t kobj_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct kobj_attribute *kattr; + ssize_t ret = -EIO; + + kattr = container_of(attr, struct kobj_attribute, attr); + if (kattr->store) + ret = kattr->store(kobj, kattr, buf, count); + return ret; +} + +const struct sysfs_ops kobj_sysfs_ops = { + .show = kobj_attr_show, + .store = kobj_attr_store, +}; + +/** + * kset_register - initialize and add a kset. + * @k: kset. + */ +int kset_register(struct kset *k) +{ + int err; + + if (!k) + return -EINVAL; + + kset_init(k); + err = kobject_add_internal(&k->kobj); + if (err) + return err; + kobject_uevent(&k->kobj, KOBJ_ADD); + return 0; +} + +/** + * kset_unregister - remove a kset. + * @k: kset. + */ +void kset_unregister(struct kset *k) +{ + if (!k) + return; + kobject_put(&k->kobj); +} + +/** + * kset_find_obj - search for object in kset. + * @kset: kset we're looking in. + * @name: object's name. + * + * Lock kset via @kset->subsys, and iterate over @kset->list, + * looking for a matching kobject. If matching object is found + * take a reference and return the object. + */ +struct kobject *kset_find_obj(struct kset *kset, const char *name) +{ + struct kobject *k; + struct kobject *ret = NULL; + + spin_lock(&kset->list_lock); + list_for_each_entry(k, &kset->list, entry) { + if (kobject_name(k) && !strcmp(kobject_name(k), name)) { + ret = kobject_get(k); + break; + } + } + spin_unlock(&kset->list_lock); + return ret; +} + +static void kset_release(struct kobject *kobj) +{ + struct kset *kset = container_of(kobj, struct kset, kobj); + pr_debug("kobject: '%s' (%p): %s\n", + kobject_name(kobj), kobj, __func__); + kfree(kset); +} + +static struct kobj_type kset_ktype = { + .sysfs_ops = &kobj_sysfs_ops, + .release = kset_release, +}; + +/** + * kset_create - create a struct kset dynamically + * + * @name: the name for the kset + * @uevent_ops: a struct kset_uevent_ops for the kset + * @parent_kobj: the parent kobject of this kset, if any. + * + * This function creates a kset structure dynamically. This structure can + * then be registered with the system and show up in sysfs with a call to + * kset_register(). When you are finished with this structure, if + * kset_register() has been called, call kset_unregister() and the + * structure will be dynamically freed when it is no longer being used. + * + * If the kset was not able to be created, NULL will be returned. + */ +static struct kset *kset_create(const char *name, + const struct kset_uevent_ops *uevent_ops, + struct kobject *parent_kobj) +{ + struct kset *kset; + int retval; + + kset = kzalloc(sizeof(*kset), GFP_KERNEL); + if (!kset) + return NULL; + retval = kobject_set_name(&kset->kobj, name); + if (retval) { + kfree(kset); + return NULL; + } + kset->uevent_ops = uevent_ops; + kset->kobj.parent = parent_kobj; + + /* + * The kobject of this kset will have a type of kset_ktype and belong to + * no kset itself. That way we can properly free it when it is + * finished being used. + */ + kset->kobj.ktype = &kset_ktype; + kset->kobj.kset = NULL; + + return kset; +} + +/** + * kset_create_and_add - create a struct kset dynamically and add it to sysfs + * + * @name: the name for the kset + * @uevent_ops: a struct kset_uevent_ops for the kset + * @parent_kobj: the parent kobject of this kset, if any. + * + * This function creates a kset structure dynamically and registers it + * with sysfs. When you are finished with this structure, call + * kset_unregister() and the structure will be dynamically freed when it + * is no longer being used. + * + * If the kset was not able to be created, NULL will be returned. + */ +struct kset *kset_create_and_add(const char *name, + const struct kset_uevent_ops *uevent_ops, + struct kobject *parent_kobj) +{ + struct kset *kset; + int error; + + kset = kset_create(name, uevent_ops, parent_kobj); + if (!kset) + return NULL; + error = kset_register(kset); + if (error) { + kfree(kset); + return NULL; + } + return kset; +} +EXPORT_SYMBOL_GPL(kset_create_and_add); + + +static DEFINE_SPINLOCK(kobj_ns_type_lock); +static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES]; + +int kobj_ns_type_register(const struct kobj_ns_type_operations *ops) +{ + enum kobj_ns_type type = ops->type; + int error; + + spin_lock(&kobj_ns_type_lock); + + error = -EINVAL; + if (type >= KOBJ_NS_TYPES) + goto out; + + error = -EINVAL; + if (type <= KOBJ_NS_TYPE_NONE) + goto out; + + error = -EBUSY; + if (kobj_ns_ops_tbl[type]) + goto out; + + error = 0; + kobj_ns_ops_tbl[type] = ops; + +out: + spin_unlock(&kobj_ns_type_lock); + return error; +} + +int kobj_ns_type_registered(enum kobj_ns_type type) +{ + int registered = 0; + + spin_lock(&kobj_ns_type_lock); + if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES)) + registered = kobj_ns_ops_tbl[type] != NULL; + spin_unlock(&kobj_ns_type_lock); + + return registered; +} + +const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent) +{ + const struct kobj_ns_type_operations *ops = NULL; + + if (parent && parent->ktype->child_ns_type) + ops = parent->ktype->child_ns_type(parent); + + return ops; +} + +const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj) +{ + return kobj_child_ns_ops(kobj->parent); +} + + +const void *kobj_ns_current(enum kobj_ns_type type) +{ + const void *ns = NULL; + + spin_lock(&kobj_ns_type_lock); + if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && + kobj_ns_ops_tbl[type]) + ns = kobj_ns_ops_tbl[type]->current_ns(); + spin_unlock(&kobj_ns_type_lock); + + return ns; +} + +const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk) +{ + const void *ns = NULL; + + spin_lock(&kobj_ns_type_lock); + if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && + kobj_ns_ops_tbl[type]) + ns = kobj_ns_ops_tbl[type]->netlink_ns(sk); + spin_unlock(&kobj_ns_type_lock); + + return ns; +} + +const void *kobj_ns_initial(enum kobj_ns_type type) +{ + const void *ns = NULL; + + spin_lock(&kobj_ns_type_lock); + if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && + kobj_ns_ops_tbl[type]) + ns = kobj_ns_ops_tbl[type]->initial_ns(); + spin_unlock(&kobj_ns_type_lock); + + return ns; +} + +/* + * kobj_ns_exit - invalidate a namespace tag + * + * @type: the namespace type (i.e. KOBJ_NS_TYPE_NET) + * @ns: the actual namespace being invalidated + * + * This is called when a tag is no longer valid. For instance, + * when a network namespace exits, it uses this helper to + * make sure no sb's sysfs_info points to the now-invalidated + * netns. + */ +void kobj_ns_exit(enum kobj_ns_type type, const void *ns) +{ + sysfs_exit_ns(type, ns); +} + + +EXPORT_SYMBOL(kobject_get); +EXPORT_SYMBOL(kobject_put); +EXPORT_SYMBOL(kobject_del); + +EXPORT_SYMBOL(kset_register); +EXPORT_SYMBOL(kset_unregister); diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c new file mode 100644 index 00000000..70af0a7f --- /dev/null +++ b/lib/kobject_uevent.c @@ -0,0 +1,425 @@ +/* + * kernel userspace event delivery + * + * Copyright (C) 2004 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004 Novell, Inc. All rights reserved. + * Copyright (C) 2004 IBM, Inc. All rights reserved. + * + * Licensed under the GNU GPL v2. + * + * Authors: + * Robert Love <rml@novell.com> + * Kay Sievers <kay.sievers@vrfy.org> + * Arjan van de Ven <arjanv@redhat.com> + * Greg Kroah-Hartman <greg@kroah.com> + */ + +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/kobject.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/user_namespace.h> +#include <linux/socket.h> +#include <linux/skbuff.h> +#include <linux/netlink.h> +#include <net/sock.h> +#include <net/net_namespace.h> + + +u64 uevent_seqnum; +char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH; +static DEFINE_SPINLOCK(sequence_lock); +#ifdef CONFIG_NET +struct uevent_sock { + struct list_head list; + struct sock *sk; +}; +static LIST_HEAD(uevent_sock_list); +static DEFINE_MUTEX(uevent_sock_mutex); +#endif + +/* the strings here must match the enum in include/linux/kobject.h */ +static const char *kobject_actions[] = { + [KOBJ_ADD] = "add", + [KOBJ_REMOVE] = "remove", + [KOBJ_CHANGE] = "change", + [KOBJ_MOVE] = "move", + [KOBJ_ONLINE] = "online", + [KOBJ_OFFLINE] = "offline", +}; + +/** + * kobject_action_type - translate action string to numeric type + * + * @buf: buffer containing the action string, newline is ignored + * @len: length of buffer + * @type: pointer to the location to store the action type + * + * Returns 0 if the action string was recognized. + */ +int kobject_action_type(const char *buf, size_t count, + enum kobject_action *type) +{ + enum kobject_action action; + int ret = -EINVAL; + + if (count && (buf[count-1] == '\n' || buf[count-1] == '\0')) + count--; + + if (!count) + goto out; + + for (action = 0; action < ARRAY_SIZE(kobject_actions); action++) { + if (strncmp(kobject_actions[action], buf, count) != 0) + continue; + if (kobject_actions[action][count] != '\0') + continue; + *type = action; + ret = 0; + break; + } +out: + return ret; +} + +#ifdef CONFIG_NET +static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data) +{ + struct kobject *kobj = data; + const struct kobj_ns_type_operations *ops; + + ops = kobj_ns_ops(kobj); + if (ops) { + const void *sock_ns, *ns; + ns = kobj->ktype->namespace(kobj); + sock_ns = ops->netlink_ns(dsk); + return sock_ns != ns; + } + + return 0; +} +#endif + +static int kobj_usermode_filter(struct kobject *kobj) +{ + const struct kobj_ns_type_operations *ops; + + ops = kobj_ns_ops(kobj); + if (ops) { + const void *init_ns, *ns; + ns = kobj->ktype->namespace(kobj); + init_ns = ops->initial_ns(); + return ns != init_ns; + } + + return 0; +} + +/** + * kobject_uevent_env - send an uevent with environmental data + * + * @action: action that is happening + * @kobj: struct kobject that the action is happening to + * @envp_ext: pointer to environmental data + * + * Returns 0 if kobject_uevent_env() is completed with success or the + * corresponding error when it fails. + */ +int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, + char *envp_ext[]) +{ + struct kobj_uevent_env *env; + const char *action_string = kobject_actions[action]; + const char *devpath = NULL; + const char *subsystem; + struct kobject *top_kobj; + struct kset *kset; + const struct kset_uevent_ops *uevent_ops; + u64 seq; + int i = 0; + int retval = 0; +#ifdef CONFIG_NET + struct uevent_sock *ue_sk; +#endif + + pr_debug("kobject: '%s' (%p): %s\n", + kobject_name(kobj), kobj, __func__); + + /* search the kset we belong to */ + top_kobj = kobj; + while (!top_kobj->kset && top_kobj->parent) + top_kobj = top_kobj->parent; + + if (!top_kobj->kset) { + pr_debug("kobject: '%s' (%p): %s: attempted to send uevent " + "without kset!\n", kobject_name(kobj), kobj, + __func__); + return -EINVAL; + } + + kset = top_kobj->kset; + uevent_ops = kset->uevent_ops; + + /* skip the event, if uevent_suppress is set*/ + if (kobj->uevent_suppress) { + pr_debug("kobject: '%s' (%p): %s: uevent_suppress " + "caused the event to drop!\n", + kobject_name(kobj), kobj, __func__); + return 0; + } + /* skip the event, if the filter returns zero. */ + if (uevent_ops && uevent_ops->filter) + if (!uevent_ops->filter(kset, kobj)) { + pr_debug("kobject: '%s' (%p): %s: filter function " + "caused the event to drop!\n", + kobject_name(kobj), kobj, __func__); + return 0; + } + + /* originating subsystem */ + if (uevent_ops && uevent_ops->name) + subsystem = uevent_ops->name(kset, kobj); + else + subsystem = kobject_name(&kset->kobj); + if (!subsystem) { + pr_debug("kobject: '%s' (%p): %s: unset subsystem caused the " + "event to drop!\n", kobject_name(kobj), kobj, + __func__); + return 0; + } + + /* environment buffer */ + env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL); + if (!env) + return -ENOMEM; + + /* complete object path */ + devpath = kobject_get_path(kobj, GFP_KERNEL); + if (!devpath) { + retval = -ENOENT; + goto exit; + } + + /* default keys */ + retval = add_uevent_var(env, "ACTION=%s", action_string); + if (retval) + goto exit; + retval = add_uevent_var(env, "DEVPATH=%s", devpath); + if (retval) + goto exit; + retval = add_uevent_var(env, "SUBSYSTEM=%s", subsystem); + if (retval) + goto exit; + + /* keys passed in from the caller */ + if (envp_ext) { + for (i = 0; envp_ext[i]; i++) { + retval = add_uevent_var(env, "%s", envp_ext[i]); + if (retval) + goto exit; + } + } + + /* let the kset specific function add its stuff */ + if (uevent_ops && uevent_ops->uevent) { + retval = uevent_ops->uevent(kset, kobj, env); + if (retval) { + pr_debug("kobject: '%s' (%p): %s: uevent() returned " + "%d\n", kobject_name(kobj), kobj, + __func__, retval); + goto exit; + } + } + + /* + * Mark "add" and "remove" events in the object to ensure proper + * events to userspace during automatic cleanup. If the object did + * send an "add" event, "remove" will automatically generated by + * the core, if not already done by the caller. + */ + if (action == KOBJ_ADD) + kobj->state_add_uevent_sent = 1; + else if (action == KOBJ_REMOVE) + kobj->state_remove_uevent_sent = 1; + + /* we will send an event, so request a new sequence number */ + spin_lock(&sequence_lock); + seq = ++uevent_seqnum; + spin_unlock(&sequence_lock); + retval = add_uevent_var(env, "SEQNUM=%llu", (unsigned long long)seq); + if (retval) + goto exit; + +#if defined(CONFIG_NET) + /* send netlink message */ + mutex_lock(&uevent_sock_mutex); + list_for_each_entry(ue_sk, &uevent_sock_list, list) { + struct sock *uevent_sock = ue_sk->sk; + struct sk_buff *skb; + size_t len; + + /* allocate message with the maximum possible size */ + len = strlen(action_string) + strlen(devpath) + 2; + skb = alloc_skb(len + env->buflen, GFP_KERNEL); + if (skb) { + char *scratch; + + /* add header */ + scratch = skb_put(skb, len); + sprintf(scratch, "%s@%s", action_string, devpath); + + /* copy keys to our continuous event payload buffer */ + for (i = 0; i < env->envp_idx; i++) { + len = strlen(env->envp[i]) + 1; + scratch = skb_put(skb, len); + strcpy(scratch, env->envp[i]); + } + + NETLINK_CB(skb).dst_group = 1; + retval = netlink_broadcast_filtered(uevent_sock, skb, + 0, 1, GFP_KERNEL, + kobj_bcast_filter, + kobj); + /* ENOBUFS should be handled in userspace */ + if (retval == -ENOBUFS) + retval = 0; + } else + retval = -ENOMEM; + } + mutex_unlock(&uevent_sock_mutex); +#endif + + /* call uevent_helper, usually only enabled during early boot */ + if (uevent_helper[0] && !kobj_usermode_filter(kobj)) { + char *argv [3]; + + argv [0] = uevent_helper; + argv [1] = (char *)subsystem; + argv [2] = NULL; + retval = add_uevent_var(env, "HOME=/"); + if (retval) + goto exit; + retval = add_uevent_var(env, + "PATH=/sbin:/bin:/usr/sbin:/usr/bin"); + if (retval) + goto exit; + + retval = call_usermodehelper(argv[0], argv, + env->envp, UMH_WAIT_EXEC); + } + +exit: + kfree(devpath); + kfree(env); + return retval; +} +EXPORT_SYMBOL_GPL(kobject_uevent_env); + +/** + * kobject_uevent - notify userspace by sending an uevent + * + * @action: action that is happening + * @kobj: struct kobject that the action is happening to + * + * Returns 0 if kobject_uevent() is completed with success or the + * corresponding error when it fails. + */ +int kobject_uevent(struct kobject *kobj, enum kobject_action action) +{ + return kobject_uevent_env(kobj, action, NULL); +} +EXPORT_SYMBOL_GPL(kobject_uevent); + +/** + * add_uevent_var - add key value string to the environment buffer + * @env: environment buffer structure + * @format: printf format for the key=value pair + * + * Returns 0 if environment variable was added successfully or -ENOMEM + * if no space was available. + */ +int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...) +{ + va_list args; + int len; + + if (env->envp_idx >= ARRAY_SIZE(env->envp)) { + WARN(1, KERN_ERR "add_uevent_var: too many keys\n"); + return -ENOMEM; + } + + va_start(args, format); + len = vsnprintf(&env->buf[env->buflen], + sizeof(env->buf) - env->buflen, + format, args); + va_end(args); + + if (len >= (sizeof(env->buf) - env->buflen)) { + WARN(1, KERN_ERR "add_uevent_var: buffer size too small\n"); + return -ENOMEM; + } + + env->envp[env->envp_idx++] = &env->buf[env->buflen]; + env->buflen += len + 1; + return 0; +} +EXPORT_SYMBOL_GPL(add_uevent_var); + +#if defined(CONFIG_NET) +static int uevent_net_init(struct net *net) +{ + struct uevent_sock *ue_sk; + + ue_sk = kzalloc(sizeof(*ue_sk), GFP_KERNEL); + if (!ue_sk) + return -ENOMEM; + + ue_sk->sk = netlink_kernel_create(net, NETLINK_KOBJECT_UEVENT, + 1, NULL, NULL, THIS_MODULE); + if (!ue_sk->sk) { + printk(KERN_ERR + "kobject_uevent: unable to create netlink socket!\n"); + kfree(ue_sk); + return -ENODEV; + } + mutex_lock(&uevent_sock_mutex); + list_add_tail(&ue_sk->list, &uevent_sock_list); + mutex_unlock(&uevent_sock_mutex); + return 0; +} + +static void uevent_net_exit(struct net *net) +{ + struct uevent_sock *ue_sk; + + mutex_lock(&uevent_sock_mutex); + list_for_each_entry(ue_sk, &uevent_sock_list, list) { + if (sock_net(ue_sk->sk) == net) + goto found; + } + mutex_unlock(&uevent_sock_mutex); + return; + +found: + list_del(&ue_sk->list); + mutex_unlock(&uevent_sock_mutex); + + netlink_kernel_release(ue_sk->sk); + kfree(ue_sk); +} + +static struct pernet_operations uevent_net_ops = { + .init = uevent_net_init, + .exit = uevent_net_exit, +}; + +static int __init kobject_uevent_init(void) +{ + netlink_set_nonroot(NETLINK_KOBJECT_UEVENT, NL_NONROOT_RECV); + return register_pernet_subsys(&uevent_net_ops); +} + + +postcore_initcall(kobject_uevent_init); +#endif diff --git a/lib/kref.c b/lib/kref.c new file mode 100644 index 00000000..d3d227a0 --- /dev/null +++ b/lib/kref.c @@ -0,0 +1,67 @@ +/* + * kref.c - library routines for handling generic reference counted objects + * + * Copyright (C) 2004 Greg Kroah-Hartman <greg@kroah.com> + * Copyright (C) 2004 IBM Corp. + * + * based on lib/kobject.c which was: + * Copyright (C) 2002-2003 Patrick Mochel <mochel@osdl.org> + * + * This file is released under the GPLv2. + * + */ + +#include <linux/kref.h> +#include <linux/module.h> +#include <linux/slab.h> + +/** + * kref_init - initialize object. + * @kref: object in question. + */ +void kref_init(struct kref *kref) +{ + atomic_set(&kref->refcount, 1); + smp_mb(); +} + +/** + * kref_get - increment refcount for object. + * @kref: object. + */ +void kref_get(struct kref *kref) +{ + WARN_ON(!atomic_read(&kref->refcount)); + atomic_inc(&kref->refcount); + smp_mb__after_atomic_inc(); +} + +/** + * kref_put - decrement refcount for object. + * @kref: object. + * @release: pointer to the function that will clean up the object when the + * last reference to the object is released. + * This pointer is required, and it is not acceptable to pass kfree + * in as this function. + * + * Decrement the refcount, and if 0, call release(). + * Return 1 if the object was removed, otherwise return 0. Beware, if this + * function returns 0, you still can not count on the kref from remaining in + * memory. Only use the return value if you want to see if the kref is now + * gone, not present. + */ +int kref_put(struct kref *kref, void (*release)(struct kref *kref)) +{ + WARN_ON(release == NULL); + WARN_ON(release == (void (*)(struct kref *))kfree); + + if (atomic_dec_and_test(&kref->refcount)) { + release(kref); + return 1; + } + return 0; +} + +EXPORT_SYMBOL(kref_init); +EXPORT_SYMBOL(kref_get); +EXPORT_SYMBOL(kref_put); diff --git a/lib/lcm.c b/lib/lcm.c new file mode 100644 index 00000000..157cd88a --- /dev/null +++ b/lib/lcm.c @@ -0,0 +1,15 @@ +#include <linux/kernel.h> +#include <linux/gcd.h> +#include <linux/module.h> + +/* Lowest common multiple */ +unsigned long lcm(unsigned long a, unsigned long b) +{ + if (a && b) + return (a * b) / gcd(a, b); + else if (b) + return b; + + return a; +} +EXPORT_SYMBOL_GPL(lcm); diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c new file mode 100644 index 00000000..244f5480 --- /dev/null +++ b/lib/libcrc32c.c @@ -0,0 +1,81 @@ +/* + * CRC32C + *@Article{castagnoli-crc, + * author = { Guy Castagnoli and Stefan Braeuer and Martin Herrman}, + * title = {{Optimization of Cyclic Redundancy-Check Codes with 24 + * and 32 Parity Bits}}, + * journal = IEEE Transactions on Communication, + * year = {1993}, + * volume = {41}, + * number = {6}, + * pages = {}, + * month = {June}, + *} + * Used by the iSCSI driver, possibly others, and derived from the + * the iscsi-crc.c module of the linux-iscsi driver at + * http://linux-iscsi.sourceforge.net. + * + * Following the example of lib/crc32, this function is intended to be + * flexible and useful for all users. Modules that currently have their + * own crc32c, but hopefully may be able to use this one are: + * net/sctp (please add all your doco to here if you change to + * use this one!) + * <endoflist> + * + * Copyright (c) 2004 Cisco Systems, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include <crypto/hash.h> +#include <linux/err.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> + +static struct crypto_shash *tfm; + +u32 crc32c(u32 crc, const void *address, unsigned int length) +{ + struct { + struct shash_desc shash; + char ctx[crypto_shash_descsize(tfm)]; + } desc; + int err; + + desc.shash.tfm = tfm; + desc.shash.flags = 0; + *(u32 *)desc.ctx = crc; + + err = crypto_shash_update(&desc.shash, address, length); + BUG_ON(err); + + return *(u32 *)desc.ctx; +} + +EXPORT_SYMBOL(crc32c); + +static int __init libcrc32c_mod_init(void) +{ + tfm = crypto_alloc_shash("crc32c", 0, 0); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + return 0; +} + +static void __exit libcrc32c_mod_fini(void) +{ + crypto_free_shash(tfm); +} + +module_init(libcrc32c_mod_init); +module_exit(libcrc32c_mod_fini); + +MODULE_AUTHOR("Clay Haapala <chaapala@cisco.com>"); +MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations"); +MODULE_LICENSE("GPL"); diff --git a/lib/list_debug.c b/lib/list_debug.c new file mode 100644 index 00000000..344c710d --- /dev/null +++ b/lib/list_debug.c @@ -0,0 +1,62 @@ +/* + * Copyright 2006, Red Hat, Inc., Dave Jones + * Released under the General Public License (GPL). + * + * This file contains the linked list implementations for + * DEBUG_LIST. + */ + +#include <linux/module.h> +#include <linux/list.h> + +/* + * Insert a new entry between two known consecutive entries. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ + +void __list_add(struct list_head *new, + struct list_head *prev, + struct list_head *next) +{ + WARN(next->prev != prev, + "list_add corruption. next->prev should be " + "prev (%p), but was %p. (next=%p).\n", + prev, next->prev, next); + WARN(prev->next != next, + "list_add corruption. prev->next should be " + "next (%p), but was %p. (prev=%p).\n", + next, prev->next, prev); + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; +} +EXPORT_SYMBOL(__list_add); + +/** + * list_del - deletes entry from list. + * @entry: the element to delete from the list. + * Note: list_empty on entry does not return true after this, the entry is + * in an undefined state. + */ +void list_del(struct list_head *entry) +{ + WARN(entry->next == LIST_POISON1, + "list_del corruption, next is LIST_POISON1 (%p)\n", + LIST_POISON1); + WARN(entry->next != LIST_POISON1 && entry->prev == LIST_POISON2, + "list_del corruption, prev is LIST_POISON2 (%p)\n", + LIST_POISON2); + WARN(entry->prev->next != entry, + "list_del corruption. prev->next should be %p, " + "but was %p\n", entry, entry->prev->next); + WARN(entry->next->prev != entry, + "list_del corruption. next->prev should be %p, " + "but was %p\n", entry, entry->next->prev); + __list_del(entry->prev, entry->next); + entry->next = LIST_POISON1; + entry->prev = LIST_POISON2; +} +EXPORT_SYMBOL(list_del); diff --git a/lib/list_sort.c b/lib/list_sort.c new file mode 100644 index 00000000..a7616fa3 --- /dev/null +++ b/lib/list_sort.c @@ -0,0 +1,217 @@ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/list_sort.h> +#include <linux/slab.h> +#include <linux/list.h> + +#define MAX_LIST_LENGTH_BITS 20 + +/* + * Returns a list organized in an intermediate format suited + * to chaining of merge() calls: null-terminated, no reserved or + * sentinel head node, "prev" links not maintained. + */ +static struct list_head *merge(void *priv, + int (*cmp)(void *priv, struct list_head *a, + struct list_head *b), + struct list_head *a, struct list_head *b) +{ + struct list_head head, *tail = &head; + + while (a && b) { + /* if equal, take 'a' -- important for sort stability */ + if ((*cmp)(priv, a, b) <= 0) { + tail->next = a; + a = a->next; + } else { + tail->next = b; + b = b->next; + } + tail = tail->next; + } + tail->next = a?:b; + return head.next; +} + +/* + * Combine final list merge with restoration of standard doubly-linked + * list structure. This approach duplicates code from merge(), but + * runs faster than the tidier alternatives of either a separate final + * prev-link restoration pass, or maintaining the prev links + * throughout. + */ +static void merge_and_restore_back_links(void *priv, + int (*cmp)(void *priv, struct list_head *a, + struct list_head *b), + struct list_head *head, + struct list_head *a, struct list_head *b) +{ + struct list_head *tail = head; + + while (a && b) { + /* if equal, take 'a' -- important for sort stability */ + if ((*cmp)(priv, a, b) <= 0) { + tail->next = a; + a->prev = tail; + a = a->next; + } else { + tail->next = b; + b->prev = tail; + b = b->next; + } + tail = tail->next; + } + tail->next = a ? : b; + + do { + /* + * In worst cases this loop may run many iterations. + * Continue callbacks to the client even though no + * element comparison is needed, so the client's cmp() + * routine can invoke cond_resched() periodically. + */ + (*cmp)(priv, tail->next, tail->next); + + tail->next->prev = tail; + tail = tail->next; + } while (tail->next); + + tail->next = head; + head->prev = tail; +} + +/** + * list_sort - sort a list + * @priv: private data, opaque to list_sort(), passed to @cmp + * @head: the list to sort + * @cmp: the elements comparison function + * + * This function implements "merge sort", which has O(nlog(n)) + * complexity. + * + * The comparison function @cmp must return a negative value if @a + * should sort before @b, and a positive value if @a should sort after + * @b. If @a and @b are equivalent, and their original relative + * ordering is to be preserved, @cmp must return 0. + */ +void list_sort(void *priv, struct list_head *head, + int (*cmp)(void *priv, struct list_head *a, + struct list_head *b)) +{ + struct list_head *part[MAX_LIST_LENGTH_BITS+1]; /* sorted partial lists + -- last slot is a sentinel */ + int lev; /* index into part[] */ + int max_lev = 0; + struct list_head *list; + + if (list_empty(head)) + return; + + memset(part, 0, sizeof(part)); + + head->prev->next = NULL; + list = head->next; + + while (list) { + struct list_head *cur = list; + list = list->next; + cur->next = NULL; + + for (lev = 0; part[lev]; lev++) { + cur = merge(priv, cmp, part[lev], cur); + part[lev] = NULL; + } + if (lev > max_lev) { + if (unlikely(lev >= ARRAY_SIZE(part)-1)) { + printk_once(KERN_DEBUG "list passed to" + " list_sort() too long for" + " efficiency\n"); + lev--; + } + max_lev = lev; + } + part[lev] = cur; + } + + for (lev = 0; lev < max_lev; lev++) + if (part[lev]) + list = merge(priv, cmp, part[lev], list); + + merge_and_restore_back_links(priv, cmp, head, part[max_lev], list); +} +EXPORT_SYMBOL(list_sort); + +#ifdef DEBUG_LIST_SORT +struct debug_el { + struct list_head l_h; + int value; + unsigned serial; +}; + +static int cmp(void *priv, struct list_head *a, struct list_head *b) +{ + return container_of(a, struct debug_el, l_h)->value + - container_of(b, struct debug_el, l_h)->value; +} + +/* + * The pattern of set bits in the list length determines which cases + * are hit in list_sort(). + */ +#define LIST_SORT_TEST_LENGTH (512+128+2) /* not including head */ + +static int __init list_sort_test(void) +{ + int i, r = 1, count; + struct list_head *head = kmalloc(sizeof(*head), GFP_KERNEL); + struct list_head *cur; + + printk(KERN_WARNING "testing list_sort()\n"); + + cur = head; + for (i = 0; i < LIST_SORT_TEST_LENGTH; i++) { + struct debug_el *el = kmalloc(sizeof(*el), GFP_KERNEL); + BUG_ON(!el); + /* force some equivalencies */ + el->value = (r = (r * 725861) % 6599) % (LIST_SORT_TEST_LENGTH/3); + el->serial = i; + + el->l_h.prev = cur; + cur->next = &el->l_h; + cur = cur->next; + } + head->prev = cur; + + list_sort(NULL, head, cmp); + + count = 1; + for (cur = head->next; cur->next != head; cur = cur->next) { + struct debug_el *el = container_of(cur, struct debug_el, l_h); + int cmp_result = cmp(NULL, cur, cur->next); + if (cur->next->prev != cur) { + printk(KERN_EMERG "list_sort() returned " + "a corrupted list!\n"); + return 1; + } else if (cmp_result > 0) { + printk(KERN_EMERG "list_sort() failed to sort!\n"); + return 1; + } else if (cmp_result == 0 && + el->serial >= container_of(cur->next, + struct debug_el, l_h)->serial) { + printk(KERN_EMERG "list_sort() failed to preserve order" + " of equivalent elements!\n"); + return 1; + } + kfree(cur->prev); + count++; + } + kfree(cur); + if (count != LIST_SORT_TEST_LENGTH) { + printk(KERN_EMERG "list_sort() returned list of" + "different length!\n"); + return 1; + } + return 0; +} +module_init(list_sort_test); +#endif diff --git a/lib/locking-selftest-hardirq.h b/lib/locking-selftest-hardirq.h new file mode 100644 index 00000000..10d4a150 --- /dev/null +++ b/lib/locking-selftest-hardirq.h @@ -0,0 +1,9 @@ +#undef IRQ_DISABLE +#undef IRQ_ENABLE +#undef IRQ_ENTER +#undef IRQ_EXIT + +#define IRQ_ENABLE HARDIRQ_ENABLE +#define IRQ_DISABLE HARDIRQ_DISABLE +#define IRQ_ENTER HARDIRQ_ENTER +#define IRQ_EXIT HARDIRQ_EXIT diff --git a/lib/locking-selftest-mutex.h b/lib/locking-selftest-mutex.h new file mode 100644 index 00000000..68601b6f --- /dev/null +++ b/lib/locking-selftest-mutex.h @@ -0,0 +1,11 @@ +#undef LOCK +#define LOCK ML + +#undef UNLOCK +#define UNLOCK MU + +#undef RLOCK +#undef WLOCK + +#undef INIT +#define INIT MI diff --git a/lib/locking-selftest-rlock-hardirq.h b/lib/locking-selftest-rlock-hardirq.h new file mode 100644 index 00000000..9f517ebc --- /dev/null +++ b/lib/locking-selftest-rlock-hardirq.h @@ -0,0 +1,2 @@ +#include "locking-selftest-rlock.h" +#include "locking-selftest-hardirq.h" diff --git a/lib/locking-selftest-rlock-softirq.h b/lib/locking-selftest-rlock-softirq.h new file mode 100644 index 00000000..981455db --- /dev/null +++ b/lib/locking-selftest-rlock-softirq.h @@ -0,0 +1,2 @@ +#include "locking-selftest-rlock.h" +#include "locking-selftest-softirq.h" diff --git a/lib/locking-selftest-rlock.h b/lib/locking-selftest-rlock.h new file mode 100644 index 00000000..6789044f --- /dev/null +++ b/lib/locking-selftest-rlock.h @@ -0,0 +1,14 @@ +#undef LOCK +#define LOCK RL + +#undef UNLOCK +#define UNLOCK RU + +#undef RLOCK +#define RLOCK RL + +#undef WLOCK +#define WLOCK WL + +#undef INIT +#define INIT RWI diff --git a/lib/locking-selftest-rsem.h b/lib/locking-selftest-rsem.h new file mode 100644 index 00000000..62da8866 --- /dev/null +++ b/lib/locking-selftest-rsem.h @@ -0,0 +1,14 @@ +#undef LOCK +#define LOCK RSL + +#undef UNLOCK +#define UNLOCK RSU + +#undef RLOCK +#define RLOCK RSL + +#undef WLOCK +#define WLOCK WSL + +#undef INIT +#define INIT RWSI diff --git a/lib/locking-selftest-softirq.h b/lib/locking-selftest-softirq.h new file mode 100644 index 00000000..a83de2a0 --- /dev/null +++ b/lib/locking-selftest-softirq.h @@ -0,0 +1,9 @@ +#undef IRQ_DISABLE +#undef IRQ_ENABLE +#undef IRQ_ENTER +#undef IRQ_EXIT + +#define IRQ_DISABLE SOFTIRQ_DISABLE +#define IRQ_ENABLE SOFTIRQ_ENABLE +#define IRQ_ENTER SOFTIRQ_ENTER +#define IRQ_EXIT SOFTIRQ_EXIT diff --git a/lib/locking-selftest-spin-hardirq.h b/lib/locking-selftest-spin-hardirq.h new file mode 100644 index 00000000..693198dc --- /dev/null +++ b/lib/locking-selftest-spin-hardirq.h @@ -0,0 +1,2 @@ +#include "locking-selftest-spin.h" +#include "locking-selftest-hardirq.h" diff --git a/lib/locking-selftest-spin-softirq.h b/lib/locking-selftest-spin-softirq.h new file mode 100644 index 00000000..c472e2a8 --- /dev/null +++ b/lib/locking-selftest-spin-softirq.h @@ -0,0 +1,2 @@ +#include "locking-selftest-spin.h" +#include "locking-selftest-softirq.h" diff --git a/lib/locking-selftest-spin.h b/lib/locking-selftest-spin.h new file mode 100644 index 00000000..ccd1b4b0 --- /dev/null +++ b/lib/locking-selftest-spin.h @@ -0,0 +1,11 @@ +#undef LOCK +#define LOCK L + +#undef UNLOCK +#define UNLOCK U + +#undef RLOCK +#undef WLOCK + +#undef INIT +#define INIT SI diff --git a/lib/locking-selftest-wlock-hardirq.h b/lib/locking-selftest-wlock-hardirq.h new file mode 100644 index 00000000..2dd2e512 --- /dev/null +++ b/lib/locking-selftest-wlock-hardirq.h @@ -0,0 +1,2 @@ +#include "locking-selftest-wlock.h" +#include "locking-selftest-hardirq.h" diff --git a/lib/locking-selftest-wlock-softirq.h b/lib/locking-selftest-wlock-softirq.h new file mode 100644 index 00000000..cb80d1cb --- /dev/null +++ b/lib/locking-selftest-wlock-softirq.h @@ -0,0 +1,2 @@ +#include "locking-selftest-wlock.h" +#include "locking-selftest-softirq.h" diff --git a/lib/locking-selftest-wlock.h b/lib/locking-selftest-wlock.h new file mode 100644 index 00000000..0815322d --- /dev/null +++ b/lib/locking-selftest-wlock.h @@ -0,0 +1,14 @@ +#undef LOCK +#define LOCK WL + +#undef UNLOCK +#define UNLOCK WU + +#undef RLOCK +#define RLOCK RL + +#undef WLOCK +#define WLOCK WL + +#undef INIT +#define INIT RWI diff --git a/lib/locking-selftest-wsem.h b/lib/locking-selftest-wsem.h new file mode 100644 index 00000000..b88c5f2d --- /dev/null +++ b/lib/locking-selftest-wsem.h @@ -0,0 +1,14 @@ +#undef LOCK +#define LOCK WSL + +#undef UNLOCK +#define UNLOCK WSU + +#undef RLOCK +#define RLOCK RSL + +#undef WLOCK +#define WLOCK WSL + +#undef INIT +#define INIT RWSI diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c new file mode 100644 index 00000000..619313ed --- /dev/null +++ b/lib/locking-selftest.c @@ -0,0 +1,1218 @@ +/* + * lib/locking-selftest.c + * + * Testsuite for various locking APIs: spinlocks, rwlocks, + * mutexes and rw-semaphores. + * + * It is checking both false positives and false negatives. + * + * Started by Ingo Molnar: + * + * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + */ +#include <linux/rwsem.h> +#include <linux/mutex.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/module.h> +#include <linux/lockdep.h> +#include <linux/spinlock.h> +#include <linux/kallsyms.h> +#include <linux/interrupt.h> +#include <linux/debug_locks.h> +#include <linux/irqflags.h> + +/* + * Change this to 1 if you want to see the failure printouts: + */ +static unsigned int debug_locks_verbose; + +static int __init setup_debug_locks_verbose(char *str) +{ + get_option(&str, &debug_locks_verbose); + + return 1; +} + +__setup("debug_locks_verbose=", setup_debug_locks_verbose); + +#define FAILURE 0 +#define SUCCESS 1 + +#define LOCKTYPE_SPIN 0x1 +#define LOCKTYPE_RWLOCK 0x2 +#define LOCKTYPE_MUTEX 0x4 +#define LOCKTYPE_RWSEM 0x8 + +/* + * Normal standalone locks, for the circular and irq-context + * dependency tests: + */ +static DEFINE_SPINLOCK(lock_A); +static DEFINE_SPINLOCK(lock_B); +static DEFINE_SPINLOCK(lock_C); +static DEFINE_SPINLOCK(lock_D); + +static DEFINE_RWLOCK(rwlock_A); +static DEFINE_RWLOCK(rwlock_B); +static DEFINE_RWLOCK(rwlock_C); +static DEFINE_RWLOCK(rwlock_D); + +static DEFINE_MUTEX(mutex_A); +static DEFINE_MUTEX(mutex_B); +static DEFINE_MUTEX(mutex_C); +static DEFINE_MUTEX(mutex_D); + +static DECLARE_RWSEM(rwsem_A); +static DECLARE_RWSEM(rwsem_B); +static DECLARE_RWSEM(rwsem_C); +static DECLARE_RWSEM(rwsem_D); + +/* + * Locks that we initialize dynamically as well so that + * e.g. X1 and X2 becomes two instances of the same class, + * but X* and Y* are different classes. We do this so that + * we do not trigger a real lockup: + */ +static DEFINE_SPINLOCK(lock_X1); +static DEFINE_SPINLOCK(lock_X2); +static DEFINE_SPINLOCK(lock_Y1); +static DEFINE_SPINLOCK(lock_Y2); +static DEFINE_SPINLOCK(lock_Z1); +static DEFINE_SPINLOCK(lock_Z2); + +static DEFINE_RWLOCK(rwlock_X1); +static DEFINE_RWLOCK(rwlock_X2); +static DEFINE_RWLOCK(rwlock_Y1); +static DEFINE_RWLOCK(rwlock_Y2); +static DEFINE_RWLOCK(rwlock_Z1); +static DEFINE_RWLOCK(rwlock_Z2); + +static DEFINE_MUTEX(mutex_X1); +static DEFINE_MUTEX(mutex_X2); +static DEFINE_MUTEX(mutex_Y1); +static DEFINE_MUTEX(mutex_Y2); +static DEFINE_MUTEX(mutex_Z1); +static DEFINE_MUTEX(mutex_Z2); + +static DECLARE_RWSEM(rwsem_X1); +static DECLARE_RWSEM(rwsem_X2); +static DECLARE_RWSEM(rwsem_Y1); +static DECLARE_RWSEM(rwsem_Y2); +static DECLARE_RWSEM(rwsem_Z1); +static DECLARE_RWSEM(rwsem_Z2); + +/* + * non-inlined runtime initializers, to let separate locks share + * the same lock-class: + */ +#define INIT_CLASS_FUNC(class) \ +static noinline void \ +init_class_##class(spinlock_t *lock, rwlock_t *rwlock, struct mutex *mutex, \ + struct rw_semaphore *rwsem) \ +{ \ + spin_lock_init(lock); \ + rwlock_init(rwlock); \ + mutex_init(mutex); \ + init_rwsem(rwsem); \ +} + +INIT_CLASS_FUNC(X) +INIT_CLASS_FUNC(Y) +INIT_CLASS_FUNC(Z) + +static void init_shared_classes(void) +{ + init_class_X(&lock_X1, &rwlock_X1, &mutex_X1, &rwsem_X1); + init_class_X(&lock_X2, &rwlock_X2, &mutex_X2, &rwsem_X2); + + init_class_Y(&lock_Y1, &rwlock_Y1, &mutex_Y1, &rwsem_Y1); + init_class_Y(&lock_Y2, &rwlock_Y2, &mutex_Y2, &rwsem_Y2); + + init_class_Z(&lock_Z1, &rwlock_Z1, &mutex_Z1, &rwsem_Z1); + init_class_Z(&lock_Z2, &rwlock_Z2, &mutex_Z2, &rwsem_Z2); +} + +/* + * For spinlocks and rwlocks we also do hardirq-safe / softirq-safe tests. + * The following functions use a lock from a simulated hardirq/softirq + * context, causing the locks to be marked as hardirq-safe/softirq-safe: + */ + +#define HARDIRQ_DISABLE local_irq_disable +#define HARDIRQ_ENABLE local_irq_enable + +#define HARDIRQ_ENTER() \ + local_irq_disable(); \ + irq_enter(); \ + WARN_ON(!in_irq()); + +#define HARDIRQ_EXIT() \ + __irq_exit(); \ + local_irq_enable(); + +#define SOFTIRQ_DISABLE local_bh_disable +#define SOFTIRQ_ENABLE local_bh_enable + +#define SOFTIRQ_ENTER() \ + local_bh_disable(); \ + local_irq_disable(); \ + lockdep_softirq_enter(); \ + WARN_ON(!in_softirq()); + +#define SOFTIRQ_EXIT() \ + lockdep_softirq_exit(); \ + local_irq_enable(); \ + local_bh_enable(); + +/* + * Shortcuts for lock/unlock API variants, to keep + * the testcases compact: + */ +#define L(x) spin_lock(&lock_##x) +#define U(x) spin_unlock(&lock_##x) +#define LU(x) L(x); U(x) +#define SI(x) spin_lock_init(&lock_##x) + +#define WL(x) write_lock(&rwlock_##x) +#define WU(x) write_unlock(&rwlock_##x) +#define WLU(x) WL(x); WU(x) + +#define RL(x) read_lock(&rwlock_##x) +#define RU(x) read_unlock(&rwlock_##x) +#define RLU(x) RL(x); RU(x) +#define RWI(x) rwlock_init(&rwlock_##x) + +#define ML(x) mutex_lock(&mutex_##x) +#define MU(x) mutex_unlock(&mutex_##x) +#define MI(x) mutex_init(&mutex_##x) + +#define WSL(x) down_write(&rwsem_##x) +#define WSU(x) up_write(&rwsem_##x) + +#define RSL(x) down_read(&rwsem_##x) +#define RSU(x) up_read(&rwsem_##x) +#define RWSI(x) init_rwsem(&rwsem_##x) + +#define LOCK_UNLOCK_2(x,y) LOCK(x); LOCK(y); UNLOCK(y); UNLOCK(x) + +/* + * Generate different permutations of the same testcase, using + * the same basic lock-dependency/state events: + */ + +#define GENERATE_TESTCASE(name) \ + \ +static void name(void) { E(); } + +#define GENERATE_PERMUTATIONS_2_EVENTS(name) \ + \ +static void name##_12(void) { E1(); E2(); } \ +static void name##_21(void) { E2(); E1(); } + +#define GENERATE_PERMUTATIONS_3_EVENTS(name) \ + \ +static void name##_123(void) { E1(); E2(); E3(); } \ +static void name##_132(void) { E1(); E3(); E2(); } \ +static void name##_213(void) { E2(); E1(); E3(); } \ +static void name##_231(void) { E2(); E3(); E1(); } \ +static void name##_312(void) { E3(); E1(); E2(); } \ +static void name##_321(void) { E3(); E2(); E1(); } + +/* + * AA deadlock: + */ + +#define E() \ + \ + LOCK(X1); \ + LOCK(X2); /* this one should fail */ + +/* + * 6 testcases: + */ +#include "locking-selftest-spin.h" +GENERATE_TESTCASE(AA_spin) +#include "locking-selftest-wlock.h" +GENERATE_TESTCASE(AA_wlock) +#include "locking-selftest-rlock.h" +GENERATE_TESTCASE(AA_rlock) +#include "locking-selftest-mutex.h" +GENERATE_TESTCASE(AA_mutex) +#include "locking-selftest-wsem.h" +GENERATE_TESTCASE(AA_wsem) +#include "locking-selftest-rsem.h" +GENERATE_TESTCASE(AA_rsem) + +#undef E + +/* + * Special-case for read-locking, they are + * allowed to recurse on the same lock class: + */ +static void rlock_AA1(void) +{ + RL(X1); + RL(X1); // this one should NOT fail +} + +static void rlock_AA1B(void) +{ + RL(X1); + RL(X2); // this one should NOT fail +} + +static void rsem_AA1(void) +{ + RSL(X1); + RSL(X1); // this one should fail +} + +static void rsem_AA1B(void) +{ + RSL(X1); + RSL(X2); // this one should fail +} +/* + * The mixing of read and write locks is not allowed: + */ +static void rlock_AA2(void) +{ + RL(X1); + WL(X2); // this one should fail +} + +static void rsem_AA2(void) +{ + RSL(X1); + WSL(X2); // this one should fail +} + +static void rlock_AA3(void) +{ + WL(X1); + RL(X2); // this one should fail +} + +static void rsem_AA3(void) +{ + WSL(X1); + RSL(X2); // this one should fail +} + +/* + * ABBA deadlock: + */ + +#define E() \ + \ + LOCK_UNLOCK_2(A, B); \ + LOCK_UNLOCK_2(B, A); /* fail */ + +/* + * 6 testcases: + */ +#include "locking-selftest-spin.h" +GENERATE_TESTCASE(ABBA_spin) +#include "locking-selftest-wlock.h" +GENERATE_TESTCASE(ABBA_wlock) +#include "locking-selftest-rlock.h" +GENERATE_TESTCASE(ABBA_rlock) +#include "locking-selftest-mutex.h" +GENERATE_TESTCASE(ABBA_mutex) +#include "locking-selftest-wsem.h" +GENERATE_TESTCASE(ABBA_wsem) +#include "locking-selftest-rsem.h" +GENERATE_TESTCASE(ABBA_rsem) + +#undef E + +/* + * AB BC CA deadlock: + */ + +#define E() \ + \ + LOCK_UNLOCK_2(A, B); \ + LOCK_UNLOCK_2(B, C); \ + LOCK_UNLOCK_2(C, A); /* fail */ + +/* + * 6 testcases: + */ +#include "locking-selftest-spin.h" +GENERATE_TESTCASE(ABBCCA_spin) +#include "locking-selftest-wlock.h" +GENERATE_TESTCASE(ABBCCA_wlock) +#include "locking-selftest-rlock.h" +GENERATE_TESTCASE(ABBCCA_rlock) +#include "locking-selftest-mutex.h" +GENERATE_TESTCASE(ABBCCA_mutex) +#include "locking-selftest-wsem.h" +GENERATE_TESTCASE(ABBCCA_wsem) +#include "locking-selftest-rsem.h" +GENERATE_TESTCASE(ABBCCA_rsem) + +#undef E + +/* + * AB CA BC deadlock: + */ + +#define E() \ + \ + LOCK_UNLOCK_2(A, B); \ + LOCK_UNLOCK_2(C, A); \ + LOCK_UNLOCK_2(B, C); /* fail */ + +/* + * 6 testcases: + */ +#include "locking-selftest-spin.h" +GENERATE_TESTCASE(ABCABC_spin) +#include "locking-selftest-wlock.h" +GENERATE_TESTCASE(ABCABC_wlock) +#include "locking-selftest-rlock.h" +GENERATE_TESTCASE(ABCABC_rlock) +#include "locking-selftest-mutex.h" +GENERATE_TESTCASE(ABCABC_mutex) +#include "locking-selftest-wsem.h" +GENERATE_TESTCASE(ABCABC_wsem) +#include "locking-selftest-rsem.h" +GENERATE_TESTCASE(ABCABC_rsem) + +#undef E + +/* + * AB BC CD DA deadlock: + */ + +#define E() \ + \ + LOCK_UNLOCK_2(A, B); \ + LOCK_UNLOCK_2(B, C); \ + LOCK_UNLOCK_2(C, D); \ + LOCK_UNLOCK_2(D, A); /* fail */ + +/* + * 6 testcases: + */ +#include "locking-selftest-spin.h" +GENERATE_TESTCASE(ABBCCDDA_spin) +#include "locking-selftest-wlock.h" +GENERATE_TESTCASE(ABBCCDDA_wlock) +#include "locking-selftest-rlock.h" +GENERATE_TESTCASE(ABBCCDDA_rlock) +#include "locking-selftest-mutex.h" +GENERATE_TESTCASE(ABBCCDDA_mutex) +#include "locking-selftest-wsem.h" +GENERATE_TESTCASE(ABBCCDDA_wsem) +#include "locking-selftest-rsem.h" +GENERATE_TESTCASE(ABBCCDDA_rsem) + +#undef E + +/* + * AB CD BD DA deadlock: + */ +#define E() \ + \ + LOCK_UNLOCK_2(A, B); \ + LOCK_UNLOCK_2(C, D); \ + LOCK_UNLOCK_2(B, D); \ + LOCK_UNLOCK_2(D, A); /* fail */ + +/* + * 6 testcases: + */ +#include "locking-selftest-spin.h" +GENERATE_TESTCASE(ABCDBDDA_spin) +#include "locking-selftest-wlock.h" +GENERATE_TESTCASE(ABCDBDDA_wlock) +#include "locking-selftest-rlock.h" +GENERATE_TESTCASE(ABCDBDDA_rlock) +#include "locking-selftest-mutex.h" +GENERATE_TESTCASE(ABCDBDDA_mutex) +#include "locking-selftest-wsem.h" +GENERATE_TESTCASE(ABCDBDDA_wsem) +#include "locking-selftest-rsem.h" +GENERATE_TESTCASE(ABCDBDDA_rsem) + +#undef E + +/* + * AB CD BC DA deadlock: + */ +#define E() \ + \ + LOCK_UNLOCK_2(A, B); \ + LOCK_UNLOCK_2(C, D); \ + LOCK_UNLOCK_2(B, C); \ + LOCK_UNLOCK_2(D, A); /* fail */ + +/* + * 6 testcases: + */ +#include "locking-selftest-spin.h" +GENERATE_TESTCASE(ABCDBCDA_spin) +#include "locking-selftest-wlock.h" +GENERATE_TESTCASE(ABCDBCDA_wlock) +#include "locking-selftest-rlock.h" +GENERATE_TESTCASE(ABCDBCDA_rlock) +#include "locking-selftest-mutex.h" +GENERATE_TESTCASE(ABCDBCDA_mutex) +#include "locking-selftest-wsem.h" +GENERATE_TESTCASE(ABCDBCDA_wsem) +#include "locking-selftest-rsem.h" +GENERATE_TESTCASE(ABCDBCDA_rsem) + +#undef E + +/* + * Double unlock: + */ +#define E() \ + \ + LOCK(A); \ + UNLOCK(A); \ + UNLOCK(A); /* fail */ + +/* + * 6 testcases: + */ +#include "locking-selftest-spin.h" +GENERATE_TESTCASE(double_unlock_spin) +#include "locking-selftest-wlock.h" +GENERATE_TESTCASE(double_unlock_wlock) +#include "locking-selftest-rlock.h" +GENERATE_TESTCASE(double_unlock_rlock) +#include "locking-selftest-mutex.h" +GENERATE_TESTCASE(double_unlock_mutex) +#include "locking-selftest-wsem.h" +GENERATE_TESTCASE(double_unlock_wsem) +#include "locking-selftest-rsem.h" +GENERATE_TESTCASE(double_unlock_rsem) + +#undef E + +/* + * Bad unlock ordering: + */ +#define E() \ + \ + LOCK(A); \ + LOCK(B); \ + UNLOCK(A); /* fail */ \ + UNLOCK(B); + +/* + * 6 testcases: + */ +#include "locking-selftest-spin.h" +GENERATE_TESTCASE(bad_unlock_order_spin) +#include "locking-selftest-wlock.h" +GENERATE_TESTCASE(bad_unlock_order_wlock) +#include "locking-selftest-rlock.h" +GENERATE_TESTCASE(bad_unlock_order_rlock) +#include "locking-selftest-mutex.h" +GENERATE_TESTCASE(bad_unlock_order_mutex) +#include "locking-selftest-wsem.h" +GENERATE_TESTCASE(bad_unlock_order_wsem) +#include "locking-selftest-rsem.h" +GENERATE_TESTCASE(bad_unlock_order_rsem) + +#undef E + +/* + * initializing a held lock: + */ +#define E() \ + \ + LOCK(A); \ + INIT(A); /* fail */ + +/* + * 6 testcases: + */ +#include "locking-selftest-spin.h" +GENERATE_TESTCASE(init_held_spin) +#include "locking-selftest-wlock.h" +GENERATE_TESTCASE(init_held_wlock) +#include "locking-selftest-rlock.h" +GENERATE_TESTCASE(init_held_rlock) +#include "locking-selftest-mutex.h" +GENERATE_TESTCASE(init_held_mutex) +#include "locking-selftest-wsem.h" +GENERATE_TESTCASE(init_held_wsem) +#include "locking-selftest-rsem.h" +GENERATE_TESTCASE(init_held_rsem) + +#undef E + +/* + * locking an irq-safe lock with irqs enabled: + */ +#define E1() \ + \ + IRQ_ENTER(); \ + LOCK(A); \ + UNLOCK(A); \ + IRQ_EXIT(); + +#define E2() \ + \ + LOCK(A); \ + UNLOCK(A); + +/* + * Generate 24 testcases: + */ +#include "locking-selftest-spin-hardirq.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_spin) + +#include "locking-selftest-rlock-hardirq.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock) + +#include "locking-selftest-wlock-hardirq.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_wlock) + +#include "locking-selftest-spin-softirq.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_spin) + +#include "locking-selftest-rlock-softirq.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_rlock) + +#include "locking-selftest-wlock-softirq.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock) + +#undef E1 +#undef E2 + +/* + * Enabling hardirqs with a softirq-safe lock held: + */ +#define E1() \ + \ + SOFTIRQ_ENTER(); \ + LOCK(A); \ + UNLOCK(A); \ + SOFTIRQ_EXIT(); + +#define E2() \ + \ + HARDIRQ_DISABLE(); \ + LOCK(A); \ + HARDIRQ_ENABLE(); \ + UNLOCK(A); + +/* + * Generate 12 testcases: + */ +#include "locking-selftest-spin.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_spin) + +#include "locking-selftest-wlock.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_wlock) + +#include "locking-selftest-rlock.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock) + +#undef E1 +#undef E2 + +/* + * Enabling irqs with an irq-safe lock held: + */ +#define E1() \ + \ + IRQ_ENTER(); \ + LOCK(A); \ + UNLOCK(A); \ + IRQ_EXIT(); + +#define E2() \ + \ + IRQ_DISABLE(); \ + LOCK(A); \ + IRQ_ENABLE(); \ + UNLOCK(A); + +/* + * Generate 24 testcases: + */ +#include "locking-selftest-spin-hardirq.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_spin) + +#include "locking-selftest-rlock-hardirq.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock) + +#include "locking-selftest-wlock-hardirq.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_wlock) + +#include "locking-selftest-spin-softirq.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_spin) + +#include "locking-selftest-rlock-softirq.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_rlock) + +#include "locking-selftest-wlock-softirq.h" +GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock) + +#undef E1 +#undef E2 + +/* + * Acquiring a irq-unsafe lock while holding an irq-safe-lock: + */ +#define E1() \ + \ + LOCK(A); \ + LOCK(B); \ + UNLOCK(B); \ + UNLOCK(A); \ + +#define E2() \ + \ + LOCK(B); \ + UNLOCK(B); + +#define E3() \ + \ + IRQ_ENTER(); \ + LOCK(A); \ + UNLOCK(A); \ + IRQ_EXIT(); + +/* + * Generate 36 testcases: + */ +#include "locking-selftest-spin-hardirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_spin) + +#include "locking-selftest-rlock-hardirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock) + +#include "locking-selftest-wlock-hardirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_wlock) + +#include "locking-selftest-spin-softirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_spin) + +#include "locking-selftest-rlock-softirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_rlock) + +#include "locking-selftest-wlock-softirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock) + +#undef E1 +#undef E2 +#undef E3 + +/* + * If a lock turns into softirq-safe, but earlier it took + * a softirq-unsafe lock: + */ + +#define E1() \ + IRQ_DISABLE(); \ + LOCK(A); \ + LOCK(B); \ + UNLOCK(B); \ + UNLOCK(A); \ + IRQ_ENABLE(); + +#define E2() \ + LOCK(B); \ + UNLOCK(B); + +#define E3() \ + IRQ_ENTER(); \ + LOCK(A); \ + UNLOCK(A); \ + IRQ_EXIT(); + +/* + * Generate 36 testcases: + */ +#include "locking-selftest-spin-hardirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_spin) + +#include "locking-selftest-rlock-hardirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock) + +#include "locking-selftest-wlock-hardirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_wlock) + +#include "locking-selftest-spin-softirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_spin) + +#include "locking-selftest-rlock-softirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_rlock) + +#include "locking-selftest-wlock-softirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock) + +#undef E1 +#undef E2 +#undef E3 + +/* + * read-lock / write-lock irq inversion. + * + * Deadlock scenario: + * + * CPU#1 is at #1, i.e. it has write-locked A, but has not + * taken B yet. + * + * CPU#2 is at #2, i.e. it has locked B. + * + * Hardirq hits CPU#2 at point #2 and is trying to read-lock A. + * + * The deadlock occurs because CPU#1 will spin on B, and CPU#2 + * will spin on A. + */ + +#define E1() \ + \ + IRQ_DISABLE(); \ + WL(A); \ + LOCK(B); \ + UNLOCK(B); \ + WU(A); \ + IRQ_ENABLE(); + +#define E2() \ + \ + LOCK(B); \ + UNLOCK(B); + +#define E3() \ + \ + IRQ_ENTER(); \ + RL(A); \ + RU(A); \ + IRQ_EXIT(); + +/* + * Generate 36 testcases: + */ +#include "locking-selftest-spin-hardirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_spin) + +#include "locking-selftest-rlock-hardirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_rlock) + +#include "locking-selftest-wlock-hardirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_hard_wlock) + +#include "locking-selftest-spin-softirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_spin) + +#include "locking-selftest-rlock-softirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_rlock) + +#include "locking-selftest-wlock-softirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_wlock) + +#undef E1 +#undef E2 +#undef E3 + +/* + * read-lock / write-lock recursion that is actually safe. + */ + +#define E1() \ + \ + IRQ_DISABLE(); \ + WL(A); \ + WU(A); \ + IRQ_ENABLE(); + +#define E2() \ + \ + RL(A); \ + RU(A); \ + +#define E3() \ + \ + IRQ_ENTER(); \ + RL(A); \ + L(B); \ + U(B); \ + RU(A); \ + IRQ_EXIT(); + +/* + * Generate 12 testcases: + */ +#include "locking-selftest-hardirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_hard) + +#include "locking-selftest-softirq.h" +GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft) + +#undef E1 +#undef E2 +#undef E3 + +/* + * read-lock / write-lock recursion that is unsafe. + */ + +#define E1() \ + \ + IRQ_DISABLE(); \ + L(B); \ + WL(A); \ + WU(A); \ + U(B); \ + IRQ_ENABLE(); + +#define E2() \ + \ + RL(A); \ + RU(A); \ + +#define E3() \ + \ + IRQ_ENTER(); \ + L(B); \ + U(B); \ + IRQ_EXIT(); + +/* + * Generate 12 testcases: + */ +#include "locking-selftest-hardirq.h" +// GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_hard) + +#include "locking-selftest-softirq.h" +// GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft) + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define I_SPINLOCK(x) lockdep_reset_lock(&lock_##x.dep_map) +# define I_RWLOCK(x) lockdep_reset_lock(&rwlock_##x.dep_map) +# define I_MUTEX(x) lockdep_reset_lock(&mutex_##x.dep_map) +# define I_RWSEM(x) lockdep_reset_lock(&rwsem_##x.dep_map) +#else +# define I_SPINLOCK(x) +# define I_RWLOCK(x) +# define I_MUTEX(x) +# define I_RWSEM(x) +#endif + +#define I1(x) \ + do { \ + I_SPINLOCK(x); \ + I_RWLOCK(x); \ + I_MUTEX(x); \ + I_RWSEM(x); \ + } while (0) + +#define I2(x) \ + do { \ + spin_lock_init(&lock_##x); \ + rwlock_init(&rwlock_##x); \ + mutex_init(&mutex_##x); \ + init_rwsem(&rwsem_##x); \ + } while (0) + +static void reset_locks(void) +{ + local_irq_disable(); + I1(A); I1(B); I1(C); I1(D); + I1(X1); I1(X2); I1(Y1); I1(Y2); I1(Z1); I1(Z2); + lockdep_reset(); + I2(A); I2(B); I2(C); I2(D); + init_shared_classes(); + local_irq_enable(); +} + +#undef I + +static int testcase_total; +static int testcase_successes; +static int expected_testcase_failures; +static int unexpected_testcase_failures; + +static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask) +{ + unsigned long saved_preempt_count = preempt_count(); + int expected_failure = 0; + + WARN_ON(irqs_disabled()); + + testcase_fn(); + /* + * Filter out expected failures: + */ +#ifndef CONFIG_PROVE_LOCKING + if ((lockclass_mask & LOCKTYPE_SPIN) && debug_locks != expected) + expected_failure = 1; + if ((lockclass_mask & LOCKTYPE_RWLOCK) && debug_locks != expected) + expected_failure = 1; + if ((lockclass_mask & LOCKTYPE_MUTEX) && debug_locks != expected) + expected_failure = 1; + if ((lockclass_mask & LOCKTYPE_RWSEM) && debug_locks != expected) + expected_failure = 1; +#endif + if (debug_locks != expected) { + if (expected_failure) { + expected_testcase_failures++; + printk("failed|"); + } else { + unexpected_testcase_failures++; + + printk("FAILED|"); + dump_stack(); + } + } else { + testcase_successes++; + printk(" ok |"); + } + testcase_total++; + + if (debug_locks_verbose) + printk(" lockclass mask: %x, debug_locks: %d, expected: %d\n", + lockclass_mask, debug_locks, expected); + /* + * Some tests (e.g. double-unlock) might corrupt the preemption + * count, so restore it: + */ + preempt_count() = saved_preempt_count; +#ifdef CONFIG_TRACE_IRQFLAGS + if (softirq_count()) + current->softirqs_enabled = 0; + else + current->softirqs_enabled = 1; +#endif + + reset_locks(); +} + +static inline void print_testname(const char *testname) +{ + printk("%33s:", testname); +} + +#define DO_TESTCASE_1(desc, name, nr) \ + print_testname(desc"/"#nr); \ + dotest(name##_##nr, SUCCESS, LOCKTYPE_RWLOCK); \ + printk("\n"); + +#define DO_TESTCASE_1B(desc, name, nr) \ + print_testname(desc"/"#nr); \ + dotest(name##_##nr, FAILURE, LOCKTYPE_RWLOCK); \ + printk("\n"); + +#define DO_TESTCASE_3(desc, name, nr) \ + print_testname(desc"/"#nr); \ + dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN); \ + dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK); \ + dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK); \ + printk("\n"); + +#define DO_TESTCASE_3RW(desc, name, nr) \ + print_testname(desc"/"#nr); \ + dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN|LOCKTYPE_RWLOCK);\ + dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK); \ + dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK); \ + printk("\n"); + +#define DO_TESTCASE_6(desc, name) \ + print_testname(desc); \ + dotest(name##_spin, FAILURE, LOCKTYPE_SPIN); \ + dotest(name##_wlock, FAILURE, LOCKTYPE_RWLOCK); \ + dotest(name##_rlock, FAILURE, LOCKTYPE_RWLOCK); \ + dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX); \ + dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM); \ + dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM); \ + printk("\n"); + +#define DO_TESTCASE_6_SUCCESS(desc, name) \ + print_testname(desc); \ + dotest(name##_spin, SUCCESS, LOCKTYPE_SPIN); \ + dotest(name##_wlock, SUCCESS, LOCKTYPE_RWLOCK); \ + dotest(name##_rlock, SUCCESS, LOCKTYPE_RWLOCK); \ + dotest(name##_mutex, SUCCESS, LOCKTYPE_MUTEX); \ + dotest(name##_wsem, SUCCESS, LOCKTYPE_RWSEM); \ + dotest(name##_rsem, SUCCESS, LOCKTYPE_RWSEM); \ + printk("\n"); + +/* + * 'read' variant: rlocks must not trigger. + */ +#define DO_TESTCASE_6R(desc, name) \ + print_testname(desc); \ + dotest(name##_spin, FAILURE, LOCKTYPE_SPIN); \ + dotest(name##_wlock, FAILURE, LOCKTYPE_RWLOCK); \ + dotest(name##_rlock, SUCCESS, LOCKTYPE_RWLOCK); \ + dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX); \ + dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM); \ + dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM); \ + printk("\n"); + +#define DO_TESTCASE_2I(desc, name, nr) \ + DO_TESTCASE_1("hard-"desc, name##_hard, nr); \ + DO_TESTCASE_1("soft-"desc, name##_soft, nr); + +#define DO_TESTCASE_2IB(desc, name, nr) \ + DO_TESTCASE_1B("hard-"desc, name##_hard, nr); \ + DO_TESTCASE_1B("soft-"desc, name##_soft, nr); + +#define DO_TESTCASE_6I(desc, name, nr) \ + DO_TESTCASE_3("hard-"desc, name##_hard, nr); \ + DO_TESTCASE_3("soft-"desc, name##_soft, nr); + +#define DO_TESTCASE_6IRW(desc, name, nr) \ + DO_TESTCASE_3RW("hard-"desc, name##_hard, nr); \ + DO_TESTCASE_3RW("soft-"desc, name##_soft, nr); + +#define DO_TESTCASE_2x3(desc, name) \ + DO_TESTCASE_3(desc, name, 12); \ + DO_TESTCASE_3(desc, name, 21); + +#define DO_TESTCASE_2x6(desc, name) \ + DO_TESTCASE_6I(desc, name, 12); \ + DO_TESTCASE_6I(desc, name, 21); + +#define DO_TESTCASE_6x2(desc, name) \ + DO_TESTCASE_2I(desc, name, 123); \ + DO_TESTCASE_2I(desc, name, 132); \ + DO_TESTCASE_2I(desc, name, 213); \ + DO_TESTCASE_2I(desc, name, 231); \ + DO_TESTCASE_2I(desc, name, 312); \ + DO_TESTCASE_2I(desc, name, 321); + +#define DO_TESTCASE_6x2B(desc, name) \ + DO_TESTCASE_2IB(desc, name, 123); \ + DO_TESTCASE_2IB(desc, name, 132); \ + DO_TESTCASE_2IB(desc, name, 213); \ + DO_TESTCASE_2IB(desc, name, 231); \ + DO_TESTCASE_2IB(desc, name, 312); \ + DO_TESTCASE_2IB(desc, name, 321); + +#define DO_TESTCASE_6x6(desc, name) \ + DO_TESTCASE_6I(desc, name, 123); \ + DO_TESTCASE_6I(desc, name, 132); \ + DO_TESTCASE_6I(desc, name, 213); \ + DO_TESTCASE_6I(desc, name, 231); \ + DO_TESTCASE_6I(desc, name, 312); \ + DO_TESTCASE_6I(desc, name, 321); + +#define DO_TESTCASE_6x6RW(desc, name) \ + DO_TESTCASE_6IRW(desc, name, 123); \ + DO_TESTCASE_6IRW(desc, name, 132); \ + DO_TESTCASE_6IRW(desc, name, 213); \ + DO_TESTCASE_6IRW(desc, name, 231); \ + DO_TESTCASE_6IRW(desc, name, 312); \ + DO_TESTCASE_6IRW(desc, name, 321); + + +void locking_selftest(void) +{ + /* + * Got a locking failure before the selftest ran? + */ + if (!debug_locks) { + printk("----------------------------------\n"); + printk("| Locking API testsuite disabled |\n"); + printk("----------------------------------\n"); + return; + } + + /* + * Run the testsuite: + */ + printk("------------------------\n"); + printk("| Locking API testsuite:\n"); + printk("----------------------------------------------------------------------------\n"); + printk(" | spin |wlock |rlock |mutex | wsem | rsem |\n"); + printk(" --------------------------------------------------------------------------\n"); + + init_shared_classes(); + debug_locks_silent = !debug_locks_verbose; + + DO_TESTCASE_6R("A-A deadlock", AA); + DO_TESTCASE_6R("A-B-B-A deadlock", ABBA); + DO_TESTCASE_6R("A-B-B-C-C-A deadlock", ABBCCA); + DO_TESTCASE_6R("A-B-C-A-B-C deadlock", ABCABC); + DO_TESTCASE_6R("A-B-B-C-C-D-D-A deadlock", ABBCCDDA); + DO_TESTCASE_6R("A-B-C-D-B-D-D-A deadlock", ABCDBDDA); + DO_TESTCASE_6R("A-B-C-D-B-C-D-A deadlock", ABCDBCDA); + DO_TESTCASE_6("double unlock", double_unlock); + DO_TESTCASE_6("initialize held", init_held); + DO_TESTCASE_6_SUCCESS("bad unlock order", bad_unlock_order); + + printk(" --------------------------------------------------------------------------\n"); + print_testname("recursive read-lock"); + printk(" |"); + dotest(rlock_AA1, SUCCESS, LOCKTYPE_RWLOCK); + printk(" |"); + dotest(rsem_AA1, FAILURE, LOCKTYPE_RWSEM); + printk("\n"); + + print_testname("recursive read-lock #2"); + printk(" |"); + dotest(rlock_AA1B, SUCCESS, LOCKTYPE_RWLOCK); + printk(" |"); + dotest(rsem_AA1B, FAILURE, LOCKTYPE_RWSEM); + printk("\n"); + + print_testname("mixed read-write-lock"); + printk(" |"); + dotest(rlock_AA2, FAILURE, LOCKTYPE_RWLOCK); + printk(" |"); + dotest(rsem_AA2, FAILURE, LOCKTYPE_RWSEM); + printk("\n"); + + print_testname("mixed write-read-lock"); + printk(" |"); + dotest(rlock_AA3, FAILURE, LOCKTYPE_RWLOCK); + printk(" |"); + dotest(rsem_AA3, FAILURE, LOCKTYPE_RWSEM); + printk("\n"); + + printk(" --------------------------------------------------------------------------\n"); + + /* + * irq-context testcases: + */ + DO_TESTCASE_2x6("irqs-on + irq-safe-A", irqsafe1); + DO_TESTCASE_2x3("sirq-safe-A => hirqs-on", irqsafe2A); + DO_TESTCASE_2x6("safe-A + irqs-on", irqsafe2B); + DO_TESTCASE_6x6("safe-A + unsafe-B #1", irqsafe3); + DO_TESTCASE_6x6("safe-A + unsafe-B #2", irqsafe4); + DO_TESTCASE_6x6RW("irq lock-inversion", irq_inversion); + + DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion); +// DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2); + + if (unexpected_testcase_failures) { + printk("-----------------------------------------------------------------\n"); + debug_locks = 0; + printk("BUG: %3d unexpected failures (out of %3d) - debugging disabled! |\n", + unexpected_testcase_failures, testcase_total); + printk("-----------------------------------------------------------------\n"); + } else if (expected_testcase_failures && testcase_successes) { + printk("--------------------------------------------------------\n"); + printk("%3d out of %3d testcases failed, as expected. |\n", + expected_testcase_failures, testcase_total); + printk("----------------------------------------------------\n"); + debug_locks = 1; + } else if (expected_testcase_failures && !testcase_successes) { + printk("--------------------------------------------------------\n"); + printk("All %3d testcases failed, as expected. |\n", + expected_testcase_failures); + printk("----------------------------------------\n"); + debug_locks = 1; + } else { + printk("-------------------------------------------------------\n"); + printk("Good, all %3d testcases passed! |\n", + testcase_successes); + printk("---------------------------------\n"); + debug_locks = 1; + } + debug_locks_silent = 0; +} diff --git a/lib/lru_cache.c b/lib/lru_cache.c new file mode 100644 index 00000000..270de9d3 --- /dev/null +++ b/lib/lru_cache.c @@ -0,0 +1,560 @@ +/* + lru_cache.c + + This file is part of DRBD by Philipp Reisner and Lars Ellenberg. + + Copyright (C) 2003-2008, LINBIT Information Technologies GmbH. + Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>. + Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>. + + drbd is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + drbd is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with drbd; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + */ + +#include <linux/module.h> +#include <linux/bitops.h> +#include <linux/slab.h> +#include <linux/string.h> /* for memset */ +#include <linux/seq_file.h> /* for seq_printf */ +#include <linux/lru_cache.h> + +MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, " + "Lars Ellenberg <lars@linbit.com>"); +MODULE_DESCRIPTION("lru_cache - Track sets of hot objects"); +MODULE_LICENSE("GPL"); + +/* this is developers aid only. + * it catches concurrent access (lack of locking on the users part) */ +#define PARANOIA_ENTRY() do { \ + BUG_ON(!lc); \ + BUG_ON(!lc->nr_elements); \ + BUG_ON(test_and_set_bit(__LC_PARANOIA, &lc->flags)); \ +} while (0) + +#define RETURN(x...) do { \ + clear_bit(__LC_PARANOIA, &lc->flags); \ + smp_mb__after_clear_bit(); return x ; } while (0) + +/* BUG() if e is not one of the elements tracked by lc */ +#define PARANOIA_LC_ELEMENT(lc, e) do { \ + struct lru_cache *lc_ = (lc); \ + struct lc_element *e_ = (e); \ + unsigned i = e_->lc_index; \ + BUG_ON(i >= lc_->nr_elements); \ + BUG_ON(lc_->lc_element[i] != e_); } while (0) + +/** + * lc_create - prepares to track objects in an active set + * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details + * @e_count: number of elements allowed to be active simultaneously + * @e_size: size of the tracked objects + * @e_off: offset to the &struct lc_element member in a tracked object + * + * Returns a pointer to a newly initialized struct lru_cache on success, + * or NULL on (allocation) failure. + */ +struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, + unsigned e_count, size_t e_size, size_t e_off) +{ + struct hlist_head *slot = NULL; + struct lc_element **element = NULL; + struct lru_cache *lc; + struct lc_element *e; + unsigned cache_obj_size = kmem_cache_size(cache); + unsigned i; + + WARN_ON(cache_obj_size < e_size); + if (cache_obj_size < e_size) + return NULL; + + /* e_count too big; would probably fail the allocation below anyways. + * for typical use cases, e_count should be few thousand at most. */ + if (e_count > LC_MAX_ACTIVE) + return NULL; + + slot = kzalloc(e_count * sizeof(struct hlist_head*), GFP_KERNEL); + if (!slot) + goto out_fail; + element = kzalloc(e_count * sizeof(struct lc_element *), GFP_KERNEL); + if (!element) + goto out_fail; + + lc = kzalloc(sizeof(*lc), GFP_KERNEL); + if (!lc) + goto out_fail; + + INIT_LIST_HEAD(&lc->in_use); + INIT_LIST_HEAD(&lc->lru); + INIT_LIST_HEAD(&lc->free); + + lc->name = name; + lc->element_size = e_size; + lc->element_off = e_off; + lc->nr_elements = e_count; + lc->new_number = LC_FREE; + lc->lc_cache = cache; + lc->lc_element = element; + lc->lc_slot = slot; + + /* preallocate all objects */ + for (i = 0; i < e_count; i++) { + void *p = kmem_cache_alloc(cache, GFP_KERNEL); + if (!p) + break; + memset(p, 0, lc->element_size); + e = p + e_off; + e->lc_index = i; + e->lc_number = LC_FREE; + list_add(&e->list, &lc->free); + element[i] = e; + } + if (i == e_count) + return lc; + + /* else: could not allocate all elements, give up */ + for (i--; i; i--) { + void *p = element[i]; + kmem_cache_free(cache, p - e_off); + } + kfree(lc); +out_fail: + kfree(element); + kfree(slot); + return NULL; +} + +void lc_free_by_index(struct lru_cache *lc, unsigned i) +{ + void *p = lc->lc_element[i]; + WARN_ON(!p); + if (p) { + p -= lc->element_off; + kmem_cache_free(lc->lc_cache, p); + } +} + +/** + * lc_destroy - frees memory allocated by lc_create() + * @lc: the lru cache to destroy + */ +void lc_destroy(struct lru_cache *lc) +{ + unsigned i; + if (!lc) + return; + for (i = 0; i < lc->nr_elements; i++) + lc_free_by_index(lc, i); + kfree(lc->lc_element); + kfree(lc->lc_slot); + kfree(lc); +} + +/** + * lc_reset - does a full reset for @lc and the hash table slots. + * @lc: the lru cache to operate on + * + * It is roughly the equivalent of re-allocating a fresh lru_cache object, + * basically a short cut to lc_destroy(lc); lc = lc_create(...); + */ +void lc_reset(struct lru_cache *lc) +{ + unsigned i; + + INIT_LIST_HEAD(&lc->in_use); + INIT_LIST_HEAD(&lc->lru); + INIT_LIST_HEAD(&lc->free); + lc->used = 0; + lc->hits = 0; + lc->misses = 0; + lc->starving = 0; + lc->dirty = 0; + lc->changed = 0; + lc->flags = 0; + lc->changing_element = NULL; + lc->new_number = LC_FREE; + memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements); + + for (i = 0; i < lc->nr_elements; i++) { + struct lc_element *e = lc->lc_element[i]; + void *p = e; + p -= lc->element_off; + memset(p, 0, lc->element_size); + /* re-init it */ + e->lc_index = i; + e->lc_number = LC_FREE; + list_add(&e->list, &lc->free); + } +} + +/** + * lc_seq_printf_stats - print stats about @lc into @seq + * @seq: the seq_file to print into + * @lc: the lru cache to print statistics of + */ +size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) +{ + /* NOTE: + * total calls to lc_get are + * (starving + hits + misses) + * misses include "dirty" count (update from an other thread in + * progress) and "changed", when this in fact lead to an successful + * update of the cache. + */ + return seq_printf(seq, "\t%s: used:%u/%u " + "hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n", + lc->name, lc->used, lc->nr_elements, + lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed); +} + +static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) +{ + return lc->lc_slot + (enr % lc->nr_elements); +} + + +/** + * lc_find - find element by label, if present in the hash table + * @lc: The lru_cache object + * @enr: element number + * + * Returns the pointer to an element, if the element with the requested + * "label" or element number is present in the hash table, + * or NULL if not found. Does not change the refcnt. + */ +struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) +{ + struct hlist_node *n; + struct lc_element *e; + + BUG_ON(!lc); + BUG_ON(!lc->nr_elements); + hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) { + if (e->lc_number == enr) + return e; + } + return NULL; +} + +/* returned element will be "recycled" immediately */ +static struct lc_element *lc_evict(struct lru_cache *lc) +{ + struct list_head *n; + struct lc_element *e; + + if (list_empty(&lc->lru)) + return NULL; + + n = lc->lru.prev; + e = list_entry(n, struct lc_element, list); + + PARANOIA_LC_ELEMENT(lc, e); + + list_del(&e->list); + hlist_del(&e->colision); + return e; +} + +/** + * lc_del - removes an element from the cache + * @lc: The lru_cache object + * @e: The element to remove + * + * @e must be unused (refcnt == 0). Moves @e from "lru" to "free" list, + * sets @e->enr to %LC_FREE. + */ +void lc_del(struct lru_cache *lc, struct lc_element *e) +{ + PARANOIA_ENTRY(); + PARANOIA_LC_ELEMENT(lc, e); + BUG_ON(e->refcnt); + + e->lc_number = LC_FREE; + hlist_del_init(&e->colision); + list_move(&e->list, &lc->free); + RETURN(); +} + +static struct lc_element *lc_get_unused_element(struct lru_cache *lc) +{ + struct list_head *n; + + if (list_empty(&lc->free)) + return lc_evict(lc); + + n = lc->free.next; + list_del(n); + return list_entry(n, struct lc_element, list); +} + +static int lc_unused_element_available(struct lru_cache *lc) +{ + if (!list_empty(&lc->free)) + return 1; /* something on the free list */ + if (!list_empty(&lc->lru)) + return 1; /* something to evict */ + + return 0; +} + + +/** + * lc_get - get element by label, maybe change the active set + * @lc: the lru cache to operate on + * @enr: the label to look up + * + * Finds an element in the cache, increases its usage count, + * "touches" and returns it. + * + * In case the requested number is not present, it needs to be added to the + * cache. Therefore it is possible that an other element becomes evicted from + * the cache. In either case, the user is notified so he is able to e.g. keep + * a persistent log of the cache changes, and therefore the objects in use. + * + * Return values: + * NULL + * The cache was marked %LC_STARVING, + * or the requested label was not in the active set + * and a changing transaction is still pending (@lc was marked %LC_DIRTY). + * Or no unused or free element could be recycled (@lc will be marked as + * %LC_STARVING, blocking further lc_get() operations). + * + * pointer to the element with the REQUESTED element number. + * In this case, it can be used right away + * + * pointer to an UNUSED element with some different element number, + * where that different number may also be %LC_FREE. + * + * In this case, the cache is marked %LC_DIRTY (blocking further changes), + * and the returned element pointer is removed from the lru list and + * hash collision chains. The user now should do whatever housekeeping + * is necessary. + * Then he must call lc_changed(lc,element_pointer), to finish + * the change. + * + * NOTE: The user needs to check the lc_number on EACH use, so he recognizes + * any cache set change. + */ +struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr) +{ + struct lc_element *e; + + PARANOIA_ENTRY(); + if (lc->flags & LC_STARVING) { + ++lc->starving; + RETURN(NULL); + } + + e = lc_find(lc, enr); + if (e) { + ++lc->hits; + if (e->refcnt++ == 0) + lc->used++; + list_move(&e->list, &lc->in_use); /* Not evictable... */ + RETURN(e); + } + + ++lc->misses; + + /* In case there is nothing available and we can not kick out + * the LRU element, we have to wait ... + */ + if (!lc_unused_element_available(lc)) { + __set_bit(__LC_STARVING, &lc->flags); + RETURN(NULL); + } + + /* it was not present in the active set. + * we are going to recycle an unused (or even "free") element. + * user may need to commit a transaction to record that change. + * we serialize on flags & TF_DIRTY */ + if (test_and_set_bit(__LC_DIRTY, &lc->flags)) { + ++lc->dirty; + RETURN(NULL); + } + + e = lc_get_unused_element(lc); + BUG_ON(!e); + + clear_bit(__LC_STARVING, &lc->flags); + BUG_ON(++e->refcnt != 1); + lc->used++; + + lc->changing_element = e; + lc->new_number = enr; + + RETURN(e); +} + +/* similar to lc_get, + * but only gets a new reference on an existing element. + * you either get the requested element, or NULL. + * will be consolidated into one function. + */ +struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr) +{ + struct lc_element *e; + + PARANOIA_ENTRY(); + if (lc->flags & LC_STARVING) { + ++lc->starving; + RETURN(NULL); + } + + e = lc_find(lc, enr); + if (e) { + ++lc->hits; + if (e->refcnt++ == 0) + lc->used++; + list_move(&e->list, &lc->in_use); /* Not evictable... */ + } + RETURN(e); +} + +/** + * lc_changed - tell @lc that the change has been recorded + * @lc: the lru cache to operate on + * @e: the element pending label change + */ +void lc_changed(struct lru_cache *lc, struct lc_element *e) +{ + PARANOIA_ENTRY(); + BUG_ON(e != lc->changing_element); + PARANOIA_LC_ELEMENT(lc, e); + ++lc->changed; + e->lc_number = lc->new_number; + list_add(&e->list, &lc->in_use); + hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number)); + lc->changing_element = NULL; + lc->new_number = LC_FREE; + clear_bit(__LC_DIRTY, &lc->flags); + smp_mb__after_clear_bit(); + RETURN(); +} + + +/** + * lc_put - give up refcnt of @e + * @lc: the lru cache to operate on + * @e: the element to put + * + * If refcnt reaches zero, the element is moved to the lru list, + * and a %LC_STARVING (if set) is cleared. + * Returns the new (post-decrement) refcnt. + */ +unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) +{ + PARANOIA_ENTRY(); + PARANOIA_LC_ELEMENT(lc, e); + BUG_ON(e->refcnt == 0); + BUG_ON(e == lc->changing_element); + if (--e->refcnt == 0) { + /* move it to the front of LRU. */ + list_move(&e->list, &lc->lru); + lc->used--; + clear_bit(__LC_STARVING, &lc->flags); + smp_mb__after_clear_bit(); + } + RETURN(e->refcnt); +} + +/** + * lc_element_by_index + * @lc: the lru cache to operate on + * @i: the index of the element to return + */ +struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i) +{ + BUG_ON(i >= lc->nr_elements); + BUG_ON(lc->lc_element[i] == NULL); + BUG_ON(lc->lc_element[i]->lc_index != i); + return lc->lc_element[i]; +} + +/** + * lc_index_of + * @lc: the lru cache to operate on + * @e: the element to query for its index position in lc->element + */ +unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e) +{ + PARANOIA_LC_ELEMENT(lc, e); + return e->lc_index; +} + +/** + * lc_set - associate index with label + * @lc: the lru cache to operate on + * @enr: the label to set + * @index: the element index to associate label with. + * + * Used to initialize the active set to some previously recorded state. + */ +void lc_set(struct lru_cache *lc, unsigned int enr, int index) +{ + struct lc_element *e; + + if (index < 0 || index >= lc->nr_elements) + return; + + e = lc_element_by_index(lc, index); + e->lc_number = enr; + + hlist_del_init(&e->colision); + hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); + list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru); +} + +/** + * lc_dump - Dump a complete LRU cache to seq in textual form. + * @lc: the lru cache to operate on + * @seq: the &struct seq_file pointer to seq_printf into + * @utext: user supplied "heading" or other info + * @detail: function pointer the user may provide to dump further details + * of the object the lc_element is embedded in. + */ +void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext, + void (*detail) (struct seq_file *, struct lc_element *)) +{ + unsigned int nr_elements = lc->nr_elements; + struct lc_element *e; + int i; + + seq_printf(seq, "\tnn: lc_number refcnt %s\n ", utext); + for (i = 0; i < nr_elements; i++) { + e = lc_element_by_index(lc, i); + if (e->lc_number == LC_FREE) { + seq_printf(seq, "\t%2d: FREE\n", i); + } else { + seq_printf(seq, "\t%2d: %4u %4u ", i, + e->lc_number, e->refcnt); + detail(seq, e); + } + } +} + +EXPORT_SYMBOL(lc_create); +EXPORT_SYMBOL(lc_reset); +EXPORT_SYMBOL(lc_destroy); +EXPORT_SYMBOL(lc_set); +EXPORT_SYMBOL(lc_del); +EXPORT_SYMBOL(lc_try_get); +EXPORT_SYMBOL(lc_find); +EXPORT_SYMBOL(lc_get); +EXPORT_SYMBOL(lc_put); +EXPORT_SYMBOL(lc_changed); +EXPORT_SYMBOL(lc_element_by_index); +EXPORT_SYMBOL(lc_index_of); +EXPORT_SYMBOL(lc_seq_printf_stats); +EXPORT_SYMBOL(lc_seq_dump_details); diff --git a/lib/lzo/Makefile b/lib/lzo/Makefile new file mode 100644 index 00000000..e764116e --- /dev/null +++ b/lib/lzo/Makefile @@ -0,0 +1,5 @@ +lzo_compress-objs := lzo1x_compress.o +lzo_decompress-objs := lzo1x_decompress.o + +obj-$(CONFIG_LZO_COMPRESS) += lzo_compress.o +obj-$(CONFIG_LZO_DECOMPRESS) += lzo_decompress.o diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c new file mode 100644 index 00000000..a6040990 --- /dev/null +++ b/lib/lzo/lzo1x_compress.c @@ -0,0 +1,226 @@ +/* + * LZO1X Compressor from MiniLZO + * + * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com> + * + * The full LZO package can be found at: + * http://www.oberhumer.com/opensource/lzo/ + * + * Changed for kernel use by: + * Nitin Gupta <nitingupta910@gmail.com> + * Richard Purdie <rpurdie@openedhand.com> + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/lzo.h> +#include <asm/unaligned.h> +#include "lzodefs.h" + +static noinline size_t +_lzo1x_1_do_compress(const unsigned char *in, size_t in_len, + unsigned char *out, size_t *out_len, void *wrkmem) +{ + const unsigned char * const in_end = in + in_len; + const unsigned char * const ip_end = in + in_len - M2_MAX_LEN - 5; + const unsigned char ** const dict = wrkmem; + const unsigned char *ip = in, *ii = ip; + const unsigned char *end, *m, *m_pos; + size_t m_off, m_len, dindex; + unsigned char *op = out; + + ip += 4; + + for (;;) { + dindex = ((size_t)(0x21 * DX3(ip, 5, 5, 6)) >> 5) & D_MASK; + m_pos = dict[dindex]; + + if (m_pos < in) + goto literal; + + if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET)) + goto literal; + + m_off = ip - m_pos; + if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) + goto try_match; + + dindex = (dindex & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f); + m_pos = dict[dindex]; + + if (m_pos < in) + goto literal; + + if (ip == m_pos || ((size_t)(ip - m_pos) > M4_MAX_OFFSET)) + goto literal; + + m_off = ip - m_pos; + if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) + goto try_match; + + goto literal; + +try_match: + if (get_unaligned((const unsigned short *)m_pos) + == get_unaligned((const unsigned short *)ip)) { + if (likely(m_pos[2] == ip[2])) + goto match; + } + +literal: + dict[dindex] = ip; + ++ip; + if (unlikely(ip >= ip_end)) + break; + continue; + +match: + dict[dindex] = ip; + if (ip != ii) { + size_t t = ip - ii; + + if (t <= 3) { + op[-2] |= t; + } else if (t <= 18) { + *op++ = (t - 3); + } else { + size_t tt = t - 18; + + *op++ = 0; + while (tt > 255) { + tt -= 255; + *op++ = 0; + } + *op++ = tt; + } + do { + *op++ = *ii++; + } while (--t > 0); + } + + ip += 3; + if (m_pos[3] != *ip++ || m_pos[4] != *ip++ + || m_pos[5] != *ip++ || m_pos[6] != *ip++ + || m_pos[7] != *ip++ || m_pos[8] != *ip++) { + --ip; + m_len = ip - ii; + + if (m_off <= M2_MAX_OFFSET) { + m_off -= 1; + *op++ = (((m_len - 1) << 5) + | ((m_off & 7) << 2)); + *op++ = (m_off >> 3); + } else if (m_off <= M3_MAX_OFFSET) { + m_off -= 1; + *op++ = (M3_MARKER | (m_len - 2)); + goto m3_m4_offset; + } else { + m_off -= 0x4000; + + *op++ = (M4_MARKER | ((m_off & 0x4000) >> 11) + | (m_len - 2)); + goto m3_m4_offset; + } + } else { + end = in_end; + m = m_pos + M2_MAX_LEN + 1; + + while (ip < end && *m == *ip) { + m++; + ip++; + } + m_len = ip - ii; + + if (m_off <= M3_MAX_OFFSET) { + m_off -= 1; + if (m_len <= 33) { + *op++ = (M3_MARKER | (m_len - 2)); + } else { + m_len -= 33; + *op++ = M3_MARKER | 0; + goto m3_m4_len; + } + } else { + m_off -= 0x4000; + if (m_len <= M4_MAX_LEN) { + *op++ = (M4_MARKER + | ((m_off & 0x4000) >> 11) + | (m_len - 2)); + } else { + m_len -= M4_MAX_LEN; + *op++ = (M4_MARKER + | ((m_off & 0x4000) >> 11)); +m3_m4_len: + while (m_len > 255) { + m_len -= 255; + *op++ = 0; + } + + *op++ = (m_len); + } + } +m3_m4_offset: + *op++ = ((m_off & 63) << 2); + *op++ = (m_off >> 6); + } + + ii = ip; + if (unlikely(ip >= ip_end)) + break; + } + + *out_len = op - out; + return in_end - ii; +} + +int lzo1x_1_compress(const unsigned char *in, size_t in_len, unsigned char *out, + size_t *out_len, void *wrkmem) +{ + const unsigned char *ii; + unsigned char *op = out; + size_t t; + + if (unlikely(in_len <= M2_MAX_LEN + 5)) { + t = in_len; + } else { + t = _lzo1x_1_do_compress(in, in_len, op, out_len, wrkmem); + op += *out_len; + } + + if (t > 0) { + ii = in + in_len - t; + + if (op == out && t <= 238) { + *op++ = (17 + t); + } else if (t <= 3) { + op[-2] |= t; + } else if (t <= 18) { + *op++ = (t - 3); + } else { + size_t tt = t - 18; + + *op++ = 0; + while (tt > 255) { + tt -= 255; + *op++ = 0; + } + + *op++ = tt; + } + do { + *op++ = *ii++; + } while (--t > 0); + } + + *op++ = M4_MARKER | 1; + *op++ = 0; + *op++ = 0; + + *out_len = op - out; + return LZO_E_OK; +} +EXPORT_SYMBOL_GPL(lzo1x_1_compress); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZO1X-1 Compressor"); + diff --git a/lib/lzo/lzo1x_decompress.c b/lib/lzo/lzo1x_decompress.c new file mode 100644 index 00000000..f2fd0985 --- /dev/null +++ b/lib/lzo/lzo1x_decompress.c @@ -0,0 +1,255 @@ +/* + * LZO1X Decompressor from MiniLZO + * + * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com> + * + * The full LZO package can be found at: + * http://www.oberhumer.com/opensource/lzo/ + * + * Changed for kernel use by: + * Nitin Gupta <nitingupta910@gmail.com> + * Richard Purdie <rpurdie@openedhand.com> + */ + +#ifndef STATIC +#include <linux/module.h> +#include <linux/kernel.h> +#endif + +#include <asm/unaligned.h> +#include <linux/lzo.h> +#include "lzodefs.h" + +#define HAVE_IP(x, ip_end, ip) ((size_t)(ip_end - ip) < (x)) +#define HAVE_OP(x, op_end, op) ((size_t)(op_end - op) < (x)) +#define HAVE_LB(m_pos, out, op) (m_pos < out || m_pos >= op) + +#define COPY4(dst, src) \ + put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst)) + +int lzo1x_decompress_safe(const unsigned char *in, size_t in_len, + unsigned char *out, size_t *out_len) +{ + const unsigned char * const ip_end = in + in_len; + unsigned char * const op_end = out + *out_len; + const unsigned char *ip = in, *m_pos; + unsigned char *op = out; + size_t t; + + *out_len = 0; + + if (*ip > 17) { + t = *ip++ - 17; + if (t < 4) + goto match_next; + if (HAVE_OP(t, op_end, op)) + goto output_overrun; + if (HAVE_IP(t + 1, ip_end, ip)) + goto input_overrun; + do { + *op++ = *ip++; + } while (--t > 0); + goto first_literal_run; + } + + while ((ip < ip_end)) { + t = *ip++; + if (t >= 16) + goto match; + if (t == 0) { + if (HAVE_IP(1, ip_end, ip)) + goto input_overrun; + while (*ip == 0) { + t += 255; + ip++; + if (HAVE_IP(1, ip_end, ip)) + goto input_overrun; + } + t += 15 + *ip++; + } + if (HAVE_OP(t + 3, op_end, op)) + goto output_overrun; + if (HAVE_IP(t + 4, ip_end, ip)) + goto input_overrun; + + COPY4(op, ip); + op += 4; + ip += 4; + if (--t > 0) { + if (t >= 4) { + do { + COPY4(op, ip); + op += 4; + ip += 4; + t -= 4; + } while (t >= 4); + if (t > 0) { + do { + *op++ = *ip++; + } while (--t > 0); + } + } else { + do { + *op++ = *ip++; + } while (--t > 0); + } + } + +first_literal_run: + t = *ip++; + if (t >= 16) + goto match; + m_pos = op - (1 + M2_MAX_OFFSET); + m_pos -= t >> 2; + m_pos -= *ip++ << 2; + + if (HAVE_LB(m_pos, out, op)) + goto lookbehind_overrun; + + if (HAVE_OP(3, op_end, op)) + goto output_overrun; + *op++ = *m_pos++; + *op++ = *m_pos++; + *op++ = *m_pos; + + goto match_done; + + do { +match: + if (t >= 64) { + m_pos = op - 1; + m_pos -= (t >> 2) & 7; + m_pos -= *ip++ << 3; + t = (t >> 5) - 1; + if (HAVE_LB(m_pos, out, op)) + goto lookbehind_overrun; + if (HAVE_OP(t + 3 - 1, op_end, op)) + goto output_overrun; + goto copy_match; + } else if (t >= 32) { + t &= 31; + if (t == 0) { + if (HAVE_IP(1, ip_end, ip)) + goto input_overrun; + while (*ip == 0) { + t += 255; + ip++; + if (HAVE_IP(1, ip_end, ip)) + goto input_overrun; + } + t += 31 + *ip++; + } + m_pos = op - 1; + m_pos -= get_unaligned_le16(ip) >> 2; + ip += 2; + } else if (t >= 16) { + m_pos = op; + m_pos -= (t & 8) << 11; + + t &= 7; + if (t == 0) { + if (HAVE_IP(1, ip_end, ip)) + goto input_overrun; + while (*ip == 0) { + t += 255; + ip++; + if (HAVE_IP(1, ip_end, ip)) + goto input_overrun; + } + t += 7 + *ip++; + } + m_pos -= get_unaligned_le16(ip) >> 2; + ip += 2; + if (m_pos == op) + goto eof_found; + m_pos -= 0x4000; + } else { + m_pos = op - 1; + m_pos -= t >> 2; + m_pos -= *ip++ << 2; + + if (HAVE_LB(m_pos, out, op)) + goto lookbehind_overrun; + if (HAVE_OP(2, op_end, op)) + goto output_overrun; + + *op++ = *m_pos++; + *op++ = *m_pos; + goto match_done; + } + + if (HAVE_LB(m_pos, out, op)) + goto lookbehind_overrun; + if (HAVE_OP(t + 3 - 1, op_end, op)) + goto output_overrun; + + if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) { + COPY4(op, m_pos); + op += 4; + m_pos += 4; + t -= 4 - (3 - 1); + do { + COPY4(op, m_pos); + op += 4; + m_pos += 4; + t -= 4; + } while (t >= 4); + if (t > 0) + do { + *op++ = *m_pos++; + } while (--t > 0); + } else { +copy_match: + *op++ = *m_pos++; + *op++ = *m_pos++; + do { + *op++ = *m_pos++; + } while (--t > 0); + } +match_done: + t = ip[-2] & 3; + if (t == 0) + break; +match_next: + if (HAVE_OP(t, op_end, op)) + goto output_overrun; + if (HAVE_IP(t + 1, ip_end, ip)) + goto input_overrun; + + *op++ = *ip++; + if (t > 1) { + *op++ = *ip++; + if (t > 2) + *op++ = *ip++; + } + + t = *ip++; + } while (ip < ip_end); + } + + *out_len = op - out; + return LZO_E_EOF_NOT_FOUND; + +eof_found: + *out_len = op - out; + return (ip == ip_end ? LZO_E_OK : + (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN)); +input_overrun: + *out_len = op - out; + return LZO_E_INPUT_OVERRUN; + +output_overrun: + *out_len = op - out; + return LZO_E_OUTPUT_OVERRUN; + +lookbehind_overrun: + *out_len = op - out; + return LZO_E_LOOKBEHIND_OVERRUN; +} +#ifndef STATIC +EXPORT_SYMBOL_GPL(lzo1x_decompress_safe); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZO1X Decompressor"); + +#endif diff --git a/lib/lzo/lzodefs.h b/lib/lzo/lzodefs.h new file mode 100644 index 00000000..b6d482c4 --- /dev/null +++ b/lib/lzo/lzodefs.h @@ -0,0 +1,43 @@ +/* + * lzodefs.h -- architecture, OS and compiler specific defines + * + * Copyright (C) 1996-2005 Markus F.X.J. Oberhumer <markus@oberhumer.com> + * + * The full LZO package can be found at: + * http://www.oberhumer.com/opensource/lzo/ + * + * Changed for kernel use by: + * Nitin Gupta <nitingupta910@gmail.com> + * Richard Purdie <rpurdie@openedhand.com> + */ + +#define LZO_VERSION 0x2020 +#define LZO_VERSION_STRING "2.02" +#define LZO_VERSION_DATE "Oct 17 2005" + +#define M1_MAX_OFFSET 0x0400 +#define M2_MAX_OFFSET 0x0800 +#define M3_MAX_OFFSET 0x4000 +#define M4_MAX_OFFSET 0xbfff + +#define M1_MIN_LEN 2 +#define M1_MAX_LEN 2 +#define M2_MIN_LEN 3 +#define M2_MAX_LEN 8 +#define M3_MIN_LEN 3 +#define M3_MAX_LEN 33 +#define M4_MIN_LEN 3 +#define M4_MAX_LEN 9 + +#define M1_MARKER 0 +#define M2_MARKER 64 +#define M3_MARKER 32 +#define M4_MARKER 16 + +#define D_BITS 14 +#define D_MASK ((1u << D_BITS) - 1) +#define D_HIGH ((D_MASK >> 1) + 1) + +#define DX2(p, s1, s2) (((((size_t)((p)[2]) << (s2)) ^ (p)[1]) \ + << (s1)) ^ (p)[0]) +#define DX3(p, s1, s2, s3) ((DX2((p)+1, s2, s3) << (s1)) ^ (p)[0]) diff --git a/lib/nlattr.c b/lib/nlattr.c new file mode 100644 index 00000000..c4706eb9 --- /dev/null +++ b/lib/nlattr.c @@ -0,0 +1,502 @@ +/* + * NETLINK Netlink attributes + * + * Authors: Thomas Graf <tgraf@suug.ch> + * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/jiffies.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/string.h> +#include <linux/types.h> +#include <net/netlink.h> + +static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = { + [NLA_U8] = sizeof(u8), + [NLA_U16] = sizeof(u16), + [NLA_U32] = sizeof(u32), + [NLA_U64] = sizeof(u64), + [NLA_NESTED] = NLA_HDRLEN, +}; + +static int validate_nla(struct nlattr *nla, int maxtype, + const struct nla_policy *policy) +{ + const struct nla_policy *pt; + int minlen = 0, attrlen = nla_len(nla), type = nla_type(nla); + + if (type <= 0 || type > maxtype) + return 0; + + pt = &policy[type]; + + BUG_ON(pt->type > NLA_TYPE_MAX); + + switch (pt->type) { + case NLA_FLAG: + if (attrlen > 0) + return -ERANGE; + break; + + case NLA_NUL_STRING: + if (pt->len) + minlen = min_t(int, attrlen, pt->len + 1); + else + minlen = attrlen; + + if (!minlen || memchr(nla_data(nla), '\0', minlen) == NULL) + return -EINVAL; + /* fall through */ + + case NLA_STRING: + if (attrlen < 1) + return -ERANGE; + + if (pt->len) { + char *buf = nla_data(nla); + + if (buf[attrlen - 1] == '\0') + attrlen--; + + if (attrlen > pt->len) + return -ERANGE; + } + break; + + case NLA_BINARY: + if (pt->len && attrlen > pt->len) + return -ERANGE; + break; + + case NLA_NESTED_COMPAT: + if (attrlen < pt->len) + return -ERANGE; + if (attrlen < NLA_ALIGN(pt->len)) + break; + if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN) + return -ERANGE; + nla = nla_data(nla) + NLA_ALIGN(pt->len); + if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN + nla_len(nla)) + return -ERANGE; + break; + case NLA_NESTED: + /* a nested attributes is allowed to be empty; if its not, + * it must have a size of at least NLA_HDRLEN. + */ + if (attrlen == 0) + break; + default: + if (pt->len) + minlen = pt->len; + else if (pt->type != NLA_UNSPEC) + minlen = nla_attr_minlen[pt->type]; + + if (attrlen < minlen) + return -ERANGE; + } + + return 0; +} + +/** + * nla_validate - Validate a stream of attributes + * @head: head of attribute stream + * @len: length of attribute stream + * @maxtype: maximum attribute type to be expected + * @policy: validation policy + * + * Validates all attributes in the specified attribute stream against the + * specified policy. Attributes with a type exceeding maxtype will be + * ignored. See documenation of struct nla_policy for more details. + * + * Returns 0 on success or a negative error code. + */ +int nla_validate(struct nlattr *head, int len, int maxtype, + const struct nla_policy *policy) +{ + struct nlattr *nla; + int rem, err; + + nla_for_each_attr(nla, head, len, rem) { + err = validate_nla(nla, maxtype, policy); + if (err < 0) + goto errout; + } + + err = 0; +errout: + return err; +} + +/** + * nla_policy_len - Determin the max. length of a policy + * @policy: policy to use + * @n: number of policies + * + * Determines the max. length of the policy. It is currently used + * to allocated Netlink buffers roughly the size of the actual + * message. + * + * Returns 0 on success or a negative error code. + */ +int +nla_policy_len(const struct nla_policy *p, int n) +{ + int i, len = 0; + + for (i = 0; i < n; i++) { + if (p->len) + len += nla_total_size(p->len); + else if (nla_attr_minlen[p->type]) + len += nla_total_size(nla_attr_minlen[p->type]); + } + + return len; +} + +/** + * nla_parse - Parse a stream of attributes into a tb buffer + * @tb: destination array with maxtype+1 elements + * @maxtype: maximum attribute type to be expected + * @head: head of attribute stream + * @len: length of attribute stream + * @policy: validation policy + * + * Parses a stream of attributes and stores a pointer to each attribute in + * the tb array accessable via the attribute type. Attributes with a type + * exceeding maxtype will be silently ignored for backwards compatibility + * reasons. policy may be set to NULL if no validation is required. + * + * Returns 0 on success or a negative error code. + */ +int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, + const struct nla_policy *policy) +{ + struct nlattr *nla; + int rem, err; + + memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); + + nla_for_each_attr(nla, head, len, rem) { + u16 type = nla_type(nla); + + if (type > 0 && type <= maxtype) { + if (policy) { + err = validate_nla(nla, maxtype, policy); + if (err < 0) + goto errout; + } + + tb[type] = nla; + } + } + + if (unlikely(rem > 0)) + printk(KERN_WARNING "netlink: %d bytes leftover after parsing " + "attributes.\n", rem); + + err = 0; +errout: + return err; +} + +/** + * nla_find - Find a specific attribute in a stream of attributes + * @head: head of attribute stream + * @len: length of attribute stream + * @attrtype: type of attribute to look for + * + * Returns the first attribute in the stream matching the specified type. + */ +struct nlattr *nla_find(struct nlattr *head, int len, int attrtype) +{ + struct nlattr *nla; + int rem; + + nla_for_each_attr(nla, head, len, rem) + if (nla_type(nla) == attrtype) + return nla; + + return NULL; +} + +/** + * nla_strlcpy - Copy string attribute payload into a sized buffer + * @dst: where to copy the string to + * @nla: attribute to copy the string from + * @dstsize: size of destination buffer + * + * Copies at most dstsize - 1 bytes into the destination buffer. + * The result is always a valid NUL-terminated string. Unlike + * strlcpy the destination buffer is always padded out. + * + * Returns the length of the source buffer. + */ +size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize) +{ + size_t srclen = nla_len(nla); + char *src = nla_data(nla); + + if (srclen > 0 && src[srclen - 1] == '\0') + srclen--; + + if (dstsize > 0) { + size_t len = (srclen >= dstsize) ? dstsize - 1 : srclen; + + memset(dst, 0, dstsize); + memcpy(dst, src, len); + } + + return srclen; +} + +/** + * nla_memcpy - Copy a netlink attribute into another memory area + * @dest: where to copy to memcpy + * @src: netlink attribute to copy from + * @count: size of the destination area + * + * Note: The number of bytes copied is limited by the length of + * attribute's payload. memcpy + * + * Returns the number of bytes copied. + */ +int nla_memcpy(void *dest, const struct nlattr *src, int count) +{ + int minlen = min_t(int, count, nla_len(src)); + + memcpy(dest, nla_data(src), minlen); + + return minlen; +} + +/** + * nla_memcmp - Compare an attribute with sized memory area + * @nla: netlink attribute + * @data: memory area + * @size: size of memory area + */ +int nla_memcmp(const struct nlattr *nla, const void *data, + size_t size) +{ + int d = nla_len(nla) - size; + + if (d == 0) + d = memcmp(nla_data(nla), data, size); + + return d; +} + +/** + * nla_strcmp - Compare a string attribute against a string + * @nla: netlink string attribute + * @str: another string + */ +int nla_strcmp(const struct nlattr *nla, const char *str) +{ + int len = strlen(str) + 1; + int d = nla_len(nla) - len; + + if (d == 0) + d = memcmp(nla_data(nla), str, len); + + return d; +} + +#ifdef CONFIG_NET +/** + * __nla_reserve - reserve room for attribute on the skb + * @skb: socket buffer to reserve room on + * @attrtype: attribute type + * @attrlen: length of attribute payload + * + * Adds a netlink attribute header to a socket buffer and reserves + * room for the payload but does not copy it. + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for the attribute header and payload. + */ +struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) +{ + struct nlattr *nla; + + nla = (struct nlattr *) skb_put(skb, nla_total_size(attrlen)); + nla->nla_type = attrtype; + nla->nla_len = nla_attr_size(attrlen); + + memset((unsigned char *) nla + nla->nla_len, 0, nla_padlen(attrlen)); + + return nla; +} +EXPORT_SYMBOL(__nla_reserve); + +/** + * __nla_reserve_nohdr - reserve room for attribute without header + * @skb: socket buffer to reserve room on + * @attrlen: length of attribute payload + * + * Reserves room for attribute payload without a header. + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for the payload. + */ +void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen) +{ + void *start; + + start = skb_put(skb, NLA_ALIGN(attrlen)); + memset(start, 0, NLA_ALIGN(attrlen)); + + return start; +} +EXPORT_SYMBOL(__nla_reserve_nohdr); + +/** + * nla_reserve - reserve room for attribute on the skb + * @skb: socket buffer to reserve room on + * @attrtype: attribute type + * @attrlen: length of attribute payload + * + * Adds a netlink attribute header to a socket buffer and reserves + * room for the payload but does not copy it. + * + * Returns NULL if the tailroom of the skb is insufficient to store + * the attribute header and payload. + */ +struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) +{ + if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) + return NULL; + + return __nla_reserve(skb, attrtype, attrlen); +} +EXPORT_SYMBOL(nla_reserve); + +/** + * nla_reserve_nohdr - reserve room for attribute without header + * @skb: socket buffer to reserve room on + * @attrlen: length of attribute payload + * + * Reserves room for attribute payload without a header. + * + * Returns NULL if the tailroom of the skb is insufficient to store + * the attribute payload. + */ +void *nla_reserve_nohdr(struct sk_buff *skb, int attrlen) +{ + if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) + return NULL; + + return __nla_reserve_nohdr(skb, attrlen); +} +EXPORT_SYMBOL(nla_reserve_nohdr); + +/** + * __nla_put - Add a netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @attrlen: length of attribute payload + * @data: head of attribute payload + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for the attribute header and payload. + */ +void __nla_put(struct sk_buff *skb, int attrtype, int attrlen, + const void *data) +{ + struct nlattr *nla; + + nla = __nla_reserve(skb, attrtype, attrlen); + memcpy(nla_data(nla), data, attrlen); +} +EXPORT_SYMBOL(__nla_put); + +/** + * __nla_put_nohdr - Add a netlink attribute without header + * @skb: socket buffer to add attribute to + * @attrlen: length of attribute payload + * @data: head of attribute payload + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for the attribute payload. + */ +void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) +{ + void *start; + + start = __nla_reserve_nohdr(skb, attrlen); + memcpy(start, data, attrlen); +} +EXPORT_SYMBOL(__nla_put_nohdr); + +/** + * nla_put - Add a netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to + * @attrtype: attribute type + * @attrlen: length of attribute payload + * @data: head of attribute payload + * + * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store + * the attribute header and payload. + */ +int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) +{ + if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) + return -EMSGSIZE; + + __nla_put(skb, attrtype, attrlen, data); + return 0; +} +EXPORT_SYMBOL(nla_put); + +/** + * nla_put_nohdr - Add a netlink attribute without header + * @skb: socket buffer to add attribute to + * @attrlen: length of attribute payload + * @data: head of attribute payload + * + * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store + * the attribute payload. + */ +int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) +{ + if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) + return -EMSGSIZE; + + __nla_put_nohdr(skb, attrlen, data); + return 0; +} +EXPORT_SYMBOL(nla_put_nohdr); + +/** + * nla_append - Add a netlink attribute without header or padding + * @skb: socket buffer to add attribute to + * @attrlen: length of attribute payload + * @data: head of attribute payload + * + * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store + * the attribute payload. + */ +int nla_append(struct sk_buff *skb, int attrlen, const void *data) +{ + if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) + return -EMSGSIZE; + + memcpy(skb_put(skb, attrlen), data, attrlen); + return 0; +} +EXPORT_SYMBOL(nla_append); +#endif + +EXPORT_SYMBOL(nla_validate); +EXPORT_SYMBOL(nla_policy_len); +EXPORT_SYMBOL(nla_parse); +EXPORT_SYMBOL(nla_find); +EXPORT_SYMBOL(nla_strlcpy); +EXPORT_SYMBOL(nla_memcpy); +EXPORT_SYMBOL(nla_memcmp); +EXPORT_SYMBOL(nla_strcmp); diff --git a/lib/parser.c b/lib/parser.c new file mode 100644 index 00000000..fb349772 --- /dev/null +++ b/lib/parser.c @@ -0,0 +1,231 @@ +/* + * lib/parser.c - simple parser for mount, etc. options. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include <linux/ctype.h> +#include <linux/module.h> +#include <linux/parser.h> +#include <linux/slab.h> +#include <linux/string.h> + +/** + * match_one: - Determines if a string matches a simple pattern + * @s: the string to examine for presense of the pattern + * @p: the string containing the pattern + * @args: array of %MAX_OPT_ARGS &substring_t elements. Used to return match + * locations. + * + * Description: Determines if the pattern @p is present in string @s. Can only + * match extremely simple token=arg style patterns. If the pattern is found, + * the location(s) of the arguments will be returned in the @args array. + */ +static int match_one(char *s, const char *p, substring_t args[]) +{ + char *meta; + int argc = 0; + + if (!p) + return 1; + + while(1) { + int len = -1; + meta = strchr(p, '%'); + if (!meta) + return strcmp(p, s) == 0; + + if (strncmp(p, s, meta-p)) + return 0; + + s += meta - p; + p = meta + 1; + + if (isdigit(*p)) + len = simple_strtoul(p, (char **) &p, 10); + else if (*p == '%') { + if (*s++ != '%') + return 0; + p++; + continue; + } + + if (argc >= MAX_OPT_ARGS) + return 0; + + args[argc].from = s; + switch (*p++) { + case 's': { + size_t str_len = strlen(s); + + if (str_len == 0) + return 0; + if (len == -1 || len > str_len) + len = str_len; + args[argc].to = s + len; + break; + } + case 'd': + simple_strtol(s, &args[argc].to, 0); + goto num; + case 'u': + simple_strtoul(s, &args[argc].to, 0); + goto num; + case 'o': + simple_strtoul(s, &args[argc].to, 8); + goto num; + case 'x': + simple_strtoul(s, &args[argc].to, 16); + num: + if (args[argc].to == args[argc].from) + return 0; + break; + default: + return 0; + } + s = args[argc].to; + argc++; + } +} + +/** + * match_token: - Find a token (and optional args) in a string + * @s: the string to examine for token/argument pairs + * @table: match_table_t describing the set of allowed option tokens and the + * arguments that may be associated with them. Must be terminated with a + * &struct match_token whose pattern is set to the NULL pointer. + * @args: array of %MAX_OPT_ARGS &substring_t elements. Used to return match + * locations. + * + * Description: Detects which if any of a set of token strings has been passed + * to it. Tokens can include up to MAX_OPT_ARGS instances of basic c-style + * format identifiers which will be taken into account when matching the + * tokens, and whose locations will be returned in the @args array. + */ +int match_token(char *s, const match_table_t table, substring_t args[]) +{ + const struct match_token *p; + + for (p = table; !match_one(s, p->pattern, args) ; p++) + ; + + return p->token; +} + +/** + * match_number: scan a number in the given base from a substring_t + * @s: substring to be scanned + * @result: resulting integer on success + * @base: base to use when converting string + * + * Description: Given a &substring_t and a base, attempts to parse the substring + * as a number in that base. On success, sets @result to the integer represented + * by the string and returns 0. Returns either -ENOMEM or -EINVAL on failure. + */ +static int match_number(substring_t *s, int *result, int base) +{ + char *endp; + char *buf; + int ret; + + buf = kmalloc(s->to - s->from + 1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + memcpy(buf, s->from, s->to - s->from); + buf[s->to - s->from] = '\0'; + *result = simple_strtol(buf, &endp, base); + ret = 0; + if (endp == buf) + ret = -EINVAL; + kfree(buf); + return ret; +} + +/** + * match_int: - scan a decimal representation of an integer from a substring_t + * @s: substring_t to be scanned + * @result: resulting integer on success + * + * Description: Attempts to parse the &substring_t @s as a decimal integer. On + * success, sets @result to the integer represented by the string and returns 0. + * Returns either -ENOMEM or -EINVAL on failure. + */ +int match_int(substring_t *s, int *result) +{ + return match_number(s, result, 0); +} + +/** + * match_octal: - scan an octal representation of an integer from a substring_t + * @s: substring_t to be scanned + * @result: resulting integer on success + * + * Description: Attempts to parse the &substring_t @s as an octal integer. On + * success, sets @result to the integer represented by the string and returns + * 0. Returns either -ENOMEM or -EINVAL on failure. + */ +int match_octal(substring_t *s, int *result) +{ + return match_number(s, result, 8); +} + +/** + * match_hex: - scan a hex representation of an integer from a substring_t + * @s: substring_t to be scanned + * @result: resulting integer on success + * + * Description: Attempts to parse the &substring_t @s as a hexadecimal integer. + * On success, sets @result to the integer represented by the string and + * returns 0. Returns either -ENOMEM or -EINVAL on failure. + */ +int match_hex(substring_t *s, int *result) +{ + return match_number(s, result, 16); +} + +/** + * match_strlcpy: - Copy the characters from a substring_t to a sized buffer + * @dest: where to copy to + * @src: &substring_t to copy + * @size: size of destination buffer + * + * Description: Copy the characters in &substring_t @src to the + * c-style string @dest. Copy no more than @size - 1 characters, plus + * the terminating NUL. Return length of @src. + */ +size_t match_strlcpy(char *dest, const substring_t *src, size_t size) +{ + size_t ret = src->to - src->from; + + if (size) { + size_t len = ret >= size ? size - 1 : ret; + memcpy(dest, src->from, len); + dest[len] = '\0'; + } + return ret; +} + +/** + * match_strdup: - allocate a new string with the contents of a substring_t + * @s: &substring_t to copy + * + * Description: Allocates and returns a string filled with the contents of + * the &substring_t @s. The caller is responsible for freeing the returned + * string with kfree(). + */ +char *match_strdup(const substring_t *s) +{ + size_t sz = s->to - s->from + 1; + char *p = kmalloc(sz, GFP_KERNEL); + if (p) + match_strlcpy(p, s, sz); + return p; +} + +EXPORT_SYMBOL(match_token); +EXPORT_SYMBOL(match_int); +EXPORT_SYMBOL(match_octal); +EXPORT_SYMBOL(match_hex); +EXPORT_SYMBOL(match_strlcpy); +EXPORT_SYMBOL(match_strdup); diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c new file mode 100644 index 00000000..209448e1 --- /dev/null +++ b/lib/percpu_counter.c @@ -0,0 +1,174 @@ +/* + * Fast batching percpu counters. + */ + +#include <linux/percpu_counter.h> +#include <linux/notifier.h> +#include <linux/mutex.h> +#include <linux/init.h> +#include <linux/cpu.h> +#include <linux/module.h> + +static LIST_HEAD(percpu_counters); +static DEFINE_MUTEX(percpu_counters_lock); + +void percpu_counter_set(struct percpu_counter *fbc, s64 amount) +{ + int cpu; + + spin_lock(&fbc->lock); + for_each_possible_cpu(cpu) { + s32 *pcount = per_cpu_ptr(fbc->counters, cpu); + *pcount = 0; + } + fbc->count = amount; + spin_unlock(&fbc->lock); +} +EXPORT_SYMBOL(percpu_counter_set); + +void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch) +{ + s64 count; + s32 *pcount; + int cpu = get_cpu(); + + pcount = per_cpu_ptr(fbc->counters, cpu); + count = *pcount + amount; + if (count >= batch || count <= -batch) { + spin_lock(&fbc->lock); + fbc->count += count; + *pcount = 0; + spin_unlock(&fbc->lock); + } else { + *pcount = count; + } + put_cpu(); +} +EXPORT_SYMBOL(__percpu_counter_add); + +/* + * Add up all the per-cpu counts, return the result. This is a more accurate + * but much slower version of percpu_counter_read_positive() + */ +s64 __percpu_counter_sum(struct percpu_counter *fbc) +{ + s64 ret; + int cpu; + + spin_lock(&fbc->lock); + ret = fbc->count; + for_each_online_cpu(cpu) { + s32 *pcount = per_cpu_ptr(fbc->counters, cpu); + ret += *pcount; + } + spin_unlock(&fbc->lock); + return ret; +} +EXPORT_SYMBOL(__percpu_counter_sum); + +int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, + struct lock_class_key *key) +{ + spin_lock_init(&fbc->lock); + lockdep_set_class(&fbc->lock, key); + fbc->count = amount; + fbc->counters = alloc_percpu(s32); + if (!fbc->counters) + return -ENOMEM; +#ifdef CONFIG_HOTPLUG_CPU + INIT_LIST_HEAD(&fbc->list); + mutex_lock(&percpu_counters_lock); + list_add(&fbc->list, &percpu_counters); + mutex_unlock(&percpu_counters_lock); +#endif + return 0; +} +EXPORT_SYMBOL(__percpu_counter_init); + +void percpu_counter_destroy(struct percpu_counter *fbc) +{ + if (!fbc->counters) + return; + +#ifdef CONFIG_HOTPLUG_CPU + mutex_lock(&percpu_counters_lock); + list_del(&fbc->list); + mutex_unlock(&percpu_counters_lock); +#endif + free_percpu(fbc->counters); + fbc->counters = NULL; +} +EXPORT_SYMBOL(percpu_counter_destroy); + +int percpu_counter_batch __read_mostly = 32; +EXPORT_SYMBOL(percpu_counter_batch); + +static void compute_batch_value(void) +{ + int nr = num_online_cpus(); + + percpu_counter_batch = max(32, nr*2); +} + +static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb, + unsigned long action, void *hcpu) +{ +#ifdef CONFIG_HOTPLUG_CPU + unsigned int cpu; + struct percpu_counter *fbc; + + compute_batch_value(); + if (action != CPU_DEAD) + return NOTIFY_OK; + + cpu = (unsigned long)hcpu; + mutex_lock(&percpu_counters_lock); + list_for_each_entry(fbc, &percpu_counters, list) { + s32 *pcount; + unsigned long flags; + + spin_lock_irqsave(&fbc->lock, flags); + pcount = per_cpu_ptr(fbc->counters, cpu); + fbc->count += *pcount; + *pcount = 0; + spin_unlock_irqrestore(&fbc->lock, flags); + } + mutex_unlock(&percpu_counters_lock); +#endif + return NOTIFY_OK; +} + +/* + * Compare counter against given value. + * Return 1 if greater, 0 if equal and -1 if less + */ +int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) +{ + s64 count; + + count = percpu_counter_read(fbc); + /* Check to see if rough count will be sufficient for comparison */ + if (abs(count - rhs) > (percpu_counter_batch*num_online_cpus())) { + if (count > rhs) + return 1; + else + return -1; + } + /* Need to use precise count */ + count = percpu_counter_sum(fbc); + if (count > rhs) + return 1; + else if (count < rhs) + return -1; + else + return 0; +} +EXPORT_SYMBOL(percpu_counter_compare); + +static int __init percpu_counter_startup(void) +{ + compute_batch_value(); + hotcpu_notifier(percpu_counter_hotcpu_callback, 0); + return 0; +} +module_init(percpu_counter_startup); diff --git a/lib/plist.c b/lib/plist.c new file mode 100644 index 00000000..1471988d --- /dev/null +++ b/lib/plist.c @@ -0,0 +1,121 @@ +/* + * lib/plist.c + * + * Descending-priority-sorted double-linked list + * + * (C) 2002-2003 Intel Corp + * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>. + * + * 2001-2005 (c) MontaVista Software, Inc. + * Daniel Walker <dwalker@mvista.com> + * + * (C) 2005 Thomas Gleixner <tglx@linutronix.de> + * + * Simplifications of the original code by + * Oleg Nesterov <oleg@tv-sign.ru> + * + * Licensed under the FSF's GNU Public License v2 or later. + * + * Based on simple lists (include/linux/list.h). + * + * This file contains the add / del functions which are considered to + * be too large to inline. See include/linux/plist.h for further + * information. + */ + +#include <linux/plist.h> +#include <linux/spinlock.h> + +#ifdef CONFIG_DEBUG_PI_LIST + +static void plist_check_prev_next(struct list_head *t, struct list_head *p, + struct list_head *n) +{ + WARN(n->prev != p || p->next != n, + "top: %p, n: %p, p: %p\n" + "prev: %p, n: %p, p: %p\n" + "next: %p, n: %p, p: %p\n", + t, t->next, t->prev, + p, p->next, p->prev, + n, n->next, n->prev); +} + +static void plist_check_list(struct list_head *top) +{ + struct list_head *prev = top, *next = top->next; + + plist_check_prev_next(top, prev, next); + while (next != top) { + prev = next; + next = prev->next; + plist_check_prev_next(top, prev, next); + } +} + +static void plist_check_head(struct plist_head *head) +{ + WARN_ON(!head->rawlock && !head->spinlock); + if (head->rawlock) + WARN_ON_SMP(!raw_spin_is_locked(head->rawlock)); + if (head->spinlock) + WARN_ON_SMP(!spin_is_locked(head->spinlock)); + plist_check_list(&head->prio_list); + plist_check_list(&head->node_list); +} + +#else +# define plist_check_head(h) do { } while (0) +#endif + +/** + * plist_add - add @node to @head + * + * @node: &struct plist_node pointer + * @head: &struct plist_head pointer + */ +void plist_add(struct plist_node *node, struct plist_head *head) +{ + struct plist_node *iter; + + plist_check_head(head); + WARN_ON(!plist_node_empty(node)); + + list_for_each_entry(iter, &head->prio_list, plist.prio_list) { + if (node->prio < iter->prio) + goto lt_prio; + else if (node->prio == iter->prio) { + iter = list_entry(iter->plist.prio_list.next, + struct plist_node, plist.prio_list); + goto eq_prio; + } + } + +lt_prio: + list_add_tail(&node->plist.prio_list, &iter->plist.prio_list); +eq_prio: + list_add_tail(&node->plist.node_list, &iter->plist.node_list); + + plist_check_head(head); +} + +/** + * plist_del - Remove a @node from plist. + * + * @node: &struct plist_node pointer - entry to be removed + * @head: &struct plist_head pointer - list head + */ +void plist_del(struct plist_node *node, struct plist_head *head) +{ + plist_check_head(head); + + if (!list_empty(&node->plist.prio_list)) { + struct plist_node *next = plist_first(&node->plist); + + list_move_tail(&next->plist.prio_list, &node->plist.prio_list); + list_del_init(&node->plist.prio_list); + } + + list_del_init(&node->plist.node_list); + + plist_check_head(head); +} diff --git a/lib/prio_heap.c b/lib/prio_heap.c new file mode 100644 index 00000000..a7af6f85 --- /dev/null +++ b/lib/prio_heap.c @@ -0,0 +1,70 @@ +/* + * Simple insertion-only static-sized priority heap containing + * pointers, based on CLR, chapter 7 + */ + +#include <linux/slab.h> +#include <linux/prio_heap.h> + +int heap_init(struct ptr_heap *heap, size_t size, gfp_t gfp_mask, + int (*gt)(void *, void *)) +{ + heap->ptrs = kmalloc(size, gfp_mask); + if (!heap->ptrs) + return -ENOMEM; + heap->size = 0; + heap->max = size / sizeof(void *); + heap->gt = gt; + return 0; +} + +void heap_free(struct ptr_heap *heap) +{ + kfree(heap->ptrs); +} + +void *heap_insert(struct ptr_heap *heap, void *p) +{ + void *res; + void **ptrs = heap->ptrs; + int pos; + + if (heap->size < heap->max) { + /* Heap insertion */ + pos = heap->size++; + while (pos > 0 && heap->gt(p, ptrs[(pos-1)/2])) { + ptrs[pos] = ptrs[(pos-1)/2]; + pos = (pos-1)/2; + } + ptrs[pos] = p; + return NULL; + } + + /* The heap is full, so something will have to be dropped */ + + /* If the new pointer is greater than the current max, drop it */ + if (heap->gt(p, ptrs[0])) + return p; + + /* Replace the current max and heapify */ + res = ptrs[0]; + ptrs[0] = p; + pos = 0; + + while (1) { + int left = 2 * pos + 1; + int right = 2 * pos + 2; + int largest = pos; + if (left < heap->size && heap->gt(ptrs[left], p)) + largest = left; + if (right < heap->size && heap->gt(ptrs[right], ptrs[largest])) + largest = right; + if (largest == pos) + break; + /* Push p down the heap one level and bump one up */ + ptrs[pos] = ptrs[largest]; + ptrs[largest] = p; + pos = largest; + } + return res; +} diff --git a/lib/prio_tree.c b/lib/prio_tree.c new file mode 100644 index 00000000..ccfd850b --- /dev/null +++ b/lib/prio_tree.c @@ -0,0 +1,484 @@ +/* + * lib/prio_tree.c - priority search tree + * + * Copyright (C) 2004, Rajesh Venkatasubramanian <vrajesh@umich.edu> + * + * This file is released under the GPL v2. + * + * Based on the radix priority search tree proposed by Edward M. McCreight + * SIAM Journal of Computing, vol. 14, no.2, pages 257-276, May 1985 + * + * 02Feb2004 Initial version + */ + +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/prio_tree.h> + +/* + * A clever mix of heap and radix trees forms a radix priority search tree (PST) + * which is useful for storing intervals, e.g, we can consider a vma as a closed + * interval of file pages [offset_begin, offset_end], and store all vmas that + * map a file in a PST. Then, using the PST, we can answer a stabbing query, + * i.e., selecting a set of stored intervals (vmas) that overlap with (map) a + * given input interval X (a set of consecutive file pages), in "O(log n + m)" + * time where 'log n' is the height of the PST, and 'm' is the number of stored + * intervals (vmas) that overlap (map) with the input interval X (the set of + * consecutive file pages). + * + * In our implementation, we store closed intervals of the form [radix_index, + * heap_index]. We assume that always radix_index <= heap_index. McCreight's PST + * is designed for storing intervals with unique radix indices, i.e., each + * interval have different radix_index. However, this limitation can be easily + * overcome by using the size, i.e., heap_index - radix_index, as part of the + * index, so we index the tree using [(radix_index,size), heap_index]. + * + * When the above-mentioned indexing scheme is used, theoretically, in a 32 bit + * machine, the maximum height of a PST can be 64. We can use a balanced version + * of the priority search tree to optimize the tree height, but the balanced + * tree proposed by McCreight is too complex and memory-hungry for our purpose. + */ + +/* + * The following macros are used for implementing prio_tree for i_mmap + */ + +#define RADIX_INDEX(vma) ((vma)->vm_pgoff) +#define VMA_SIZE(vma) (((vma)->vm_end - (vma)->vm_start) >> PAGE_SHIFT) +/* avoid overflow */ +#define HEAP_INDEX(vma) ((vma)->vm_pgoff + (VMA_SIZE(vma) - 1)) + + +static void get_index(const struct prio_tree_root *root, + const struct prio_tree_node *node, + unsigned long *radix, unsigned long *heap) +{ + if (root->raw) { + struct vm_area_struct *vma = prio_tree_entry( + node, struct vm_area_struct, shared.prio_tree_node); + + *radix = RADIX_INDEX(vma); + *heap = HEAP_INDEX(vma); + } + else { + *radix = node->start; + *heap = node->last; + } +} + +static unsigned long index_bits_to_maxindex[BITS_PER_LONG]; + +void __init prio_tree_init(void) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(index_bits_to_maxindex) - 1; i++) + index_bits_to_maxindex[i] = (1UL << (i + 1)) - 1; + index_bits_to_maxindex[ARRAY_SIZE(index_bits_to_maxindex) - 1] = ~0UL; +} + +/* + * Maximum heap_index that can be stored in a PST with index_bits bits + */ +static inline unsigned long prio_tree_maxindex(unsigned int bits) +{ + return index_bits_to_maxindex[bits - 1]; +} + +/* + * Extend a priority search tree so that it can store a node with heap_index + * max_heap_index. In the worst case, this algorithm takes O((log n)^2). + * However, this function is used rarely and the common case performance is + * not bad. + */ +static struct prio_tree_node *prio_tree_expand(struct prio_tree_root *root, + struct prio_tree_node *node, unsigned long max_heap_index) +{ + struct prio_tree_node *first = NULL, *prev, *last = NULL; + + if (max_heap_index > prio_tree_maxindex(root->index_bits)) + root->index_bits++; + + while (max_heap_index > prio_tree_maxindex(root->index_bits)) { + root->index_bits++; + + if (prio_tree_empty(root)) + continue; + + if (first == NULL) { + first = root->prio_tree_node; + prio_tree_remove(root, root->prio_tree_node); + INIT_PRIO_TREE_NODE(first); + last = first; + } else { + prev = last; + last = root->prio_tree_node; + prio_tree_remove(root, root->prio_tree_node); + INIT_PRIO_TREE_NODE(last); + prev->left = last; + last->parent = prev; + } + } + + INIT_PRIO_TREE_NODE(node); + + if (first) { + node->left = first; + first->parent = node; + } else + last = node; + + if (!prio_tree_empty(root)) { + last->left = root->prio_tree_node; + last->left->parent = last; + } + + root->prio_tree_node = node; + return node; +} + +/* + * Replace a prio_tree_node with a new node and return the old node + */ +struct prio_tree_node *prio_tree_replace(struct prio_tree_root *root, + struct prio_tree_node *old, struct prio_tree_node *node) +{ + INIT_PRIO_TREE_NODE(node); + + if (prio_tree_root(old)) { + BUG_ON(root->prio_tree_node != old); + /* + * We can reduce root->index_bits here. However, it is complex + * and does not help much to improve performance (IMO). + */ + node->parent = node; + root->prio_tree_node = node; + } else { + node->parent = old->parent; + if (old->parent->left == old) + old->parent->left = node; + else + old->parent->right = node; + } + + if (!prio_tree_left_empty(old)) { + node->left = old->left; + old->left->parent = node; + } + + if (!prio_tree_right_empty(old)) { + node->right = old->right; + old->right->parent = node; + } + + return old; +} + +/* + * Insert a prio_tree_node @node into a radix priority search tree @root. The + * algorithm typically takes O(log n) time where 'log n' is the number of bits + * required to represent the maximum heap_index. In the worst case, the algo + * can take O((log n)^2) - check prio_tree_expand. + * + * If a prior node with same radix_index and heap_index is already found in + * the tree, then returns the address of the prior node. Otherwise, inserts + * @node into the tree and returns @node. + */ +struct prio_tree_node *prio_tree_insert(struct prio_tree_root *root, + struct prio_tree_node *node) +{ + struct prio_tree_node *cur, *res = node; + unsigned long radix_index, heap_index; + unsigned long r_index, h_index, index, mask; + int size_flag = 0; + + get_index(root, node, &radix_index, &heap_index); + + if (prio_tree_empty(root) || + heap_index > prio_tree_maxindex(root->index_bits)) + return prio_tree_expand(root, node, heap_index); + + cur = root->prio_tree_node; + mask = 1UL << (root->index_bits - 1); + + while (mask) { + get_index(root, cur, &r_index, &h_index); + + if (r_index == radix_index && h_index == heap_index) + return cur; + + if (h_index < heap_index || + (h_index == heap_index && r_index > radix_index)) { + struct prio_tree_node *tmp = node; + node = prio_tree_replace(root, cur, node); + cur = tmp; + /* swap indices */ + index = r_index; + r_index = radix_index; + radix_index = index; + index = h_index; + h_index = heap_index; + heap_index = index; + } + + if (size_flag) + index = heap_index - radix_index; + else + index = radix_index; + + if (index & mask) { + if (prio_tree_right_empty(cur)) { + INIT_PRIO_TREE_NODE(node); + cur->right = node; + node->parent = cur; + return res; + } else + cur = cur->right; + } else { + if (prio_tree_left_empty(cur)) { + INIT_PRIO_TREE_NODE(node); + cur->left = node; + node->parent = cur; + return res; + } else + cur = cur->left; + } + + mask >>= 1; + + if (!mask) { + mask = 1UL << (BITS_PER_LONG - 1); + size_flag = 1; + } + } + /* Should not reach here */ + BUG(); + return NULL; +} + +/* + * Remove a prio_tree_node @node from a radix priority search tree @root. The + * algorithm takes O(log n) time where 'log n' is the number of bits required + * to represent the maximum heap_index. + */ +void prio_tree_remove(struct prio_tree_root *root, struct prio_tree_node *node) +{ + struct prio_tree_node *cur; + unsigned long r_index, h_index_right, h_index_left; + + cur = node; + + while (!prio_tree_left_empty(cur) || !prio_tree_right_empty(cur)) { + if (!prio_tree_left_empty(cur)) + get_index(root, cur->left, &r_index, &h_index_left); + else { + cur = cur->right; + continue; + } + + if (!prio_tree_right_empty(cur)) + get_index(root, cur->right, &r_index, &h_index_right); + else { + cur = cur->left; + continue; + } + + /* both h_index_left and h_index_right cannot be 0 */ + if (h_index_left >= h_index_right) + cur = cur->left; + else + cur = cur->right; + } + + if (prio_tree_root(cur)) { + BUG_ON(root->prio_tree_node != cur); + __INIT_PRIO_TREE_ROOT(root, root->raw); + return; + } + + if (cur->parent->right == cur) + cur->parent->right = cur->parent; + else + cur->parent->left = cur->parent; + + while (cur != node) + cur = prio_tree_replace(root, cur->parent, cur); +} + +/* + * Following functions help to enumerate all prio_tree_nodes in the tree that + * overlap with the input interval X [radix_index, heap_index]. The enumeration + * takes O(log n + m) time where 'log n' is the height of the tree (which is + * proportional to # of bits required to represent the maximum heap_index) and + * 'm' is the number of prio_tree_nodes that overlap the interval X. + */ + +static struct prio_tree_node *prio_tree_left(struct prio_tree_iter *iter, + unsigned long *r_index, unsigned long *h_index) +{ + if (prio_tree_left_empty(iter->cur)) + return NULL; + + get_index(iter->root, iter->cur->left, r_index, h_index); + + if (iter->r_index <= *h_index) { + iter->cur = iter->cur->left; + iter->mask >>= 1; + if (iter->mask) { + if (iter->size_level) + iter->size_level++; + } else { + if (iter->size_level) { + BUG_ON(!prio_tree_left_empty(iter->cur)); + BUG_ON(!prio_tree_right_empty(iter->cur)); + iter->size_level++; + iter->mask = ULONG_MAX; + } else { + iter->size_level = 1; + iter->mask = 1UL << (BITS_PER_LONG - 1); + } + } + return iter->cur; + } + + return NULL; +} + +static struct prio_tree_node *prio_tree_right(struct prio_tree_iter *iter, + unsigned long *r_index, unsigned long *h_index) +{ + unsigned long value; + + if (prio_tree_right_empty(iter->cur)) + return NULL; + + if (iter->size_level) + value = iter->value; + else + value = iter->value | iter->mask; + + if (iter->h_index < value) + return NULL; + + get_index(iter->root, iter->cur->right, r_index, h_index); + + if (iter->r_index <= *h_index) { + iter->cur = iter->cur->right; + iter->mask >>= 1; + iter->value = value; + if (iter->mask) { + if (iter->size_level) + iter->size_level++; + } else { + if (iter->size_level) { + BUG_ON(!prio_tree_left_empty(iter->cur)); + BUG_ON(!prio_tree_right_empty(iter->cur)); + iter->size_level++; + iter->mask = ULONG_MAX; + } else { + iter->size_level = 1; + iter->mask = 1UL << (BITS_PER_LONG - 1); + } + } + return iter->cur; + } + + return NULL; +} + +static struct prio_tree_node *prio_tree_parent(struct prio_tree_iter *iter) +{ + iter->cur = iter->cur->parent; + if (iter->mask == ULONG_MAX) + iter->mask = 1UL; + else if (iter->size_level == 1) + iter->mask = 1UL; + else + iter->mask <<= 1; + if (iter->size_level) + iter->size_level--; + if (!iter->size_level && (iter->value & iter->mask)) + iter->value ^= iter->mask; + return iter->cur; +} + +static inline int overlap(struct prio_tree_iter *iter, + unsigned long r_index, unsigned long h_index) +{ + return iter->h_index >= r_index && iter->r_index <= h_index; +} + +/* + * prio_tree_first: + * + * Get the first prio_tree_node that overlaps with the interval [radix_index, + * heap_index]. Note that always radix_index <= heap_index. We do a pre-order + * traversal of the tree. + */ +static struct prio_tree_node *prio_tree_first(struct prio_tree_iter *iter) +{ + struct prio_tree_root *root; + unsigned long r_index, h_index; + + INIT_PRIO_TREE_ITER(iter); + + root = iter->root; + if (prio_tree_empty(root)) + return NULL; + + get_index(root, root->prio_tree_node, &r_index, &h_index); + + if (iter->r_index > h_index) + return NULL; + + iter->mask = 1UL << (root->index_bits - 1); + iter->cur = root->prio_tree_node; + + while (1) { + if (overlap(iter, r_index, h_index)) + return iter->cur; + + if (prio_tree_left(iter, &r_index, &h_index)) + continue; + + if (prio_tree_right(iter, &r_index, &h_index)) + continue; + + break; + } + return NULL; +} + +/* + * prio_tree_next: + * + * Get the next prio_tree_node that overlaps with the input interval in iter + */ +struct prio_tree_node *prio_tree_next(struct prio_tree_iter *iter) +{ + unsigned long r_index, h_index; + + if (iter->cur == NULL) + return prio_tree_first(iter); + +repeat: + while (prio_tree_left(iter, &r_index, &h_index)) + if (overlap(iter, r_index, h_index)) + return iter->cur; + + while (!prio_tree_right(iter, &r_index, &h_index)) { + while (!prio_tree_root(iter->cur) && + iter->cur->parent->right == iter->cur) + prio_tree_parent(iter); + + if (prio_tree_root(iter->cur)) + return NULL; + + prio_tree_parent(iter); + } + + if (overlap(iter, r_index, h_index)) + return iter->cur; + + goto repeat; +} diff --git a/lib/proportions.c b/lib/proportions.c new file mode 100644 index 00000000..d50746a7 --- /dev/null +++ b/lib/proportions.c @@ -0,0 +1,407 @@ +/* + * Floating proportions + * + * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> + * + * Description: + * + * The floating proportion is a time derivative with an exponentially decaying + * history: + * + * p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i) + * + * Where j is an element from {prop_local}, x_{j} is j's number of events, + * and i the time period over which the differential is taken. So d/dt_{-i} is + * the differential over the i-th last period. + * + * The decaying history gives smooth transitions. The time differential carries + * the notion of speed. + * + * The denominator is 2^(1+i) because we want the series to be normalised, ie. + * + * \Sum_{i=0} 1/2^(1+i) = 1 + * + * Further more, if we measure time (t) in the same events as x; so that: + * + * t = \Sum_{j} x_{j} + * + * we get that: + * + * \Sum_{j} p_{j} = 1 + * + * Writing this in an iterative fashion we get (dropping the 'd's): + * + * if (++x_{j}, ++t > period) + * t /= 2; + * for_each (j) + * x_{j} /= 2; + * + * so that: + * + * p_{j} = x_{j} / t; + * + * We optimize away the '/= 2' for the global time delta by noting that: + * + * if (++t > period) t /= 2: + * + * Can be approximated by: + * + * period/2 + (++t % period/2) + * + * [ Furthermore, when we choose period to be 2^n it can be written in terms of + * binary operations and wraparound artefacts disappear. ] + * + * Also note that this yields a natural counter of the elapsed periods: + * + * c = t / (period/2) + * + * [ Its monotonic increasing property can be applied to mitigate the wrap- + * around issue. ] + * + * This allows us to do away with the loop over all prop_locals on each period + * expiration. By remembering the period count under which it was last accessed + * as c_{j}, we can obtain the number of 'missed' cycles from: + * + * c - c_{j} + * + * We can then lazily catch up to the global period count every time we are + * going to use x_{j}, by doing: + * + * x_{j} /= 2^(c - c_{j}), c_{j} = c + */ + +#include <linux/proportions.h> +#include <linux/rcupdate.h> + +int prop_descriptor_init(struct prop_descriptor *pd, int shift) +{ + int err; + + if (shift > PROP_MAX_SHIFT) + shift = PROP_MAX_SHIFT; + + pd->index = 0; + pd->pg[0].shift = shift; + mutex_init(&pd->mutex); + err = percpu_counter_init(&pd->pg[0].events, 0); + if (err) + goto out; + + err = percpu_counter_init(&pd->pg[1].events, 0); + if (err) + percpu_counter_destroy(&pd->pg[0].events); + +out: + return err; +} + +/* + * We have two copies, and flip between them to make it seem like an atomic + * update. The update is not really atomic wrt the events counter, but + * it is internally consistent with the bit layout depending on shift. + * + * We copy the events count, move the bits around and flip the index. + */ +void prop_change_shift(struct prop_descriptor *pd, int shift) +{ + int index; + int offset; + u64 events; + unsigned long flags; + + if (shift > PROP_MAX_SHIFT) + shift = PROP_MAX_SHIFT; + + mutex_lock(&pd->mutex); + + index = pd->index ^ 1; + offset = pd->pg[pd->index].shift - shift; + if (!offset) + goto out; + + pd->pg[index].shift = shift; + + local_irq_save(flags); + events = percpu_counter_sum(&pd->pg[pd->index].events); + if (offset < 0) + events <<= -offset; + else + events >>= offset; + percpu_counter_set(&pd->pg[index].events, events); + + /* + * ensure the new pg is fully written before the switch + */ + smp_wmb(); + pd->index = index; + local_irq_restore(flags); + + synchronize_rcu(); + +out: + mutex_unlock(&pd->mutex); +} + +/* + * wrap the access to the data in an rcu_read_lock() section; + * this is used to track the active references. + */ +static struct prop_global *prop_get_global(struct prop_descriptor *pd) +__acquires(RCU) +{ + int index; + + rcu_read_lock(); + index = pd->index; + /* + * match the wmb from vcd_flip() + */ + smp_rmb(); + return &pd->pg[index]; +} + +static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg) +__releases(RCU) +{ + rcu_read_unlock(); +} + +static void +prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift) +{ + int offset = *pl_shift - new_shift; + + if (!offset) + return; + + if (offset < 0) + *pl_period <<= -offset; + else + *pl_period >>= offset; + + *pl_shift = new_shift; +} + +/* + * PERCPU + */ + +#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids))) + +int prop_local_init_percpu(struct prop_local_percpu *pl) +{ + spin_lock_init(&pl->lock); + pl->shift = 0; + pl->period = 0; + return percpu_counter_init(&pl->events, 0); +} + +void prop_local_destroy_percpu(struct prop_local_percpu *pl) +{ + percpu_counter_destroy(&pl->events); +} + +/* + * Catch up with missed period expirations. + * + * until (c_{j} == c) + * x_{j} -= x_{j}/2; + * c_{j}++; + */ +static +void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl) +{ + unsigned long period = 1UL << (pg->shift - 1); + unsigned long period_mask = ~(period - 1); + unsigned long global_period; + unsigned long flags; + + global_period = percpu_counter_read(&pg->events); + global_period &= period_mask; + + /* + * Fast path - check if the local and global period count still match + * outside of the lock. + */ + if (pl->period == global_period) + return; + + spin_lock_irqsave(&pl->lock, flags); + prop_adjust_shift(&pl->shift, &pl->period, pg->shift); + + /* + * For each missed period, we half the local counter. + * basically: + * pl->events >> (global_period - pl->period); + */ + period = (global_period - pl->period) >> (pg->shift - 1); + if (period < BITS_PER_LONG) { + s64 val = percpu_counter_read(&pl->events); + + if (val < (nr_cpu_ids * PROP_BATCH)) + val = percpu_counter_sum(&pl->events); + + __percpu_counter_add(&pl->events, -val + (val >> period), + PROP_BATCH); + } else + percpu_counter_set(&pl->events, 0); + + pl->period = global_period; + spin_unlock_irqrestore(&pl->lock, flags); +} + +/* + * ++x_{j}, ++t + */ +void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl) +{ + struct prop_global *pg = prop_get_global(pd); + + prop_norm_percpu(pg, pl); + __percpu_counter_add(&pl->events, 1, PROP_BATCH); + percpu_counter_add(&pg->events, 1); + prop_put_global(pd, pg); +} + +/* + * identical to __prop_inc_percpu, except that it limits this pl's fraction to + * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded. + */ +void __prop_inc_percpu_max(struct prop_descriptor *pd, + struct prop_local_percpu *pl, long frac) +{ + struct prop_global *pg = prop_get_global(pd); + + prop_norm_percpu(pg, pl); + + if (unlikely(frac != PROP_FRAC_BASE)) { + unsigned long period_2 = 1UL << (pg->shift - 1); + unsigned long counter_mask = period_2 - 1; + unsigned long global_count; + long numerator, denominator; + + numerator = percpu_counter_read_positive(&pl->events); + global_count = percpu_counter_read(&pg->events); + denominator = period_2 + (global_count & counter_mask); + + if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT)) + goto out_put; + } + + percpu_counter_add(&pl->events, 1); + percpu_counter_add(&pg->events, 1); + +out_put: + prop_put_global(pd, pg); +} + +/* + * Obtain a fraction of this proportion + * + * p_{j} = x_{j} / (period/2 + t % period/2) + */ +void prop_fraction_percpu(struct prop_descriptor *pd, + struct prop_local_percpu *pl, + long *numerator, long *denominator) +{ + struct prop_global *pg = prop_get_global(pd); + unsigned long period_2 = 1UL << (pg->shift - 1); + unsigned long counter_mask = period_2 - 1; + unsigned long global_count; + + prop_norm_percpu(pg, pl); + *numerator = percpu_counter_read_positive(&pl->events); + + global_count = percpu_counter_read(&pg->events); + *denominator = period_2 + (global_count & counter_mask); + + prop_put_global(pd, pg); +} + +/* + * SINGLE + */ + +int prop_local_init_single(struct prop_local_single *pl) +{ + spin_lock_init(&pl->lock); + pl->shift = 0; + pl->period = 0; + pl->events = 0; + return 0; +} + +void prop_local_destroy_single(struct prop_local_single *pl) +{ +} + +/* + * Catch up with missed period expirations. + */ +static +void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl) +{ + unsigned long period = 1UL << (pg->shift - 1); + unsigned long period_mask = ~(period - 1); + unsigned long global_period; + unsigned long flags; + + global_period = percpu_counter_read(&pg->events); + global_period &= period_mask; + + /* + * Fast path - check if the local and global period count still match + * outside of the lock. + */ + if (pl->period == global_period) + return; + + spin_lock_irqsave(&pl->lock, flags); + prop_adjust_shift(&pl->shift, &pl->period, pg->shift); + /* + * For each missed period, we half the local counter. + */ + period = (global_period - pl->period) >> (pg->shift - 1); + if (likely(period < BITS_PER_LONG)) + pl->events >>= period; + else + pl->events = 0; + pl->period = global_period; + spin_unlock_irqrestore(&pl->lock, flags); +} + +/* + * ++x_{j}, ++t + */ +void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl) +{ + struct prop_global *pg = prop_get_global(pd); + + prop_norm_single(pg, pl); + pl->events++; + percpu_counter_add(&pg->events, 1); + prop_put_global(pd, pg); +} + +/* + * Obtain a fraction of this proportion + * + * p_{j} = x_{j} / (period/2 + t % period/2) + */ +void prop_fraction_single(struct prop_descriptor *pd, + struct prop_local_single *pl, + long *numerator, long *denominator) +{ + struct prop_global *pg = prop_get_global(pd); + unsigned long period_2 = 1UL << (pg->shift - 1); + unsigned long counter_mask = period_2 - 1; + unsigned long global_count; + + prop_norm_single(pg, pl); + *numerator = pl->events; + + global_count = percpu_counter_read(&pg->events); + *denominator = period_2 + (global_count & counter_mask); + + prop_put_global(pd, pg); +} diff --git a/lib/radix-tree.c b/lib/radix-tree.c new file mode 100644 index 00000000..296eb810 --- /dev/null +++ b/lib/radix-tree.c @@ -0,0 +1,1419 @@ +/* + * Copyright (C) 2001 Momchil Velikov + * Portions Copyright (C) 2001 Christoph Hellwig + * Copyright (C) 2005 SGI, Christoph Lameter + * Copyright (C) 2006 Nick Piggin + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2, or (at + * your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/radix-tree.h> +#include <linux/percpu.h> +#include <linux/slab.h> +#include <linux/notifier.h> +#include <linux/cpu.h> +#include <linux/string.h> +#include <linux/bitops.h> +#include <linux/rcupdate.h> + + +#ifdef __KERNEL__ +#define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) +#else +#define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */ +#endif + +#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) +#define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) + +#define RADIX_TREE_TAG_LONGS \ + ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG) + +struct radix_tree_node { + unsigned int height; /* Height from the bottom */ + unsigned int count; + struct rcu_head rcu_head; + void *slots[RADIX_TREE_MAP_SIZE]; + unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; +}; + +struct radix_tree_path { + struct radix_tree_node *node; + int offset; +}; + +#define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) +#define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ + RADIX_TREE_MAP_SHIFT)) + +/* + * The height_to_maxindex array needs to be one deeper than the maximum + * path as height 0 holds only 1 entry. + */ +static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly; + +/* + * Radix tree node cache. + */ +static struct kmem_cache *radix_tree_node_cachep; + +/* + * Per-cpu pool of preloaded nodes + */ +struct radix_tree_preload { + int nr; + struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH]; +}; +static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, }; + +static inline void *ptr_to_indirect(void *ptr) +{ + return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR); +} + +static inline void *indirect_to_ptr(void *ptr) +{ + return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); +} + +static inline gfp_t root_gfp_mask(struct radix_tree_root *root) +{ + return root->gfp_mask & __GFP_BITS_MASK; +} + +static inline void tag_set(struct radix_tree_node *node, unsigned int tag, + int offset) +{ + __set_bit(offset, node->tags[tag]); +} + +static inline void tag_clear(struct radix_tree_node *node, unsigned int tag, + int offset) +{ + __clear_bit(offset, node->tags[tag]); +} + +static inline int tag_get(struct radix_tree_node *node, unsigned int tag, + int offset) +{ + return test_bit(offset, node->tags[tag]); +} + +static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag) +{ + root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT)); +} + +static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag) +{ + root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT)); +} + +static inline void root_tag_clear_all(struct radix_tree_root *root) +{ + root->gfp_mask &= __GFP_BITS_MASK; +} + +static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag) +{ + return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT)); +} + +/* + * Returns 1 if any slot in the node has this tag set. + * Otherwise returns 0. + */ +static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag) +{ + int idx; + for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) { + if (node->tags[tag][idx]) + return 1; + } + return 0; +} +/* + * This assumes that the caller has performed appropriate preallocation, and + * that the caller has pinned this thread of control to the current CPU. + */ +static struct radix_tree_node * +radix_tree_node_alloc(struct radix_tree_root *root) +{ + struct radix_tree_node *ret = NULL; + gfp_t gfp_mask = root_gfp_mask(root); + + if (!(gfp_mask & __GFP_WAIT)) { + struct radix_tree_preload *rtp; + + /* + * Provided the caller has preloaded here, we will always + * succeed in getting a node here (and never reach + * kmem_cache_alloc) + */ + rtp = &__get_cpu_var(radix_tree_preloads); + if (rtp->nr) { + ret = rtp->nodes[rtp->nr - 1]; + rtp->nodes[rtp->nr - 1] = NULL; + rtp->nr--; + } + } + if (ret == NULL) + ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); + + BUG_ON(radix_tree_is_indirect_ptr(ret)); + return ret; +} + +static void radix_tree_node_rcu_free(struct rcu_head *head) +{ + struct radix_tree_node *node = + container_of(head, struct radix_tree_node, rcu_head); + int i; + + /* + * must only free zeroed nodes into the slab. radix_tree_shrink + * can leave us with a non-NULL entry in the first slot, so clear + * that here to make sure. + */ + for (i = 0; i < RADIX_TREE_MAX_TAGS; i++) + tag_clear(node, i, 0); + + node->slots[0] = NULL; + node->count = 0; + + kmem_cache_free(radix_tree_node_cachep, node); +} + +static inline void +radix_tree_node_free(struct radix_tree_node *node) +{ + call_rcu(&node->rcu_head, radix_tree_node_rcu_free); +} + +/* + * Load up this CPU's radix_tree_node buffer with sufficient objects to + * ensure that the addition of a single element in the tree cannot fail. On + * success, return zero, with preemption disabled. On error, return -ENOMEM + * with preemption not disabled. + * + * To make use of this facility, the radix tree must be initialised without + * __GFP_WAIT being passed to INIT_RADIX_TREE(). + */ +int radix_tree_preload(gfp_t gfp_mask) +{ + struct radix_tree_preload *rtp; + struct radix_tree_node *node; + int ret = -ENOMEM; + + preempt_disable(); + rtp = &__get_cpu_var(radix_tree_preloads); + while (rtp->nr < ARRAY_SIZE(rtp->nodes)) { + preempt_enable(); + node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); + if (node == NULL) + goto out; + preempt_disable(); + rtp = &__get_cpu_var(radix_tree_preloads); + if (rtp->nr < ARRAY_SIZE(rtp->nodes)) + rtp->nodes[rtp->nr++] = node; + else + kmem_cache_free(radix_tree_node_cachep, node); + } + ret = 0; +out: + return ret; +} +EXPORT_SYMBOL(radix_tree_preload); + +/* + * Return the maximum key which can be store into a + * radix tree with height HEIGHT. + */ +static inline unsigned long radix_tree_maxindex(unsigned int height) +{ + return height_to_maxindex[height]; +} + +/* + * Extend a radix tree so it can store key @index. + */ +static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) +{ + struct radix_tree_node *node; + unsigned int height; + int tag; + + /* Figure out what the height should be. */ + height = root->height + 1; + while (index > radix_tree_maxindex(height)) + height++; + + if (root->rnode == NULL) { + root->height = height; + goto out; + } + + do { + unsigned int newheight; + if (!(node = radix_tree_node_alloc(root))) + return -ENOMEM; + + /* Increase the height. */ + node->slots[0] = indirect_to_ptr(root->rnode); + + /* Propagate the aggregated tag info into the new root */ + for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { + if (root_tag_get(root, tag)) + tag_set(node, tag, 0); + } + + newheight = root->height+1; + node->height = newheight; + node->count = 1; + node = ptr_to_indirect(node); + rcu_assign_pointer(root->rnode, node); + root->height = newheight; + } while (height > root->height); +out: + return 0; +} + +/** + * radix_tree_insert - insert into a radix tree + * @root: radix tree root + * @index: index key + * @item: item to insert + * + * Insert an item into the radix tree at position @index. + */ +int radix_tree_insert(struct radix_tree_root *root, + unsigned long index, void *item) +{ + struct radix_tree_node *node = NULL, *slot; + unsigned int height, shift; + int offset; + int error; + + BUG_ON(radix_tree_is_indirect_ptr(item)); + + /* Make sure the tree is high enough. */ + if (index > radix_tree_maxindex(root->height)) { + error = radix_tree_extend(root, index); + if (error) + return error; + } + + slot = indirect_to_ptr(root->rnode); + + height = root->height; + shift = (height-1) * RADIX_TREE_MAP_SHIFT; + + offset = 0; /* uninitialised var warning */ + while (height > 0) { + if (slot == NULL) { + /* Have to add a child node. */ + if (!(slot = radix_tree_node_alloc(root))) + return -ENOMEM; + slot->height = height; + if (node) { + rcu_assign_pointer(node->slots[offset], slot); + node->count++; + } else + rcu_assign_pointer(root->rnode, ptr_to_indirect(slot)); + } + + /* Go a level down */ + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + node = slot; + slot = node->slots[offset]; + shift -= RADIX_TREE_MAP_SHIFT; + height--; + } + + if (slot != NULL) + return -EEXIST; + + if (node) { + node->count++; + rcu_assign_pointer(node->slots[offset], item); + BUG_ON(tag_get(node, 0, offset)); + BUG_ON(tag_get(node, 1, offset)); + } else { + rcu_assign_pointer(root->rnode, item); + BUG_ON(root_tag_get(root, 0)); + BUG_ON(root_tag_get(root, 1)); + } + + return 0; +} +EXPORT_SYMBOL(radix_tree_insert); + +/* + * is_slot == 1 : search for the slot. + * is_slot == 0 : search for the node. + */ +static void *radix_tree_lookup_element(struct radix_tree_root *root, + unsigned long index, int is_slot) +{ + unsigned int height, shift; + struct radix_tree_node *node, **slot; + + node = rcu_dereference_raw(root->rnode); + if (node == NULL) + return NULL; + + if (!radix_tree_is_indirect_ptr(node)) { + if (index > 0) + return NULL; + return is_slot ? (void *)&root->rnode : node; + } + node = indirect_to_ptr(node); + + height = node->height; + if (index > radix_tree_maxindex(height)) + return NULL; + + shift = (height-1) * RADIX_TREE_MAP_SHIFT; + + do { + slot = (struct radix_tree_node **) + (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); + node = rcu_dereference_raw(*slot); + if (node == NULL) + return NULL; + + shift -= RADIX_TREE_MAP_SHIFT; + height--; + } while (height > 0); + + return is_slot ? (void *)slot : indirect_to_ptr(node); +} + +/** + * radix_tree_lookup_slot - lookup a slot in a radix tree + * @root: radix tree root + * @index: index key + * + * Returns: the slot corresponding to the position @index in the + * radix tree @root. This is useful for update-if-exists operations. + * + * This function can be called under rcu_read_lock iff the slot is not + * modified by radix_tree_replace_slot, otherwise it must be called + * exclusive from other writers. Any dereference of the slot must be done + * using radix_tree_deref_slot. + */ +void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) +{ + return (void **)radix_tree_lookup_element(root, index, 1); +} +EXPORT_SYMBOL(radix_tree_lookup_slot); + +/** + * radix_tree_lookup - perform lookup operation on a radix tree + * @root: radix tree root + * @index: index key + * + * Lookup the item at the position @index in the radix tree @root. + * + * This function can be called under rcu_read_lock, however the caller + * must manage lifetimes of leaf nodes (eg. RCU may also be used to free + * them safely). No RCU barriers are required to access or modify the + * returned item, however. + */ +void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) +{ + return radix_tree_lookup_element(root, index, 0); +} +EXPORT_SYMBOL(radix_tree_lookup); + +/** + * radix_tree_tag_set - set a tag on a radix tree node + * @root: radix tree root + * @index: index key + * @tag: tag index + * + * Set the search tag (which must be < RADIX_TREE_MAX_TAGS) + * corresponding to @index in the radix tree. From + * the root all the way down to the leaf node. + * + * Returns the address of the tagged item. Setting a tag on a not-present + * item is a bug. + */ +void *radix_tree_tag_set(struct radix_tree_root *root, + unsigned long index, unsigned int tag) +{ + unsigned int height, shift; + struct radix_tree_node *slot; + + height = root->height; + BUG_ON(index > radix_tree_maxindex(height)); + + slot = indirect_to_ptr(root->rnode); + shift = (height - 1) * RADIX_TREE_MAP_SHIFT; + + while (height > 0) { + int offset; + + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + if (!tag_get(slot, tag, offset)) + tag_set(slot, tag, offset); + slot = slot->slots[offset]; + BUG_ON(slot == NULL); + shift -= RADIX_TREE_MAP_SHIFT; + height--; + } + + /* set the root's tag bit */ + if (slot && !root_tag_get(root, tag)) + root_tag_set(root, tag); + + return slot; +} +EXPORT_SYMBOL(radix_tree_tag_set); + +/** + * radix_tree_tag_clear - clear a tag on a radix tree node + * @root: radix tree root + * @index: index key + * @tag: tag index + * + * Clear the search tag (which must be < RADIX_TREE_MAX_TAGS) + * corresponding to @index in the radix tree. If + * this causes the leaf node to have no tags set then clear the tag in the + * next-to-leaf node, etc. + * + * Returns the address of the tagged item on success, else NULL. ie: + * has the same return value and semantics as radix_tree_lookup(). + */ +void *radix_tree_tag_clear(struct radix_tree_root *root, + unsigned long index, unsigned int tag) +{ + /* + * The radix tree path needs to be one longer than the maximum path + * since the "list" is null terminated. + */ + struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; + struct radix_tree_node *slot = NULL; + unsigned int height, shift; + + height = root->height; + if (index > radix_tree_maxindex(height)) + goto out; + + shift = (height - 1) * RADIX_TREE_MAP_SHIFT; + pathp->node = NULL; + slot = indirect_to_ptr(root->rnode); + + while (height > 0) { + int offset; + + if (slot == NULL) + goto out; + + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + pathp[1].offset = offset; + pathp[1].node = slot; + slot = slot->slots[offset]; + pathp++; + shift -= RADIX_TREE_MAP_SHIFT; + height--; + } + + if (slot == NULL) + goto out; + + while (pathp->node) { + if (!tag_get(pathp->node, tag, pathp->offset)) + goto out; + tag_clear(pathp->node, tag, pathp->offset); + if (any_tag_set(pathp->node, tag)) + goto out; + pathp--; + } + + /* clear the root's tag bit */ + if (root_tag_get(root, tag)) + root_tag_clear(root, tag); + +out: + return slot; +} +EXPORT_SYMBOL(radix_tree_tag_clear); + +/** + * radix_tree_tag_get - get a tag on a radix tree node + * @root: radix tree root + * @index: index key + * @tag: tag index (< RADIX_TREE_MAX_TAGS) + * + * Return values: + * + * 0: tag not present or not set + * 1: tag set + * + * Note that the return value of this function may not be relied on, even if + * the RCU lock is held, unless tag modification and node deletion are excluded + * from concurrency. + */ +int radix_tree_tag_get(struct radix_tree_root *root, + unsigned long index, unsigned int tag) +{ + unsigned int height, shift; + struct radix_tree_node *node; + int saw_unset_tag = 0; + + /* check the root's tag bit */ + if (!root_tag_get(root, tag)) + return 0; + + node = rcu_dereference_raw(root->rnode); + if (node == NULL) + return 0; + + if (!radix_tree_is_indirect_ptr(node)) + return (index == 0); + node = indirect_to_ptr(node); + + height = node->height; + if (index > radix_tree_maxindex(height)) + return 0; + + shift = (height - 1) * RADIX_TREE_MAP_SHIFT; + + for ( ; ; ) { + int offset; + + if (node == NULL) + return 0; + + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + + /* + * This is just a debug check. Later, we can bale as soon as + * we see an unset tag. + */ + if (!tag_get(node, tag, offset)) + saw_unset_tag = 1; + if (height == 1) + return !!tag_get(node, tag, offset); + node = rcu_dereference_raw(node->slots[offset]); + shift -= RADIX_TREE_MAP_SHIFT; + height--; + } +} +EXPORT_SYMBOL(radix_tree_tag_get); + +/** + * radix_tree_range_tag_if_tagged - for each item in given range set given + * tag if item has another tag set + * @root: radix tree root + * @first_indexp: pointer to a starting index of a range to scan + * @last_index: last index of a range to scan + * @nr_to_tag: maximum number items to tag + * @iftag: tag index to test + * @settag: tag index to set if tested tag is set + * + * This function scans range of radix tree from first_index to last_index + * (inclusive). For each item in the range if iftag is set, the function sets + * also settag. The function stops either after tagging nr_to_tag items or + * after reaching last_index. + * + * The tags must be set from the leaf level only and propagated back up the + * path to the root. We must do this so that we resolve the full path before + * setting any tags on intermediate nodes. If we set tags as we descend, then + * we can get to the leaf node and find that the index that has the iftag + * set is outside the range we are scanning. This reults in dangling tags and + * can lead to problems with later tag operations (e.g. livelocks on lookups). + * + * The function returns number of leaves where the tag was set and sets + * *first_indexp to the first unscanned index. + * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must + * be prepared to handle that. + */ +unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, + unsigned long *first_indexp, unsigned long last_index, + unsigned long nr_to_tag, + unsigned int iftag, unsigned int settag) +{ + unsigned int height = root->height; + struct radix_tree_path path[height]; + struct radix_tree_path *pathp = path; + struct radix_tree_node *slot; + unsigned int shift; + unsigned long tagged = 0; + unsigned long index = *first_indexp; + + last_index = min(last_index, radix_tree_maxindex(height)); + if (index > last_index) + return 0; + if (!nr_to_tag) + return 0; + if (!root_tag_get(root, iftag)) { + *first_indexp = last_index + 1; + return 0; + } + if (height == 0) { + *first_indexp = last_index + 1; + root_tag_set(root, settag); + return 1; + } + + shift = (height - 1) * RADIX_TREE_MAP_SHIFT; + slot = indirect_to_ptr(root->rnode); + + /* + * we fill the path from (root->height - 2) to 0, leaving the index at + * (root->height - 1) as a terminator. Zero the node in the terminator + * so that we can use this to end walk loops back up the path. + */ + path[height - 1].node = NULL; + + for (;;) { + int offset; + + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + if (!slot->slots[offset]) + goto next; + if (!tag_get(slot, iftag, offset)) + goto next; + if (height > 1) { + /* Go down one level */ + height--; + shift -= RADIX_TREE_MAP_SHIFT; + path[height - 1].node = slot; + path[height - 1].offset = offset; + slot = slot->slots[offset]; + continue; + } + + /* tag the leaf */ + tagged++; + tag_set(slot, settag, offset); + + /* walk back up the path tagging interior nodes */ + pathp = &path[0]; + while (pathp->node) { + /* stop if we find a node with the tag already set */ + if (tag_get(pathp->node, settag, pathp->offset)) + break; + tag_set(pathp->node, settag, pathp->offset); + pathp++; + } + +next: + /* Go to next item at level determined by 'shift' */ + index = ((index >> shift) + 1) << shift; + /* Overflow can happen when last_index is ~0UL... */ + if (index > last_index || !index) + break; + if (tagged >= nr_to_tag) + break; + while (((index >> shift) & RADIX_TREE_MAP_MASK) == 0) { + /* + * We've fully scanned this node. Go up. Because + * last_index is guaranteed to be in the tree, what + * we do below cannot wander astray. + */ + slot = path[height - 1].node; + height++; + shift += RADIX_TREE_MAP_SHIFT; + } + } + /* + * The iftag must have been set somewhere because otherwise + * we would return immediated at the beginning of the function + */ + root_tag_set(root, settag); + *first_indexp = index; + + return tagged; +} +EXPORT_SYMBOL(radix_tree_range_tag_if_tagged); + + +/** + * radix_tree_next_hole - find the next hole (not-present entry) + * @root: tree root + * @index: index key + * @max_scan: maximum range to search + * + * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest + * indexed hole. + * + * Returns: the index of the hole if found, otherwise returns an index + * outside of the set specified (in which case 'return - index >= max_scan' + * will be true). In rare cases of index wrap-around, 0 will be returned. + * + * radix_tree_next_hole may be called under rcu_read_lock. However, like + * radix_tree_gang_lookup, this will not atomically search a snapshot of + * the tree at a single point in time. For example, if a hole is created + * at index 5, then subsequently a hole is created at index 10, + * radix_tree_next_hole covering both indexes may return 10 if called + * under rcu_read_lock. + */ +unsigned long radix_tree_next_hole(struct radix_tree_root *root, + unsigned long index, unsigned long max_scan) +{ + unsigned long i; + + for (i = 0; i < max_scan; i++) { + if (!radix_tree_lookup(root, index)) + break; + index++; + if (index == 0) + break; + } + + return index; +} +EXPORT_SYMBOL(radix_tree_next_hole); + +/** + * radix_tree_prev_hole - find the prev hole (not-present entry) + * @root: tree root + * @index: index key + * @max_scan: maximum range to search + * + * Search backwards in the range [max(index-max_scan+1, 0), index] + * for the first hole. + * + * Returns: the index of the hole if found, otherwise returns an index + * outside of the set specified (in which case 'index - return >= max_scan' + * will be true). In rare cases of wrap-around, ULONG_MAX will be returned. + * + * radix_tree_next_hole may be called under rcu_read_lock. However, like + * radix_tree_gang_lookup, this will not atomically search a snapshot of + * the tree at a single point in time. For example, if a hole is created + * at index 10, then subsequently a hole is created at index 5, + * radix_tree_prev_hole covering both indexes may return 5 if called under + * rcu_read_lock. + */ +unsigned long radix_tree_prev_hole(struct radix_tree_root *root, + unsigned long index, unsigned long max_scan) +{ + unsigned long i; + + for (i = 0; i < max_scan; i++) { + if (!radix_tree_lookup(root, index)) + break; + index--; + if (index == ULONG_MAX) + break; + } + + return index; +} +EXPORT_SYMBOL(radix_tree_prev_hole); + +static unsigned int +__lookup(struct radix_tree_node *slot, void ***results, unsigned long index, + unsigned int max_items, unsigned long *next_index) +{ + unsigned int nr_found = 0; + unsigned int shift, height; + unsigned long i; + + height = slot->height; + if (height == 0) + goto out; + shift = (height-1) * RADIX_TREE_MAP_SHIFT; + + for ( ; height > 1; height--) { + i = (index >> shift) & RADIX_TREE_MAP_MASK; + for (;;) { + if (slot->slots[i] != NULL) + break; + index &= ~((1UL << shift) - 1); + index += 1UL << shift; + if (index == 0) + goto out; /* 32-bit wraparound */ + i++; + if (i == RADIX_TREE_MAP_SIZE) + goto out; + } + + shift -= RADIX_TREE_MAP_SHIFT; + slot = rcu_dereference_raw(slot->slots[i]); + if (slot == NULL) + goto out; + } + + /* Bottom level: grab some items */ + for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) { + index++; + if (slot->slots[i]) { + results[nr_found++] = &(slot->slots[i]); + if (nr_found == max_items) + goto out; + } + } +out: + *next_index = index; + return nr_found; +} + +/** + * radix_tree_gang_lookup - perform multiple lookup on a radix tree + * @root: radix tree root + * @results: where the results of the lookup are placed + * @first_index: start the lookup from this key + * @max_items: place up to this many items at *results + * + * Performs an index-ascending scan of the tree for present items. Places + * them at *@results and returns the number of items which were placed at + * *@results. + * + * The implementation is naive. + * + * Like radix_tree_lookup, radix_tree_gang_lookup may be called under + * rcu_read_lock. In this case, rather than the returned results being + * an atomic snapshot of the tree at a single point in time, the semantics + * of an RCU protected gang lookup are as though multiple radix_tree_lookups + * have been issued in individual locks, and results stored in 'results'. + */ +unsigned int +radix_tree_gang_lookup(struct radix_tree_root *root, void **results, + unsigned long first_index, unsigned int max_items) +{ + unsigned long max_index; + struct radix_tree_node *node; + unsigned long cur_index = first_index; + unsigned int ret; + + node = rcu_dereference_raw(root->rnode); + if (!node) + return 0; + + if (!radix_tree_is_indirect_ptr(node)) { + if (first_index > 0) + return 0; + results[0] = node; + return 1; + } + node = indirect_to_ptr(node); + + max_index = radix_tree_maxindex(node->height); + + ret = 0; + while (ret < max_items) { + unsigned int nr_found, slots_found, i; + unsigned long next_index; /* Index of next search */ + + if (cur_index > max_index) + break; + slots_found = __lookup(node, (void ***)results + ret, cur_index, + max_items - ret, &next_index); + nr_found = 0; + for (i = 0; i < slots_found; i++) { + struct radix_tree_node *slot; + slot = *(((void ***)results)[ret + i]); + if (!slot) + continue; + results[ret + nr_found] = + indirect_to_ptr(rcu_dereference_raw(slot)); + nr_found++; + } + ret += nr_found; + if (next_index == 0) + break; + cur_index = next_index; + } + + return ret; +} +EXPORT_SYMBOL(radix_tree_gang_lookup); + +/** + * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree + * @root: radix tree root + * @results: where the results of the lookup are placed + * @first_index: start the lookup from this key + * @max_items: place up to this many items at *results + * + * Performs an index-ascending scan of the tree for present items. Places + * their slots at *@results and returns the number of items which were + * placed at *@results. + * + * The implementation is naive. + * + * Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must + * be dereferenced with radix_tree_deref_slot, and if using only RCU + * protection, radix_tree_deref_slot may fail requiring a retry. + */ +unsigned int +radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results, + unsigned long first_index, unsigned int max_items) +{ + unsigned long max_index; + struct radix_tree_node *node; + unsigned long cur_index = first_index; + unsigned int ret; + + node = rcu_dereference_raw(root->rnode); + if (!node) + return 0; + + if (!radix_tree_is_indirect_ptr(node)) { + if (first_index > 0) + return 0; + results[0] = (void **)&root->rnode; + return 1; + } + node = indirect_to_ptr(node); + + max_index = radix_tree_maxindex(node->height); + + ret = 0; + while (ret < max_items) { + unsigned int slots_found; + unsigned long next_index; /* Index of next search */ + + if (cur_index > max_index) + break; + slots_found = __lookup(node, results + ret, cur_index, + max_items - ret, &next_index); + ret += slots_found; + if (next_index == 0) + break; + cur_index = next_index; + } + + return ret; +} +EXPORT_SYMBOL(radix_tree_gang_lookup_slot); + +/* + * FIXME: the two tag_get()s here should use find_next_bit() instead of + * open-coding the search. + */ +static unsigned int +__lookup_tag(struct radix_tree_node *slot, void ***results, unsigned long index, + unsigned int max_items, unsigned long *next_index, unsigned int tag) +{ + unsigned int nr_found = 0; + unsigned int shift, height; + + height = slot->height; + if (height == 0) + goto out; + shift = (height-1) * RADIX_TREE_MAP_SHIFT; + + while (height > 0) { + unsigned long i = (index >> shift) & RADIX_TREE_MAP_MASK ; + + for (;;) { + if (tag_get(slot, tag, i)) + break; + index &= ~((1UL << shift) - 1); + index += 1UL << shift; + if (index == 0) + goto out; /* 32-bit wraparound */ + i++; + if (i == RADIX_TREE_MAP_SIZE) + goto out; + } + height--; + if (height == 0) { /* Bottom level: grab some items */ + unsigned long j = index & RADIX_TREE_MAP_MASK; + + for ( ; j < RADIX_TREE_MAP_SIZE; j++) { + index++; + if (!tag_get(slot, tag, j)) + continue; + /* + * Even though the tag was found set, we need to + * recheck that we have a non-NULL node, because + * if this lookup is lockless, it may have been + * subsequently deleted. + * + * Similar care must be taken in any place that + * lookup ->slots[x] without a lock (ie. can't + * rely on its value remaining the same). + */ + if (slot->slots[j]) { + results[nr_found++] = &(slot->slots[j]); + if (nr_found == max_items) + goto out; + } + } + } + shift -= RADIX_TREE_MAP_SHIFT; + slot = rcu_dereference_raw(slot->slots[i]); + if (slot == NULL) + break; + } +out: + *next_index = index; + return nr_found; +} + +/** + * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree + * based on a tag + * @root: radix tree root + * @results: where the results of the lookup are placed + * @first_index: start the lookup from this key + * @max_items: place up to this many items at *results + * @tag: the tag index (< RADIX_TREE_MAX_TAGS) + * + * Performs an index-ascending scan of the tree for present items which + * have the tag indexed by @tag set. Places the items at *@results and + * returns the number of items which were placed at *@results. + */ +unsigned int +radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, + unsigned long first_index, unsigned int max_items, + unsigned int tag) +{ + struct radix_tree_node *node; + unsigned long max_index; + unsigned long cur_index = first_index; + unsigned int ret; + + /* check the root's tag bit */ + if (!root_tag_get(root, tag)) + return 0; + + node = rcu_dereference_raw(root->rnode); + if (!node) + return 0; + + if (!radix_tree_is_indirect_ptr(node)) { + if (first_index > 0) + return 0; + results[0] = node; + return 1; + } + node = indirect_to_ptr(node); + + max_index = radix_tree_maxindex(node->height); + + ret = 0; + while (ret < max_items) { + unsigned int nr_found, slots_found, i; + unsigned long next_index; /* Index of next search */ + + if (cur_index > max_index) + break; + slots_found = __lookup_tag(node, (void ***)results + ret, + cur_index, max_items - ret, &next_index, tag); + nr_found = 0; + for (i = 0; i < slots_found; i++) { + struct radix_tree_node *slot; + slot = *(((void ***)results)[ret + i]); + if (!slot) + continue; + results[ret + nr_found] = + indirect_to_ptr(rcu_dereference_raw(slot)); + nr_found++; + } + ret += nr_found; + if (next_index == 0) + break; + cur_index = next_index; + } + + return ret; +} +EXPORT_SYMBOL(radix_tree_gang_lookup_tag); + +/** + * radix_tree_gang_lookup_tag_slot - perform multiple slot lookup on a + * radix tree based on a tag + * @root: radix tree root + * @results: where the results of the lookup are placed + * @first_index: start the lookup from this key + * @max_items: place up to this many items at *results + * @tag: the tag index (< RADIX_TREE_MAX_TAGS) + * + * Performs an index-ascending scan of the tree for present items which + * have the tag indexed by @tag set. Places the slots at *@results and + * returns the number of slots which were placed at *@results. + */ +unsigned int +radix_tree_gang_lookup_tag_slot(struct radix_tree_root *root, void ***results, + unsigned long first_index, unsigned int max_items, + unsigned int tag) +{ + struct radix_tree_node *node; + unsigned long max_index; + unsigned long cur_index = first_index; + unsigned int ret; + + /* check the root's tag bit */ + if (!root_tag_get(root, tag)) + return 0; + + node = rcu_dereference_raw(root->rnode); + if (!node) + return 0; + + if (!radix_tree_is_indirect_ptr(node)) { + if (first_index > 0) + return 0; + results[0] = (void **)&root->rnode; + return 1; + } + node = indirect_to_ptr(node); + + max_index = radix_tree_maxindex(node->height); + + ret = 0; + while (ret < max_items) { + unsigned int slots_found; + unsigned long next_index; /* Index of next search */ + + if (cur_index > max_index) + break; + slots_found = __lookup_tag(node, results + ret, + cur_index, max_items - ret, &next_index, tag); + ret += slots_found; + if (next_index == 0) + break; + cur_index = next_index; + } + + return ret; +} +EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); + + +/** + * radix_tree_shrink - shrink height of a radix tree to minimal + * @root radix tree root + */ +static inline void radix_tree_shrink(struct radix_tree_root *root) +{ + /* try to shrink tree height */ + while (root->height > 0) { + struct radix_tree_node *to_free = root->rnode; + void *newptr; + + BUG_ON(!radix_tree_is_indirect_ptr(to_free)); + to_free = indirect_to_ptr(to_free); + + /* + * The candidate node has more than one child, or its child + * is not at the leftmost slot, we cannot shrink. + */ + if (to_free->count != 1) + break; + if (!to_free->slots[0]) + break; + + /* + * We don't need rcu_assign_pointer(), since we are simply + * moving the node from one part of the tree to another: if it + * was safe to dereference the old pointer to it + * (to_free->slots[0]), it will be safe to dereference the new + * one (root->rnode) as far as dependent read barriers go. + */ + newptr = to_free->slots[0]; + if (root->height > 1) + newptr = ptr_to_indirect(newptr); + root->rnode = newptr; + root->height--; + + /* + * We have a dilemma here. The node's slot[0] must not be + * NULLed in case there are concurrent lookups expecting to + * find the item. However if this was a bottom-level node, + * then it may be subject to the slot pointer being visible + * to callers dereferencing it. If item corresponding to + * slot[0] is subsequently deleted, these callers would expect + * their slot to become empty sooner or later. + * + * For example, lockless pagecache will look up a slot, deref + * the page pointer, and if the page is 0 refcount it means it + * was concurrently deleted from pagecache so try the deref + * again. Fortunately there is already a requirement for logic + * to retry the entire slot lookup -- the indirect pointer + * problem (replacing direct root node with an indirect pointer + * also results in a stale slot). So tag the slot as indirect + * to force callers to retry. + */ + if (root->height == 0) + *((unsigned long *)&to_free->slots[0]) |= + RADIX_TREE_INDIRECT_PTR; + + radix_tree_node_free(to_free); + } +} + +/** + * radix_tree_delete - delete an item from a radix tree + * @root: radix tree root + * @index: index key + * + * Remove the item at @index from the radix tree rooted at @root. + * + * Returns the address of the deleted item, or NULL if it was not present. + */ +void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) +{ + /* + * The radix tree path needs to be one longer than the maximum path + * since the "list" is null terminated. + */ + struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; + struct radix_tree_node *slot = NULL; + struct radix_tree_node *to_free; + unsigned int height, shift; + int tag; + int offset; + + height = root->height; + if (index > radix_tree_maxindex(height)) + goto out; + + slot = root->rnode; + if (height == 0) { + root_tag_clear_all(root); + root->rnode = NULL; + goto out; + } + slot = indirect_to_ptr(slot); + + shift = (height - 1) * RADIX_TREE_MAP_SHIFT; + pathp->node = NULL; + + do { + if (slot == NULL) + goto out; + + pathp++; + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + pathp->offset = offset; + pathp->node = slot; + slot = slot->slots[offset]; + shift -= RADIX_TREE_MAP_SHIFT; + height--; + } while (height > 0); + + if (slot == NULL) + goto out; + + /* + * Clear all tags associated with the just-deleted item + */ + for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { + if (tag_get(pathp->node, tag, pathp->offset)) + radix_tree_tag_clear(root, index, tag); + } + + to_free = NULL; + /* Now free the nodes we do not need anymore */ + while (pathp->node) { + pathp->node->slots[pathp->offset] = NULL; + pathp->node->count--; + /* + * Queue the node for deferred freeing after the + * last reference to it disappears (set NULL, above). + */ + if (to_free) + radix_tree_node_free(to_free); + + if (pathp->node->count) { + if (pathp->node == indirect_to_ptr(root->rnode)) + radix_tree_shrink(root); + goto out; + } + + /* Node with zero slots in use so free it */ + to_free = pathp->node; + pathp--; + + } + root_tag_clear_all(root); + root->height = 0; + root->rnode = NULL; + if (to_free) + radix_tree_node_free(to_free); + +out: + return slot; +} +EXPORT_SYMBOL(radix_tree_delete); + +/** + * radix_tree_tagged - test whether any items in the tree are tagged + * @root: radix tree root + * @tag: tag to test + */ +int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag) +{ + return root_tag_get(root, tag); +} +EXPORT_SYMBOL(radix_tree_tagged); + +static void +radix_tree_node_ctor(void *node) +{ + memset(node, 0, sizeof(struct radix_tree_node)); +} + +static __init unsigned long __maxindex(unsigned int height) +{ + unsigned int width = height * RADIX_TREE_MAP_SHIFT; + int shift = RADIX_TREE_INDEX_BITS - width; + + if (shift < 0) + return ~0UL; + if (shift >= BITS_PER_LONG) + return 0UL; + return ~0UL >> shift; +} + +static __init void radix_tree_init_maxindex(void) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) + height_to_maxindex[i] = __maxindex(i); +} + +static int radix_tree_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + int cpu = (long)hcpu; + struct radix_tree_preload *rtp; + + /* Free per-cpu pool of perloaded nodes */ + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { + rtp = &per_cpu(radix_tree_preloads, cpu); + while (rtp->nr) { + kmem_cache_free(radix_tree_node_cachep, + rtp->nodes[rtp->nr-1]); + rtp->nodes[rtp->nr-1] = NULL; + rtp->nr--; + } + } + return NOTIFY_OK; +} + +void __init radix_tree_init(void) +{ + radix_tree_node_cachep = kmem_cache_create("radix_tree_node", + sizeof(struct radix_tree_node), 0, + SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, + radix_tree_node_ctor); + radix_tree_init_maxindex(); + hotcpu_notifier(radix_tree_callback, 0); +} diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile new file mode 100644 index 00000000..8a381027 --- /dev/null +++ b/lib/raid6/Makefile @@ -0,0 +1,75 @@ +obj-$(CONFIG_RAID6_PQ) += raid6_pq.o + +raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ + int8.o int16.o int32.o altivec1.o altivec2.o altivec4.o \ + altivec8.o mmx.o sse1.o sse2.o +hostprogs-y += mktables + +quiet_cmd_unroll = UNROLL $@ + cmd_unroll = $(AWK) -f$(srctree)/$(src)/unroll.awk -vN=$(UNROLL) \ + < $< > $@ || ( rm -f $@ && exit 1 ) + +ifeq ($(CONFIG_ALTIVEC),y) +altivec_flags := -maltivec -mabi=altivec +endif + +targets += int1.c +$(obj)/int1.c: UNROLL := 1 +$(obj)/int1.c: $(src)/int.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +targets += int2.c +$(obj)/int2.c: UNROLL := 2 +$(obj)/int2.c: $(src)/int.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +targets += int4.c +$(obj)/int4.c: UNROLL := 4 +$(obj)/int4.c: $(src)/int.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +targets += int8.c +$(obj)/int8.c: UNROLL := 8 +$(obj)/int8.c: $(src)/int.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +targets += int16.c +$(obj)/int16.c: UNROLL := 16 +$(obj)/int16.c: $(src)/int.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +targets += int32.c +$(obj)/int32.c: UNROLL := 32 +$(obj)/int32.c: $(src)/int.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_altivec1.o += $(altivec_flags) +targets += altivec1.c +$(obj)/altivec1.c: UNROLL := 1 +$(obj)/altivec1.c: $(src)/altivec.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_altivec2.o += $(altivec_flags) +targets += altivec2.c +$(obj)/altivec2.c: UNROLL := 2 +$(obj)/altivec2.c: $(src)/altivec.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_altivec4.o += $(altivec_flags) +targets += altivec4.c +$(obj)/altivec4.c: UNROLL := 4 +$(obj)/altivec4.c: $(src)/altivec.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_altivec8.o += $(altivec_flags) +targets += altivec8.c +$(obj)/altivec8.c: UNROLL := 8 +$(obj)/altivec8.c: $(src)/altivec.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +quiet_cmd_mktable = TABLE $@ + cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) + +targets += tables.c +$(obj)/tables.c: $(obj)/mktables FORCE + $(call if_changed,mktable) diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c new file mode 100644 index 00000000..b595f560 --- /dev/null +++ b/lib/raid6/algos.c @@ -0,0 +1,154 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6/algos.c + * + * Algorithm list and algorithm selection for RAID-6 + */ + +#include <linux/raid/pq.h> +#ifndef __KERNEL__ +#include <sys/mman.h> +#include <stdio.h> +#else +#include <linux/gfp.h> +#if !RAID6_USE_EMPTY_ZERO_PAGE +/* In .bss so it's zeroed */ +const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); +EXPORT_SYMBOL(raid6_empty_zero_page); +#endif +#endif + +struct raid6_calls raid6_call; +EXPORT_SYMBOL_GPL(raid6_call); + +const struct raid6_calls * const raid6_algos[] = { + &raid6_intx1, + &raid6_intx2, + &raid6_intx4, + &raid6_intx8, +#if defined(__ia64__) + &raid6_intx16, + &raid6_intx32, +#endif +#if defined(__i386__) && !defined(__arch_um__) + &raid6_mmxx1, + &raid6_mmxx2, + &raid6_sse1x1, + &raid6_sse1x2, + &raid6_sse2x1, + &raid6_sse2x2, +#endif +#if defined(__x86_64__) && !defined(__arch_um__) + &raid6_sse2x1, + &raid6_sse2x2, + &raid6_sse2x4, +#endif +#ifdef CONFIG_ALTIVEC + &raid6_altivec1, + &raid6_altivec2, + &raid6_altivec4, + &raid6_altivec8, +#endif + NULL +}; + +#ifdef __KERNEL__ +#define RAID6_TIME_JIFFIES_LG2 4 +#else +/* Need more time to be stable in userspace */ +#define RAID6_TIME_JIFFIES_LG2 9 +#define time_before(x, y) ((x) < (y)) +#endif + +/* Try to pick the best algorithm */ +/* This code uses the gfmul table as convenient data set to abuse */ + +int __init raid6_select_algo(void) +{ + const struct raid6_calls * const * algo; + const struct raid6_calls * best; + char *syndromes; + void *dptrs[(65536/PAGE_SIZE)+2]; + int i, disks; + unsigned long perf, bestperf; + int bestprefer; + unsigned long j0, j1; + + disks = (65536/PAGE_SIZE)+2; + for ( i = 0 ; i < disks-2 ; i++ ) { + dptrs[i] = ((char *)raid6_gfmul) + PAGE_SIZE*i; + } + + /* Normal code - use a 2-page allocation to avoid D$ conflict */ + syndromes = (void *) __get_free_pages(GFP_KERNEL, 1); + + if ( !syndromes ) { + printk("raid6: Yikes! No memory available.\n"); + return -ENOMEM; + } + + dptrs[disks-2] = syndromes; + dptrs[disks-1] = syndromes + PAGE_SIZE; + + bestperf = 0; bestprefer = 0; best = NULL; + + for ( algo = raid6_algos ; *algo ; algo++ ) { + if ( !(*algo)->valid || (*algo)->valid() ) { + perf = 0; + + preempt_disable(); + j0 = jiffies; + while ( (j1 = jiffies) == j0 ) + cpu_relax(); + while (time_before(jiffies, + j1 + (1<<RAID6_TIME_JIFFIES_LG2))) { + (*algo)->gen_syndrome(disks, PAGE_SIZE, dptrs); + perf++; + } + preempt_enable(); + + if ( (*algo)->prefer > bestprefer || + ((*algo)->prefer == bestprefer && + perf > bestperf) ) { + best = *algo; + bestprefer = best->prefer; + bestperf = perf; + } + printk("raid6: %-8s %5ld MB/s\n", (*algo)->name, + (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); + } + } + + if (best) { + printk("raid6: using algorithm %s (%ld MB/s)\n", + best->name, + (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); + raid6_call = *best; + } else + printk("raid6: Yikes! No algorithm found!\n"); + + free_pages((unsigned long)syndromes, 1); + + return best ? 0 : -EINVAL; +} + +static void raid6_exit(void) +{ + do { } while (0); +} + +subsys_initcall(raid6_select_algo); +module_exit(raid6_exit); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("RAID6 Q-syndrome calculations"); diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc new file mode 100644 index 00000000..2654d5c8 --- /dev/null +++ b/lib/raid6/altivec.uc @@ -0,0 +1,130 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6altivec$#.c + * + * $#-way unrolled portable integer math RAID-6 instruction set + * + * This file is postprocessed using unroll.awk + * + * <benh> hpa: in process, + * you can just "steal" the vec unit with enable_kernel_altivec() (but + * bracked this with preempt_disable/enable or in a lock) + */ + +#include <linux/raid/pq.h> + +#ifdef CONFIG_ALTIVEC + +#include <altivec.h> +#ifdef __KERNEL__ +# include <asm/system.h> +# include <asm/cputable.h> +#endif + +/* + * This is the C data type to use. We use a vector of + * signed char so vec_cmpgt() will generate the right + * instruction. + */ + +typedef vector signed char unative_t; + +#define NBYTES(x) ((vector signed char) {x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x}) +#define NSIZE sizeof(unative_t) + +/* + * The SHLBYTE() operation shifts each byte left by 1, *not* + * rolling over into the next byte + */ +static inline __attribute_const__ unative_t SHLBYTE(unative_t v) +{ + return vec_add(v,v); +} + +/* + * The MASK() operation returns 0xFF in any byte for which the high + * bit is 1, 0x00 for any byte for which the high bit is 0. + */ +static inline __attribute_const__ unative_t MASK(unative_t v) +{ + unative_t zv = NBYTES(0); + + /* vec_cmpgt returns a vector bool char; thus the need for the cast */ + return (unative_t)vec_cmpgt(zv, v); +} + + +/* This is noinline to make damned sure that gcc doesn't move any of the + Altivec code around the enable/disable code */ +static void noinline +raid6_altivec$#_gen_syndrome_real(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + unative_t wd$$, wq$$, wp$$, w1$$, w2$$; + unative_t x1d = NBYTES(0x1d); + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { + wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; + for ( z = z0-1 ; z >= 0 ; z-- ) { + wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; + wp$$ = vec_xor(wp$$, wd$$); + w2$$ = MASK(wq$$); + w1$$ = SHLBYTE(wq$$); + w2$$ = vec_and(w2$$, x1d); + w1$$ = vec_xor(w1$$, w2$$); + wq$$ = vec_xor(w1$$, wd$$); + } + *(unative_t *)&p[d+NSIZE*$$] = wp$$; + *(unative_t *)&q[d+NSIZE*$$] = wq$$; + } +} + +static void raid6_altivec$#_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + preempt_disable(); + enable_kernel_altivec(); + + raid6_altivec$#_gen_syndrome_real(disks, bytes, ptrs); + + preempt_enable(); +} + +int raid6_have_altivec(void); +#if $# == 1 +int raid6_have_altivec(void) +{ + /* This assumes either all CPUs have Altivec or none does */ +# ifdef __KERNEL__ + return cpu_has_feature(CPU_FTR_ALTIVEC); +# else + return 1; +# endif +} +#endif + +const struct raid6_calls raid6_altivec$# = { + raid6_altivec$#_gen_syndrome, + raid6_have_altivec, + "altivecx$#", + 0 +}; + +#endif /* CONFIG_ALTIVEC */ diff --git a/lib/raid6/int.uc b/lib/raid6/int.uc new file mode 100644 index 00000000..d1e276a1 --- /dev/null +++ b/lib/raid6/int.uc @@ -0,0 +1,117 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6int$#.c + * + * $#-way unrolled portable integer math RAID-6 instruction set + * + * This file is postprocessed using unroll.awk + */ + +#include <linux/raid/pq.h> + +/* + * This is the C data type to use + */ + +/* Change this from BITS_PER_LONG if there is something better... */ +#if BITS_PER_LONG == 64 +# define NBYTES(x) ((x) * 0x0101010101010101UL) +# define NSIZE 8 +# define NSHIFT 3 +# define NSTRING "64" +typedef u64 unative_t; +#else +# define NBYTES(x) ((x) * 0x01010101U) +# define NSIZE 4 +# define NSHIFT 2 +# define NSTRING "32" +typedef u32 unative_t; +#endif + + + +/* + * IA-64 wants insane amounts of unrolling. On other architectures that + * is just a waste of space. + */ +#if ($# <= 8) || defined(__ia64__) + + +/* + * These sub-operations are separate inlines since they can sometimes be + * specially optimized using architecture-specific hacks. + */ + +/* + * The SHLBYTE() operation shifts each byte left by 1, *not* + * rolling over into the next byte + */ +static inline __attribute_const__ unative_t SHLBYTE(unative_t v) +{ + unative_t vv; + + vv = (v << 1) & NBYTES(0xfe); + return vv; +} + +/* + * The MASK() operation returns 0xFF in any byte for which the high + * bit is 1, 0x00 for any byte for which the high bit is 0. + */ +static inline __attribute_const__ unative_t MASK(unative_t v) +{ + unative_t vv; + + vv = v & NBYTES(0x80); + vv = (vv << 1) - (vv >> 7); /* Overflow on the top bit is OK */ + return vv; +} + + +static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + unative_t wd$$, wq$$, wp$$, w1$$, w2$$; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { + wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; + for ( z = z0-1 ; z >= 0 ; z-- ) { + wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; + wp$$ ^= wd$$; + w2$$ = MASK(wq$$); + w1$$ = SHLBYTE(wq$$); + w2$$ &= NBYTES(0x1d); + w1$$ ^= w2$$; + wq$$ = w1$$ ^ wd$$; + } + *(unative_t *)&p[d+NSIZE*$$] = wp$$; + *(unative_t *)&q[d+NSIZE*$$] = wq$$; + } +} + +const struct raid6_calls raid6_intx$# = { + raid6_int$#_gen_syndrome, + NULL, /* always valid */ + "int" NSTRING "x$#", + 0 +}; + +#endif diff --git a/lib/raid6/mktables.c b/lib/raid6/mktables.c new file mode 100644 index 00000000..3b150084 --- /dev/null +++ b/lib/raid6/mktables.c @@ -0,0 +1,132 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2 or (at your + * option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * mktables.c + * + * Make RAID-6 tables. This is a host user space program to be run at + * compile time. + */ + +#include <stdio.h> +#include <string.h> +#include <inttypes.h> +#include <stdlib.h> +#include <time.h> + +static uint8_t gfmul(uint8_t a, uint8_t b) +{ + uint8_t v = 0; + + while (b) { + if (b & 1) + v ^= a; + a = (a << 1) ^ (a & 0x80 ? 0x1d : 0); + b >>= 1; + } + + return v; +} + +static uint8_t gfpow(uint8_t a, int b) +{ + uint8_t v = 1; + + b %= 255; + if (b < 0) + b += 255; + + while (b) { + if (b & 1) + v = gfmul(v, a); + a = gfmul(a, a); + b >>= 1; + } + + return v; +} + +int main(int argc, char *argv[]) +{ + int i, j, k; + uint8_t v; + uint8_t exptbl[256], invtbl[256]; + + printf("#include <linux/raid/pq.h>\n"); + + /* Compute multiplication table */ + printf("\nconst u8 __attribute__((aligned(256)))\n" + "raid6_gfmul[256][256] =\n" + "{\n"); + for (i = 0; i < 256; i++) { + printf("\t{\n"); + for (j = 0; j < 256; j += 8) { + printf("\t\t"); + for (k = 0; k < 8; k++) + printf("0x%02x,%c", gfmul(i, j + k), + (k == 7) ? '\n' : ' '); + } + printf("\t},\n"); + } + printf("};\n"); + printf("#ifdef __KERNEL__\n"); + printf("EXPORT_SYMBOL(raid6_gfmul);\n"); + printf("#endif\n"); + + /* Compute power-of-2 table (exponent) */ + v = 1; + printf("\nconst u8 __attribute__((aligned(256)))\n" + "raid6_gfexp[256] =\n" "{\n"); + for (i = 0; i < 256; i += 8) { + printf("\t"); + for (j = 0; j < 8; j++) { + exptbl[i + j] = v; + printf("0x%02x,%c", v, (j == 7) ? '\n' : ' '); + v = gfmul(v, 2); + if (v == 1) + v = 0; /* For entry 255, not a real entry */ + } + } + printf("};\n"); + printf("#ifdef __KERNEL__\n"); + printf("EXPORT_SYMBOL(raid6_gfexp);\n"); + printf("#endif\n"); + + /* Compute inverse table x^-1 == x^254 */ + printf("\nconst u8 __attribute__((aligned(256)))\n" + "raid6_gfinv[256] =\n" "{\n"); + for (i = 0; i < 256; i += 8) { + printf("\t"); + for (j = 0; j < 8; j++) { + invtbl[i + j] = v = gfpow(i + j, 254); + printf("0x%02x,%c", v, (j == 7) ? '\n' : ' '); + } + } + printf("};\n"); + printf("#ifdef __KERNEL__\n"); + printf("EXPORT_SYMBOL(raid6_gfinv);\n"); + printf("#endif\n"); + + /* Compute inv(2^x + 1) (exponent-xor-inverse) table */ + printf("\nconst u8 __attribute__((aligned(256)))\n" + "raid6_gfexi[256] =\n" "{\n"); + for (i = 0; i < 256; i += 8) { + printf("\t"); + for (j = 0; j < 8; j++) + printf("0x%02x,%c", invtbl[exptbl[i + j] ^ 1], + (j == 7) ? '\n' : ' '); + } + printf("};\n"); + printf("#ifdef __KERNEL__\n"); + printf("EXPORT_SYMBOL(raid6_gfexi);\n"); + printf("#endif\n"); + + return 0; +} diff --git a/lib/raid6/mmx.c b/lib/raid6/mmx.c new file mode 100644 index 00000000..279347f2 --- /dev/null +++ b/lib/raid6/mmx.c @@ -0,0 +1,142 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6/mmx.c + * + * MMX implementation of RAID-6 syndrome functions + */ + +#if defined(__i386__) && !defined(__arch_um__) + +#include <linux/raid/pq.h> +#include "x86.h" + +/* Shared with raid6/sse1.c */ +const struct raid6_mmx_constants { + u64 x1d; +} raid6_mmx_constants = { + 0x1d1d1d1d1d1d1d1dULL, +}; + +static int raid6_have_mmx(void) +{ + /* Not really "boot_cpu" but "all_cpus" */ + return boot_cpu_has(X86_FEATURE_MMX); +} + +/* + * Plain MMX implementation + */ +static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); + asm volatile("pxor %mm5,%mm5"); /* Zero temp */ + + for ( d = 0 ; d < bytes ; d += 8 ) { + asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ + asm volatile("movq %mm2,%mm4"); /* Q[0] */ + for ( z = z0-1 ; z >= 0 ; z-- ) { + asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d])); + asm volatile("pcmpgtb %mm4,%mm5"); + asm volatile("paddb %mm4,%mm4"); + asm volatile("pand %mm0,%mm5"); + asm volatile("pxor %mm5,%mm4"); + asm volatile("pxor %mm5,%mm5"); + asm volatile("pxor %mm6,%mm2"); + asm volatile("pxor %mm6,%mm4"); + } + asm volatile("movq %%mm2,%0" : "=m" (p[d])); + asm volatile("pxor %mm2,%mm2"); + asm volatile("movq %%mm4,%0" : "=m" (q[d])); + asm volatile("pxor %mm4,%mm4"); + } + + kernel_fpu_end(); +} + +const struct raid6_calls raid6_mmxx1 = { + raid6_mmx1_gen_syndrome, + raid6_have_mmx, + "mmxx1", + 0 +}; + +/* + * Unrolled-by-2 MMX implementation + */ +static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); + asm volatile("pxor %mm5,%mm5"); /* Zero temp */ + asm volatile("pxor %mm7,%mm7"); /* Zero temp */ + + for ( d = 0 ; d < bytes ; d += 16 ) { + asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ + asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); + asm volatile("movq %mm2,%mm4"); /* Q[0] */ + asm volatile("movq %mm3,%mm6"); /* Q[1] */ + for ( z = z0-1 ; z >= 0 ; z-- ) { + asm volatile("pcmpgtb %mm4,%mm5"); + asm volatile("pcmpgtb %mm6,%mm7"); + asm volatile("paddb %mm4,%mm4"); + asm volatile("paddb %mm6,%mm6"); + asm volatile("pand %mm0,%mm5"); + asm volatile("pand %mm0,%mm7"); + asm volatile("pxor %mm5,%mm4"); + asm volatile("pxor %mm7,%mm6"); + asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d])); + asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8])); + asm volatile("pxor %mm5,%mm2"); + asm volatile("pxor %mm7,%mm3"); + asm volatile("pxor %mm5,%mm4"); + asm volatile("pxor %mm7,%mm6"); + asm volatile("pxor %mm5,%mm5"); + asm volatile("pxor %mm7,%mm7"); + } + asm volatile("movq %%mm2,%0" : "=m" (p[d])); + asm volatile("movq %%mm3,%0" : "=m" (p[d+8])); + asm volatile("movq %%mm4,%0" : "=m" (q[d])); + asm volatile("movq %%mm6,%0" : "=m" (q[d+8])); + } + + kernel_fpu_end(); +} + +const struct raid6_calls raid6_mmxx2 = { + raid6_mmx2_gen_syndrome, + raid6_have_mmx, + "mmxx2", + 0 +}; + +#endif diff --git a/lib/raid6/recov.c b/lib/raid6/recov.c new file mode 100644 index 00000000..8590d19c --- /dev/null +++ b/lib/raid6/recov.c @@ -0,0 +1,132 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6/recov.c + * + * RAID-6 data recovery in dual failure mode. In single failure mode, + * use the RAID-5 algorithm (or, in the case of Q failure, just reconstruct + * the syndrome.) + */ + +#include <linux/raid/pq.h> + +/* Recover two failed data blocks. */ +void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, + void **ptrs) +{ + u8 *p, *q, *dp, *dq; + u8 px, qx, db; + const u8 *pbmul; /* P multiplier table for B data */ + const u8 *qmul; /* Q multiplier table (for both) */ + + p = (u8 *)ptrs[disks-2]; + q = (u8 *)ptrs[disks-1]; + + /* Compute syndrome with zero for the missing data pages + Use the dead data pages as temporary storage for + delta p and delta q */ + dp = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks-2] = dp; + dq = (u8 *)ptrs[failb]; + ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[disks-1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dp; + ptrs[failb] = dq; + ptrs[disks-2] = p; + ptrs[disks-1] = q; + + /* Now, pick the proper data tables */ + pbmul = raid6_gfmul[raid6_gfexi[failb-faila]]; + qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]]; + + /* Now do it... */ + while ( bytes-- ) { + px = *p ^ *dp; + qx = qmul[*q ^ *dq]; + *dq++ = db = pbmul[px] ^ qx; /* Reconstructed B */ + *dp++ = db ^ px; /* Reconstructed A */ + p++; q++; + } +} +EXPORT_SYMBOL_GPL(raid6_2data_recov); + +/* Recover failure of one data block plus the P block */ +void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs) +{ + u8 *p, *q, *dq; + const u8 *qmul; /* Q multiplier table */ + + p = (u8 *)ptrs[disks-2]; + q = (u8 *)ptrs[disks-1]; + + /* Compute syndrome with zero for the missing data page + Use the dead data page as temporary storage for delta q */ + dq = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks-1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dq; + ptrs[disks-1] = q; + + /* Now, pick the proper data tables */ + qmul = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]]; + + /* Now do it... */ + while ( bytes-- ) { + *p++ ^= *dq = qmul[*q ^ *dq]; + q++; dq++; + } +} +EXPORT_SYMBOL_GPL(raid6_datap_recov); + +#ifndef __KERNEL__ +/* Testing only */ + +/* Recover two failed blocks. */ +void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs) +{ + if ( faila > failb ) { + int tmp = faila; + faila = failb; + failb = tmp; + } + + if ( failb == disks-1 ) { + if ( faila == disks-2 ) { + /* P+Q failure. Just rebuild the syndrome. */ + raid6_call.gen_syndrome(disks, bytes, ptrs); + } else { + /* data+Q failure. Reconstruct data from P, + then rebuild syndrome. */ + /* NOT IMPLEMENTED - equivalent to RAID-5 */ + } + } else { + if ( failb == disks-2 ) { + /* data+P failure. */ + raid6_datap_recov(disks, bytes, faila, ptrs); + } else { + /* data+data failure. */ + raid6_2data_recov(disks, bytes, faila, failb, ptrs); + } + } +} + +#endif diff --git a/lib/raid6/sse1.c b/lib/raid6/sse1.c new file mode 100644 index 00000000..10dd9194 --- /dev/null +++ b/lib/raid6/sse1.c @@ -0,0 +1,162 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6/sse1.c + * + * SSE-1/MMXEXT implementation of RAID-6 syndrome functions + * + * This is really an MMX implementation, but it requires SSE-1 or + * AMD MMXEXT for prefetch support and a few other features. The + * support for nontemporal memory accesses is enough to make this + * worthwhile as a separate implementation. + */ + +#if defined(__i386__) && !defined(__arch_um__) + +#include <linux/raid/pq.h> +#include "x86.h" + +/* Defined in raid6/mmx.c */ +extern const struct raid6_mmx_constants { + u64 x1d; +} raid6_mmx_constants; + +static int raid6_have_sse1_or_mmxext(void) +{ + /* Not really boot_cpu but "all_cpus" */ + return boot_cpu_has(X86_FEATURE_MMX) && + (boot_cpu_has(X86_FEATURE_XMM) || + boot_cpu_has(X86_FEATURE_MMXEXT)); +} + +/* + * Plain SSE1 implementation + */ +static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); + asm volatile("pxor %mm5,%mm5"); /* Zero temp */ + + for ( d = 0 ; d < bytes ; d += 8 ) { + asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); + asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ + asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d])); + asm volatile("movq %mm2,%mm4"); /* Q[0] */ + asm volatile("movq %0,%%mm6" : : "m" (dptr[z0-1][d])); + for ( z = z0-2 ; z >= 0 ; z-- ) { + asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); + asm volatile("pcmpgtb %mm4,%mm5"); + asm volatile("paddb %mm4,%mm4"); + asm volatile("pand %mm0,%mm5"); + asm volatile("pxor %mm5,%mm4"); + asm volatile("pxor %mm5,%mm5"); + asm volatile("pxor %mm6,%mm2"); + asm volatile("pxor %mm6,%mm4"); + asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d])); + } + asm volatile("pcmpgtb %mm4,%mm5"); + asm volatile("paddb %mm4,%mm4"); + asm volatile("pand %mm0,%mm5"); + asm volatile("pxor %mm5,%mm4"); + asm volatile("pxor %mm5,%mm5"); + asm volatile("pxor %mm6,%mm2"); + asm volatile("pxor %mm6,%mm4"); + + asm volatile("movntq %%mm2,%0" : "=m" (p[d])); + asm volatile("movntq %%mm4,%0" : "=m" (q[d])); + } + + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); +} + +const struct raid6_calls raid6_sse1x1 = { + raid6_sse11_gen_syndrome, + raid6_have_sse1_or_mmxext, + "sse1x1", + 1 /* Has cache hints */ +}; + +/* + * Unrolled-by-2 SSE1 implementation + */ +static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d)); + asm volatile("pxor %mm5,%mm5"); /* Zero temp */ + asm volatile("pxor %mm7,%mm7"); /* Zero temp */ + + /* We uniformly assume a single prefetch covers at least 16 bytes */ + for ( d = 0 ; d < bytes ; d += 16 ) { + asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); + asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */ + asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); /* P[1] */ + asm volatile("movq %mm2,%mm4"); /* Q[0] */ + asm volatile("movq %mm3,%mm6"); /* Q[1] */ + for ( z = z0-1 ; z >= 0 ; z-- ) { + asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); + asm volatile("pcmpgtb %mm4,%mm5"); + asm volatile("pcmpgtb %mm6,%mm7"); + asm volatile("paddb %mm4,%mm4"); + asm volatile("paddb %mm6,%mm6"); + asm volatile("pand %mm0,%mm5"); + asm volatile("pand %mm0,%mm7"); + asm volatile("pxor %mm5,%mm4"); + asm volatile("pxor %mm7,%mm6"); + asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d])); + asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8])); + asm volatile("pxor %mm5,%mm2"); + asm volatile("pxor %mm7,%mm3"); + asm volatile("pxor %mm5,%mm4"); + asm volatile("pxor %mm7,%mm6"); + asm volatile("pxor %mm5,%mm5"); + asm volatile("pxor %mm7,%mm7"); + } + asm volatile("movntq %%mm2,%0" : "=m" (p[d])); + asm volatile("movntq %%mm3,%0" : "=m" (p[d+8])); + asm volatile("movntq %%mm4,%0" : "=m" (q[d])); + asm volatile("movntq %%mm6,%0" : "=m" (q[d+8])); + } + + asm volatile("sfence" : :: "memory"); + kernel_fpu_end(); +} + +const struct raid6_calls raid6_sse1x2 = { + raid6_sse12_gen_syndrome, + raid6_have_sse1_or_mmxext, + "sse1x2", + 1 /* Has cache hints */ +}; + +#endif diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c new file mode 100644 index 00000000..bc2d57da --- /dev/null +++ b/lib/raid6/sse2.c @@ -0,0 +1,262 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6/sse2.c + * + * SSE-2 implementation of RAID-6 syndrome functions + * + */ + +#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) + +#include <linux/raid/pq.h> +#include "x86.h" + +static const struct raid6_sse_constants { + u64 x1d[2]; +} raid6_sse_constants __attribute__((aligned(16))) = { + { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL }, +}; + +static int raid6_have_sse2(void) +{ + /* Not really boot_cpu but "all_cpus" */ + return boot_cpu_has(X86_FEATURE_MMX) && + boot_cpu_has(X86_FEATURE_FXSR) && + boot_cpu_has(X86_FEATURE_XMM) && + boot_cpu_has(X86_FEATURE_XMM2); +} + +/* + * Plain SSE2 implementation + */ +static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); + asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */ + + for ( d = 0 ; d < bytes ; d += 16 ) { + asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); + asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */ + asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d])); + asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */ + asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z0-1][d])); + for ( z = z0-2 ; z >= 0 ; z-- ) { + asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pxor %xmm6,%xmm2"); + asm volatile("pxor %xmm6,%xmm4"); + asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z][d])); + } + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pxor %xmm6,%xmm2"); + asm volatile("pxor %xmm6,%xmm4"); + + asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); + asm volatile("pxor %xmm2,%xmm2"); + asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); + asm volatile("pxor %xmm4,%xmm4"); + } + + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); +} + +const struct raid6_calls raid6_sse2x1 = { + raid6_sse21_gen_syndrome, + raid6_have_sse2, + "sse2x1", + 1 /* Has cache hints */ +}; + +/* + * Unrolled-by-2 SSE2 implementation + */ +static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); + asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */ + asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */ + + /* We uniformly assume a single prefetch covers at least 32 bytes */ + for ( d = 0 ; d < bytes ; d += 32 ) { + asm volatile("prefetchnta %0" : : "m" (dptr[z0][d])); + asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */ + asm volatile("movdqa %0,%%xmm3" : : "m" (dptr[z0][d+16])); /* P[1] */ + asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */ + asm volatile("movdqa %xmm3,%xmm6"); /* Q[1] */ + for ( z = z0-1 ; z >= 0 ; z-- ) { + asm volatile("prefetchnta %0" : : "m" (dptr[z][d])); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("pcmpgtb %xmm6,%xmm7"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("paddb %xmm6,%xmm6"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pand %xmm0,%xmm7"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + asm volatile("movdqa %0,%%xmm5" : : "m" (dptr[z][d])); + asm volatile("movdqa %0,%%xmm7" : : "m" (dptr[z][d+16])); + asm volatile("pxor %xmm5,%xmm2"); + asm volatile("pxor %xmm7,%xmm3"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pxor %xmm7,%xmm7"); + } + asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); + asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16])); + asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); + asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16])); + } + + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); +} + +const struct raid6_calls raid6_sse2x2 = { + raid6_sse22_gen_syndrome, + raid6_have_sse2, + "sse2x2", + 1 /* Has cache hints */ +}; + +#endif + +#if defined(__x86_64__) && !defined(__arch_um__) + +/* + * Unrolled-by-4 SSE2 implementation + */ +static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0])); + asm volatile("pxor %xmm2,%xmm2"); /* P[0] */ + asm volatile("pxor %xmm3,%xmm3"); /* P[1] */ + asm volatile("pxor %xmm4,%xmm4"); /* Q[0] */ + asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */ + asm volatile("pxor %xmm6,%xmm6"); /* Q[1] */ + asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */ + asm volatile("pxor %xmm10,%xmm10"); /* P[2] */ + asm volatile("pxor %xmm11,%xmm11"); /* P[3] */ + asm volatile("pxor %xmm12,%xmm12"); /* Q[2] */ + asm volatile("pxor %xmm13,%xmm13"); /* Zero temp */ + asm volatile("pxor %xmm14,%xmm14"); /* Q[3] */ + asm volatile("pxor %xmm15,%xmm15"); /* Zero temp */ + + for ( d = 0 ; d < bytes ; d += 64 ) { + for ( z = z0 ; z >= 0 ; z-- ) { + /* The second prefetch seems to improve performance... */ + asm volatile("prefetchnta %0" :: "m" (dptr[z][d])); + asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32])); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("pcmpgtb %xmm6,%xmm7"); + asm volatile("pcmpgtb %xmm12,%xmm13"); + asm volatile("pcmpgtb %xmm14,%xmm15"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("paddb %xmm6,%xmm6"); + asm volatile("paddb %xmm12,%xmm12"); + asm volatile("paddb %xmm14,%xmm14"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pand %xmm0,%xmm7"); + asm volatile("pand %xmm0,%xmm13"); + asm volatile("pand %xmm0,%xmm15"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + asm volatile("pxor %xmm13,%xmm12"); + asm volatile("pxor %xmm15,%xmm14"); + asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d])); + asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16])); + asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32])); + asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48])); + asm volatile("pxor %xmm5,%xmm2"); + asm volatile("pxor %xmm7,%xmm3"); + asm volatile("pxor %xmm13,%xmm10"); + asm volatile("pxor %xmm15,%xmm11"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + asm volatile("pxor %xmm13,%xmm12"); + asm volatile("pxor %xmm15,%xmm14"); + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pxor %xmm7,%xmm7"); + asm volatile("pxor %xmm13,%xmm13"); + asm volatile("pxor %xmm15,%xmm15"); + } + asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); + asm volatile("pxor %xmm2,%xmm2"); + asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16])); + asm volatile("pxor %xmm3,%xmm3"); + asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32])); + asm volatile("pxor %xmm10,%xmm10"); + asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48])); + asm volatile("pxor %xmm11,%xmm11"); + asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); + asm volatile("pxor %xmm4,%xmm4"); + asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16])); + asm volatile("pxor %xmm6,%xmm6"); + asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32])); + asm volatile("pxor %xmm12,%xmm12"); + asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48])); + asm volatile("pxor %xmm14,%xmm14"); + } + + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); +} + +const struct raid6_calls raid6_sse2x4 = { + raid6_sse24_gen_syndrome, + raid6_have_sse2, + "sse2x4", + 1 /* Has cache hints */ +}; + +#endif diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile new file mode 100644 index 00000000..aa651697 --- /dev/null +++ b/lib/raid6/test/Makefile @@ -0,0 +1,72 @@ +# +# This is a simple Makefile to test some of the RAID-6 code +# from userspace. +# + +CC = gcc +OPTFLAGS = -O2 # Adjust as desired +CFLAGS = -I.. -I ../../../include -g $(OPTFLAGS) +LD = ld +AWK = awk -f +AR = ar +RANLIB = ranlib + +.c.o: + $(CC) $(CFLAGS) -c -o $@ $< + +%.c: ../%.c + cp -f $< $@ + +%.uc: ../%.uc + cp -f $< $@ + +all: raid6.a raid6test + +raid6.a: int1.o int2.o int4.o int8.o int16.o int32.o mmx.o sse1.o sse2.o \ + altivec1.o altivec2.o altivec4.o altivec8.o recov.o algos.o \ + tables.o + rm -f $@ + $(AR) cq $@ $^ + $(RANLIB) $@ + +raid6test: test.c raid6.a + $(CC) $(CFLAGS) -o raid6test $^ + +altivec1.c: altivec.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=1 < altivec.uc > $@ + +altivec2.c: altivec.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=2 < altivec.uc > $@ + +altivec4.c: altivec.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=4 < altivec.uc > $@ + +altivec8.c: altivec.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=8 < altivec.uc > $@ + +int1.c: int.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=1 < int.uc > $@ + +int2.c: int.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=2 < int.uc > $@ + +int4.c: int.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=4 < int.uc > $@ + +int8.c: int.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=8 < int.uc > $@ + +int16.c: int.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=16 < int.uc > $@ + +int32.c: int.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=32 < int.uc > $@ + +tables.c: mktables + ./mktables > tables.c + +clean: + rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c tables.c raid6test + +spotless: clean + rm -f *~ diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c new file mode 100644 index 00000000..7a930318 --- /dev/null +++ b/lib/raid6/test/test.c @@ -0,0 +1,124 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002-2007 H. Peter Anvin - All Rights Reserved + * + * This file is part of the Linux kernel, and is made available under + * the terms of the GNU General Public License version 2 or (at your + * option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6test.c + * + * Test RAID-6 recovery with various algorithms + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <linux/raid/pq.h> + +#define NDISKS 16 /* Including P and Q */ + +const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); +struct raid6_calls raid6_call; + +char *dataptrs[NDISKS]; +char data[NDISKS][PAGE_SIZE]; +char recovi[PAGE_SIZE], recovj[PAGE_SIZE]; + +static void makedata(void) +{ + int i, j; + + for (i = 0; i < NDISKS; i++) { + for (j = 0; j < PAGE_SIZE; j++) + data[i][j] = rand(); + + dataptrs[i] = data[i]; + } +} + +static char disk_type(int d) +{ + switch (d) { + case NDISKS-2: + return 'P'; + case NDISKS-1: + return 'Q'; + default: + return 'D'; + } +} + +static int test_disks(int i, int j) +{ + int erra, errb; + + memset(recovi, 0xf0, PAGE_SIZE); + memset(recovj, 0xba, PAGE_SIZE); + + dataptrs[i] = recovi; + dataptrs[j] = recovj; + + raid6_dual_recov(NDISKS, PAGE_SIZE, i, j, (void **)&dataptrs); + + erra = memcmp(data[i], recovi, PAGE_SIZE); + errb = memcmp(data[j], recovj, PAGE_SIZE); + + if (i < NDISKS-2 && j == NDISKS-1) { + /* We don't implement the DQ failure scenario, since it's + equivalent to a RAID-5 failure (XOR, then recompute Q) */ + erra = errb = 0; + } else { + printf("algo=%-8s faila=%3d(%c) failb=%3d(%c) %s\n", + raid6_call.name, + i, disk_type(i), + j, disk_type(j), + (!erra && !errb) ? "OK" : + !erra ? "ERRB" : + !errb ? "ERRA" : "ERRAB"); + } + + dataptrs[i] = data[i]; + dataptrs[j] = data[j]; + + return erra || errb; +} + +int main(int argc, char *argv[]) +{ + const struct raid6_calls *const *algo; + int i, j; + int err = 0; + + makedata(); + + for (algo = raid6_algos; *algo; algo++) { + if (!(*algo)->valid || (*algo)->valid()) { + raid6_call = **algo; + + /* Nuke syndromes */ + memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); + + /* Generate assumed good syndrome */ + raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, + (void **)&dataptrs); + + for (i = 0; i < NDISKS-1; i++) + for (j = i+1; j < NDISKS; j++) + err += test_disks(i, j); + } + printf("\n"); + } + + printf("\n"); + /* Pick the best algorithm test */ + raid6_select_algo(); + + if (err) + printf("\n*** ERRORS FOUND ***\n"); + + return err; +} diff --git a/lib/raid6/unroll.awk b/lib/raid6/unroll.awk new file mode 100644 index 00000000..c6aa0363 --- /dev/null +++ b/lib/raid6/unroll.awk @@ -0,0 +1,20 @@ + +# This filter requires one command line option of form -vN=n +# where n must be a decimal number. +# +# Repeat each input line containing $$ n times, replacing $$ with 0...n-1. +# Replace each $# with n, and each $* with a single $. + +BEGIN { + n = N + 0 +} +{ + if (/\$\$/) { rep = n } else { rep = 1 } + for (i = 0; i < rep; ++i) { + tmp = $0 + gsub(/\$\$/, i, tmp) + gsub(/\$\#/, n, tmp) + gsub(/\$\*/, "$", tmp) + print tmp + } +} diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h new file mode 100644 index 00000000..cb2a8c91 --- /dev/null +++ b/lib/raid6/x86.h @@ -0,0 +1,61 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * raid6/x86.h + * + * Definitions common to x86 and x86-64 RAID-6 code only + */ + +#ifndef LINUX_RAID_RAID6X86_H +#define LINUX_RAID_RAID6X86_H + +#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) + +#ifdef __KERNEL__ /* Real code */ + +#include <asm/i387.h> + +#else /* Dummy code for user space testing */ + +static inline void kernel_fpu_begin(void) +{ +} + +static inline void kernel_fpu_end(void) +{ +} + +#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ +#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions + * (fast save and restore) */ +#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */ +#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */ +#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ + +/* Should work well enough on modern CPUs for testing */ +static inline int boot_cpu_has(int flag) +{ + u32 eax = (flag >> 5) ? 0x80000001 : 1; + u32 edx; + + asm volatile("cpuid" + : "+a" (eax), "=d" (edx) + : : "ecx", "ebx"); + + return (edx >> (flag & 31)) & 1; +} + +#endif /* ndef __KERNEL__ */ + +#endif +#endif diff --git a/lib/random32.c b/lib/random32.c new file mode 100644 index 00000000..fc3545a3 --- /dev/null +++ b/lib/random32.c @@ -0,0 +1,150 @@ +/* + This is a maximally equidistributed combined Tausworthe generator + based on code from GNU Scientific Library 1.5 (30 Jun 2004) + + x_n = (s1_n ^ s2_n ^ s3_n) + + s1_{n+1} = (((s1_n & 4294967294) <<12) ^ (((s1_n <<13) ^ s1_n) >>19)) + s2_{n+1} = (((s2_n & 4294967288) << 4) ^ (((s2_n << 2) ^ s2_n) >>25)) + s3_{n+1} = (((s3_n & 4294967280) <<17) ^ (((s3_n << 3) ^ s3_n) >>11)) + + The period of this generator is about 2^88. + + From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe + Generators", Mathematics of Computation, 65, 213 (1996), 203--213. + + This is available on the net from L'Ecuyer's home page, + + http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps + ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps + + There is an erratum in the paper "Tables of Maximally + Equidistributed Combined LFSR Generators", Mathematics of + Computation, 68, 225 (1999), 261--269: + http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps + + ... the k_j most significant bits of z_j must be non- + zero, for each j. (Note: this restriction also applies to the + computer code given in [4], but was mistakenly not mentioned in + that paper.) + + This affects the seeding procedure by imposing the requirement + s1 > 1, s2 > 7, s3 > 15. + +*/ + +#include <linux/types.h> +#include <linux/percpu.h> +#include <linux/module.h> +#include <linux/jiffies.h> +#include <linux/random.h> + +static DEFINE_PER_CPU(struct rnd_state, net_rand_state); + +/** + * prandom32 - seeded pseudo-random number generator. + * @state: pointer to state structure holding seeded state. + * + * This is used for pseudo-randomness with no outside seeding. + * For more random results, use random32(). + */ +u32 prandom32(struct rnd_state *state) +{ +#define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b) + + state->s1 = TAUSWORTHE(state->s1, 13, 19, 4294967294UL, 12); + state->s2 = TAUSWORTHE(state->s2, 2, 25, 4294967288UL, 4); + state->s3 = TAUSWORTHE(state->s3, 3, 11, 4294967280UL, 17); + + return (state->s1 ^ state->s2 ^ state->s3); +} +EXPORT_SYMBOL(prandom32); + +/** + * random32 - pseudo random number generator + * + * A 32 bit pseudo-random number is generated using a fast + * algorithm suitable for simulation. This algorithm is NOT + * considered safe for cryptographic use. + */ +u32 random32(void) +{ + unsigned long r; + struct rnd_state *state = &get_cpu_var(net_rand_state); + r = prandom32(state); + put_cpu_var(state); + return r; +} +EXPORT_SYMBOL(random32); + +/** + * srandom32 - add entropy to pseudo random number generator + * @seed: seed value + * + * Add some additional seeding to the random32() pool. + */ +void srandom32(u32 entropy) +{ + int i; + /* + * No locking on the CPUs, but then somewhat random results are, well, + * expected. + */ + for_each_possible_cpu (i) { + struct rnd_state *state = &per_cpu(net_rand_state, i); + state->s1 = __seed(state->s1 ^ entropy, 1); + } +} +EXPORT_SYMBOL(srandom32); + +/* + * Generate some initially weak seeding values to allow + * to start the random32() engine. + */ +static int __init random32_init(void) +{ + int i; + + for_each_possible_cpu(i) { + struct rnd_state *state = &per_cpu(net_rand_state,i); + +#define LCG(x) ((x) * 69069) /* super-duper LCG */ + state->s1 = __seed(LCG(i + jiffies), 1); + state->s2 = __seed(LCG(state->s1), 7); + state->s3 = __seed(LCG(state->s2), 15); + + /* "warm it up" */ + prandom32(state); + prandom32(state); + prandom32(state); + prandom32(state); + prandom32(state); + prandom32(state); + } + return 0; +} +core_initcall(random32_init); + +/* + * Generate better values after random number generator + * is fully initialized. + */ +static int __init random32_reseed(void) +{ + int i; + + for_each_possible_cpu(i) { + struct rnd_state *state = &per_cpu(net_rand_state,i); + u32 seeds[3]; + + get_random_bytes(&seeds, sizeof(seeds)); + state->s1 = __seed(seeds[0], 1); + state->s2 = __seed(seeds[1], 7); + state->s3 = __seed(seeds[2], 15); + + /* mix it in */ + prandom32(state); + } + return 0; +} +late_initcall(random32_reseed); diff --git a/lib/ratelimit.c b/lib/ratelimit.c new file mode 100644 index 00000000..027a03f4 --- /dev/null +++ b/lib/ratelimit.c @@ -0,0 +1,67 @@ +/* + * ratelimit.c - Do something with rate limit. + * + * Isolated from kernel/printk.c by Dave Young <hidave.darkstar@gmail.com> + * + * 2008-05-01 rewrite the function and use a ratelimit_state data struct as + * parameter. Now every user can use their own standalone ratelimit_state. + * + * This file is released under the GPLv2. + */ + +#include <linux/ratelimit.h> +#include <linux/jiffies.h> +#include <linux/module.h> + +/* + * __ratelimit - rate limiting + * @rs: ratelimit_state data + * @func: name of calling function + * + * This enforces a rate limit: not more than @rs->burst callbacks + * in every @rs->interval + * + * RETURNS: + * 0 means callbacks will be suppressed. + * 1 means go ahead and do it. + */ +int ___ratelimit(struct ratelimit_state *rs, const char *func) +{ + unsigned long flags; + int ret; + + if (!rs->interval) + return 1; + + /* + * If we contend on this state's lock then almost + * by definition we are too busy to print a message, + * in addition to the one that will be printed by + * the entity that is holding the lock already: + */ + if (!spin_trylock_irqsave(&rs->lock, flags)) + return 0; + + if (!rs->begin) + rs->begin = jiffies; + + if (time_is_before_jiffies(rs->begin + rs->interval)) { + if (rs->missed) + printk(KERN_WARNING "%s: %d callbacks suppressed\n", + func, rs->missed); + rs->begin = 0; + rs->printed = 0; + rs->missed = 0; + } + if (rs->burst && rs->burst > rs->printed) { + rs->printed++; + ret = 1; + } else { + rs->missed++; + ret = 0; + } + spin_unlock_irqrestore(&rs->lock, flags); + + return ret; +} +EXPORT_SYMBOL(___ratelimit); diff --git a/lib/rational.c b/lib/rational.c new file mode 100644 index 00000000..3ed247b8 --- /dev/null +++ b/lib/rational.c @@ -0,0 +1,63 @@ +/* + * rational fractions + * + * Copyright (C) 2009 emlix GmbH, Oskar Schirmer <os@emlix.com> + * + * helper functions when coping with rational numbers + */ + +#include <linux/rational.h> +#include <linux/module.h> + +/* + * calculate best rational approximation for a given fraction + * taking into account restricted register size, e.g. to find + * appropriate values for a pll with 5 bit denominator and + * 8 bit numerator register fields, trying to set up with a + * frequency ratio of 3.1415, one would say: + * + * rational_best_approximation(31415, 10000, + * (1 << 8) - 1, (1 << 5) - 1, &n, &d); + * + * you may look at given_numerator as a fixed point number, + * with the fractional part size described in given_denominator. + * + * for theoretical background, see: + * http://en.wikipedia.org/wiki/Continued_fraction + */ + +void rational_best_approximation( + unsigned long given_numerator, unsigned long given_denominator, + unsigned long max_numerator, unsigned long max_denominator, + unsigned long *best_numerator, unsigned long *best_denominator) +{ + unsigned long n, d, n0, d0, n1, d1; + n = given_numerator; + d = given_denominator; + n0 = d1 = 0; + n1 = d0 = 1; + for (;;) { + unsigned long t, a; + if ((n1 > max_numerator) || (d1 > max_denominator)) { + n1 = n0; + d1 = d0; + break; + } + if (d == 0) + break; + t = d; + a = n / d; + d = n % d; + n = t; + t = n0 + a * n1; + n0 = n1; + n1 = t; + t = d0 + a * d1; + d0 = d1; + d1 = t; + } + *best_numerator = n1; + *best_denominator = d1; +} + +EXPORT_SYMBOL(rational_best_approximation); diff --git a/lib/rbtree.c b/lib/rbtree.c new file mode 100644 index 00000000..4693f791 --- /dev/null +++ b/lib/rbtree.c @@ -0,0 +1,459 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli <andrea@suse.de> + (C) 2002 David Woodhouse <dwmw2@infradead.org> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/lib/rbtree.c +*/ + +#include <linux/rbtree.h> +#include <linux/module.h> + +static void __rb_rotate_left(struct rb_node *node, struct rb_root *root) +{ + struct rb_node *right = node->rb_right; + struct rb_node *parent = rb_parent(node); + + if ((node->rb_right = right->rb_left)) + rb_set_parent(right->rb_left, node); + right->rb_left = node; + + rb_set_parent(right, parent); + + if (parent) + { + if (node == parent->rb_left) + parent->rb_left = right; + else + parent->rb_right = right; + } + else + root->rb_node = right; + rb_set_parent(node, right); +} + +static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) +{ + struct rb_node *left = node->rb_left; + struct rb_node *parent = rb_parent(node); + + if ((node->rb_left = left->rb_right)) + rb_set_parent(left->rb_right, node); + left->rb_right = node; + + rb_set_parent(left, parent); + + if (parent) + { + if (node == parent->rb_right) + parent->rb_right = left; + else + parent->rb_left = left; + } + else + root->rb_node = left; + rb_set_parent(node, left); +} + +void rb_insert_color(struct rb_node *node, struct rb_root *root) +{ + struct rb_node *parent, *gparent; + + while ((parent = rb_parent(node)) && rb_is_red(parent)) + { + gparent = rb_parent(parent); + + if (parent == gparent->rb_left) + { + { + register struct rb_node *uncle = gparent->rb_right; + if (uncle && rb_is_red(uncle)) + { + rb_set_black(uncle); + rb_set_black(parent); + rb_set_red(gparent); + node = gparent; + continue; + } + } + + if (parent->rb_right == node) + { + register struct rb_node *tmp; + __rb_rotate_left(parent, root); + tmp = parent; + parent = node; + node = tmp; + } + + rb_set_black(parent); + rb_set_red(gparent); + __rb_rotate_right(gparent, root); + } else { + { + register struct rb_node *uncle = gparent->rb_left; + if (uncle && rb_is_red(uncle)) + { + rb_set_black(uncle); + rb_set_black(parent); + rb_set_red(gparent); + node = gparent; + continue; + } + } + + if (parent->rb_left == node) + { + register struct rb_node *tmp; + __rb_rotate_right(parent, root); + tmp = parent; + parent = node; + node = tmp; + } + + rb_set_black(parent); + rb_set_red(gparent); + __rb_rotate_left(gparent, root); + } + } + + rb_set_black(root->rb_node); +} +EXPORT_SYMBOL(rb_insert_color); + +static void __rb_erase_color(struct rb_node *node, struct rb_node *parent, + struct rb_root *root) +{ + struct rb_node *other; + + while ((!node || rb_is_black(node)) && node != root->rb_node) + { + if (parent->rb_left == node) + { + other = parent->rb_right; + if (rb_is_red(other)) + { + rb_set_black(other); + rb_set_red(parent); + __rb_rotate_left(parent, root); + other = parent->rb_right; + } + if ((!other->rb_left || rb_is_black(other->rb_left)) && + (!other->rb_right || rb_is_black(other->rb_right))) + { + rb_set_red(other); + node = parent; + parent = rb_parent(node); + } + else + { + if (!other->rb_right || rb_is_black(other->rb_right)) + { + rb_set_black(other->rb_left); + rb_set_red(other); + __rb_rotate_right(other, root); + other = parent->rb_right; + } + rb_set_color(other, rb_color(parent)); + rb_set_black(parent); + rb_set_black(other->rb_right); + __rb_rotate_left(parent, root); + node = root->rb_node; + break; + } + } + else + { + other = parent->rb_left; + if (rb_is_red(other)) + { + rb_set_black(other); + rb_set_red(parent); + __rb_rotate_right(parent, root); + other = parent->rb_left; + } + if ((!other->rb_left || rb_is_black(other->rb_left)) && + (!other->rb_right || rb_is_black(other->rb_right))) + { + rb_set_red(other); + node = parent; + parent = rb_parent(node); + } + else + { + if (!other->rb_left || rb_is_black(other->rb_left)) + { + rb_set_black(other->rb_right); + rb_set_red(other); + __rb_rotate_left(other, root); + other = parent->rb_left; + } + rb_set_color(other, rb_color(parent)); + rb_set_black(parent); + rb_set_black(other->rb_left); + __rb_rotate_right(parent, root); + node = root->rb_node; + break; + } + } + } + if (node) + rb_set_black(node); +} + +void rb_erase(struct rb_node *node, struct rb_root *root) +{ + struct rb_node *child, *parent; + int color; + + if (!node->rb_left) + child = node->rb_right; + else if (!node->rb_right) + child = node->rb_left; + else + { + struct rb_node *old = node, *left; + + node = node->rb_right; + while ((left = node->rb_left) != NULL) + node = left; + + if (rb_parent(old)) { + if (rb_parent(old)->rb_left == old) + rb_parent(old)->rb_left = node; + else + rb_parent(old)->rb_right = node; + } else + root->rb_node = node; + + child = node->rb_right; + parent = rb_parent(node); + color = rb_color(node); + + if (parent == old) { + parent = node; + } else { + if (child) + rb_set_parent(child, parent); + parent->rb_left = child; + + node->rb_right = old->rb_right; + rb_set_parent(old->rb_right, node); + } + + node->rb_parent_color = old->rb_parent_color; + node->rb_left = old->rb_left; + rb_set_parent(old->rb_left, node); + + goto color; + } + + parent = rb_parent(node); + color = rb_color(node); + + if (child) + rb_set_parent(child, parent); + if (parent) + { + if (parent->rb_left == node) + parent->rb_left = child; + else + parent->rb_right = child; + } + else + root->rb_node = child; + + color: + if (color == RB_BLACK) + __rb_erase_color(child, parent, root); +} +EXPORT_SYMBOL(rb_erase); + +static void rb_augment_path(struct rb_node *node, rb_augment_f func, void *data) +{ + struct rb_node *parent; + +up: + func(node, data); + parent = rb_parent(node); + if (!parent) + return; + + if (node == parent->rb_left && parent->rb_right) + func(parent->rb_right, data); + else if (parent->rb_left) + func(parent->rb_left, data); + + node = parent; + goto up; +} + +/* + * after inserting @node into the tree, update the tree to account for + * both the new entry and any damage done by rebalance + */ +void rb_augment_insert(struct rb_node *node, rb_augment_f func, void *data) +{ + if (node->rb_left) + node = node->rb_left; + else if (node->rb_right) + node = node->rb_right; + + rb_augment_path(node, func, data); +} + +/* + * before removing the node, find the deepest node on the rebalance path + * that will still be there after @node gets removed + */ +struct rb_node *rb_augment_erase_begin(struct rb_node *node) +{ + struct rb_node *deepest; + + if (!node->rb_right && !node->rb_left) + deepest = rb_parent(node); + else if (!node->rb_right) + deepest = node->rb_left; + else if (!node->rb_left) + deepest = node->rb_right; + else { + deepest = rb_next(node); + if (deepest->rb_right) + deepest = deepest->rb_right; + else if (rb_parent(deepest) != node) + deepest = rb_parent(deepest); + } + + return deepest; +} + +/* + * after removal, update the tree to account for the removed entry + * and any rebalance damage. + */ +void rb_augment_erase_end(struct rb_node *node, rb_augment_f func, void *data) +{ + if (node) + rb_augment_path(node, func, data); +} + +/* + * This function returns the first node (in sort order) of the tree. + */ +struct rb_node *rb_first(const struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_left) + n = n->rb_left; + return n; +} +EXPORT_SYMBOL(rb_first); + +struct rb_node *rb_last(const struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_right) + n = n->rb_right; + return n; +} +EXPORT_SYMBOL(rb_last); + +struct rb_node *rb_next(const struct rb_node *node) +{ + struct rb_node *parent; + + if (rb_parent(node) == node) + return NULL; + + /* If we have a right-hand child, go down and then left as far + as we can. */ + if (node->rb_right) { + node = node->rb_right; + while (node->rb_left) + node=node->rb_left; + return (struct rb_node *)node; + } + + /* No right-hand children. Everything down and left is + smaller than us, so any 'next' node must be in the general + direction of our parent. Go up the tree; any time the + ancestor is a right-hand child of its parent, keep going + up. First time it's a left-hand child of its parent, said + parent is our 'next' node. */ + while ((parent = rb_parent(node)) && node == parent->rb_right) + node = parent; + + return parent; +} +EXPORT_SYMBOL(rb_next); + +struct rb_node *rb_prev(const struct rb_node *node) +{ + struct rb_node *parent; + + if (rb_parent(node) == node) + return NULL; + + /* If we have a left-hand child, go down and then right as far + as we can. */ + if (node->rb_left) { + node = node->rb_left; + while (node->rb_right) + node=node->rb_right; + return (struct rb_node *)node; + } + + /* No left-hand children. Go up till we find an ancestor which + is a right-hand child of its parent */ + while ((parent = rb_parent(node)) && node == parent->rb_left) + node = parent; + + return parent; +} +EXPORT_SYMBOL(rb_prev); + +void rb_replace_node(struct rb_node *victim, struct rb_node *new, + struct rb_root *root) +{ + struct rb_node *parent = rb_parent(victim); + + /* Set the surrounding nodes to point to the replacement */ + if (parent) { + if (victim == parent->rb_left) + parent->rb_left = new; + else + parent->rb_right = new; + } else { + root->rb_node = new; + } + if (victim->rb_left) + rb_set_parent(victim->rb_left, new); + if (victim->rb_right) + rb_set_parent(victim->rb_right, new); + + /* Copy the pointers/colour from the victim to the replacement */ + *new = *victim; +} +EXPORT_SYMBOL(rb_replace_node); diff --git a/lib/reciprocal_div.c b/lib/reciprocal_div.c new file mode 100644 index 00000000..6a3bd48f --- /dev/null +++ b/lib/reciprocal_div.c @@ -0,0 +1,9 @@ +#include <asm/div64.h> +#include <linux/reciprocal_div.h> + +u32 reciprocal_value(u32 k) +{ + u64 val = (1LL << 32) + (k - 1); + do_div(val, k); + return (u32)val; +} diff --git a/lib/reed_solomon/Makefile b/lib/reed_solomon/Makefile new file mode 100644 index 00000000..c3d71368 --- /dev/null +++ b/lib/reed_solomon/Makefile @@ -0,0 +1,6 @@ +# +# This is a modified version of reed solomon lib, +# + +obj-$(CONFIG_REED_SOLOMON) += reed_solomon.o + diff --git a/lib/reed_solomon/decode_rs.c b/lib/reed_solomon/decode_rs.c new file mode 100644 index 00000000..0ec3f257 --- /dev/null +++ b/lib/reed_solomon/decode_rs.c @@ -0,0 +1,271 @@ +/* + * lib/reed_solomon/decode_rs.c + * + * Overview: + * Generic Reed Solomon encoder / decoder library + * + * Copyright 2002, Phil Karn, KA9Q + * May be used under the terms of the GNU General Public License (GPL) + * + * Adaption to the kernel by Thomas Gleixner (tglx@linutronix.de) + * + * $Id: decode_rs.c,v 1.7 2005/11/07 11:14:59 gleixner Exp $ + * + */ + +/* Generic data width independent code which is included by the + * wrappers. + */ +{ + int deg_lambda, el, deg_omega; + int i, j, r, k, pad; + int nn = rs->nn; + int nroots = rs->nroots; + int fcr = rs->fcr; + int prim = rs->prim; + int iprim = rs->iprim; + uint16_t *alpha_to = rs->alpha_to; + uint16_t *index_of = rs->index_of; + uint16_t u, q, tmp, num1, num2, den, discr_r, syn_error; + /* Err+Eras Locator poly and syndrome poly The maximum value + * of nroots is 8. So the necessary stack size will be about + * 220 bytes max. + */ + uint16_t lambda[nroots + 1], syn[nroots]; + uint16_t b[nroots + 1], t[nroots + 1], omega[nroots + 1]; + uint16_t root[nroots], reg[nroots + 1], loc[nroots]; + int count = 0; + uint16_t msk = (uint16_t) rs->nn; + + /* Check length parameter for validity */ + pad = nn - nroots - len; + BUG_ON(pad < 0 || pad >= nn); + + /* Does the caller provide the syndrome ? */ + if (s != NULL) + goto decode; + + /* form the syndromes; i.e., evaluate data(x) at roots of + * g(x) */ + for (i = 0; i < nroots; i++) + syn[i] = (((uint16_t) data[0]) ^ invmsk) & msk; + + for (j = 1; j < len; j++) { + for (i = 0; i < nroots; i++) { + if (syn[i] == 0) { + syn[i] = (((uint16_t) data[j]) ^ + invmsk) & msk; + } else { + syn[i] = ((((uint16_t) data[j]) ^ + invmsk) & msk) ^ + alpha_to[rs_modnn(rs, index_of[syn[i]] + + (fcr + i) * prim)]; + } + } + } + + for (j = 0; j < nroots; j++) { + for (i = 0; i < nroots; i++) { + if (syn[i] == 0) { + syn[i] = ((uint16_t) par[j]) & msk; + } else { + syn[i] = (((uint16_t) par[j]) & msk) ^ + alpha_to[rs_modnn(rs, index_of[syn[i]] + + (fcr+i)*prim)]; + } + } + } + s = syn; + + /* Convert syndromes to index form, checking for nonzero condition */ + syn_error = 0; + for (i = 0; i < nroots; i++) { + syn_error |= s[i]; + s[i] = index_of[s[i]]; + } + + if (!syn_error) { + /* if syndrome is zero, data[] is a codeword and there are no + * errors to correct. So return data[] unmodified + */ + count = 0; + goto finish; + } + + decode: + memset(&lambda[1], 0, nroots * sizeof(lambda[0])); + lambda[0] = 1; + + if (no_eras > 0) { + /* Init lambda to be the erasure locator polynomial */ + lambda[1] = alpha_to[rs_modnn(rs, + prim * (nn - 1 - eras_pos[0]))]; + for (i = 1; i < no_eras; i++) { + u = rs_modnn(rs, prim * (nn - 1 - eras_pos[i])); + for (j = i + 1; j > 0; j--) { + tmp = index_of[lambda[j - 1]]; + if (tmp != nn) { + lambda[j] ^= + alpha_to[rs_modnn(rs, u + tmp)]; + } + } + } + } + + for (i = 0; i < nroots + 1; i++) + b[i] = index_of[lambda[i]]; + + /* + * Begin Berlekamp-Massey algorithm to determine error+erasure + * locator polynomial + */ + r = no_eras; + el = no_eras; + while (++r <= nroots) { /* r is the step number */ + /* Compute discrepancy at the r-th step in poly-form */ + discr_r = 0; + for (i = 0; i < r; i++) { + if ((lambda[i] != 0) && (s[r - i - 1] != nn)) { + discr_r ^= + alpha_to[rs_modnn(rs, + index_of[lambda[i]] + + s[r - i - 1])]; + } + } + discr_r = index_of[discr_r]; /* Index form */ + if (discr_r == nn) { + /* 2 lines below: B(x) <-- x*B(x) */ + memmove (&b[1], b, nroots * sizeof (b[0])); + b[0] = nn; + } else { + /* 7 lines below: T(x) <-- lambda(x)-discr_r*x*b(x) */ + t[0] = lambda[0]; + for (i = 0; i < nroots; i++) { + if (b[i] != nn) { + t[i + 1] = lambda[i + 1] ^ + alpha_to[rs_modnn(rs, discr_r + + b[i])]; + } else + t[i + 1] = lambda[i + 1]; + } + if (2 * el <= r + no_eras - 1) { + el = r + no_eras - el; + /* + * 2 lines below: B(x) <-- inv(discr_r) * + * lambda(x) + */ + for (i = 0; i <= nroots; i++) { + b[i] = (lambda[i] == 0) ? nn : + rs_modnn(rs, index_of[lambda[i]] + - discr_r + nn); + } + } else { + /* 2 lines below: B(x) <-- x*B(x) */ + memmove(&b[1], b, nroots * sizeof(b[0])); + b[0] = nn; + } + memcpy(lambda, t, (nroots + 1) * sizeof(t[0])); + } + } + + /* Convert lambda to index form and compute deg(lambda(x)) */ + deg_lambda = 0; + for (i = 0; i < nroots + 1; i++) { + lambda[i] = index_of[lambda[i]]; + if (lambda[i] != nn) + deg_lambda = i; + } + /* Find roots of error+erasure locator polynomial by Chien search */ + memcpy(®[1], &lambda[1], nroots * sizeof(reg[0])); + count = 0; /* Number of roots of lambda(x) */ + for (i = 1, k = iprim - 1; i <= nn; i++, k = rs_modnn(rs, k + iprim)) { + q = 1; /* lambda[0] is always 0 */ + for (j = deg_lambda; j > 0; j--) { + if (reg[j] != nn) { + reg[j] = rs_modnn(rs, reg[j] + j); + q ^= alpha_to[reg[j]]; + } + } + if (q != 0) + continue; /* Not a root */ + /* store root (index-form) and error location number */ + root[count] = i; + loc[count] = k; + /* If we've already found max possible roots, + * abort the search to save time + */ + if (++count == deg_lambda) + break; + } + if (deg_lambda != count) { + /* + * deg(lambda) unequal to number of roots => uncorrectable + * error detected + */ + count = -EBADMSG; + goto finish; + } + /* + * Compute err+eras evaluator poly omega(x) = s(x)*lambda(x) (modulo + * x**nroots). in index form. Also find deg(omega). + */ + deg_omega = deg_lambda - 1; + for (i = 0; i <= deg_omega; i++) { + tmp = 0; + for (j = i; j >= 0; j--) { + if ((s[i - j] != nn) && (lambda[j] != nn)) + tmp ^= + alpha_to[rs_modnn(rs, s[i - j] + lambda[j])]; + } + omega[i] = index_of[tmp]; + } + + /* + * Compute error values in poly-form. num1 = omega(inv(X(l))), num2 = + * inv(X(l))**(fcr-1) and den = lambda_pr(inv(X(l))) all in poly-form + */ + for (j = count - 1; j >= 0; j--) { + num1 = 0; + for (i = deg_omega; i >= 0; i--) { + if (omega[i] != nn) + num1 ^= alpha_to[rs_modnn(rs, omega[i] + + i * root[j])]; + } + num2 = alpha_to[rs_modnn(rs, root[j] * (fcr - 1) + nn)]; + den = 0; + + /* lambda[i+1] for i even is the formal derivative + * lambda_pr of lambda[i] */ + for (i = min(deg_lambda, nroots - 1) & ~1; i >= 0; i -= 2) { + if (lambda[i + 1] != nn) { + den ^= alpha_to[rs_modnn(rs, lambda[i + 1] + + i * root[j])]; + } + } + /* Apply error to data */ + if (num1 != 0 && loc[j] >= pad) { + uint16_t cor = alpha_to[rs_modnn(rs,index_of[num1] + + index_of[num2] + + nn - index_of[den])]; + /* Store the error correction pattern, if a + * correction buffer is available */ + if (corr) { + corr[j] = cor; + } else { + /* If a data buffer is given and the + * error is inside the message, + * correct it */ + if (data && (loc[j] < (nn - nroots))) + data[loc[j] - pad] ^= cor; + } + } + } + +finish: + if (eras_pos != NULL) { + for (i = 0; i < count; i++) + eras_pos[i] = loc[i] - pad; + } + return count; + +} diff --git a/lib/reed_solomon/encode_rs.c b/lib/reed_solomon/encode_rs.c new file mode 100644 index 00000000..0b5b1a67 --- /dev/null +++ b/lib/reed_solomon/encode_rs.c @@ -0,0 +1,54 @@ +/* + * lib/reed_solomon/encode_rs.c + * + * Overview: + * Generic Reed Solomon encoder / decoder library + * + * Copyright 2002, Phil Karn, KA9Q + * May be used under the terms of the GNU General Public License (GPL) + * + * Adaption to the kernel by Thomas Gleixner (tglx@linutronix.de) + * + * $Id: encode_rs.c,v 1.5 2005/11/07 11:14:59 gleixner Exp $ + * + */ + +/* Generic data width independent code which is included by the + * wrappers. + * int encode_rsX (struct rs_control *rs, uintX_t *data, int len, uintY_t *par) + */ +{ + int i, j, pad; + int nn = rs->nn; + int nroots = rs->nroots; + uint16_t *alpha_to = rs->alpha_to; + uint16_t *index_of = rs->index_of; + uint16_t *genpoly = rs->genpoly; + uint16_t fb; + uint16_t msk = (uint16_t) rs->nn; + + /* Check length parameter for validity */ + pad = nn - nroots - len; + if (pad < 0 || pad >= nn) + return -ERANGE; + + for (i = 0; i < len; i++) { + fb = index_of[((((uint16_t) data[i])^invmsk) & msk) ^ par[0]]; + /* feedback term is non-zero */ + if (fb != nn) { + for (j = 1; j < nroots; j++) { + par[j] ^= alpha_to[rs_modnn(rs, fb + + genpoly[nroots - j])]; + } + } + /* Shift */ + memmove(&par[0], &par[1], sizeof(uint16_t) * (nroots - 1)); + if (fb != nn) { + par[nroots - 1] = alpha_to[rs_modnn(rs, + fb + genpoly[0])]; + } else { + par[nroots - 1] = 0; + } + } + return 0; +} diff --git a/lib/reed_solomon/reed_solomon.c b/lib/reed_solomon/reed_solomon.c new file mode 100644 index 00000000..06d04cfa --- /dev/null +++ b/lib/reed_solomon/reed_solomon.c @@ -0,0 +1,384 @@ +/* + * lib/reed_solomon/reed_solomon.c + * + * Overview: + * Generic Reed Solomon encoder / decoder library + * + * Copyright (C) 2004 Thomas Gleixner (tglx@linutronix.de) + * + * Reed Solomon code lifted from reed solomon library written by Phil Karn + * Copyright 2002 Phil Karn, KA9Q + * + * $Id: rslib.c,v 1.7 2005/11/07 11:14:59 gleixner Exp $ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Description: + * + * The generic Reed Solomon library provides runtime configurable + * encoding / decoding of RS codes. + * Each user must call init_rs to get a pointer to a rs_control + * structure for the given rs parameters. This structure is either + * generated or a already available matching control structure is used. + * If a structure is generated then the polynomial arrays for + * fast encoding / decoding are built. This can take some time so + * make sure not to call this function from a time critical path. + * Usually a module / driver should initialize the necessary + * rs_control structure on module / driver init and release it + * on exit. + * The encoding puts the calculated syndrome into a given syndrome + * buffer. + * The decoding is a two step process. The first step calculates + * the syndrome over the received (data + syndrome) and calls the + * second stage, which does the decoding / error correction itself. + * Many hw encoders provide a syndrome calculation over the received + * data + syndrome and can call the second stage directly. + * + */ + +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/rslib.h> +#include <linux/slab.h> +#include <linux/mutex.h> + +/* This list holds all currently allocated rs control structures */ +static LIST_HEAD (rslist); +/* Protection for the list */ +static DEFINE_MUTEX(rslistlock); + +/** + * rs_init - Initialize a Reed-Solomon codec + * @symsize: symbol size, bits (1-8) + * @gfpoly: Field generator polynomial coefficients + * @gffunc: Field generator function + * @fcr: first root of RS code generator polynomial, index form + * @prim: primitive element to generate polynomial roots + * @nroots: RS code generator polynomial degree (number of roots) + * + * Allocate a control structure and the polynom arrays for faster + * en/decoding. Fill the arrays according to the given parameters. + */ +static struct rs_control *rs_init(int symsize, int gfpoly, int (*gffunc)(int), + int fcr, int prim, int nroots) +{ + struct rs_control *rs; + int i, j, sr, root, iprim; + + /* Allocate the control structure */ + rs = kmalloc(sizeof (struct rs_control), GFP_KERNEL); + if (rs == NULL) + return NULL; + + INIT_LIST_HEAD(&rs->list); + + rs->mm = symsize; + rs->nn = (1 << symsize) - 1; + rs->fcr = fcr; + rs->prim = prim; + rs->nroots = nroots; + rs->gfpoly = gfpoly; + rs->gffunc = gffunc; + + /* Allocate the arrays */ + rs->alpha_to = kmalloc(sizeof(uint16_t) * (rs->nn + 1), GFP_KERNEL); + if (rs->alpha_to == NULL) + goto errrs; + + rs->index_of = kmalloc(sizeof(uint16_t) * (rs->nn + 1), GFP_KERNEL); + if (rs->index_of == NULL) + goto erralp; + + rs->genpoly = kmalloc(sizeof(uint16_t) * (rs->nroots + 1), GFP_KERNEL); + if(rs->genpoly == NULL) + goto erridx; + + /* Generate Galois field lookup tables */ + rs->index_of[0] = rs->nn; /* log(zero) = -inf */ + rs->alpha_to[rs->nn] = 0; /* alpha**-inf = 0 */ + if (gfpoly) { + sr = 1; + for (i = 0; i < rs->nn; i++) { + rs->index_of[sr] = i; + rs->alpha_to[i] = sr; + sr <<= 1; + if (sr & (1 << symsize)) + sr ^= gfpoly; + sr &= rs->nn; + } + } else { + sr = gffunc(0); + for (i = 0; i < rs->nn; i++) { + rs->index_of[sr] = i; + rs->alpha_to[i] = sr; + sr = gffunc(sr); + } + } + /* If it's not primitive, exit */ + if(sr != rs->alpha_to[0]) + goto errpol; + + /* Find prim-th root of 1, used in decoding */ + for(iprim = 1; (iprim % prim) != 0; iprim += rs->nn); + /* prim-th root of 1, index form */ + rs->iprim = iprim / prim; + + /* Form RS code generator polynomial from its roots */ + rs->genpoly[0] = 1; + for (i = 0, root = fcr * prim; i < nroots; i++, root += prim) { + rs->genpoly[i + 1] = 1; + /* Multiply rs->genpoly[] by @**(root + x) */ + for (j = i; j > 0; j--) { + if (rs->genpoly[j] != 0) { + rs->genpoly[j] = rs->genpoly[j -1] ^ + rs->alpha_to[rs_modnn(rs, + rs->index_of[rs->genpoly[j]] + root)]; + } else + rs->genpoly[j] = rs->genpoly[j - 1]; + } + /* rs->genpoly[0] can never be zero */ + rs->genpoly[0] = + rs->alpha_to[rs_modnn(rs, + rs->index_of[rs->genpoly[0]] + root)]; + } + /* convert rs->genpoly[] to index form for quicker encoding */ + for (i = 0; i <= nroots; i++) + rs->genpoly[i] = rs->index_of[rs->genpoly[i]]; + return rs; + + /* Error exit */ +errpol: + kfree(rs->genpoly); +erridx: + kfree(rs->index_of); +erralp: + kfree(rs->alpha_to); +errrs: + kfree(rs); + return NULL; +} + + +/** + * free_rs - Free the rs control structure, if it is no longer used + * @rs: the control structure which is not longer used by the + * caller + */ +void free_rs(struct rs_control *rs) +{ + mutex_lock(&rslistlock); + rs->users--; + if(!rs->users) { + list_del(&rs->list); + kfree(rs->alpha_to); + kfree(rs->index_of); + kfree(rs->genpoly); + kfree(rs); + } + mutex_unlock(&rslistlock); +} + +/** + * init_rs_internal - Find a matching or allocate a new rs control structure + * @symsize: the symbol size (number of bits) + * @gfpoly: the extended Galois field generator polynomial coefficients, + * with the 0th coefficient in the low order bit. The polynomial + * must be primitive; + * @gffunc: pointer to function to generate the next field element, + * or the multiplicative identity element if given 0. Used + * instead of gfpoly if gfpoly is 0 + * @fcr: the first consecutive root of the rs code generator polynomial + * in index form + * @prim: primitive element to generate polynomial roots + * @nroots: RS code generator polynomial degree (number of roots) + */ +static struct rs_control *init_rs_internal(int symsize, int gfpoly, + int (*gffunc)(int), int fcr, + int prim, int nroots) +{ + struct list_head *tmp; + struct rs_control *rs; + + /* Sanity checks */ + if (symsize < 1) + return NULL; + if (fcr < 0 || fcr >= (1<<symsize)) + return NULL; + if (prim <= 0 || prim >= (1<<symsize)) + return NULL; + if (nroots < 0 || nroots >= (1<<symsize)) + return NULL; + + mutex_lock(&rslistlock); + + /* Walk through the list and look for a matching entry */ + list_for_each(tmp, &rslist) { + rs = list_entry(tmp, struct rs_control, list); + if (symsize != rs->mm) + continue; + if (gfpoly != rs->gfpoly) + continue; + if (gffunc != rs->gffunc) + continue; + if (fcr != rs->fcr) + continue; + if (prim != rs->prim) + continue; + if (nroots != rs->nroots) + continue; + /* We have a matching one already */ + rs->users++; + goto out; + } + + /* Create a new one */ + rs = rs_init(symsize, gfpoly, gffunc, fcr, prim, nroots); + if (rs) { + rs->users = 1; + list_add(&rs->list, &rslist); + } +out: + mutex_unlock(&rslistlock); + return rs; +} + +/** + * init_rs - Find a matching or allocate a new rs control structure + * @symsize: the symbol size (number of bits) + * @gfpoly: the extended Galois field generator polynomial coefficients, + * with the 0th coefficient in the low order bit. The polynomial + * must be primitive; + * @fcr: the first consecutive root of the rs code generator polynomial + * in index form + * @prim: primitive element to generate polynomial roots + * @nroots: RS code generator polynomial degree (number of roots) + */ +struct rs_control *init_rs(int symsize, int gfpoly, int fcr, int prim, + int nroots) +{ + return init_rs_internal(symsize, gfpoly, NULL, fcr, prim, nroots); +} + +/** + * init_rs_non_canonical - Find a matching or allocate a new rs control + * structure, for fields with non-canonical + * representation + * @symsize: the symbol size (number of bits) + * @gffunc: pointer to function to generate the next field element, + * or the multiplicative identity element if given 0. Used + * instead of gfpoly if gfpoly is 0 + * @fcr: the first consecutive root of the rs code generator polynomial + * in index form + * @prim: primitive element to generate polynomial roots + * @nroots: RS code generator polynomial degree (number of roots) + */ +struct rs_control *init_rs_non_canonical(int symsize, int (*gffunc)(int), + int fcr, int prim, int nroots) +{ + return init_rs_internal(symsize, 0, gffunc, fcr, prim, nroots); +} + +#ifdef CONFIG_REED_SOLOMON_ENC8 +/** + * encode_rs8 - Calculate the parity for data values (8bit data width) + * @rs: the rs control structure + * @data: data field of a given type + * @len: data length + * @par: parity data, must be initialized by caller (usually all 0) + * @invmsk: invert data mask (will be xored on data) + * + * The parity uses a uint16_t data type to enable + * symbol size > 8. The calling code must take care of encoding of the + * syndrome result for storage itself. + */ +int encode_rs8(struct rs_control *rs, uint8_t *data, int len, uint16_t *par, + uint16_t invmsk) +{ +#include "encode_rs.c" +} +EXPORT_SYMBOL_GPL(encode_rs8); +#endif + +#ifdef CONFIG_REED_SOLOMON_DEC8 +/** + * decode_rs8 - Decode codeword (8bit data width) + * @rs: the rs control structure + * @data: data field of a given type + * @par: received parity data field + * @len: data length + * @s: syndrome data field (if NULL, syndrome is calculated) + * @no_eras: number of erasures + * @eras_pos: position of erasures, can be NULL + * @invmsk: invert data mask (will be xored on data, not on parity!) + * @corr: buffer to store correction bitmask on eras_pos + * + * The syndrome and parity uses a uint16_t data type to enable + * symbol size > 8. The calling code must take care of decoding of the + * syndrome result and the received parity before calling this code. + * Returns the number of corrected bits or -EBADMSG for uncorrectable errors. + */ +int decode_rs8(struct rs_control *rs, uint8_t *data, uint16_t *par, int len, + uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk, + uint16_t *corr) +{ +#include "decode_rs.c" +} +EXPORT_SYMBOL_GPL(decode_rs8); +#endif + +#ifdef CONFIG_REED_SOLOMON_ENC16 +/** + * encode_rs16 - Calculate the parity for data values (16bit data width) + * @rs: the rs control structure + * @data: data field of a given type + * @len: data length + * @par: parity data, must be initialized by caller (usually all 0) + * @invmsk: invert data mask (will be xored on data, not on parity!) + * + * Each field in the data array contains up to symbol size bits of valid data. + */ +int encode_rs16(struct rs_control *rs, uint16_t *data, int len, uint16_t *par, + uint16_t invmsk) +{ +#include "encode_rs.c" +} +EXPORT_SYMBOL_GPL(encode_rs16); +#endif + +#ifdef CONFIG_REED_SOLOMON_DEC16 +/** + * decode_rs16 - Decode codeword (16bit data width) + * @rs: the rs control structure + * @data: data field of a given type + * @par: received parity data field + * @len: data length + * @s: syndrome data field (if NULL, syndrome is calculated) + * @no_eras: number of erasures + * @eras_pos: position of erasures, can be NULL + * @invmsk: invert data mask (will be xored on data, not on parity!) + * @corr: buffer to store correction bitmask on eras_pos + * + * Each field in the data array contains up to symbol size bits of valid data. + * Returns the number of corrected bits or -EBADMSG for uncorrectable errors. + */ +int decode_rs16(struct rs_control *rs, uint16_t *data, uint16_t *par, int len, + uint16_t *s, int no_eras, int *eras_pos, uint16_t invmsk, + uint16_t *corr) +{ +#include "decode_rs.c" +} +EXPORT_SYMBOL_GPL(decode_rs16); +#endif + +EXPORT_SYMBOL_GPL(init_rs); +EXPORT_SYMBOL_GPL(init_rs_non_canonical); +EXPORT_SYMBOL_GPL(free_rs); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Reed Solomon encoder/decoder"); +MODULE_AUTHOR("Phil Karn, Thomas Gleixner"); + diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c new file mode 100644 index 00000000..ffc9fc7f --- /dev/null +++ b/lib/rwsem-spinlock.c @@ -0,0 +1,323 @@ +/* rwsem-spinlock.c: R/W semaphores: contention handling functions for + * generic spinlock implementation + * + * Copyright (c) 2001 David Howells (dhowells@redhat.com). + * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de> + * - Derived also from comments by Linus + */ +#include <linux/rwsem.h> +#include <linux/sched.h> +#include <linux/module.h> + +struct rwsem_waiter { + struct list_head list; + struct task_struct *task; + unsigned int flags; +#define RWSEM_WAITING_FOR_READ 0x00000001 +#define RWSEM_WAITING_FOR_WRITE 0x00000002 +}; + +int rwsem_is_locked(struct rw_semaphore *sem) +{ + int ret = 1; + unsigned long flags; + + if (spin_trylock_irqsave(&sem->wait_lock, flags)) { + ret = (sem->activity != 0); + spin_unlock_irqrestore(&sem->wait_lock, flags); + } + return ret; +} +EXPORT_SYMBOL(rwsem_is_locked); + +/* + * initialise the semaphore + */ +void __init_rwsem(struct rw_semaphore *sem, const char *name, + struct lock_class_key *key) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + /* + * Make sure we are not reinitializing a held semaphore: + */ + debug_check_no_locks_freed((void *)sem, sizeof(*sem)); + lockdep_init_map(&sem->dep_map, name, key, 0); +#endif + sem->activity = 0; + spin_lock_init(&sem->wait_lock); + INIT_LIST_HEAD(&sem->wait_list); +} +EXPORT_SYMBOL(__init_rwsem); + +/* + * handle the lock release when processes blocked on it that can now run + * - if we come here, then: + * - the 'active count' _reached_ zero + * - the 'waiting count' is non-zero + * - the spinlock must be held by the caller + * - woken process blocks are discarded from the list after having task zeroed + * - writers are only woken if wakewrite is non-zero + */ +static inline struct rw_semaphore * +__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) +{ + struct rwsem_waiter *waiter; + struct task_struct *tsk; + int woken; + + waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); + + if (!wakewrite) { + if (waiter->flags & RWSEM_WAITING_FOR_WRITE) + goto out; + goto dont_wake_writers; + } + + /* if we are allowed to wake writers try to grant a single write lock + * if there's a writer at the front of the queue + * - we leave the 'waiting count' incremented to signify potential + * contention + */ + if (waiter->flags & RWSEM_WAITING_FOR_WRITE) { + sem->activity = -1; + list_del(&waiter->list); + tsk = waiter->task; + /* Don't touch waiter after ->task has been NULLed */ + smp_mb(); + waiter->task = NULL; + wake_up_process(tsk); + put_task_struct(tsk); + goto out; + } + + /* grant an infinite number of read locks to the front of the queue */ + dont_wake_writers: + woken = 0; + while (waiter->flags & RWSEM_WAITING_FOR_READ) { + struct list_head *next = waiter->list.next; + + list_del(&waiter->list); + tsk = waiter->task; + smp_mb(); + waiter->task = NULL; + wake_up_process(tsk); + put_task_struct(tsk); + woken++; + if (list_empty(&sem->wait_list)) + break; + waiter = list_entry(next, struct rwsem_waiter, list); + } + + sem->activity += woken; + + out: + return sem; +} + +/* + * wake a single writer + */ +static inline struct rw_semaphore * +__rwsem_wake_one_writer(struct rw_semaphore *sem) +{ + struct rwsem_waiter *waiter; + struct task_struct *tsk; + + sem->activity = -1; + + waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); + list_del(&waiter->list); + + tsk = waiter->task; + smp_mb(); + waiter->task = NULL; + wake_up_process(tsk); + put_task_struct(tsk); + return sem; +} + +/* + * get a read lock on the semaphore + */ +void __sched __down_read(struct rw_semaphore *sem) +{ + struct rwsem_waiter waiter; + struct task_struct *tsk; + unsigned long flags; + + spin_lock_irqsave(&sem->wait_lock, flags); + + if (sem->activity >= 0 && list_empty(&sem->wait_list)) { + /* granted */ + sem->activity++; + spin_unlock_irqrestore(&sem->wait_lock, flags); + goto out; + } + + tsk = current; + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + + /* set up my own style of waitqueue */ + waiter.task = tsk; + waiter.flags = RWSEM_WAITING_FOR_READ; + get_task_struct(tsk); + + list_add_tail(&waiter.list, &sem->wait_list); + + /* we don't need to touch the semaphore struct anymore */ + spin_unlock_irqrestore(&sem->wait_lock, flags); + + /* wait to be given the lock */ + for (;;) { + if (!waiter.task) + break; + schedule(); + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + } + + tsk->state = TASK_RUNNING; + out: + ; +} + +/* + * trylock for reading -- returns 1 if successful, 0 if contention + */ +int __down_read_trylock(struct rw_semaphore *sem) +{ + unsigned long flags; + int ret = 0; + + + spin_lock_irqsave(&sem->wait_lock, flags); + + if (sem->activity >= 0 && list_empty(&sem->wait_list)) { + /* granted */ + sem->activity++; + ret = 1; + } + + spin_unlock_irqrestore(&sem->wait_lock, flags); + + return ret; +} + +/* + * get a write lock on the semaphore + * - we increment the waiting count anyway to indicate an exclusive lock + */ +void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) +{ + struct rwsem_waiter waiter; + struct task_struct *tsk; + unsigned long flags; + + spin_lock_irqsave(&sem->wait_lock, flags); + + if (sem->activity == 0 && list_empty(&sem->wait_list)) { + /* granted */ + sem->activity = -1; + spin_unlock_irqrestore(&sem->wait_lock, flags); + goto out; + } + + tsk = current; + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + + /* set up my own style of waitqueue */ + waiter.task = tsk; + waiter.flags = RWSEM_WAITING_FOR_WRITE; + get_task_struct(tsk); + + list_add_tail(&waiter.list, &sem->wait_list); + + /* we don't need to touch the semaphore struct anymore */ + spin_unlock_irqrestore(&sem->wait_lock, flags); + + /* wait to be given the lock */ + for (;;) { + if (!waiter.task) + break; + schedule(); + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + } + + tsk->state = TASK_RUNNING; + out: + ; +} + +void __sched __down_write(struct rw_semaphore *sem) +{ + __down_write_nested(sem, 0); +} + +/* + * trylock for writing -- returns 1 if successful, 0 if contention + */ +int __down_write_trylock(struct rw_semaphore *sem) +{ + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&sem->wait_lock, flags); + + if (sem->activity == 0 && list_empty(&sem->wait_list)) { + /* granted */ + sem->activity = -1; + ret = 1; + } + + spin_unlock_irqrestore(&sem->wait_lock, flags); + + return ret; +} + +/* + * release a read lock on the semaphore + */ +void __up_read(struct rw_semaphore *sem) +{ + unsigned long flags; + + spin_lock_irqsave(&sem->wait_lock, flags); + + if (--sem->activity == 0 && !list_empty(&sem->wait_list)) + sem = __rwsem_wake_one_writer(sem); + + spin_unlock_irqrestore(&sem->wait_lock, flags); +} + +/* + * release a write lock on the semaphore + */ +void __up_write(struct rw_semaphore *sem) +{ + unsigned long flags; + + spin_lock_irqsave(&sem->wait_lock, flags); + + sem->activity = 0; + if (!list_empty(&sem->wait_list)) + sem = __rwsem_do_wake(sem, 1); + + spin_unlock_irqrestore(&sem->wait_lock, flags); +} + +/* + * downgrade a write lock into a read lock + * - just wake up any readers at the front of the queue + */ +void __downgrade_write(struct rw_semaphore *sem) +{ + unsigned long flags; + + spin_lock_irqsave(&sem->wait_lock, flags); + + sem->activity = 1; + if (!list_empty(&sem->wait_list)) + sem = __rwsem_do_wake(sem, 0); + + spin_unlock_irqrestore(&sem->wait_lock, flags); +} + diff --git a/lib/rwsem.c b/lib/rwsem.c new file mode 100644 index 00000000..f236d7cd --- /dev/null +++ b/lib/rwsem.c @@ -0,0 +1,284 @@ +/* rwsem.c: R/W semaphores: contention handling functions + * + * Written by David Howells (dhowells@redhat.com). + * Derived from arch/i386/kernel/semaphore.c + */ +#include <linux/rwsem.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/module.h> + +/* + * Initialize an rwsem: + */ +void __init_rwsem(struct rw_semaphore *sem, const char *name, + struct lock_class_key *key) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + /* + * Make sure we are not reinitializing a held semaphore: + */ + debug_check_no_locks_freed((void *)sem, sizeof(*sem)); + lockdep_init_map(&sem->dep_map, name, key, 0); +#endif + sem->count = RWSEM_UNLOCKED_VALUE; + spin_lock_init(&sem->wait_lock); + INIT_LIST_HEAD(&sem->wait_list); +} + +EXPORT_SYMBOL(__init_rwsem); + +struct rwsem_waiter { + struct list_head list; + struct task_struct *task; + unsigned int flags; +#define RWSEM_WAITING_FOR_READ 0x00000001 +#define RWSEM_WAITING_FOR_WRITE 0x00000002 +}; + +/* Wake types for __rwsem_do_wake(). Note that RWSEM_WAKE_NO_ACTIVE and + * RWSEM_WAKE_READ_OWNED imply that the spinlock must have been kept held + * since the rwsem value was observed. + */ +#define RWSEM_WAKE_ANY 0 /* Wake whatever's at head of wait list */ +#define RWSEM_WAKE_NO_ACTIVE 1 /* rwsem was observed with no active thread */ +#define RWSEM_WAKE_READ_OWNED 2 /* rwsem was observed to be read owned */ + +/* + * handle the lock release when processes blocked on it that can now run + * - if we come here from up_xxxx(), then: + * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed) + * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) + * - there must be someone on the queue + * - the spinlock must be held by the caller + * - woken process blocks are discarded from the list after having task zeroed + * - writers are only woken if downgrading is false + */ +static struct rw_semaphore * +__rwsem_do_wake(struct rw_semaphore *sem, int wake_type) +{ + struct rwsem_waiter *waiter; + struct task_struct *tsk; + struct list_head *next; + signed long oldcount, woken, loop, adjustment; + + waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); + if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) + goto readers_only; + + if (wake_type == RWSEM_WAKE_READ_OWNED) + /* Another active reader was observed, so wakeup is not + * likely to succeed. Save the atomic op. + */ + goto out; + + /* There's a writer at the front of the queue - try to grant it the + * write lock. However, we only wake this writer if we can transition + * the active part of the count from 0 -> 1 + */ + adjustment = RWSEM_ACTIVE_WRITE_BIAS; + if (waiter->list.next == &sem->wait_list) + adjustment -= RWSEM_WAITING_BIAS; + + try_again_write: + oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; + if (oldcount & RWSEM_ACTIVE_MASK) + /* Someone grabbed the sem already */ + goto undo_write; + + /* We must be careful not to touch 'waiter' after we set ->task = NULL. + * It is an allocated on the waiter's stack and may become invalid at + * any time after that point (due to a wakeup from another source). + */ + list_del(&waiter->list); + tsk = waiter->task; + smp_mb(); + waiter->task = NULL; + wake_up_process(tsk); + put_task_struct(tsk); + goto out; + + readers_only: + /* If we come here from up_xxxx(), another thread might have reached + * rwsem_down_failed_common() before we acquired the spinlock and + * woken up a waiter, making it now active. We prefer to check for + * this first in order to not spend too much time with the spinlock + * held if we're not going to be able to wake up readers in the end. + * + * Note that we do not need to update the rwsem count: any writer + * trying to acquire rwsem will run rwsem_down_write_failed() due + * to the waiting threads and block trying to acquire the spinlock. + * + * We use a dummy atomic update in order to acquire the cache line + * exclusively since we expect to succeed and run the final rwsem + * count adjustment pretty soon. + */ + if (wake_type == RWSEM_WAKE_ANY && + rwsem_atomic_update(0, sem) < RWSEM_WAITING_BIAS) + /* Someone grabbed the sem for write already */ + goto out; + + /* Grant an infinite number of read locks to the readers at the front + * of the queue. Note we increment the 'active part' of the count by + * the number of readers before waking any processes up. + */ + woken = 0; + do { + woken++; + + if (waiter->list.next == &sem->wait_list) + break; + + waiter = list_entry(waiter->list.next, + struct rwsem_waiter, list); + + } while (waiter->flags & RWSEM_WAITING_FOR_READ); + + adjustment = woken * RWSEM_ACTIVE_READ_BIAS; + if (waiter->flags & RWSEM_WAITING_FOR_READ) + /* hit end of list above */ + adjustment -= RWSEM_WAITING_BIAS; + + rwsem_atomic_add(adjustment, sem); + + next = sem->wait_list.next; + for (loop = woken; loop > 0; loop--) { + waiter = list_entry(next, struct rwsem_waiter, list); + next = waiter->list.next; + tsk = waiter->task; + smp_mb(); + waiter->task = NULL; + wake_up_process(tsk); + put_task_struct(tsk); + } + + sem->wait_list.next = next; + next->prev = &sem->wait_list; + + out: + return sem; + + /* undo the change to the active count, but check for a transition + * 1->0 */ + undo_write: + if (rwsem_atomic_update(-adjustment, sem) & RWSEM_ACTIVE_MASK) + goto out; + goto try_again_write; +} + +/* + * wait for a lock to be granted + */ +static struct rw_semaphore __sched * +rwsem_down_failed_common(struct rw_semaphore *sem, + unsigned int flags, signed long adjustment) +{ + struct rwsem_waiter waiter; + struct task_struct *tsk = current; + signed long count; + + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + + /* set up my own style of waitqueue */ + spin_lock_irq(&sem->wait_lock); + waiter.task = tsk; + waiter.flags = flags; + get_task_struct(tsk); + + if (list_empty(&sem->wait_list)) + adjustment += RWSEM_WAITING_BIAS; + list_add_tail(&waiter.list, &sem->wait_list); + + /* we're now waiting on the lock, but no longer actively locking */ + count = rwsem_atomic_update(adjustment, sem); + + /* If there are no active locks, wake the front queued process(es) up. + * + * Alternatively, if we're called from a failed down_write(), there + * were already threads queued before us and there are no active + * writers, the lock must be read owned; so we try to wake any read + * locks that were queued ahead of us. */ + if (count == RWSEM_WAITING_BIAS) + sem = __rwsem_do_wake(sem, RWSEM_WAKE_NO_ACTIVE); + else if (count > RWSEM_WAITING_BIAS && + adjustment == -RWSEM_ACTIVE_WRITE_BIAS) + sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); + + spin_unlock_irq(&sem->wait_lock); + + /* wait to be given the lock */ + for (;;) { + if (!waiter.task) + break; + schedule(); + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + } + + tsk->state = TASK_RUNNING; + + return sem; +} + +/* + * wait for the read lock to be granted + */ +asmregparm struct rw_semaphore __sched * +rwsem_down_read_failed(struct rw_semaphore *sem) +{ + return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_READ, + -RWSEM_ACTIVE_READ_BIAS); +} + +/* + * wait for the write lock to be granted + */ +asmregparm struct rw_semaphore __sched * +rwsem_down_write_failed(struct rw_semaphore *sem) +{ + return rwsem_down_failed_common(sem, RWSEM_WAITING_FOR_WRITE, + -RWSEM_ACTIVE_WRITE_BIAS); +} + +/* + * handle waking up a waiter on the semaphore + * - up_read/up_write has decremented the active part of count if we come here + */ +asmregparm struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) +{ + unsigned long flags; + + spin_lock_irqsave(&sem->wait_lock, flags); + + /* do nothing if list empty */ + if (!list_empty(&sem->wait_list)) + sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); + + spin_unlock_irqrestore(&sem->wait_lock, flags); + + return sem; +} + +/* + * downgrade a write lock into a read lock + * - caller incremented waiting part of count and discovered it still negative + * - just wake up any readers at the front of the queue + */ +asmregparm struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) +{ + unsigned long flags; + + spin_lock_irqsave(&sem->wait_lock, flags); + + /* do nothing if list empty */ + if (!list_empty(&sem->wait_list)) + sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); + + spin_unlock_irqrestore(&sem->wait_lock, flags); + + return sem; +} + +EXPORT_SYMBOL(rwsem_down_read_failed); +EXPORT_SYMBOL(rwsem_down_write_failed); +EXPORT_SYMBOL(rwsem_wake); +EXPORT_SYMBOL(rwsem_downgrade_wake); diff --git a/lib/scatterlist.c b/lib/scatterlist.c new file mode 100644 index 00000000..4ceb05d7 --- /dev/null +++ b/lib/scatterlist.c @@ -0,0 +1,519 @@ +/* + * Copyright (C) 2007 Jens Axboe <jens.axboe@oracle.com> + * + * Scatterlist handling helpers. + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/scatterlist.h> +#include <linux/highmem.h> +#include <linux/kmemleak.h> + +/** + * sg_next - return the next scatterlist entry in a list + * @sg: The current sg entry + * + * Description: + * Usually the next entry will be @sg@ + 1, but if this sg element is part + * of a chained scatterlist, it could jump to the start of a new + * scatterlist array. + * + **/ +struct scatterlist *sg_next(struct scatterlist *sg) +{ +#ifdef CONFIG_DEBUG_SG + BUG_ON(sg->sg_magic != SG_MAGIC); +#endif + if (sg_is_last(sg)) + return NULL; + + sg++; + if (unlikely(sg_is_chain(sg))) + sg = sg_chain_ptr(sg); + + return sg; +} +EXPORT_SYMBOL(sg_next); + +/** + * sg_last - return the last scatterlist entry in a list + * @sgl: First entry in the scatterlist + * @nents: Number of entries in the scatterlist + * + * Description: + * Should only be used casually, it (currently) scans the entire list + * to get the last entry. + * + * Note that the @sgl@ pointer passed in need not be the first one, + * the important bit is that @nents@ denotes the number of entries that + * exist from @sgl@. + * + **/ +struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents) +{ +#ifndef ARCH_HAS_SG_CHAIN + struct scatterlist *ret = &sgl[nents - 1]; +#else + struct scatterlist *sg, *ret = NULL; + unsigned int i; + + for_each_sg(sgl, sg, nents, i) + ret = sg; + +#endif +#ifdef CONFIG_DEBUG_SG + BUG_ON(sgl[0].sg_magic != SG_MAGIC); + BUG_ON(!sg_is_last(ret)); +#endif + return ret; +} +EXPORT_SYMBOL(sg_last); + +/** + * sg_init_table - Initialize SG table + * @sgl: The SG table + * @nents: Number of entries in table + * + * Notes: + * If this is part of a chained sg table, sg_mark_end() should be + * used only on the last table part. + * + **/ +void sg_init_table(struct scatterlist *sgl, unsigned int nents) +{ + memset(sgl, 0, sizeof(*sgl) * nents); +#ifdef CONFIG_DEBUG_SG + { + unsigned int i; + for (i = 0; i < nents; i++) + sgl[i].sg_magic = SG_MAGIC; + } +#endif + sg_mark_end(&sgl[nents - 1]); +} +EXPORT_SYMBOL(sg_init_table); + +/** + * sg_init_one - Initialize a single entry sg list + * @sg: SG entry + * @buf: Virtual address for IO + * @buflen: IO length + * + **/ +void sg_init_one(struct scatterlist *sg, const void *buf, unsigned int buflen) +{ + sg_init_table(sg, 1); + sg_set_buf(sg, buf, buflen); +} +EXPORT_SYMBOL(sg_init_one); + +/* + * The default behaviour of sg_alloc_table() is to use these kmalloc/kfree + * helpers. + */ +static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask) +{ + if (nents == SG_MAX_SINGLE_ALLOC) { + /* + * Kmemleak doesn't track page allocations as they are not + * commonly used (in a raw form) for kernel data structures. + * As we chain together a list of pages and then a normal + * kmalloc (tracked by kmemleak), in order to for that last + * allocation not to become decoupled (and thus a + * false-positive) we need to inform kmemleak of all the + * intermediate allocations. + */ + void *ptr = (void *) __get_free_page(gfp_mask); + kmemleak_alloc(ptr, PAGE_SIZE, 1, gfp_mask); + return ptr; + } else + return kmalloc(nents * sizeof(struct scatterlist), gfp_mask); +} + +static void sg_kfree(struct scatterlist *sg, unsigned int nents) +{ + if (nents == SG_MAX_SINGLE_ALLOC) { + kmemleak_free(sg); + free_page((unsigned long) sg); + } else + kfree(sg); +} + +/** + * __sg_free_table - Free a previously mapped sg table + * @table: The sg table header to use + * @max_ents: The maximum number of entries per single scatterlist + * @free_fn: Free function + * + * Description: + * Free an sg table previously allocated and setup with + * __sg_alloc_table(). The @max_ents value must be identical to + * that previously used with __sg_alloc_table(). + * + **/ +void __sg_free_table(struct sg_table *table, unsigned int max_ents, + sg_free_fn *free_fn) +{ + struct scatterlist *sgl, *next; + + if (unlikely(!table->sgl)) + return; + + sgl = table->sgl; + while (table->orig_nents) { + unsigned int alloc_size = table->orig_nents; + unsigned int sg_size; + + /* + * If we have more than max_ents segments left, + * then assign 'next' to the sg table after the current one. + * sg_size is then one less than alloc size, since the last + * element is the chain pointer. + */ + if (alloc_size > max_ents) { + next = sg_chain_ptr(&sgl[max_ents - 1]); + alloc_size = max_ents; + sg_size = alloc_size - 1; + } else { + sg_size = alloc_size; + next = NULL; + } + + table->orig_nents -= sg_size; + free_fn(sgl, alloc_size); + sgl = next; + } + + table->sgl = NULL; +} +EXPORT_SYMBOL(__sg_free_table); + +/** + * sg_free_table - Free a previously allocated sg table + * @table: The mapped sg table header + * + **/ +void sg_free_table(struct sg_table *table) +{ + __sg_free_table(table, SG_MAX_SINGLE_ALLOC, sg_kfree); +} +EXPORT_SYMBOL(sg_free_table); + +/** + * __sg_alloc_table - Allocate and initialize an sg table with given allocator + * @table: The sg table header to use + * @nents: Number of entries in sg list + * @max_ents: The maximum number of entries the allocator returns per call + * @gfp_mask: GFP allocation mask + * @alloc_fn: Allocator to use + * + * Description: + * This function returns a @table @nents long. The allocator is + * defined to return scatterlist chunks of maximum size @max_ents. + * Thus if @nents is bigger than @max_ents, the scatterlists will be + * chained in units of @max_ents. + * + * Notes: + * If this function returns non-0 (eg failure), the caller must call + * __sg_free_table() to cleanup any leftover allocations. + * + **/ +int __sg_alloc_table(struct sg_table *table, unsigned int nents, + unsigned int max_ents, gfp_t gfp_mask, + sg_alloc_fn *alloc_fn) +{ + struct scatterlist *sg, *prv; + unsigned int left; + +#ifndef ARCH_HAS_SG_CHAIN + BUG_ON(nents > max_ents); +#endif + + memset(table, 0, sizeof(*table)); + + left = nents; + prv = NULL; + do { + unsigned int sg_size, alloc_size = left; + + if (alloc_size > max_ents) { + alloc_size = max_ents; + sg_size = alloc_size - 1; + } else + sg_size = alloc_size; + + left -= sg_size; + + sg = alloc_fn(alloc_size, gfp_mask); + if (unlikely(!sg)) { + /* + * Adjust entry count to reflect that the last + * entry of the previous table won't be used for + * linkage. Without this, sg_kfree() may get + * confused. + */ + if (prv) + table->nents = ++table->orig_nents; + + return -ENOMEM; + } + + sg_init_table(sg, alloc_size); + table->nents = table->orig_nents += sg_size; + + /* + * If this is the first mapping, assign the sg table header. + * If this is not the first mapping, chain previous part. + */ + if (prv) + sg_chain(prv, max_ents, sg); + else + table->sgl = sg; + + /* + * If no more entries after this one, mark the end + */ + if (!left) + sg_mark_end(&sg[sg_size - 1]); + + /* + * only really needed for mempool backed sg allocations (like + * SCSI), a possible improvement here would be to pass the + * table pointer into the allocator and let that clear these + * flags + */ + gfp_mask &= ~__GFP_WAIT; + gfp_mask |= __GFP_HIGH; + prv = sg; + } while (left); + + return 0; +} +EXPORT_SYMBOL(__sg_alloc_table); + +/** + * sg_alloc_table - Allocate and initialize an sg table + * @table: The sg table header to use + * @nents: Number of entries in sg list + * @gfp_mask: GFP allocation mask + * + * Description: + * Allocate and initialize an sg table. If @nents@ is larger than + * SG_MAX_SINGLE_ALLOC a chained sg table will be setup. + * + **/ +int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask) +{ + int ret; + + ret = __sg_alloc_table(table, nents, SG_MAX_SINGLE_ALLOC, + gfp_mask, sg_kmalloc); + if (unlikely(ret)) + __sg_free_table(table, SG_MAX_SINGLE_ALLOC, sg_kfree); + + return ret; +} +EXPORT_SYMBOL(sg_alloc_table); + +/** + * sg_miter_start - start mapping iteration over a sg list + * @miter: sg mapping iter to be started + * @sgl: sg list to iterate over + * @nents: number of sg entries + * + * Description: + * Starts mapping iterator @miter. + * + * Context: + * Don't care. + */ +void sg_miter_start(struct sg_mapping_iter *miter, struct scatterlist *sgl, + unsigned int nents, unsigned int flags) +{ + memset(miter, 0, sizeof(struct sg_mapping_iter)); + + miter->__sg = sgl; + miter->__nents = nents; + miter->__offset = 0; + WARN_ON(!(flags & (SG_MITER_TO_SG | SG_MITER_FROM_SG))); + miter->__flags = flags; +} +EXPORT_SYMBOL(sg_miter_start); + +/** + * sg_miter_next - proceed mapping iterator to the next mapping + * @miter: sg mapping iter to proceed + * + * Description: + * Proceeds @miter@ to the next mapping. @miter@ should have been + * started using sg_miter_start(). On successful return, + * @miter@->page, @miter@->addr and @miter@->length point to the + * current mapping. + * + * Context: + * IRQ disabled if SG_MITER_ATOMIC. IRQ must stay disabled till + * @miter@ is stopped. May sleep if !SG_MITER_ATOMIC. + * + * Returns: + * true if @miter contains the next mapping. false if end of sg + * list is reached. + */ +bool sg_miter_next(struct sg_mapping_iter *miter) +{ + unsigned int off, len; + + /* check for end and drop resources from the last iteration */ + if (!miter->__nents) + return false; + + sg_miter_stop(miter); + + /* get to the next sg if necessary. __offset is adjusted by stop */ + while (miter->__offset == miter->__sg->length) { + if (--miter->__nents) { + miter->__sg = sg_next(miter->__sg); + miter->__offset = 0; + } else + return false; + } + + /* map the next page */ + off = miter->__sg->offset + miter->__offset; + len = miter->__sg->length - miter->__offset; + + miter->page = nth_page(sg_page(miter->__sg), off >> PAGE_SHIFT); + off &= ~PAGE_MASK; + miter->length = min_t(unsigned int, len, PAGE_SIZE - off); + miter->consumed = miter->length; + + if (miter->__flags & SG_MITER_ATOMIC) + miter->addr = kmap_atomic(miter->page, KM_BIO_SRC_IRQ) + off; + else + miter->addr = kmap(miter->page) + off; + + return true; +} +EXPORT_SYMBOL(sg_miter_next); + +/** + * sg_miter_stop - stop mapping iteration + * @miter: sg mapping iter to be stopped + * + * Description: + * Stops mapping iterator @miter. @miter should have been started + * started using sg_miter_start(). A stopped iteration can be + * resumed by calling sg_miter_next() on it. This is useful when + * resources (kmap) need to be released during iteration. + * + * Context: + * IRQ disabled if the SG_MITER_ATOMIC is set. Don't care otherwise. + */ +void sg_miter_stop(struct sg_mapping_iter *miter) +{ + WARN_ON(miter->consumed > miter->length); + + /* drop resources from the last iteration */ + if (miter->addr) { + miter->__offset += miter->consumed; + + if (miter->__flags & SG_MITER_TO_SG) + flush_kernel_dcache_page(miter->page); + + if (miter->__flags & SG_MITER_ATOMIC) { + WARN_ON(!irqs_disabled()); + kunmap_atomic(miter->addr, KM_BIO_SRC_IRQ); + } else + kunmap(miter->page); + + miter->page = NULL; + miter->addr = NULL; + miter->length = 0; + miter->consumed = 0; + } +} +EXPORT_SYMBOL(sg_miter_stop); + +/** + * sg_copy_buffer - Copy data between a linear buffer and an SG list + * @sgl: The SG list + * @nents: Number of SG entries + * @buf: Where to copy from + * @buflen: The number of bytes to copy + * @to_buffer: transfer direction (non zero == from an sg list to a + * buffer, 0 == from a buffer to an sg list + * + * Returns the number of copied bytes. + * + **/ +static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, + void *buf, size_t buflen, int to_buffer) +{ + unsigned int offset = 0; + struct sg_mapping_iter miter; + unsigned long flags; + unsigned int sg_flags = SG_MITER_ATOMIC; + + if (to_buffer) + sg_flags |= SG_MITER_FROM_SG; + else + sg_flags |= SG_MITER_TO_SG; + + sg_miter_start(&miter, sgl, nents, sg_flags); + + local_irq_save(flags); + + while (sg_miter_next(&miter) && offset < buflen) { + unsigned int len; + + len = min(miter.length, buflen - offset); + + if (to_buffer) + memcpy(buf + offset, miter.addr, len); + else + memcpy(miter.addr, buf + offset, len); + + offset += len; + } + + sg_miter_stop(&miter); + + local_irq_restore(flags); + return offset; +} + +/** + * sg_copy_from_buffer - Copy from a linear buffer to an SG list + * @sgl: The SG list + * @nents: Number of SG entries + * @buf: Where to copy from + * @buflen: The number of bytes to copy + * + * Returns the number of copied bytes. + * + **/ +size_t sg_copy_from_buffer(struct scatterlist *sgl, unsigned int nents, + void *buf, size_t buflen) +{ + return sg_copy_buffer(sgl, nents, buf, buflen, 0); +} +EXPORT_SYMBOL(sg_copy_from_buffer); + +/** + * sg_copy_to_buffer - Copy from an SG list to a linear buffer + * @sgl: The SG list + * @nents: Number of SG entries + * @buf: Where to copy to + * @buflen: The number of bytes to copy + * + * Returns the number of copied bytes. + * + **/ +size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents, + void *buf, size_t buflen) +{ + return sg_copy_buffer(sgl, nents, buf, buflen, 1); +} +EXPORT_SYMBOL(sg_copy_to_buffer); diff --git a/lib/sha1.c b/lib/sha1.c new file mode 100644 index 00000000..4c45fd50 --- /dev/null +++ b/lib/sha1.c @@ -0,0 +1,95 @@ +/* + * SHA transform algorithm, originally taken from code written by + * Peter Gutmann, and placed in the public domain. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/cryptohash.h> + +/* The SHA f()-functions. */ + +#define f1(x,y,z) (z ^ (x & (y ^ z))) /* x ? y : z */ +#define f2(x,y,z) (x ^ y ^ z) /* XOR */ +#define f3(x,y,z) ((x & y) + (z & (x ^ y))) /* majority */ + +/* The SHA Mysterious Constants */ + +#define K1 0x5A827999L /* Rounds 0-19: sqrt(2) * 2^30 */ +#define K2 0x6ED9EBA1L /* Rounds 20-39: sqrt(3) * 2^30 */ +#define K3 0x8F1BBCDCL /* Rounds 40-59: sqrt(5) * 2^30 */ +#define K4 0xCA62C1D6L /* Rounds 60-79: sqrt(10) * 2^30 */ + +/** + * sha_transform - single block SHA1 transform + * + * @digest: 160 bit digest to update + * @data: 512 bits of data to hash + * @W: 80 words of workspace (see note) + * + * This function generates a SHA1 digest for a single 512-bit block. + * Be warned, it does not handle padding and message digest, do not + * confuse it with the full FIPS 180-1 digest algorithm for variable + * length messages. + * + * Note: If the hash is security sensitive, the caller should be sure + * to clear the workspace. This is left to the caller to avoid + * unnecessary clears between chained hashing operations. + */ +void sha_transform(__u32 *digest, const char *in, __u32 *W) +{ + __u32 a, b, c, d, e, t, i; + + for (i = 0; i < 16; i++) + W[i] = be32_to_cpu(((const __be32 *)in)[i]); + + for (i = 0; i < 64; i++) + W[i+16] = rol32(W[i+13] ^ W[i+8] ^ W[i+2] ^ W[i], 1); + + a = digest[0]; + b = digest[1]; + c = digest[2]; + d = digest[3]; + e = digest[4]; + + for (i = 0; i < 20; i++) { + t = f1(b, c, d) + K1 + rol32(a, 5) + e + W[i]; + e = d; d = c; c = rol32(b, 30); b = a; a = t; + } + + for (; i < 40; i ++) { + t = f2(b, c, d) + K2 + rol32(a, 5) + e + W[i]; + e = d; d = c; c = rol32(b, 30); b = a; a = t; + } + + for (; i < 60; i ++) { + t = f3(b, c, d) + K3 + rol32(a, 5) + e + W[i]; + e = d; d = c; c = rol32(b, 30); b = a; a = t; + } + + for (; i < 80; i ++) { + t = f2(b, c, d) + K4 + rol32(a, 5) + e + W[i]; + e = d; d = c; c = rol32(b, 30); b = a; a = t; + } + + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; +} +EXPORT_SYMBOL(sha_transform); + +/** + * sha_init - initialize the vectors for a SHA1 digest + * @buf: vector to initialize + */ +void sha_init(__u32 *buf) +{ + buf[0] = 0x67452301; + buf[1] = 0xefcdab89; + buf[2] = 0x98badcfe; + buf[3] = 0x10325476; + buf[4] = 0xc3d2e1f0; +} + diff --git a/lib/show_mem.c b/lib/show_mem.c new file mode 100644 index 00000000..fdc77c82 --- /dev/null +++ b/lib/show_mem.c @@ -0,0 +1,63 @@ +/* + * Generic show_mem() implementation + * + * Copyright (C) 2008 Johannes Weiner <hannes@saeurebad.de> + * All code subject to the GPL version 2. + */ + +#include <linux/mm.h> +#include <linux/nmi.h> +#include <linux/quicklist.h> + +void show_mem(void) +{ + pg_data_t *pgdat; + unsigned long total = 0, reserved = 0, shared = 0, + nonshared = 0, highmem = 0; + + printk("Mem-Info:\n"); + show_free_areas(); + + for_each_online_pgdat(pgdat) { + unsigned long i, flags; + + pgdat_resize_lock(pgdat, &flags); + for (i = 0; i < pgdat->node_spanned_pages; i++) { + struct page *page; + unsigned long pfn = pgdat->node_start_pfn + i; + + if (unlikely(!(i % MAX_ORDER_NR_PAGES))) + touch_nmi_watchdog(); + + if (!pfn_valid(pfn)) + continue; + + page = pfn_to_page(pfn); + + if (PageHighMem(page)) + highmem++; + + if (PageReserved(page)) + reserved++; + else if (page_count(page) == 1) + nonshared++; + else if (page_count(page) > 1) + shared += page_count(page) - 1; + + total++; + } + pgdat_resize_unlock(pgdat, &flags); + } + + printk("%lu pages RAM\n", total); +#ifdef CONFIG_HIGHMEM + printk("%lu pages HighMem\n", highmem); +#endif + printk("%lu pages reserved\n", reserved); + printk("%lu pages shared\n", shared); + printk("%lu pages non-shared\n", nonshared); +#ifdef CONFIG_QUICKLIST + printk("%lu pages in pagetable cache\n", + quicklist_total_size()); +#endif +} diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c new file mode 100644 index 00000000..4689cb07 --- /dev/null +++ b/lib/smp_processor_id.c @@ -0,0 +1,55 @@ +/* + * lib/smp_processor_id.c + * + * DEBUG_PREEMPT variant of smp_processor_id(). + */ +#include <linux/module.h> +#include <linux/kallsyms.h> +#include <linux/sched.h> + +notrace unsigned int debug_smp_processor_id(void) +{ + unsigned long preempt_count = preempt_count(); + int this_cpu = raw_smp_processor_id(); + + if (likely(preempt_count)) + goto out; + + if (irqs_disabled()) + goto out; + + /* + * Kernel threads bound to a single CPU can safely use + * smp_processor_id(): + */ + if (cpumask_equal(¤t->cpus_allowed, cpumask_of(this_cpu))) + goto out; + + /* + * It is valid to assume CPU-locality during early bootup: + */ + if (system_state != SYSTEM_RUNNING) + goto out; + + /* + * Avoid recursion: + */ + preempt_disable_notrace(); + + if (!printk_ratelimit()) + goto out_enable; + + printk(KERN_ERR "BUG: using smp_processor_id() in preemptible [%08x] " + "code: %s/%d\n", + preempt_count() - 1, current->comm, current->pid); + print_symbol("caller is %s\n", (long)__builtin_return_address(0)); + dump_stack(); + +out_enable: + preempt_enable_no_resched_notrace(); +out: + return this_cpu; +} + +EXPORT_SYMBOL(debug_smp_processor_id); + diff --git a/lib/sort.c b/lib/sort.c new file mode 100644 index 00000000..926d0042 --- /dev/null +++ b/lib/sort.c @@ -0,0 +1,123 @@ +/* + * A fast, small, non-recursive O(nlog n) sort for the Linux kernel + * + * Jan 23 2005 Matt Mackall <mpm@selenic.com> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/sort.h> +#include <linux/slab.h> + +static void u32_swap(void *a, void *b, int size) +{ + u32 t = *(u32 *)a; + *(u32 *)a = *(u32 *)b; + *(u32 *)b = t; +} + +static void generic_swap(void *a, void *b, int size) +{ + char t; + + do { + t = *(char *)a; + *(char *)a++ = *(char *)b; + *(char *)b++ = t; + } while (--size > 0); +} + +/** + * sort - sort an array of elements + * @base: pointer to data to sort + * @num: number of elements + * @size: size of each element + * @cmp_func: pointer to comparison function + * @swap_func: pointer to swap function or NULL + * + * This function does a heapsort on the given array. You may provide a + * swap_func function optimized to your element type. + * + * Sorting time is O(n log n) both on average and worst-case. While + * qsort is about 20% faster on average, it suffers from exploitable + * O(n*n) worst-case behavior and extra memory requirements that make + * it less suitable for kernel use. + */ + +void sort(void *base, size_t num, size_t size, + int (*cmp_func)(const void *, const void *), + void (*swap_func)(void *, void *, int size)) +{ + /* pre-scale counters for performance */ + int i = (num/2 - 1) * size, n = num * size, c, r; + + if (!swap_func) + swap_func = (size == 4 ? u32_swap : generic_swap); + + /* heapify */ + for ( ; i >= 0; i -= size) { + for (r = i; r * 2 + size < n; r = c) { + c = r * 2 + size; + if (c < n - size && + cmp_func(base + c, base + c + size) < 0) + c += size; + if (cmp_func(base + r, base + c) >= 0) + break; + swap_func(base + r, base + c, size); + } + } + + /* sort */ + for (i = n - size; i > 0; i -= size) { + swap_func(base, base + i, size); + for (r = 0; r * 2 + size < i; r = c) { + c = r * 2 + size; + if (c < i - size && + cmp_func(base + c, base + c + size) < 0) + c += size; + if (cmp_func(base + r, base + c) >= 0) + break; + swap_func(base + r, base + c, size); + } + } +} + +EXPORT_SYMBOL(sort); + +#if 0 +/* a simple boot-time regression test */ + +int cmpint(const void *a, const void *b) +{ + return *(int *)a - *(int *)b; +} + +static int sort_test(void) +{ + int *a, i, r = 1; + + a = kmalloc(1000 * sizeof(int), GFP_KERNEL); + BUG_ON(!a); + + printk("testing sort()\n"); + + for (i = 0; i < 1000; i++) { + r = (r * 725861) % 6599; + a[i] = r; + } + + sort(a, 1000, sizeof(int), cmpint, NULL); + + for (i = 0; i < 999; i++) + if (a[i] > a[i+1]) { + printk("sort() failed!\n"); + break; + } + + kfree(a); + + return 0; +} + +module_init(sort_test); +#endif diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c new file mode 100644 index 00000000..4755b98b --- /dev/null +++ b/lib/spinlock_debug.c @@ -0,0 +1,297 @@ +/* + * Copyright 2005, Red Hat, Inc., Ingo Molnar + * Released under the General Public License (GPL). + * + * This file contains the spinlock/rwlock implementations for + * DEBUG_SPINLOCK. + */ + +#include <linux/spinlock.h> +#include <linux/nmi.h> +#include <linux/interrupt.h> +#include <linux/debug_locks.h> +#include <linux/delay.h> +#include <linux/module.h> + +void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name, + struct lock_class_key *key) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + /* + * Make sure we are not reinitializing a held lock: + */ + debug_check_no_locks_freed((void *)lock, sizeof(*lock)); + lockdep_init_map(&lock->dep_map, name, key, 0); +#endif + lock->raw_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; + lock->magic = SPINLOCK_MAGIC; + lock->owner = SPINLOCK_OWNER_INIT; + lock->owner_cpu = -1; +} + +EXPORT_SYMBOL(__raw_spin_lock_init); + +void __rwlock_init(rwlock_t *lock, const char *name, + struct lock_class_key *key) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + /* + * Make sure we are not reinitializing a held lock: + */ + debug_check_no_locks_freed((void *)lock, sizeof(*lock)); + lockdep_init_map(&lock->dep_map, name, key, 0); +#endif + lock->raw_lock = (arch_rwlock_t) __ARCH_RW_LOCK_UNLOCKED; + lock->magic = RWLOCK_MAGIC; + lock->owner = SPINLOCK_OWNER_INIT; + lock->owner_cpu = -1; +} + +EXPORT_SYMBOL(__rwlock_init); + +static void spin_bug(raw_spinlock_t *lock, const char *msg) +{ + struct task_struct *owner = NULL; + + if (!debug_locks_off()) + return; + + if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT) + owner = lock->owner; + printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n", + msg, raw_smp_processor_id(), + current->comm, task_pid_nr(current)); + printk(KERN_EMERG " lock: %p, .magic: %08x, .owner: %s/%d, " + ".owner_cpu: %d\n", + lock, lock->magic, + owner ? owner->comm : "<none>", + owner ? task_pid_nr(owner) : -1, + lock->owner_cpu); + dump_stack(); +} + +#define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg) + +static inline void +debug_spin_lock_before(raw_spinlock_t *lock) +{ + SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic"); + SPIN_BUG_ON(lock->owner == current, lock, "recursion"); + SPIN_BUG_ON(lock->owner_cpu == raw_smp_processor_id(), + lock, "cpu recursion"); +} + +static inline void debug_spin_lock_after(raw_spinlock_t *lock) +{ + lock->owner_cpu = raw_smp_processor_id(); + lock->owner = current; +} + +static inline void debug_spin_unlock(raw_spinlock_t *lock) +{ + SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic"); + SPIN_BUG_ON(!raw_spin_is_locked(lock), lock, "already unlocked"); + SPIN_BUG_ON(lock->owner != current, lock, "wrong owner"); + SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(), + lock, "wrong CPU"); + lock->owner = SPINLOCK_OWNER_INIT; + lock->owner_cpu = -1; +} + +static void __spin_lock_debug(raw_spinlock_t *lock) +{ + u64 i; + u64 loops = loops_per_jiffy * HZ; + int print_once = 1; + + for (;;) { + for (i = 0; i < loops; i++) { + if (arch_spin_trylock(&lock->raw_lock)) + return; + __delay(1); + } + /* lockup suspected: */ + if (print_once) { + print_once = 0; + printk(KERN_EMERG "BUG: spinlock lockup on CPU#%d, " + "%s/%d, %p\n", + raw_smp_processor_id(), current->comm, + task_pid_nr(current), lock); + dump_stack(); +#ifdef CONFIG_SMP + trigger_all_cpu_backtrace(); +#endif + } + } +} + +void do_raw_spin_lock(raw_spinlock_t *lock) +{ + debug_spin_lock_before(lock); + if (unlikely(!arch_spin_trylock(&lock->raw_lock))) + __spin_lock_debug(lock); + debug_spin_lock_after(lock); +} + +int do_raw_spin_trylock(raw_spinlock_t *lock) +{ + int ret = arch_spin_trylock(&lock->raw_lock); + + if (ret) + debug_spin_lock_after(lock); +#ifndef CONFIG_SMP + /* + * Must not happen on UP: + */ + SPIN_BUG_ON(!ret, lock, "trylock failure on UP"); +#endif + return ret; +} + +void do_raw_spin_unlock(raw_spinlock_t *lock) +{ + debug_spin_unlock(lock); + arch_spin_unlock(&lock->raw_lock); +} + +static void rwlock_bug(rwlock_t *lock, const char *msg) +{ + if (!debug_locks_off()) + return; + + printk(KERN_EMERG "BUG: rwlock %s on CPU#%d, %s/%d, %p\n", + msg, raw_smp_processor_id(), current->comm, + task_pid_nr(current), lock); + dump_stack(); +} + +#define RWLOCK_BUG_ON(cond, lock, msg) if (unlikely(cond)) rwlock_bug(lock, msg) + +#if 0 /* __write_lock_debug() can lock up - maybe this can too? */ +static void __read_lock_debug(rwlock_t *lock) +{ + u64 i; + u64 loops = loops_per_jiffy * HZ; + int print_once = 1; + + for (;;) { + for (i = 0; i < loops; i++) { + if (arch_read_trylock(&lock->raw_lock)) + return; + __delay(1); + } + /* lockup suspected: */ + if (print_once) { + print_once = 0; + printk(KERN_EMERG "BUG: read-lock lockup on CPU#%d, " + "%s/%d, %p\n", + raw_smp_processor_id(), current->comm, + current->pid, lock); + dump_stack(); + } + } +} +#endif + +void do_raw_read_lock(rwlock_t *lock) +{ + RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); + arch_read_lock(&lock->raw_lock); +} + +int do_raw_read_trylock(rwlock_t *lock) +{ + int ret = arch_read_trylock(&lock->raw_lock); + +#ifndef CONFIG_SMP + /* + * Must not happen on UP: + */ + RWLOCK_BUG_ON(!ret, lock, "trylock failure on UP"); +#endif + return ret; +} + +void do_raw_read_unlock(rwlock_t *lock) +{ + RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); + arch_read_unlock(&lock->raw_lock); +} + +static inline void debug_write_lock_before(rwlock_t *lock) +{ + RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); + RWLOCK_BUG_ON(lock->owner == current, lock, "recursion"); + RWLOCK_BUG_ON(lock->owner_cpu == raw_smp_processor_id(), + lock, "cpu recursion"); +} + +static inline void debug_write_lock_after(rwlock_t *lock) +{ + lock->owner_cpu = raw_smp_processor_id(); + lock->owner = current; +} + +static inline void debug_write_unlock(rwlock_t *lock) +{ + RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic"); + RWLOCK_BUG_ON(lock->owner != current, lock, "wrong owner"); + RWLOCK_BUG_ON(lock->owner_cpu != raw_smp_processor_id(), + lock, "wrong CPU"); + lock->owner = SPINLOCK_OWNER_INIT; + lock->owner_cpu = -1; +} + +#if 0 /* This can cause lockups */ +static void __write_lock_debug(rwlock_t *lock) +{ + u64 i; + u64 loops = loops_per_jiffy * HZ; + int print_once = 1; + + for (;;) { + for (i = 0; i < loops; i++) { + if (arch_write_trylock(&lock->raw_lock)) + return; + __delay(1); + } + /* lockup suspected: */ + if (print_once) { + print_once = 0; + printk(KERN_EMERG "BUG: write-lock lockup on CPU#%d, " + "%s/%d, %p\n", + raw_smp_processor_id(), current->comm, + current->pid, lock); + dump_stack(); + } + } +} +#endif + +void do_raw_write_lock(rwlock_t *lock) +{ + debug_write_lock_before(lock); + arch_write_lock(&lock->raw_lock); + debug_write_lock_after(lock); +} + +int do_raw_write_trylock(rwlock_t *lock) +{ + int ret = arch_write_trylock(&lock->raw_lock); + + if (ret) + debug_write_lock_after(lock); +#ifndef CONFIG_SMP + /* + * Must not happen on UP: + */ + RWLOCK_BUG_ON(!ret, lock, "trylock failure on UP"); +#endif + return ret; +} + +void do_raw_write_unlock(rwlock_t *lock) +{ + debug_write_unlock(lock); + arch_write_unlock(&lock->raw_lock); +} diff --git a/lib/string.c b/lib/string.c new file mode 100644 index 00000000..f71bead1 --- /dev/null +++ b/lib/string.c @@ -0,0 +1,729 @@ +/* + * linux/lib/string.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* + * stupid library routines.. The optimized versions should generally be found + * as inline code in <asm-xx/string.h> + * + * These are buggy as well.. + * + * * Fri Jun 25 1999, Ingo Oeser <ioe@informatik.tu-chemnitz.de> + * - Added strsep() which will replace strtok() soon (because strsep() is + * reentrant and should be faster). Use only strsep() in new code, please. + * + * * Sat Feb 09 2002, Jason Thomas <jason@topic.com.au>, + * Matthew Hawkins <matt@mh.dropbear.id.au> + * - Kissed strtok() goodbye + */ + +#include <linux/types.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/module.h> + +#ifndef __HAVE_ARCH_STRNICMP +/** + * strnicmp - Case insensitive, length-limited string comparison + * @s1: One string + * @s2: The other string + * @len: the maximum number of characters to compare + */ +int strnicmp(const char *s1, const char *s2, size_t len) +{ + /* Yes, Virginia, it had better be unsigned */ + unsigned char c1, c2; + + if (!len) + return 0; + + do { + c1 = *s1++; + c2 = *s2++; + if (!c1 || !c2) + break; + if (c1 == c2) + continue; + c1 = tolower(c1); + c2 = tolower(c2); + if (c1 != c2) + break; + } while (--len); + return (int)c1 - (int)c2; +} +EXPORT_SYMBOL(strnicmp); +#endif + +#ifndef __HAVE_ARCH_STRCASECMP +int strcasecmp(const char *s1, const char *s2) +{ + int c1, c2; + + do { + c1 = tolower(*s1++); + c2 = tolower(*s2++); + } while (c1 == c2 && c1 != 0); + return c1 - c2; +} +EXPORT_SYMBOL(strcasecmp); +#endif + +#ifndef __HAVE_ARCH_STRNCASECMP +int strncasecmp(const char *s1, const char *s2, size_t n) +{ + int c1, c2; + + do { + c1 = tolower(*s1++); + c2 = tolower(*s2++); + } while ((--n > 0) && c1 == c2 && c1 != 0); + return c1 - c2; +} +EXPORT_SYMBOL(strncasecmp); +#endif + +#ifndef __HAVE_ARCH_STRCPY +/** + * strcpy - Copy a %NUL terminated string + * @dest: Where to copy the string to + * @src: Where to copy the string from + */ +#undef strcpy +char *strcpy(char *dest, const char *src) +{ + char *tmp = dest; + + while ((*dest++ = *src++) != '\0') + /* nothing */; + return tmp; +} +EXPORT_SYMBOL(strcpy); +#endif + +#ifndef __HAVE_ARCH_STRNCPY +/** + * strncpy - Copy a length-limited, %NUL-terminated string + * @dest: Where to copy the string to + * @src: Where to copy the string from + * @count: The maximum number of bytes to copy + * + * The result is not %NUL-terminated if the source exceeds + * @count bytes. + * + * In the case where the length of @src is less than that of + * count, the remainder of @dest will be padded with %NUL. + * + */ +char *strncpy(char *dest, const char *src, size_t count) +{ + char *tmp = dest; + + while (count) { + if ((*tmp = *src) != 0) + src++; + tmp++; + count--; + } + return dest; +} +EXPORT_SYMBOL(strncpy); +#endif + +#ifndef __HAVE_ARCH_STRLCPY +/** + * strlcpy - Copy a %NUL terminated string into a sized buffer + * @dest: Where to copy the string to + * @src: Where to copy the string from + * @size: size of destination buffer + * + * Compatible with *BSD: the result is always a valid + * NUL-terminated string that fits in the buffer (unless, + * of course, the buffer size is zero). It does not pad + * out the result like strncpy() does. + */ +size_t strlcpy(char *dest, const char *src, size_t size) +{ + size_t ret = strlen(src); + + if (size) { + size_t len = (ret >= size) ? size - 1 : ret; + memcpy(dest, src, len); + dest[len] = '\0'; + } + return ret; +} +EXPORT_SYMBOL(strlcpy); +#endif + +#ifndef __HAVE_ARCH_STRCAT +/** + * strcat - Append one %NUL-terminated string to another + * @dest: The string to be appended to + * @src: The string to append to it + */ +#undef strcat +char *strcat(char *dest, const char *src) +{ + char *tmp = dest; + + while (*dest) + dest++; + while ((*dest++ = *src++) != '\0') + ; + return tmp; +} +EXPORT_SYMBOL(strcat); +#endif + +#ifndef __HAVE_ARCH_STRNCAT +/** + * strncat - Append a length-limited, %NUL-terminated string to another + * @dest: The string to be appended to + * @src: The string to append to it + * @count: The maximum numbers of bytes to copy + * + * Note that in contrast to strncpy(), strncat() ensures the result is + * terminated. + */ +char *strncat(char *dest, const char *src, size_t count) +{ + char *tmp = dest; + + if (count) { + while (*dest) + dest++; + while ((*dest++ = *src++) != 0) { + if (--count == 0) { + *dest = '\0'; + break; + } + } + } + return tmp; +} +EXPORT_SYMBOL(strncat); +#endif + +#ifndef __HAVE_ARCH_STRLCAT +/** + * strlcat - Append a length-limited, %NUL-terminated string to another + * @dest: The string to be appended to + * @src: The string to append to it + * @count: The size of the destination buffer. + */ +size_t strlcat(char *dest, const char *src, size_t count) +{ + size_t dsize = strlen(dest); + size_t len = strlen(src); + size_t res = dsize + len; + + /* This would be a bug */ + BUG_ON(dsize >= count); + + dest += dsize; + count -= dsize; + if (len >= count) + len = count-1; + memcpy(dest, src, len); + dest[len] = 0; + return res; +} +EXPORT_SYMBOL(strlcat); +#endif + +#ifndef __HAVE_ARCH_STRCMP +/** + * strcmp - Compare two strings + * @cs: One string + * @ct: Another string + */ +#undef strcmp +int strcmp(const char *cs, const char *ct) +{ + unsigned char c1, c2; + + while (1) { + c1 = *cs++; + c2 = *ct++; + if (c1 != c2) + return c1 < c2 ? -1 : 1; + if (!c1) + break; + } + return 0; +} +EXPORT_SYMBOL(strcmp); +#endif + +#ifndef __HAVE_ARCH_STRNCMP +/** + * strncmp - Compare two length-limited strings + * @cs: One string + * @ct: Another string + * @count: The maximum number of bytes to compare + */ +int strncmp(const char *cs, const char *ct, size_t count) +{ + unsigned char c1, c2; + + while (count) { + c1 = *cs++; + c2 = *ct++; + if (c1 != c2) + return c1 < c2 ? -1 : 1; + if (!c1) + break; + count--; + } + return 0; +} +EXPORT_SYMBOL(strncmp); +#endif + +#ifndef __HAVE_ARCH_STRCHR +/** + * strchr - Find the first occurrence of a character in a string + * @s: The string to be searched + * @c: The character to search for + */ +char *strchr(const char *s, int c) +{ + for (; *s != (char)c; ++s) + if (*s == '\0') + return NULL; + return (char *)s; +} +EXPORT_SYMBOL(strchr); +#endif + +#ifndef __HAVE_ARCH_STRRCHR +/** + * strrchr - Find the last occurrence of a character in a string + * @s: The string to be searched + * @c: The character to search for + */ +char *strrchr(const char *s, int c) +{ + const char *p = s + strlen(s); + do { + if (*p == (char)c) + return (char *)p; + } while (--p >= s); + return NULL; +} +EXPORT_SYMBOL(strrchr); +#endif + +#ifndef __HAVE_ARCH_STRNCHR +/** + * strnchr - Find a character in a length limited string + * @s: The string to be searched + * @count: The number of characters to be searched + * @c: The character to search for + */ +char *strnchr(const char *s, size_t count, int c) +{ + for (; count-- && *s != '\0'; ++s) + if (*s == (char)c) + return (char *)s; + return NULL; +} +EXPORT_SYMBOL(strnchr); +#endif + +/** + * skip_spaces - Removes leading whitespace from @str. + * @str: The string to be stripped. + * + * Returns a pointer to the first non-whitespace character in @str. + */ +char *skip_spaces(const char *str) +{ + while (isspace(*str)) + ++str; + return (char *)str; +} +EXPORT_SYMBOL(skip_spaces); + +/** + * strim - Removes leading and trailing whitespace from @s. + * @s: The string to be stripped. + * + * Note that the first trailing whitespace is replaced with a %NUL-terminator + * in the given string @s. Returns a pointer to the first non-whitespace + * character in @s. + */ +char *strim(char *s) +{ + size_t size; + char *end; + + s = skip_spaces(s); + size = strlen(s); + if (!size) + return s; + + end = s + size - 1; + while (end >= s && isspace(*end)) + end--; + *(end + 1) = '\0'; + + return s; +} +EXPORT_SYMBOL(strim); + +#ifndef __HAVE_ARCH_STRLEN +/** + * strlen - Find the length of a string + * @s: The string to be sized + */ +size_t strlen(const char *s) +{ + const char *sc; + + for (sc = s; *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} +EXPORT_SYMBOL(strlen); +#endif + +#ifndef __HAVE_ARCH_STRNLEN +/** + * strnlen - Find the length of a length-limited string + * @s: The string to be sized + * @count: The maximum number of bytes to search + */ +size_t strnlen(const char *s, size_t count) +{ + const char *sc; + + for (sc = s; count-- && *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} +EXPORT_SYMBOL(strnlen); +#endif + +#ifndef __HAVE_ARCH_STRSPN +/** + * strspn - Calculate the length of the initial substring of @s which only contain letters in @accept + * @s: The string to be searched + * @accept: The string to search for + */ +size_t strspn(const char *s, const char *accept) +{ + const char *p; + const char *a; + size_t count = 0; + + for (p = s; *p != '\0'; ++p) { + for (a = accept; *a != '\0'; ++a) { + if (*p == *a) + break; + } + if (*a == '\0') + return count; + ++count; + } + return count; +} + +EXPORT_SYMBOL(strspn); +#endif + +#ifndef __HAVE_ARCH_STRCSPN +/** + * strcspn - Calculate the length of the initial substring of @s which does not contain letters in @reject + * @s: The string to be searched + * @reject: The string to avoid + */ +size_t strcspn(const char *s, const char *reject) +{ + const char *p; + const char *r; + size_t count = 0; + + for (p = s; *p != '\0'; ++p) { + for (r = reject; *r != '\0'; ++r) { + if (*p == *r) + return count; + } + ++count; + } + return count; +} +EXPORT_SYMBOL(strcspn); +#endif + +#ifndef __HAVE_ARCH_STRPBRK +/** + * strpbrk - Find the first occurrence of a set of characters + * @cs: The string to be searched + * @ct: The characters to search for + */ +char *strpbrk(const char *cs, const char *ct) +{ + const char *sc1, *sc2; + + for (sc1 = cs; *sc1 != '\0'; ++sc1) { + for (sc2 = ct; *sc2 != '\0'; ++sc2) { + if (*sc1 == *sc2) + return (char *)sc1; + } + } + return NULL; +} +EXPORT_SYMBOL(strpbrk); +#endif + +#ifndef __HAVE_ARCH_STRSEP +/** + * strsep - Split a string into tokens + * @s: The string to be searched + * @ct: The characters to search for + * + * strsep() updates @s to point after the token, ready for the next call. + * + * It returns empty tokens, too, behaving exactly like the libc function + * of that name. In fact, it was stolen from glibc2 and de-fancy-fied. + * Same semantics, slimmer shape. ;) + */ +char *strsep(char **s, const char *ct) +{ + char *sbegin = *s; + char *end; + + if (sbegin == NULL) + return NULL; + + end = strpbrk(sbegin, ct); + if (end) + *end++ = '\0'; + *s = end; + return sbegin; +} +EXPORT_SYMBOL(strsep); +#endif + +/** + * sysfs_streq - return true if strings are equal, modulo trailing newline + * @s1: one string + * @s2: another string + * + * This routine returns true iff two strings are equal, treating both + * NUL and newline-then-NUL as equivalent string terminations. It's + * geared for use with sysfs input strings, which generally terminate + * with newlines but are compared against values without newlines. + */ +bool sysfs_streq(const char *s1, const char *s2) +{ + while (*s1 && *s1 == *s2) { + s1++; + s2++; + } + + if (*s1 == *s2) + return true; + if (!*s1 && *s2 == '\n' && !s2[1]) + return true; + if (*s1 == '\n' && !s1[1] && !*s2) + return true; + return false; +} +EXPORT_SYMBOL(sysfs_streq); + +#ifndef __HAVE_ARCH_MEMSET +/** + * memset - Fill a region of memory with the given value + * @s: Pointer to the start of the area. + * @c: The byte to fill the area with + * @count: The size of the area. + * + * Do not use memset() to access IO space, use memset_io() instead. + */ +void *memset(void *s, int c, size_t count) +{ + char *xs = s; + + while (count--) + *xs++ = c; + return s; +} +EXPORT_SYMBOL(memset); +#endif + +#ifndef __HAVE_ARCH_MEMCPY +/** + * memcpy - Copy one area of memory to another + * @dest: Where to copy to + * @src: Where to copy from + * @count: The size of the area. + * + * You should not use this function to access IO space, use memcpy_toio() + * or memcpy_fromio() instead. + */ +void *memcpy(void *dest, const void *src, size_t count) +{ + char *tmp = dest; + const char *s = src; + + while (count--) + *tmp++ = *s++; + return dest; +} +EXPORT_SYMBOL(memcpy); +#endif + +#ifndef __HAVE_ARCH_MEMMOVE +/** + * memmove - Copy one area of memory to another + * @dest: Where to copy to + * @src: Where to copy from + * @count: The size of the area. + * + * Unlike memcpy(), memmove() copes with overlapping areas. + */ +void *memmove(void *dest, const void *src, size_t count) +{ + char *tmp; + const char *s; + + if (dest <= src) { + tmp = dest; + s = src; + while (count--) + *tmp++ = *s++; + } else { + tmp = dest; + tmp += count; + s = src; + s += count; + while (count--) + *--tmp = *--s; + } + return dest; +} +EXPORT_SYMBOL(memmove); +#endif + +#ifndef __HAVE_ARCH_MEMCMP +/** + * memcmp - Compare two areas of memory + * @cs: One area of memory + * @ct: Another area of memory + * @count: The size of the area. + */ +#undef memcmp +int memcmp(const void *cs, const void *ct, size_t count) +{ + const unsigned char *su1, *su2; + int res = 0; + + for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--) + if ((res = *su1 - *su2) != 0) + break; + return res; +} +EXPORT_SYMBOL(memcmp); +#endif + +#ifndef __HAVE_ARCH_MEMSCAN +/** + * memscan - Find a character in an area of memory. + * @addr: The memory area + * @c: The byte to search for + * @size: The size of the area. + * + * returns the address of the first occurrence of @c, or 1 byte past + * the area if @c is not found + */ +void *memscan(void *addr, int c, size_t size) +{ + unsigned char *p = addr; + + while (size) { + if (*p == c) + return (void *)p; + p++; + size--; + } + return (void *)p; +} +EXPORT_SYMBOL(memscan); +#endif + +#ifndef __HAVE_ARCH_STRSTR +/** + * strstr - Find the first substring in a %NUL terminated string + * @s1: The string to be searched + * @s2: The string to search for + */ +char *strstr(const char *s1, const char *s2) +{ + size_t l1, l2; + + l2 = strlen(s2); + if (!l2) + return (char *)s1; + l1 = strlen(s1); + while (l1 >= l2) { + l1--; + if (!memcmp(s1, s2, l2)) + return (char *)s1; + s1++; + } + return NULL; +} +EXPORT_SYMBOL(strstr); +#endif + +#ifndef __HAVE_ARCH_STRNSTR +/** + * strnstr - Find the first substring in a length-limited string + * @s1: The string to be searched + * @s2: The string to search for + * @len: the maximum number of characters to search + */ +char *strnstr(const char *s1, const char *s2, size_t len) +{ + size_t l2; + + l2 = strlen(s2); + if (!l2) + return (char *)s1; + while (len >= l2) { + len--; + if (!memcmp(s1, s2, l2)) + return (char *)s1; + s1++; + } + return NULL; +} +EXPORT_SYMBOL(strnstr); +#endif + +#ifndef __HAVE_ARCH_MEMCHR +/** + * memchr - Find a character in an area of memory. + * @s: The memory area + * @c: The byte to search for + * @n: The size of the area. + * + * returns the address of the first occurrence of @c, or %NULL + * if @c is not found + */ +void *memchr(const void *s, int c, size_t n) +{ + const unsigned char *p = s; + while (n-- != 0) { + if ((unsigned char)c == *p++) { + return (void *)(p - 1); + } + } + return NULL; +} +EXPORT_SYMBOL(memchr); +#endif diff --git a/lib/string_helpers.c b/lib/string_helpers.c new file mode 100644 index 00000000..ab431d4c --- /dev/null +++ b/lib/string_helpers.c @@ -0,0 +1,68 @@ +/* + * Helpers for formatting and printing strings + * + * Copyright 31 August 2008 James Bottomley + */ +#include <linux/kernel.h> +#include <linux/math64.h> +#include <linux/module.h> +#include <linux/string_helpers.h> + +/** + * string_get_size - get the size in the specified units + * @size: The size to be converted + * @units: units to use (powers of 1000 or 1024) + * @buf: buffer to format to + * @len: length of buffer + * + * This function returns a string formatted to 3 significant figures + * giving the size in the required units. Returns 0 on success or + * error on failure. @buf is always zero terminated. + * + */ +int string_get_size(u64 size, const enum string_size_units units, + char *buf, int len) +{ + const char *units_10[] = { "B", "kB", "MB", "GB", "TB", "PB", + "EB", "ZB", "YB", NULL}; + const char *units_2[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB", + "EiB", "ZiB", "YiB", NULL }; + const char **units_str[] = { + [STRING_UNITS_10] = units_10, + [STRING_UNITS_2] = units_2, + }; + const unsigned int divisor[] = { + [STRING_UNITS_10] = 1000, + [STRING_UNITS_2] = 1024, + }; + int i, j; + u64 remainder = 0, sf_cap; + char tmp[8]; + + tmp[0] = '\0'; + i = 0; + if (size >= divisor[units]) { + while (size >= divisor[units] && units_str[units][i]) { + remainder = do_div(size, divisor[units]); + i++; + } + + sf_cap = size; + for (j = 0; sf_cap*10 < 1000; j++) + sf_cap *= 10; + + if (j) { + remainder *= 1000; + do_div(remainder, divisor[units]); + snprintf(tmp, sizeof(tmp), ".%03lld", + (unsigned long long)remainder); + tmp[j+1] = '\0'; + } + } + + snprintf(buf, len, "%lld%s %s", (unsigned long long)size, + tmp, units_str[units][i]); + + return 0; +} +EXPORT_SYMBOL(string_get_size); diff --git a/lib/swiotlb.c b/lib/swiotlb.c new file mode 100644 index 00000000..34e30826 --- /dev/null +++ b/lib/swiotlb.c @@ -0,0 +1,923 @@ +/* + * Dynamic DMA mapping support. + * + * This implementation is a fallback for platforms that do not support + * I/O TLBs (aka DMA address translation hardware). + * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com> + * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com> + * Copyright (C) 2000, 2003 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * + * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API. + * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid + * unnecessary i-cache flushing. + * 04/07/.. ak Better overflow handling. Assorted fixes. + * 05/09/10 linville Add support for syncing ranges, support syncing for + * DMA_BIDIRECTIONAL mappings, miscellaneous cleanup. + * 08/12/11 beckyb Add highmem support + */ + +#include <linux/cache.h> +#include <linux/dma-mapping.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/swiotlb.h> +#include <linux/pfn.h> +#include <linux/types.h> +#include <linux/ctype.h> +#include <linux/highmem.h> +#include <linux/gfp.h> + +#include <asm/io.h> +#include <asm/dma.h> +#include <asm/scatterlist.h> + +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/iommu-helper.h> + +#define OFFSET(val,align) ((unsigned long) \ + ( (val) & ( (align) - 1))) + +#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) + +/* + * Minimum IO TLB size to bother booting with. Systems with mainly + * 64bit capable cards will only lightly use the swiotlb. If we can't + * allocate a contiguous 1MB, we're probably in trouble anyway. + */ +#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) + +int swiotlb_force; + +/* + * Used to do a quick range check in swiotlb_tbl_unmap_single and + * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this + * API. + */ +static char *io_tlb_start, *io_tlb_end; + +/* + * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and + * io_tlb_end. This is command line adjustable via setup_io_tlb_npages. + */ +static unsigned long io_tlb_nslabs; + +/* + * When the IOMMU overflows we return a fallback buffer. This sets the size. + */ +static unsigned long io_tlb_overflow = 32*1024; + +void *io_tlb_overflow_buffer; + +/* + * This is a free list describing the number of free entries available from + * each index + */ +static unsigned int *io_tlb_list; +static unsigned int io_tlb_index; + +/* + * We need to save away the original address corresponding to a mapped entry + * for the sync operations. + */ +static phys_addr_t *io_tlb_orig_addr; + +/* + * Protect the above data structures in the map and unmap calls + */ +static DEFINE_SPINLOCK(io_tlb_lock); + +static int late_alloc; + +static int __init +setup_io_tlb_npages(char *str) +{ + if (isdigit(*str)) { + io_tlb_nslabs = simple_strtoul(str, &str, 0); + /* avoid tail segment of size < IO_TLB_SEGSIZE */ + io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); + } + if (*str == ',') + ++str; + if (!strcmp(str, "force")) + swiotlb_force = 1; + + return 1; +} +__setup("swiotlb=", setup_io_tlb_npages); +/* make io_tlb_overflow tunable too? */ + +/* Note that this doesn't work with highmem page */ +static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev, + volatile void *address) +{ + return phys_to_dma(hwdev, virt_to_phys(address)); +} + +void swiotlb_print_info(void) +{ + unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; + phys_addr_t pstart, pend; + + pstart = virt_to_phys(io_tlb_start); + pend = virt_to_phys(io_tlb_end); + + printk(KERN_INFO "Placing %luMB software IO TLB between %p - %p\n", + bytes >> 20, io_tlb_start, io_tlb_end); + printk(KERN_INFO "software IO TLB at phys %#llx - %#llx\n", + (unsigned long long)pstart, + (unsigned long long)pend); +} + +void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) +{ + unsigned long i, bytes; + + bytes = nslabs << IO_TLB_SHIFT; + + io_tlb_nslabs = nslabs; + io_tlb_start = tlb; + io_tlb_end = io_tlb_start + bytes; + + /* + * Allocate and initialize the free list array. This array is used + * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE + * between io_tlb_start and io_tlb_end. + */ + io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int)); + for (i = 0; i < io_tlb_nslabs; i++) + io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); + io_tlb_index = 0; + io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(phys_addr_t)); + + /* + * Get the overflow emergency buffer + */ + io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow); + if (!io_tlb_overflow_buffer) + panic("Cannot allocate SWIOTLB overflow buffer!\n"); + if (verbose) + swiotlb_print_info(); +} + +/* + * Statically reserve bounce buffer space and initialize bounce buffer data + * structures for the software IO TLB used to implement the DMA API. + */ +void __init +swiotlb_init_with_default_size(size_t default_size, int verbose) +{ + unsigned long bytes; + + if (!io_tlb_nslabs) { + io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); + io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); + } + + bytes = io_tlb_nslabs << IO_TLB_SHIFT; + + /* + * Get IO TLB memory from the low pages + */ + io_tlb_start = alloc_bootmem_low_pages(bytes); + if (!io_tlb_start) + panic("Cannot allocate SWIOTLB buffer"); + + swiotlb_init_with_tbl(io_tlb_start, io_tlb_nslabs, verbose); +} + +void __init +swiotlb_init(int verbose) +{ + swiotlb_init_with_default_size(64 * (1<<20), verbose); /* default to 64MB */ +} + +/* + * Systems with larger DMA zones (those that don't support ISA) can + * initialize the swiotlb later using the slab allocator if needed. + * This should be just like above, but with some error catching. + */ +int +swiotlb_late_init_with_default_size(size_t default_size) +{ + unsigned long i, bytes, req_nslabs = io_tlb_nslabs; + unsigned int order; + + if (!io_tlb_nslabs) { + io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); + io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); + } + + /* + * Get IO TLB memory from the low pages + */ + order = get_order(io_tlb_nslabs << IO_TLB_SHIFT); + io_tlb_nslabs = SLABS_PER_PAGE << order; + bytes = io_tlb_nslabs << IO_TLB_SHIFT; + + while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { + io_tlb_start = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, + order); + if (io_tlb_start) + break; + order--; + } + + if (!io_tlb_start) + goto cleanup1; + + if (order != get_order(bytes)) { + printk(KERN_WARNING "Warning: only able to allocate %ld MB " + "for software IO TLB\n", (PAGE_SIZE << order) >> 20); + io_tlb_nslabs = SLABS_PER_PAGE << order; + bytes = io_tlb_nslabs << IO_TLB_SHIFT; + } + io_tlb_end = io_tlb_start + bytes; + memset(io_tlb_start, 0, bytes); + + /* + * Allocate and initialize the free list array. This array is used + * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE + * between io_tlb_start and io_tlb_end. + */ + io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL, + get_order(io_tlb_nslabs * sizeof(int))); + if (!io_tlb_list) + goto cleanup2; + + for (i = 0; i < io_tlb_nslabs; i++) + io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); + io_tlb_index = 0; + + io_tlb_orig_addr = (phys_addr_t *) + __get_free_pages(GFP_KERNEL, + get_order(io_tlb_nslabs * + sizeof(phys_addr_t))); + if (!io_tlb_orig_addr) + goto cleanup3; + + memset(io_tlb_orig_addr, 0, io_tlb_nslabs * sizeof(phys_addr_t)); + + /* + * Get the overflow emergency buffer + */ + io_tlb_overflow_buffer = (void *)__get_free_pages(GFP_DMA, + get_order(io_tlb_overflow)); + if (!io_tlb_overflow_buffer) + goto cleanup4; + + swiotlb_print_info(); + + late_alloc = 1; + + return 0; + +cleanup4: + free_pages((unsigned long)io_tlb_orig_addr, + get_order(io_tlb_nslabs * sizeof(phys_addr_t))); + io_tlb_orig_addr = NULL; +cleanup3: + free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * + sizeof(int))); + io_tlb_list = NULL; +cleanup2: + io_tlb_end = NULL; + free_pages((unsigned long)io_tlb_start, order); + io_tlb_start = NULL; +cleanup1: + io_tlb_nslabs = req_nslabs; + return -ENOMEM; +} + +void __init swiotlb_free(void) +{ + if (!io_tlb_overflow_buffer) + return; + + if (late_alloc) { + free_pages((unsigned long)io_tlb_overflow_buffer, + get_order(io_tlb_overflow)); + free_pages((unsigned long)io_tlb_orig_addr, + get_order(io_tlb_nslabs * sizeof(phys_addr_t))); + free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * + sizeof(int))); + free_pages((unsigned long)io_tlb_start, + get_order(io_tlb_nslabs << IO_TLB_SHIFT)); + } else { + free_bootmem_late(__pa(io_tlb_overflow_buffer), + io_tlb_overflow); + free_bootmem_late(__pa(io_tlb_orig_addr), + io_tlb_nslabs * sizeof(phys_addr_t)); + free_bootmem_late(__pa(io_tlb_list), + io_tlb_nslabs * sizeof(int)); + free_bootmem_late(__pa(io_tlb_start), + io_tlb_nslabs << IO_TLB_SHIFT); + } +} + +static int is_swiotlb_buffer(phys_addr_t paddr) +{ + return paddr >= virt_to_phys(io_tlb_start) && + paddr < virt_to_phys(io_tlb_end); +} + +/* + * Bounce: copy the swiotlb buffer back to the original dma location + */ +void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, + enum dma_data_direction dir) +{ + unsigned long pfn = PFN_DOWN(phys); + + if (PageHighMem(pfn_to_page(pfn))) { + /* The buffer does not have a mapping. Map it in and copy */ + unsigned int offset = phys & ~PAGE_MASK; + char *buffer; + unsigned int sz = 0; + unsigned long flags; + + while (size) { + sz = min_t(size_t, PAGE_SIZE - offset, size); + + local_irq_save(flags); + buffer = kmap_atomic(pfn_to_page(pfn), + KM_BOUNCE_READ); + if (dir == DMA_TO_DEVICE) + memcpy(dma_addr, buffer + offset, sz); + else + memcpy(buffer + offset, dma_addr, sz); + kunmap_atomic(buffer, KM_BOUNCE_READ); + local_irq_restore(flags); + + size -= sz; + pfn++; + dma_addr += sz; + offset = 0; + } + } else { + if (dir == DMA_TO_DEVICE) + memcpy(dma_addr, phys_to_virt(phys), size); + else + memcpy(phys_to_virt(phys), dma_addr, size); + } +} +EXPORT_SYMBOL_GPL(swiotlb_bounce); + +void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr, + phys_addr_t phys, size_t size, + enum dma_data_direction dir) +{ + unsigned long flags; + char *dma_addr; + unsigned int nslots, stride, index, wrap; + int i; + unsigned long mask; + unsigned long offset_slots; + unsigned long max_slots; + + mask = dma_get_seg_boundary(hwdev); + + tbl_dma_addr &= mask; + + offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; + + /* + * Carefully handle integer overflow which can occur when mask == ~0UL. + */ + max_slots = mask + 1 + ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT + : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); + + /* + * For mappings greater than a page, we limit the stride (and + * hence alignment) to a page size. + */ + nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; + if (size > PAGE_SIZE) + stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); + else + stride = 1; + + BUG_ON(!nslots); + + /* + * Find suitable number of IO TLB entries size that will fit this + * request and allocate a buffer from that IO TLB pool. + */ + spin_lock_irqsave(&io_tlb_lock, flags); + index = ALIGN(io_tlb_index, stride); + if (index >= io_tlb_nslabs) + index = 0; + wrap = index; + + do { + while (iommu_is_span_boundary(index, nslots, offset_slots, + max_slots)) { + index += stride; + if (index >= io_tlb_nslabs) + index = 0; + if (index == wrap) + goto not_found; + } + + /* + * If we find a slot that indicates we have 'nslots' number of + * contiguous buffers, we allocate the buffers from that slot + * and mark the entries as '0' indicating unavailable. + */ + if (io_tlb_list[index] >= nslots) { + int count = 0; + + for (i = index; i < (int) (index + nslots); i++) + io_tlb_list[i] = 0; + for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--) + io_tlb_list[i] = ++count; + dma_addr = io_tlb_start + (index << IO_TLB_SHIFT); + + /* + * Update the indices to avoid searching in the next + * round. + */ + io_tlb_index = ((index + nslots) < io_tlb_nslabs + ? (index + nslots) : 0); + + goto found; + } + index += stride; + if (index >= io_tlb_nslabs) + index = 0; + } while (index != wrap); + +not_found: + spin_unlock_irqrestore(&io_tlb_lock, flags); + return NULL; +found: + spin_unlock_irqrestore(&io_tlb_lock, flags); + + /* + * Save away the mapping from the original address to the DMA address. + * This is needed when we sync the memory. Then we sync the buffer if + * needed. + */ + for (i = 0; i < nslots; i++) + io_tlb_orig_addr[index+i] = phys + (i << IO_TLB_SHIFT); + if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL) + swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE); + + return dma_addr; +} +EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single); + +/* + * Allocates bounce buffer and returns its kernel virtual address. + */ + +static void * +map_single(struct device *hwdev, phys_addr_t phys, size_t size, + enum dma_data_direction dir) +{ + dma_addr_t start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start); + + return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir); +} + +/* + * dma_addr is the kernel virtual address of the bounce buffer to unmap. + */ +void +swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size, + enum dma_data_direction dir) +{ + unsigned long flags; + int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; + int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; + phys_addr_t phys = io_tlb_orig_addr[index]; + + /* + * First, sync the memory before unmapping the entry + */ + if (phys && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) + swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE); + + /* + * Return the buffer to the free list by setting the corresponding + * entries to indicate the number of contiguous entries available. + * While returning the entries to the free list, we merge the entries + * with slots below and above the pool being returned. + */ + spin_lock_irqsave(&io_tlb_lock, flags); + { + count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ? + io_tlb_list[index + nslots] : 0); + /* + * Step 1: return the slots to the free list, merging the + * slots with superceeding slots + */ + for (i = index + nslots - 1; i >= index; i--) + io_tlb_list[i] = ++count; + /* + * Step 2: merge the returned slots with the preceding slots, + * if available (non zero) + */ + for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--) + io_tlb_list[i] = ++count; + } + spin_unlock_irqrestore(&io_tlb_lock, flags); +} +EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single); + +void +swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, size_t size, + enum dma_data_direction dir, + enum dma_sync_target target) +{ + int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; + phys_addr_t phys = io_tlb_orig_addr[index]; + + phys += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1)); + + switch (target) { + case SYNC_FOR_CPU: + if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) + swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE); + else + BUG_ON(dir != DMA_TO_DEVICE); + break; + case SYNC_FOR_DEVICE: + if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) + swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE); + else + BUG_ON(dir != DMA_FROM_DEVICE); + break; + default: + BUG(); + } +} +EXPORT_SYMBOL_GPL(swiotlb_tbl_sync_single); + +void * +swiotlb_alloc_coherent(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags) +{ + dma_addr_t dev_addr; + void *ret; + int order = get_order(size); + u64 dma_mask = DMA_BIT_MASK(32); + + if (hwdev && hwdev->coherent_dma_mask) + dma_mask = hwdev->coherent_dma_mask; + + ret = (void *)__get_free_pages(flags, order); + if (ret && swiotlb_virt_to_bus(hwdev, ret) + size - 1 > dma_mask) { + /* + * The allocated memory isn't reachable by the device. + */ + free_pages((unsigned long) ret, order); + ret = NULL; + } + if (!ret) { + /* + * We are either out of memory or the device can't DMA to + * GFP_DMA memory; fall back on map_single(), which + * will grab memory from the lowest available address range. + */ + ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE); + if (!ret) + return NULL; + } + + memset(ret, 0, size); + dev_addr = swiotlb_virt_to_bus(hwdev, ret); + + /* Confirm address can be DMA'd by device */ + if (dev_addr + size - 1 > dma_mask) { + printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", + (unsigned long long)dma_mask, + (unsigned long long)dev_addr); + + /* DMA_TO_DEVICE to avoid memcpy in unmap_single */ + swiotlb_tbl_unmap_single(hwdev, ret, size, DMA_TO_DEVICE); + return NULL; + } + *dma_handle = dev_addr; + return ret; +} +EXPORT_SYMBOL(swiotlb_alloc_coherent); + +void +swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, + dma_addr_t dev_addr) +{ + phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); + + WARN_ON(irqs_disabled()); + if (!is_swiotlb_buffer(paddr)) + free_pages((unsigned long)vaddr, get_order(size)); + else + /* DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single */ + swiotlb_tbl_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE); +} +EXPORT_SYMBOL(swiotlb_free_coherent); + +static void +swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir, + int do_panic) +{ + /* + * Ran out of IOMMU space for this operation. This is very bad. + * Unfortunately the drivers cannot handle this operation properly. + * unless they check for dma_mapping_error (most don't) + * When the mapping is small enough return a static buffer to limit + * the damage, or panic when the transfer is too big. + */ + printk(KERN_ERR "DMA: Out of SW-IOMMU space for %zu bytes at " + "device %s\n", size, dev ? dev_name(dev) : "?"); + + if (size <= io_tlb_overflow || !do_panic) + return; + + if (dir == DMA_BIDIRECTIONAL) + panic("DMA: Random memory could be DMA accessed\n"); + if (dir == DMA_FROM_DEVICE) + panic("DMA: Random memory could be DMA written\n"); + if (dir == DMA_TO_DEVICE) + panic("DMA: Random memory could be DMA read\n"); +} + +/* + * Map a single buffer of the indicated size for DMA in streaming mode. The + * physical address to use is returned. + * + * Once the device is given the dma address, the device owns this memory until + * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed. + */ +dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + phys_addr_t phys = page_to_phys(page) + offset; + dma_addr_t dev_addr = phys_to_dma(dev, phys); + void *map; + + BUG_ON(dir == DMA_NONE); + /* + * If the address happens to be in the device's DMA window, + * we can safely return the device addr and not worry about bounce + * buffering it. + */ + if (dma_capable(dev, dev_addr, size) && !swiotlb_force) + return dev_addr; + + /* + * Oh well, have to allocate and map a bounce buffer. + */ + map = map_single(dev, phys, size, dir); + if (!map) { + swiotlb_full(dev, size, dir, 1); + map = io_tlb_overflow_buffer; + } + + dev_addr = swiotlb_virt_to_bus(dev, map); + + /* + * Ensure that the address returned is DMA'ble + */ + if (!dma_capable(dev, dev_addr, size)) + panic("map_single: bounce buffer is not DMA'ble"); + + return dev_addr; +} +EXPORT_SYMBOL_GPL(swiotlb_map_page); + +/* + * Unmap a single streaming mode DMA translation. The dma_addr and size must + * match what was provided for in a previous swiotlb_map_page call. All + * other usages are undefined. + * + * After this call, reads by the cpu to the buffer are guaranteed to see + * whatever the device wrote there. + */ +static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir) +{ + phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); + + BUG_ON(dir == DMA_NONE); + + if (is_swiotlb_buffer(paddr)) { + swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir); + return; + } + + if (dir != DMA_FROM_DEVICE) + return; + + /* + * phys_to_virt doesn't work with hihgmem page but we could + * call dma_mark_clean() with hihgmem page here. However, we + * are fine since dma_mark_clean() is null on POWERPC. We can + * make dma_mark_clean() take a physical address if necessary. + */ + dma_mark_clean(phys_to_virt(paddr), size); +} + +void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + unmap_single(hwdev, dev_addr, size, dir); +} +EXPORT_SYMBOL_GPL(swiotlb_unmap_page); + +/* + * Make physical memory consistent for a single streaming mode DMA translation + * after a transfer. + * + * If you perform a swiotlb_map_page() but wish to interrogate the buffer + * using the cpu, yet do not wish to teardown the dma mapping, you must + * call this function before doing so. At the next point you give the dma + * address back to the card, you must first perform a + * swiotlb_dma_sync_for_device, and then the device again owns the buffer + */ +static void +swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir, + enum dma_sync_target target) +{ + phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); + + BUG_ON(dir == DMA_NONE); + + if (is_swiotlb_buffer(paddr)) { + swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir, + target); + return; + } + + if (dir != DMA_FROM_DEVICE) + return; + + dma_mark_clean(phys_to_virt(paddr), size); +} + +void +swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir) +{ + swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU); +} +EXPORT_SYMBOL(swiotlb_sync_single_for_cpu); + +void +swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir) +{ + swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE); +} +EXPORT_SYMBOL(swiotlb_sync_single_for_device); + +/* + * Map a set of buffers described by scatterlist in streaming mode for DMA. + * This is the scatter-gather version of the above swiotlb_map_page + * interface. Here the scatter gather list elements are each tagged with the + * appropriate dma address and length. They are obtained via + * sg_dma_{address,length}(SG). + * + * NOTE: An implementation may be able to use a smaller number of + * DMA address/length pairs than there are SG table elements. + * (for example via virtual mapping capabilities) + * The routine returns the number of addr/length pairs actually + * used, at most nents. + * + * Device ownership issues as mentioned above for swiotlb_map_page are the + * same here. + */ +int +swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + struct scatterlist *sg; + int i; + + BUG_ON(dir == DMA_NONE); + + for_each_sg(sgl, sg, nelems, i) { + phys_addr_t paddr = sg_phys(sg); + dma_addr_t dev_addr = phys_to_dma(hwdev, paddr); + + if (swiotlb_force || + !dma_capable(hwdev, dev_addr, sg->length)) { + void *map = map_single(hwdev, sg_phys(sg), + sg->length, dir); + if (!map) { + /* Don't panic here, we expect map_sg users + to do proper error handling. */ + swiotlb_full(hwdev, sg->length, dir, 0); + swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, + attrs); + sgl[0].dma_length = 0; + return 0; + } + sg->dma_address = swiotlb_virt_to_bus(hwdev, map); + } else + sg->dma_address = dev_addr; + sg->dma_length = sg->length; + } + return nelems; +} +EXPORT_SYMBOL(swiotlb_map_sg_attrs); + +int +swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ + return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL); +} +EXPORT_SYMBOL(swiotlb_map_sg); + +/* + * Unmap a set of streaming mode DMA translations. Again, cpu read rules + * concerning calls here are the same as for swiotlb_unmap_page() above. + */ +void +swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir, struct dma_attrs *attrs) +{ + struct scatterlist *sg; + int i; + + BUG_ON(dir == DMA_NONE); + + for_each_sg(sgl, sg, nelems, i) + unmap_single(hwdev, sg->dma_address, sg->dma_length, dir); + +} +EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); + +void +swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ + return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL); +} +EXPORT_SYMBOL(swiotlb_unmap_sg); + +/* + * Make physical memory consistent for a set of streaming mode DMA translations + * after a transfer. + * + * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules + * and usage. + */ +static void +swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir, + enum dma_sync_target target) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nelems, i) + swiotlb_sync_single(hwdev, sg->dma_address, + sg->dma_length, dir, target); +} + +void +swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir) +{ + swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU); +} +EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu); + +void +swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, + int nelems, enum dma_data_direction dir) +{ + swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE); +} +EXPORT_SYMBOL(swiotlb_sync_sg_for_device); + +int +swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) +{ + return (dma_addr == swiotlb_virt_to_bus(hwdev, io_tlb_overflow_buffer)); +} +EXPORT_SYMBOL(swiotlb_dma_mapping_error); + +/* + * Return whether the given device DMA address mask can be supported + * properly. For example, if your device can only drive the low 24-bits + * during bus mastering, then you would pass 0x00ffffff as the mask to + * this function. + */ +int +swiotlb_dma_supported(struct device *hwdev, u64 mask) +{ + return swiotlb_virt_to_bus(hwdev, io_tlb_end - 1) <= mask; +} +EXPORT_SYMBOL(swiotlb_dma_supported); diff --git a/lib/syscall.c b/lib/syscall.c new file mode 100644 index 00000000..a4f7067f --- /dev/null +++ b/lib/syscall.c @@ -0,0 +1,75 @@ +#include <linux/ptrace.h> +#include <linux/sched.h> +#include <linux/module.h> +#include <asm/syscall.h> + +static int collect_syscall(struct task_struct *target, long *callno, + unsigned long args[6], unsigned int maxargs, + unsigned long *sp, unsigned long *pc) +{ + struct pt_regs *regs = task_pt_regs(target); + if (unlikely(!regs)) + return -EAGAIN; + + *sp = user_stack_pointer(regs); + *pc = instruction_pointer(regs); + + *callno = syscall_get_nr(target, regs); + if (*callno != -1L && maxargs > 0) + syscall_get_arguments(target, regs, 0, maxargs, args); + + return 0; +} + +/** + * task_current_syscall - Discover what a blocked task is doing. + * @target: thread to examine + * @callno: filled with system call number or -1 + * @args: filled with @maxargs system call arguments + * @maxargs: number of elements in @args to fill + * @sp: filled with user stack pointer + * @pc: filled with user PC + * + * If @target is blocked in a system call, returns zero with *@callno + * set to the the call's number and @args filled in with its arguments. + * Registers not used for system call arguments may not be available and + * it is not kosher to use &struct user_regset calls while the system + * call is still in progress. Note we may get this result if @target + * has finished its system call but not yet returned to user mode, such + * as when it's stopped for signal handling or syscall exit tracing. + * + * If @target is blocked in the kernel during a fault or exception, + * returns zero with *@callno set to -1 and does not fill in @args. + * If so, it's now safe to examine @target using &struct user_regset + * get() calls as long as we're sure @target won't return to user mode. + * + * Returns -%EAGAIN if @target does not remain blocked. + * + * Returns -%EINVAL if @maxargs is too large (maximum is six). + */ +int task_current_syscall(struct task_struct *target, long *callno, + unsigned long args[6], unsigned int maxargs, + unsigned long *sp, unsigned long *pc) +{ + long state; + unsigned long ncsw; + + if (unlikely(maxargs > 6)) + return -EINVAL; + + if (target == current) + return collect_syscall(target, callno, args, maxargs, sp, pc); + + state = target->state; + if (unlikely(!state)) + return -EAGAIN; + + ncsw = wait_task_inactive(target, state); + if (unlikely(!ncsw) || + unlikely(collect_syscall(target, callno, args, maxargs, sp, pc)) || + unlikely(wait_task_inactive(target, state) != ncsw)) + return -EAGAIN; + + return 0; +} +EXPORT_SYMBOL_GPL(task_current_syscall); diff --git a/lib/textsearch.c b/lib/textsearch.c new file mode 100644 index 00000000..d608331b --- /dev/null +++ b/lib/textsearch.c @@ -0,0 +1,324 @@ +/* + * lib/textsearch.c Generic text search interface + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Thomas Graf <tgraf@suug.ch> + * Pablo Neira Ayuso <pablo@netfilter.org> + * + * ========================================================================== + * + * INTRODUCTION + * + * The textsearch infrastructure provides text searching facitilies for + * both linear and non-linear data. Individual search algorithms are + * implemented in modules and chosen by the user. + * + * ARCHITECTURE + * + * User + * +----------------+ + * | finish()|<--------------(6)-----------------+ + * |get_next_block()|<--------------(5)---------------+ | + * | | Algorithm | | + * | | +------------------------------+ + * | | | init() find() destroy() | + * | | +------------------------------+ + * | | Core API ^ ^ ^ + * | | +---------------+ (2) (4) (8) + * | (1)|----->| prepare() |---+ | | + * | (3)|----->| find()/next() |-----------+ | + * | (7)|----->| destroy() |----------------------+ + * +----------------+ +---------------+ + * + * (1) User configures a search by calling _prepare() specifying the + * search parameters such as the pattern and algorithm name. + * (2) Core requests the algorithm to allocate and initialize a search + * configuration according to the specified parameters. + * (3) User starts the search(es) by calling _find() or _next() to + * fetch subsequent occurrences. A state variable is provided + * to the algorithm to store persistent variables. + * (4) Core eventually resets the search offset and forwards the find() + * request to the algorithm. + * (5) Algorithm calls get_next_block() provided by the user continously + * to fetch the data to be searched in block by block. + * (6) Algorithm invokes finish() after the last call to get_next_block + * to clean up any leftovers from get_next_block. (Optional) + * (7) User destroys the configuration by calling _destroy(). + * (8) Core notifies the algorithm to destroy algorithm specific + * allocations. (Optional) + * + * USAGE + * + * Before a search can be performed, a configuration must be created + * by calling textsearch_prepare() specifying the searching algorithm, + * the pattern to look for and flags. As a flag, you can set TS_IGNORECASE + * to perform case insensitive matching. But it might slow down + * performance of algorithm, so you should use it at own your risk. + * The returned configuration may then be used for an arbitary + * amount of times and even in parallel as long as a separate struct + * ts_state variable is provided to every instance. + * + * The actual search is performed by either calling textsearch_find_- + * continuous() for linear data or by providing an own get_next_block() + * implementation and calling textsearch_find(). Both functions return + * the position of the first occurrence of the patern or UINT_MAX if + * no match was found. Subsequent occurences can be found by calling + * textsearch_next() regardless of the linearity of the data. + * + * Once you're done using a configuration it must be given back via + * textsearch_destroy. + * + * EXAMPLE + * + * int pos; + * struct ts_config *conf; + * struct ts_state state; + * const char *pattern = "chicken"; + * const char *example = "We dance the funky chicken"; + * + * conf = textsearch_prepare("kmp", pattern, strlen(pattern), + * GFP_KERNEL, TS_AUTOLOAD); + * if (IS_ERR(conf)) { + * err = PTR_ERR(conf); + * goto errout; + * } + * + * pos = textsearch_find_continuous(conf, &state, example, strlen(example)); + * if (pos != UINT_MAX) + * panic("Oh my god, dancing chickens at %d\n", pos); + * + * textsearch_destroy(conf); + * ========================================================================== + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/rculist.h> +#include <linux/rcupdate.h> +#include <linux/err.h> +#include <linux/textsearch.h> +#include <linux/slab.h> + +static LIST_HEAD(ts_ops); +static DEFINE_SPINLOCK(ts_mod_lock); + +static inline struct ts_ops *lookup_ts_algo(const char *name) +{ + struct ts_ops *o; + + rcu_read_lock(); + list_for_each_entry_rcu(o, &ts_ops, list) { + if (!strcmp(name, o->name)) { + if (!try_module_get(o->owner)) + o = NULL; + rcu_read_unlock(); + return o; + } + } + rcu_read_unlock(); + + return NULL; +} + +/** + * textsearch_register - register a textsearch module + * @ops: operations lookup table + * + * This function must be called by textsearch modules to announce + * their presence. The specified &@ops must have %name set to a + * unique identifier and the callbacks find(), init(), get_pattern(), + * and get_pattern_len() must be implemented. + * + * Returns 0 or -EEXISTS if another module has already registered + * with same name. + */ +int textsearch_register(struct ts_ops *ops) +{ + int err = -EEXIST; + struct ts_ops *o; + + if (ops->name == NULL || ops->find == NULL || ops->init == NULL || + ops->get_pattern == NULL || ops->get_pattern_len == NULL) + return -EINVAL; + + spin_lock(&ts_mod_lock); + list_for_each_entry(o, &ts_ops, list) { + if (!strcmp(ops->name, o->name)) + goto errout; + } + + list_add_tail_rcu(&ops->list, &ts_ops); + err = 0; +errout: + spin_unlock(&ts_mod_lock); + return err; +} + +/** + * textsearch_unregister - unregister a textsearch module + * @ops: operations lookup table + * + * This function must be called by textsearch modules to announce + * their disappearance for examples when the module gets unloaded. + * The &ops parameter must be the same as the one during the + * registration. + * + * Returns 0 on success or -ENOENT if no matching textsearch + * registration was found. + */ +int textsearch_unregister(struct ts_ops *ops) +{ + int err = 0; + struct ts_ops *o; + + spin_lock(&ts_mod_lock); + list_for_each_entry(o, &ts_ops, list) { + if (o == ops) { + list_del_rcu(&o->list); + goto out; + } + } + + err = -ENOENT; +out: + spin_unlock(&ts_mod_lock); + return err; +} + +struct ts_linear_state +{ + unsigned int len; + const void *data; +}; + +static unsigned int get_linear_data(unsigned int consumed, const u8 **dst, + struct ts_config *conf, + struct ts_state *state) +{ + struct ts_linear_state *st = (struct ts_linear_state *) state->cb; + + if (likely(consumed < st->len)) { + *dst = st->data + consumed; + return st->len - consumed; + } + + return 0; +} + +/** + * textsearch_find_continuous - search a pattern in continuous/linear data + * @conf: search configuration + * @state: search state + * @data: data to search in + * @len: length of data + * + * A simplified version of textsearch_find() for continuous/linear data. + * Call textsearch_next() to retrieve subsequent matches. + * + * Returns the position of first occurrence of the pattern or + * %UINT_MAX if no occurrence was found. + */ +unsigned int textsearch_find_continuous(struct ts_config *conf, + struct ts_state *state, + const void *data, unsigned int len) +{ + struct ts_linear_state *st = (struct ts_linear_state *) state->cb; + + conf->get_next_block = get_linear_data; + st->data = data; + st->len = len; + + return textsearch_find(conf, state); +} + +/** + * textsearch_prepare - Prepare a search + * @algo: name of search algorithm + * @pattern: pattern data + * @len: length of pattern + * @gfp_mask: allocation mask + * @flags: search flags + * + * Looks up the search algorithm module and creates a new textsearch + * configuration for the specified pattern. Upon completion all + * necessary refcnts are held and the configuration must be put back + * using textsearch_put() after usage. + * + * Note: The format of the pattern may not be compatible between + * the various search algorithms. + * + * Returns a new textsearch configuration according to the specified + * parameters or a ERR_PTR(). If a zero length pattern is passed, this + * function returns EINVAL. + */ +struct ts_config *textsearch_prepare(const char *algo, const void *pattern, + unsigned int len, gfp_t gfp_mask, int flags) +{ + int err = -ENOENT; + struct ts_config *conf; + struct ts_ops *ops; + + if (len == 0) + return ERR_PTR(-EINVAL); + + ops = lookup_ts_algo(algo); +#ifdef CONFIG_MODULES + /* + * Why not always autoload you may ask. Some users are + * in a situation where requesting a module may deadlock, + * especially when the module is located on a NFS mount. + */ + if (ops == NULL && flags & TS_AUTOLOAD) { + request_module("ts_%s", algo); + ops = lookup_ts_algo(algo); + } +#endif + + if (ops == NULL) + goto errout; + + conf = ops->init(pattern, len, gfp_mask, flags); + if (IS_ERR(conf)) { + err = PTR_ERR(conf); + goto errout; + } + + conf->ops = ops; + return conf; + +errout: + if (ops) + module_put(ops->owner); + + return ERR_PTR(err); +} + +/** + * textsearch_destroy - destroy a search configuration + * @conf: search configuration + * + * Releases all references of the configuration and frees + * up the memory. + */ +void textsearch_destroy(struct ts_config *conf) +{ + if (conf->ops) { + if (conf->ops->destroy) + conf->ops->destroy(conf); + module_put(conf->ops->owner); + } + + kfree(conf); +} + +EXPORT_SYMBOL(textsearch_register); +EXPORT_SYMBOL(textsearch_unregister); +EXPORT_SYMBOL(textsearch_prepare); +EXPORT_SYMBOL(textsearch_find_continuous); +EXPORT_SYMBOL(textsearch_destroy); diff --git a/lib/ts_bm.c b/lib/ts_bm.c new file mode 100644 index 00000000..9e66ee40 --- /dev/null +++ b/lib/ts_bm.c @@ -0,0 +1,207 @@ +/* + * lib/ts_bm.c Boyer-Moore text search implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Pablo Neira Ayuso <pablo@eurodev.net> + * + * ========================================================================== + * + * Implements Boyer-Moore string matching algorithm: + * + * [1] A Fast String Searching Algorithm, R.S. Boyer and Moore. + * Communications of the Association for Computing Machinery, + * 20(10), 1977, pp. 762-772. + * http://www.cs.utexas.edu/users/moore/publications/fstrpos.pdf + * + * [2] Handbook of Exact String Matching Algorithms, Thierry Lecroq, 2004 + * http://www-igm.univ-mlv.fr/~lecroq/string/string.pdf + * + * Note: Since Boyer-Moore (BM) performs searches for matchings from right + * to left, it's still possible that a matching could be spread over + * multiple blocks, in that case this algorithm won't find any coincidence. + * + * If you're willing to ensure that such thing won't ever happen, use the + * Knuth-Pratt-Morris (KMP) implementation instead. In conclusion, choose + * the proper string search algorithm depending on your setting. + * + * Say you're using the textsearch infrastructure for filtering, NIDS or + * any similar security focused purpose, then go KMP. Otherwise, if you + * really care about performance, say you're classifying packets to apply + * Quality of Service (QoS) policies, and you don't mind about possible + * matchings spread over multiple fragments, then go BM. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/textsearch.h> + +/* Alphabet size, use ASCII */ +#define ASIZE 256 + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(args, format...) +#endif + +struct ts_bm +{ + u8 * pattern; + unsigned int patlen; + unsigned int bad_shift[ASIZE]; + unsigned int good_shift[0]; +}; + +static unsigned int bm_find(struct ts_config *conf, struct ts_state *state) +{ + struct ts_bm *bm = ts_config_priv(conf); + unsigned int i, text_len, consumed = state->offset; + const u8 *text; + int shift = bm->patlen - 1, bs; + const u8 icase = conf->flags & TS_IGNORECASE; + + for (;;) { + text_len = conf->get_next_block(consumed, &text, conf, state); + + if (unlikely(text_len == 0)) + break; + + while (shift < text_len) { + DEBUGP("Searching in position %d (%c)\n", + shift, text[shift]); + for (i = 0; i < bm->patlen; i++) + if ((icase ? toupper(text[shift-i]) + : text[shift-i]) + != bm->pattern[bm->patlen-1-i]) + goto next; + + /* London calling... */ + DEBUGP("found!\n"); + return consumed += (shift-(bm->patlen-1)); + +next: bs = bm->bad_shift[text[shift-i]]; + + /* Now jumping to... */ + shift = max_t(int, shift-i+bs, shift+bm->good_shift[i]); + } + consumed += text_len; + } + + return UINT_MAX; +} + +static int subpattern(u8 *pattern, int i, int j, int g) +{ + int x = i+g-1, y = j+g-1, ret = 0; + + while(pattern[x--] == pattern[y--]) { + if (y < 0) { + ret = 1; + break; + } + if (--g == 0) { + ret = pattern[i-1] != pattern[j-1]; + break; + } + } + + return ret; +} + +static void compute_prefix_tbl(struct ts_bm *bm, int flags) +{ + int i, j, g; + + for (i = 0; i < ASIZE; i++) + bm->bad_shift[i] = bm->patlen; + for (i = 0; i < bm->patlen - 1; i++) { + bm->bad_shift[bm->pattern[i]] = bm->patlen - 1 - i; + if (flags & TS_IGNORECASE) + bm->bad_shift[tolower(bm->pattern[i])] + = bm->patlen - 1 - i; + } + + /* Compute the good shift array, used to match reocurrences + * of a subpattern */ + bm->good_shift[0] = 1; + for (i = 1; i < bm->patlen; i++) + bm->good_shift[i] = bm->patlen; + for (i = bm->patlen-1, g = 1; i > 0; g++, i--) { + for (j = i-1; j >= 1-g ; j--) + if (subpattern(bm->pattern, i, j, g)) { + bm->good_shift[g] = bm->patlen-j-g; + break; + } + } +} + +static struct ts_config *bm_init(const void *pattern, unsigned int len, + gfp_t gfp_mask, int flags) +{ + struct ts_config *conf; + struct ts_bm *bm; + int i; + unsigned int prefix_tbl_len = len * sizeof(unsigned int); + size_t priv_size = sizeof(*bm) + len + prefix_tbl_len; + + conf = alloc_ts_config(priv_size, gfp_mask); + if (IS_ERR(conf)) + return conf; + + conf->flags = flags; + bm = ts_config_priv(conf); + bm->patlen = len; + bm->pattern = (u8 *) bm->good_shift + prefix_tbl_len; + if (flags & TS_IGNORECASE) + for (i = 0; i < len; i++) + bm->pattern[i] = toupper(((u8 *)pattern)[i]); + else + memcpy(bm->pattern, pattern, len); + compute_prefix_tbl(bm, flags); + + return conf; +} + +static void *bm_get_pattern(struct ts_config *conf) +{ + struct ts_bm *bm = ts_config_priv(conf); + return bm->pattern; +} + +static unsigned int bm_get_pattern_len(struct ts_config *conf) +{ + struct ts_bm *bm = ts_config_priv(conf); + return bm->patlen; +} + +static struct ts_ops bm_ops = { + .name = "bm", + .find = bm_find, + .init = bm_init, + .get_pattern = bm_get_pattern, + .get_pattern_len = bm_get_pattern_len, + .owner = THIS_MODULE, + .list = LIST_HEAD_INIT(bm_ops.list) +}; + +static int __init init_bm(void) +{ + return textsearch_register(&bm_ops); +} + +static void __exit exit_bm(void) +{ + textsearch_unregister(&bm_ops); +} + +MODULE_LICENSE("GPL"); + +module_init(init_bm); +module_exit(exit_bm); diff --git a/lib/ts_fsm.c b/lib/ts_fsm.c new file mode 100644 index 00000000..5696a351 --- /dev/null +++ b/lib/ts_fsm.c @@ -0,0 +1,341 @@ +/* + * lib/ts_fsm.c A naive finite state machine text search approach + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Thomas Graf <tgraf@suug.ch> + * + * ========================================================================== + * + * A finite state machine consists of n states (struct ts_fsm_token) + * representing the pattern as a finite automation. The data is read + * sequentially on an octet basis. Every state token specifies the number + * of recurrences and the type of value accepted which can be either a + * specific character or ctype based set of characters. The available + * type of recurrences include 1, (0|1), [0 n], and [1 n]. + * + * The algorithm differs between strict/non-strict mode specifying + * whether the pattern has to start at the first octet. Strict mode + * is enabled by default and can be disabled by inserting + * TS_FSM_HEAD_IGNORE as the first token in the chain. + * + * The runtime performance of the algorithm should be around O(n), + * however while in strict mode the average runtime can be better. + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/textsearch.h> +#include <linux/textsearch_fsm.h> + +struct ts_fsm +{ + unsigned int ntokens; + struct ts_fsm_token tokens[0]; +}; + +/* other values derived from ctype.h */ +#define _A 0x100 /* ascii */ +#define _W 0x200 /* wildcard */ + +/* Map to _ctype flags and some magic numbers */ +static const u16 token_map[TS_FSM_TYPE_MAX+1] = { + [TS_FSM_SPECIFIC] = 0, + [TS_FSM_WILDCARD] = _W, + [TS_FSM_CNTRL] = _C, + [TS_FSM_LOWER] = _L, + [TS_FSM_UPPER] = _U, + [TS_FSM_PUNCT] = _P, + [TS_FSM_SPACE] = _S, + [TS_FSM_DIGIT] = _D, + [TS_FSM_XDIGIT] = _D | _X, + [TS_FSM_ALPHA] = _U | _L, + [TS_FSM_ALNUM] = _U | _L | _D, + [TS_FSM_PRINT] = _P | _U | _L | _D | _SP, + [TS_FSM_GRAPH] = _P | _U | _L | _D, + [TS_FSM_ASCII] = _A, +}; + +static const u16 token_lookup_tbl[256] = { +_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 0- 3 */ +_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 4- 7 */ +_W|_A|_C, _W|_A|_C|_S, _W|_A|_C|_S, _W|_A|_C|_S, /* 8- 11 */ +_W|_A|_C|_S, _W|_A|_C|_S, _W|_A|_C, _W|_A|_C, /* 12- 15 */ +_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 16- 19 */ +_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 20- 23 */ +_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 24- 27 */ +_W|_A|_C, _W|_A|_C, _W|_A|_C, _W|_A|_C, /* 28- 31 */ +_W|_A|_S|_SP, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 32- 35 */ +_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 36- 39 */ +_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 40- 43 */ +_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 44- 47 */ +_W|_A|_D, _W|_A|_D, _W|_A|_D, _W|_A|_D, /* 48- 51 */ +_W|_A|_D, _W|_A|_D, _W|_A|_D, _W|_A|_D, /* 52- 55 */ +_W|_A|_D, _W|_A|_D, _W|_A|_P, _W|_A|_P, /* 56- 59 */ +_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 60- 63 */ +_W|_A|_P, _W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U|_X, /* 64- 67 */ +_W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U|_X, _W|_A|_U, /* 68- 71 */ +_W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U, /* 72- 75 */ +_W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U, /* 76- 79 */ +_W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U, /* 80- 83 */ +_W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_U, /* 84- 87 */ +_W|_A|_U, _W|_A|_U, _W|_A|_U, _W|_A|_P, /* 88- 91 */ +_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_P, /* 92- 95 */ +_W|_A|_P, _W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L|_X, /* 96- 99 */ +_W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L|_X, _W|_A|_L, /* 100-103 */ +_W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L, /* 104-107 */ +_W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L, /* 108-111 */ +_W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L, /* 112-115 */ +_W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_L, /* 116-119 */ +_W|_A|_L, _W|_A|_L, _W|_A|_L, _W|_A|_P, /* 120-123 */ +_W|_A|_P, _W|_A|_P, _W|_A|_P, _W|_A|_C, /* 124-127 */ +_W, _W, _W, _W, /* 128-131 */ +_W, _W, _W, _W, /* 132-135 */ +_W, _W, _W, _W, /* 136-139 */ +_W, _W, _W, _W, /* 140-143 */ +_W, _W, _W, _W, /* 144-147 */ +_W, _W, _W, _W, /* 148-151 */ +_W, _W, _W, _W, /* 152-155 */ +_W, _W, _W, _W, /* 156-159 */ +_W|_S|_SP, _W|_P, _W|_P, _W|_P, /* 160-163 */ +_W|_P, _W|_P, _W|_P, _W|_P, /* 164-167 */ +_W|_P, _W|_P, _W|_P, _W|_P, /* 168-171 */ +_W|_P, _W|_P, _W|_P, _W|_P, /* 172-175 */ +_W|_P, _W|_P, _W|_P, _W|_P, /* 176-179 */ +_W|_P, _W|_P, _W|_P, _W|_P, /* 180-183 */ +_W|_P, _W|_P, _W|_P, _W|_P, /* 184-187 */ +_W|_P, _W|_P, _W|_P, _W|_P, /* 188-191 */ +_W|_U, _W|_U, _W|_U, _W|_U, /* 192-195 */ +_W|_U, _W|_U, _W|_U, _W|_U, /* 196-199 */ +_W|_U, _W|_U, _W|_U, _W|_U, /* 200-203 */ +_W|_U, _W|_U, _W|_U, _W|_U, /* 204-207 */ +_W|_U, _W|_U, _W|_U, _W|_U, /* 208-211 */ +_W|_U, _W|_U, _W|_U, _W|_P, /* 212-215 */ +_W|_U, _W|_U, _W|_U, _W|_U, /* 216-219 */ +_W|_U, _W|_U, _W|_U, _W|_L, /* 220-223 */ +_W|_L, _W|_L, _W|_L, _W|_L, /* 224-227 */ +_W|_L, _W|_L, _W|_L, _W|_L, /* 228-231 */ +_W|_L, _W|_L, _W|_L, _W|_L, /* 232-235 */ +_W|_L, _W|_L, _W|_L, _W|_L, /* 236-239 */ +_W|_L, _W|_L, _W|_L, _W|_L, /* 240-243 */ +_W|_L, _W|_L, _W|_L, _W|_P, /* 244-247 */ +_W|_L, _W|_L, _W|_L, _W|_L, /* 248-251 */ +_W|_L, _W|_L, _W|_L, _W|_L}; /* 252-255 */ + +static inline int match_token(struct ts_fsm_token *t, u8 d) +{ + if (t->type) + return (token_lookup_tbl[d] & t->type) != 0; + else + return t->value == d; +} + +static unsigned int fsm_find(struct ts_config *conf, struct ts_state *state) +{ + struct ts_fsm *fsm = ts_config_priv(conf); + struct ts_fsm_token *cur = NULL, *next; + unsigned int match_start, block_idx = 0, tok_idx; + unsigned block_len = 0, strict, consumed = state->offset; + const u8 *data; + +#define GET_NEXT_BLOCK() \ +({ consumed += block_idx; \ + block_idx = 0; \ + block_len = conf->get_next_block(consumed, &data, conf, state); }) + +#define TOKEN_MISMATCH() \ + do { \ + if (strict) \ + goto no_match; \ + block_idx++; \ + goto startover; \ + } while(0) + +#define end_of_data() unlikely(block_idx >= block_len && !GET_NEXT_BLOCK()) + + if (end_of_data()) + goto no_match; + + strict = fsm->tokens[0].recur != TS_FSM_HEAD_IGNORE; + +startover: + match_start = consumed + block_idx; + + for (tok_idx = 0; tok_idx < fsm->ntokens; tok_idx++) { + cur = &fsm->tokens[tok_idx]; + + if (likely(tok_idx < (fsm->ntokens - 1))) + next = &fsm->tokens[tok_idx + 1]; + else + next = NULL; + + switch (cur->recur) { + case TS_FSM_SINGLE: + if (end_of_data()) + goto no_match; + + if (!match_token(cur, data[block_idx])) + TOKEN_MISMATCH(); + break; + + case TS_FSM_PERHAPS: + if (end_of_data() || + !match_token(cur, data[block_idx])) + continue; + break; + + case TS_FSM_MULTI: + if (end_of_data()) + goto no_match; + + if (!match_token(cur, data[block_idx])) + TOKEN_MISMATCH(); + + block_idx++; + /* fall through */ + + case TS_FSM_ANY: + if (next == NULL) + goto found_match; + + if (end_of_data()) + continue; + + while (!match_token(next, data[block_idx])) { + if (!match_token(cur, data[block_idx])) + TOKEN_MISMATCH(); + block_idx++; + if (end_of_data()) + goto no_match; + } + continue; + + /* + * Optimization: Prefer small local loop over jumping + * back and forth until garbage at head is munched. + */ + case TS_FSM_HEAD_IGNORE: + if (end_of_data()) + continue; + + while (!match_token(next, data[block_idx])) { + /* + * Special case, don't start over upon + * a mismatch, give the user the + * chance to specify the type of data + * allowed to be ignored. + */ + if (!match_token(cur, data[block_idx])) + goto no_match; + + block_idx++; + if (end_of_data()) + goto no_match; + } + + match_start = consumed + block_idx; + continue; + } + + block_idx++; + } + + if (end_of_data()) + goto found_match; + +no_match: + return UINT_MAX; + +found_match: + state->offset = consumed + block_idx; + return match_start; +} + +static struct ts_config *fsm_init(const void *pattern, unsigned int len, + gfp_t gfp_mask, int flags) +{ + int i, err = -EINVAL; + struct ts_config *conf; + struct ts_fsm *fsm; + struct ts_fsm_token *tokens = (struct ts_fsm_token *) pattern; + unsigned int ntokens = len / sizeof(*tokens); + size_t priv_size = sizeof(*fsm) + len; + + if (len % sizeof(struct ts_fsm_token) || ntokens < 1) + goto errout; + + if (flags & TS_IGNORECASE) + goto errout; + + for (i = 0; i < ntokens; i++) { + struct ts_fsm_token *t = &tokens[i]; + + if (t->type > TS_FSM_TYPE_MAX || t->recur > TS_FSM_RECUR_MAX) + goto errout; + + if (t->recur == TS_FSM_HEAD_IGNORE && + (i != 0 || i == (ntokens - 1))) + goto errout; + } + + conf = alloc_ts_config(priv_size, gfp_mask); + if (IS_ERR(conf)) + return conf; + + conf->flags = flags; + fsm = ts_config_priv(conf); + fsm->ntokens = ntokens; + memcpy(fsm->tokens, pattern, len); + + for (i = 0; i < fsm->ntokens; i++) { + struct ts_fsm_token *t = &fsm->tokens[i]; + t->type = token_map[t->type]; + } + + return conf; + +errout: + return ERR_PTR(err); +} + +static void *fsm_get_pattern(struct ts_config *conf) +{ + struct ts_fsm *fsm = ts_config_priv(conf); + return fsm->tokens; +} + +static unsigned int fsm_get_pattern_len(struct ts_config *conf) +{ + struct ts_fsm *fsm = ts_config_priv(conf); + return fsm->ntokens * sizeof(struct ts_fsm_token); +} + +static struct ts_ops fsm_ops = { + .name = "fsm", + .find = fsm_find, + .init = fsm_init, + .get_pattern = fsm_get_pattern, + .get_pattern_len = fsm_get_pattern_len, + .owner = THIS_MODULE, + .list = LIST_HEAD_INIT(fsm_ops.list) +}; + +static int __init init_fsm(void) +{ + return textsearch_register(&fsm_ops); +} + +static void __exit exit_fsm(void) +{ + textsearch_unregister(&fsm_ops); +} + +MODULE_LICENSE("GPL"); + +module_init(init_fsm); +module_exit(exit_fsm); diff --git a/lib/ts_kmp.c b/lib/ts_kmp.c new file mode 100644 index 00000000..632f783e --- /dev/null +++ b/lib/ts_kmp.c @@ -0,0 +1,157 @@ +/* + * lib/ts_kmp.c Knuth-Morris-Pratt text search implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Thomas Graf <tgraf@suug.ch> + * + * ========================================================================== + * + * Implements a linear-time string-matching algorithm due to Knuth, + * Morris, and Pratt [1]. Their algorithm avoids the explicit + * computation of the transition function DELTA altogether. Its + * matching time is O(n), for n being length(text), using just an + * auxiliary function PI[1..m], for m being length(pattern), + * precomputed from the pattern in time O(m). The array PI allows + * the transition function DELTA to be computed efficiently + * "on the fly" as needed. Roughly speaking, for any state + * "q" = 0,1,...,m and any character "a" in SIGMA, the value + * PI["q"] contains the information that is independent of "a" and + * is needed to compute DELTA("q", "a") [2]. Since the array PI + * has only m entries, whereas DELTA has O(m|SIGMA|) entries, we + * save a factor of |SIGMA| in the preprocessing time by computing + * PI rather than DELTA. + * + * [1] Cormen, Leiserson, Rivest, Stein + * Introdcution to Algorithms, 2nd Edition, MIT Press + * [2] See finite automation theory + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/textsearch.h> + +struct ts_kmp +{ + u8 * pattern; + unsigned int pattern_len; + unsigned int prefix_tbl[0]; +}; + +static unsigned int kmp_find(struct ts_config *conf, struct ts_state *state) +{ + struct ts_kmp *kmp = ts_config_priv(conf); + unsigned int i, q = 0, text_len, consumed = state->offset; + const u8 *text; + const int icase = conf->flags & TS_IGNORECASE; + + for (;;) { + text_len = conf->get_next_block(consumed, &text, conf, state); + + if (unlikely(text_len == 0)) + break; + + for (i = 0; i < text_len; i++) { + while (q > 0 && kmp->pattern[q] + != (icase ? toupper(text[i]) : text[i])) + q = kmp->prefix_tbl[q - 1]; + if (kmp->pattern[q] + == (icase ? toupper(text[i]) : text[i])) + q++; + if (unlikely(q == kmp->pattern_len)) { + state->offset = consumed + i + 1; + return state->offset - kmp->pattern_len; + } + } + + consumed += text_len; + } + + return UINT_MAX; +} + +static inline void compute_prefix_tbl(const u8 *pattern, unsigned int len, + unsigned int *prefix_tbl, int flags) +{ + unsigned int k, q; + const u8 icase = flags & TS_IGNORECASE; + + for (k = 0, q = 1; q < len; q++) { + while (k > 0 && (icase ? toupper(pattern[k]) : pattern[k]) + != (icase ? toupper(pattern[q]) : pattern[q])) + k = prefix_tbl[k-1]; + if ((icase ? toupper(pattern[k]) : pattern[k]) + == (icase ? toupper(pattern[q]) : pattern[q])) + k++; + prefix_tbl[q] = k; + } +} + +static struct ts_config *kmp_init(const void *pattern, unsigned int len, + gfp_t gfp_mask, int flags) +{ + struct ts_config *conf; + struct ts_kmp *kmp; + int i; + unsigned int prefix_tbl_len = len * sizeof(unsigned int); + size_t priv_size = sizeof(*kmp) + len + prefix_tbl_len; + + conf = alloc_ts_config(priv_size, gfp_mask); + if (IS_ERR(conf)) + return conf; + + conf->flags = flags; + kmp = ts_config_priv(conf); + kmp->pattern_len = len; + compute_prefix_tbl(pattern, len, kmp->prefix_tbl, flags); + kmp->pattern = (u8 *) kmp->prefix_tbl + prefix_tbl_len; + if (flags & TS_IGNORECASE) + for (i = 0; i < len; i++) + kmp->pattern[i] = toupper(((u8 *)pattern)[i]); + else + memcpy(kmp->pattern, pattern, len); + + return conf; +} + +static void *kmp_get_pattern(struct ts_config *conf) +{ + struct ts_kmp *kmp = ts_config_priv(conf); + return kmp->pattern; +} + +static unsigned int kmp_get_pattern_len(struct ts_config *conf) +{ + struct ts_kmp *kmp = ts_config_priv(conf); + return kmp->pattern_len; +} + +static struct ts_ops kmp_ops = { + .name = "kmp", + .find = kmp_find, + .init = kmp_init, + .get_pattern = kmp_get_pattern, + .get_pattern_len = kmp_get_pattern_len, + .owner = THIS_MODULE, + .list = LIST_HEAD_INIT(kmp_ops.list) +}; + +static int __init init_kmp(void) +{ + return textsearch_register(&kmp_ops); +} + +static void __exit exit_kmp(void) +{ + textsearch_unregister(&kmp_ops); +} + +MODULE_LICENSE("GPL"); + +module_init(init_kmp); +module_exit(exit_kmp); diff --git a/lib/uuid.c b/lib/uuid.c new file mode 100644 index 00000000..8fadd7ce --- /dev/null +++ b/lib/uuid.c @@ -0,0 +1,53 @@ +/* + * Unified UUID/GUID definition + * + * Copyright (C) 2009, Intel Corp. + * Huang Ying <ying.huang@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation; + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/uuid.h> +#include <linux/random.h> + +static void __uuid_gen_common(__u8 b[16]) +{ + int i; + u32 r; + + for (i = 0; i < 4; i++) { + r = random32(); + memcpy(b + i * 4, &r, 4); + } + /* reversion 0b10 */ + b[8] = (b[8] & 0x3F) | 0x80; +} + +void uuid_le_gen(uuid_le *lu) +{ + __uuid_gen_common(lu->b); + /* version 4 : random generation */ + lu->b[7] = (lu->b[7] & 0x0F) | 0x40; +} +EXPORT_SYMBOL_GPL(uuid_le_gen); + +void uuid_be_gen(uuid_be *bu) +{ + __uuid_gen_common(bu->b); + /* version 4 : random generation */ + bu->b[6] = (bu->b[6] & 0x0F) | 0x40; +} +EXPORT_SYMBOL_GPL(uuid_be_gen); diff --git a/lib/vsprintf.c b/lib/vsprintf.c new file mode 100644 index 00000000..7af9d841 --- /dev/null +++ b/lib/vsprintf.c @@ -0,0 +1,2152 @@ +/* + * linux/lib/vsprintf.c + * + * Copyright (C) 1991, 1992 Linus Torvalds + */ + +/* vsprintf.c -- Lars Wirzenius & Linus Torvalds. */ +/* + * Wirzenius wrote this portably, Torvalds fucked it up :-) + */ + +/* + * Fri Jul 13 2001 Crutcher Dunnavant <crutcher+kernel@datastacks.com> + * - changed to provide snprintf and vsnprintf functions + * So Feb 1 16:51:32 CET 2004 Juergen Quade <quade@hsnr.de> + * - scnprintf and vscnprintf + */ + +#include <stdarg.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/string.h> +#include <linux/ctype.h> +#include <linux/kernel.h> +#include <linux/kallsyms.h> +#include <linux/uaccess.h> +#include <linux/ioport.h> +#include <net/addrconf.h> + +#include <asm/page.h> /* for PAGE_SIZE */ +#include <asm/div64.h> +#include <asm/sections.h> /* for dereference_function_descriptor() */ + +/* Works only for digits and letters, but small and fast */ +#define TOLOWER(x) ((x) | 0x20) + +static unsigned int simple_guess_base(const char *cp) +{ + if (cp[0] == '0') { + if (TOLOWER(cp[1]) == 'x' && isxdigit(cp[2])) + return 16; + else + return 8; + } else { + return 10; + } +} + +/** + * simple_strtoull - convert a string to an unsigned long long + * @cp: The start of the string + * @endp: A pointer to the end of the parsed string will be placed here + * @base: The number base to use + */ +unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base) +{ + unsigned long long result = 0; + + if (!base) + base = simple_guess_base(cp); + + if (base == 16 && cp[0] == '0' && TOLOWER(cp[1]) == 'x') + cp += 2; + + while (isxdigit(*cp)) { + unsigned int value; + + value = isdigit(*cp) ? *cp - '0' : TOLOWER(*cp) - 'a' + 10; + if (value >= base) + break; + result = result * base + value; + cp++; + } + if (endp) + *endp = (char *)cp; + + return result; +} +EXPORT_SYMBOL(simple_strtoull); + +/** + * simple_strtoul - convert a string to an unsigned long + * @cp: The start of the string + * @endp: A pointer to the end of the parsed string will be placed here + * @base: The number base to use + */ +unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base) +{ + return simple_strtoull(cp, endp, base); +} +EXPORT_SYMBOL(simple_strtoul); + +/** + * simple_strtol - convert a string to a signed long + * @cp: The start of the string + * @endp: A pointer to the end of the parsed string will be placed here + * @base: The number base to use + */ +long simple_strtol(const char *cp, char **endp, unsigned int base) +{ + if (*cp == '-') + return -simple_strtoul(cp + 1, endp, base); + + return simple_strtoul(cp, endp, base); +} +EXPORT_SYMBOL(simple_strtol); + +/** + * simple_strtoll - convert a string to a signed long long + * @cp: The start of the string + * @endp: A pointer to the end of the parsed string will be placed here + * @base: The number base to use + */ +long long simple_strtoll(const char *cp, char **endp, unsigned int base) +{ + if (*cp == '-') + return -simple_strtoull(cp + 1, endp, base); + + return simple_strtoull(cp, endp, base); +} +EXPORT_SYMBOL(simple_strtoll); + +/** + * strict_strtoul - convert a string to an unsigned long strictly + * @cp: The string to be converted + * @base: The number base to use + * @res: The converted result value + * + * strict_strtoul converts a string to an unsigned long only if the + * string is really an unsigned long string, any string containing + * any invalid char at the tail will be rejected and -EINVAL is returned, + * only a newline char at the tail is acceptible because people generally + * change a module parameter in the following way: + * + * echo 1024 > /sys/module/e1000/parameters/copybreak + * + * echo will append a newline to the tail. + * + * It returns 0 if conversion is successful and *res is set to the converted + * value, otherwise it returns -EINVAL and *res is set to 0. + * + * simple_strtoul just ignores the successive invalid characters and + * return the converted value of prefix part of the string. + */ +int strict_strtoul(const char *cp, unsigned int base, unsigned long *res) +{ + char *tail; + unsigned long val; + + *res = 0; + if (!*cp) + return -EINVAL; + + val = simple_strtoul(cp, &tail, base); + if (tail == cp) + return -EINVAL; + + if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) { + *res = val; + return 0; + } + + return -EINVAL; +} +EXPORT_SYMBOL(strict_strtoul); + +/** + * strict_strtol - convert a string to a long strictly + * @cp: The string to be converted + * @base: The number base to use + * @res: The converted result value + * + * strict_strtol is similiar to strict_strtoul, but it allows the first + * character of a string is '-'. + * + * It returns 0 if conversion is successful and *res is set to the converted + * value, otherwise it returns -EINVAL and *res is set to 0. + */ +int strict_strtol(const char *cp, unsigned int base, long *res) +{ + int ret; + if (*cp == '-') { + ret = strict_strtoul(cp + 1, base, (unsigned long *)res); + if (!ret) + *res = -(*res); + } else { + ret = strict_strtoul(cp, base, (unsigned long *)res); + } + + return ret; +} +EXPORT_SYMBOL(strict_strtol); + +/** + * strict_strtoull - convert a string to an unsigned long long strictly + * @cp: The string to be converted + * @base: The number base to use + * @res: The converted result value + * + * strict_strtoull converts a string to an unsigned long long only if the + * string is really an unsigned long long string, any string containing + * any invalid char at the tail will be rejected and -EINVAL is returned, + * only a newline char at the tail is acceptible because people generally + * change a module parameter in the following way: + * + * echo 1024 > /sys/module/e1000/parameters/copybreak + * + * echo will append a newline to the tail of the string. + * + * It returns 0 if conversion is successful and *res is set to the converted + * value, otherwise it returns -EINVAL and *res is set to 0. + * + * simple_strtoull just ignores the successive invalid characters and + * return the converted value of prefix part of the string. + */ +int strict_strtoull(const char *cp, unsigned int base, unsigned long long *res) +{ + char *tail; + unsigned long long val; + + *res = 0; + if (!*cp) + return -EINVAL; + + val = simple_strtoull(cp, &tail, base); + if (tail == cp) + return -EINVAL; + if ((tail[0] == '\0') || (tail[0] == '\n' && tail[1] == '\0')) { + *res = val; + return 0; + } + + return -EINVAL; +} +EXPORT_SYMBOL(strict_strtoull); + +/** + * strict_strtoll - convert a string to a long long strictly + * @cp: The string to be converted + * @base: The number base to use + * @res: The converted result value + * + * strict_strtoll is similiar to strict_strtoull, but it allows the first + * character of a string is '-'. + * + * It returns 0 if conversion is successful and *res is set to the converted + * value, otherwise it returns -EINVAL and *res is set to 0. + */ +int strict_strtoll(const char *cp, unsigned int base, long long *res) +{ + int ret; + if (*cp == '-') { + ret = strict_strtoull(cp + 1, base, (unsigned long long *)res); + if (!ret) + *res = -(*res); + } else { + ret = strict_strtoull(cp, base, (unsigned long long *)res); + } + + return ret; +} +EXPORT_SYMBOL(strict_strtoll); + +static noinline_for_stack +int skip_atoi(const char **s) +{ + int i = 0; + + while (isdigit(**s)) + i = i*10 + *((*s)++) - '0'; + + return i; +} + +/* Decimal conversion is by far the most typical, and is used + * for /proc and /sys data. This directly impacts e.g. top performance + * with many processes running. We optimize it for speed + * using code from + * http://www.cs.uiowa.edu/~jones/bcd/decimal.html + * (with permission from the author, Douglas W. Jones). */ + +/* Formats correctly any integer in [0,99999]. + * Outputs from one to five digits depending on input. + * On i386 gcc 4.1.2 -O2: ~250 bytes of code. */ +static noinline_for_stack +char *put_dec_trunc(char *buf, unsigned q) +{ + unsigned d3, d2, d1, d0; + d1 = (q>>4) & 0xf; + d2 = (q>>8) & 0xf; + d3 = (q>>12); + + d0 = 6*(d3 + d2 + d1) + (q & 0xf); + q = (d0 * 0xcd) >> 11; + d0 = d0 - 10*q; + *buf++ = d0 + '0'; /* least significant digit */ + d1 = q + 9*d3 + 5*d2 + d1; + if (d1 != 0) { + q = (d1 * 0xcd) >> 11; + d1 = d1 - 10*q; + *buf++ = d1 + '0'; /* next digit */ + + d2 = q + 2*d2; + if ((d2 != 0) || (d3 != 0)) { + q = (d2 * 0xd) >> 7; + d2 = d2 - 10*q; + *buf++ = d2 + '0'; /* next digit */ + + d3 = q + 4*d3; + if (d3 != 0) { + q = (d3 * 0xcd) >> 11; + d3 = d3 - 10*q; + *buf++ = d3 + '0'; /* next digit */ + if (q != 0) + *buf++ = q + '0'; /* most sign. digit */ + } + } + } + + return buf; +} +/* Same with if's removed. Always emits five digits */ +static noinline_for_stack +char *put_dec_full(char *buf, unsigned q) +{ + /* BTW, if q is in [0,9999], 8-bit ints will be enough, */ + /* but anyway, gcc produces better code with full-sized ints */ + unsigned d3, d2, d1, d0; + d1 = (q>>4) & 0xf; + d2 = (q>>8) & 0xf; + d3 = (q>>12); + + /* + * Possible ways to approx. divide by 10 + * gcc -O2 replaces multiply with shifts and adds + * (x * 0xcd) >> 11: 11001101 - shorter code than * 0x67 (on i386) + * (x * 0x67) >> 10: 1100111 + * (x * 0x34) >> 9: 110100 - same + * (x * 0x1a) >> 8: 11010 - same + * (x * 0x0d) >> 7: 1101 - same, shortest code (on i386) + */ + d0 = 6*(d3 + d2 + d1) + (q & 0xf); + q = (d0 * 0xcd) >> 11; + d0 = d0 - 10*q; + *buf++ = d0 + '0'; + d1 = q + 9*d3 + 5*d2 + d1; + q = (d1 * 0xcd) >> 11; + d1 = d1 - 10*q; + *buf++ = d1 + '0'; + + d2 = q + 2*d2; + q = (d2 * 0xd) >> 7; + d2 = d2 - 10*q; + *buf++ = d2 + '0'; + + d3 = q + 4*d3; + q = (d3 * 0xcd) >> 11; /* - shorter code */ + /* q = (d3 * 0x67) >> 10; - would also work */ + d3 = d3 - 10*q; + *buf++ = d3 + '0'; + *buf++ = q + '0'; + + return buf; +} +/* No inlining helps gcc to use registers better */ +static noinline_for_stack +char *put_dec(char *buf, unsigned long long num) +{ + while (1) { + unsigned rem; + if (num < 100000) + return put_dec_trunc(buf, num); + rem = do_div(num, 100000); + buf = put_dec_full(buf, rem); + } +} + +#define ZEROPAD 1 /* pad with zero */ +#define SIGN 2 /* unsigned/signed long */ +#define PLUS 4 /* show plus */ +#define SPACE 8 /* space if plus */ +#define LEFT 16 /* left justified */ +#define SMALL 32 /* use lowercase in hex (must be 32 == 0x20) */ +#define SPECIAL 64 /* prefix hex with "0x", octal with "0" */ + +enum format_type { + FORMAT_TYPE_NONE, /* Just a string part */ + FORMAT_TYPE_WIDTH, + FORMAT_TYPE_PRECISION, + FORMAT_TYPE_CHAR, + FORMAT_TYPE_STR, + FORMAT_TYPE_PTR, + FORMAT_TYPE_PERCENT_CHAR, + FORMAT_TYPE_INVALID, + FORMAT_TYPE_LONG_LONG, + FORMAT_TYPE_ULONG, + FORMAT_TYPE_LONG, + FORMAT_TYPE_UBYTE, + FORMAT_TYPE_BYTE, + FORMAT_TYPE_USHORT, + FORMAT_TYPE_SHORT, + FORMAT_TYPE_UINT, + FORMAT_TYPE_INT, + FORMAT_TYPE_NRCHARS, + FORMAT_TYPE_SIZE_T, + FORMAT_TYPE_PTRDIFF +}; + +struct printf_spec { + u8 type; /* format_type enum */ + u8 flags; /* flags to number() */ + u8 base; /* number base, 8, 10 or 16 only */ + u8 qualifier; /* number qualifier, one of 'hHlLtzZ' */ + s16 field_width; /* width of output field */ + s16 precision; /* # of digits/chars */ +}; + +static noinline_for_stack +char *number(char *buf, char *end, unsigned long long num, + struct printf_spec spec) +{ + /* we are called with base 8, 10 or 16, only, thus don't need "G..." */ + static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */ + + char tmp[66]; + char sign; + char locase; + int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10); + int i; + + /* locase = 0 or 0x20. ORing digits or letters with 'locase' + * produces same digits or (maybe lowercased) letters */ + locase = (spec.flags & SMALL); + if (spec.flags & LEFT) + spec.flags &= ~ZEROPAD; + sign = 0; + if (spec.flags & SIGN) { + if ((signed long long)num < 0) { + sign = '-'; + num = -(signed long long)num; + spec.field_width--; + } else if (spec.flags & PLUS) { + sign = '+'; + spec.field_width--; + } else if (spec.flags & SPACE) { + sign = ' '; + spec.field_width--; + } + } + if (need_pfx) { + spec.field_width--; + if (spec.base == 16) + spec.field_width--; + } + + /* generate full string in tmp[], in reverse order */ + i = 0; + if (num == 0) + tmp[i++] = '0'; + /* Generic code, for any base: + else do { + tmp[i++] = (digits[do_div(num,base)] | locase); + } while (num != 0); + */ + else if (spec.base != 10) { /* 8 or 16 */ + int mask = spec.base - 1; + int shift = 3; + + if (spec.base == 16) + shift = 4; + do { + tmp[i++] = (digits[((unsigned char)num) & mask] | locase); + num >>= shift; + } while (num); + } else { /* base 10 */ + i = put_dec(tmp, num) - tmp; + } + + /* printing 100 using %2d gives "100", not "00" */ + if (i > spec.precision) + spec.precision = i; + /* leading space padding */ + spec.field_width -= spec.precision; + if (!(spec.flags & (ZEROPAD+LEFT))) { + while (--spec.field_width >= 0) { + if (buf < end) + *buf = ' '; + ++buf; + } + } + /* sign */ + if (sign) { + if (buf < end) + *buf = sign; + ++buf; + } + /* "0x" / "0" prefix */ + if (need_pfx) { + if (buf < end) + *buf = '0'; + ++buf; + if (spec.base == 16) { + if (buf < end) + *buf = ('X' | locase); + ++buf; + } + } + /* zero or space padding */ + if (!(spec.flags & LEFT)) { + char c = (spec.flags & ZEROPAD) ? '0' : ' '; + while (--spec.field_width >= 0) { + if (buf < end) + *buf = c; + ++buf; + } + } + /* hmm even more zero padding? */ + while (i <= --spec.precision) { + if (buf < end) + *buf = '0'; + ++buf; + } + /* actual digits of result */ + while (--i >= 0) { + if (buf < end) + *buf = tmp[i]; + ++buf; + } + /* trailing space padding */ + while (--spec.field_width >= 0) { + if (buf < end) + *buf = ' '; + ++buf; + } + + return buf; +} + +static noinline_for_stack +char *string(char *buf, char *end, const char *s, struct printf_spec spec) +{ + int len, i; + + if ((unsigned long)s < PAGE_SIZE) + s = "(null)"; + + len = strnlen(s, spec.precision); + + if (!(spec.flags & LEFT)) { + while (len < spec.field_width--) { + if (buf < end) + *buf = ' '; + ++buf; + } + } + for (i = 0; i < len; ++i) { + if (buf < end) + *buf = *s; + ++buf; ++s; + } + while (len < spec.field_width--) { + if (buf < end) + *buf = ' '; + ++buf; + } + + return buf; +} + +static noinline_for_stack +char *symbol_string(char *buf, char *end, void *ptr, + struct printf_spec spec, char ext) +{ + unsigned long value = (unsigned long) ptr; +#ifdef CONFIG_KALLSYMS + char sym[KSYM_SYMBOL_LEN]; + if (ext != 'f' && ext != 's') + sprint_symbol(sym, value); + else + kallsyms_lookup(value, NULL, NULL, NULL, sym); + + return string(buf, end, sym, spec); +#else + spec.field_width = 2 * sizeof(void *); + spec.flags |= SPECIAL | SMALL | ZEROPAD; + spec.base = 16; + + return number(buf, end, value, spec); +#endif +} + +static noinline_for_stack +char *resource_string(char *buf, char *end, struct resource *res, + struct printf_spec spec, const char *fmt) +{ +#ifndef IO_RSRC_PRINTK_SIZE +#define IO_RSRC_PRINTK_SIZE 6 +#endif + +#ifndef MEM_RSRC_PRINTK_SIZE +#define MEM_RSRC_PRINTK_SIZE 10 +#endif + static const struct printf_spec io_spec = { + .base = 16, + .field_width = IO_RSRC_PRINTK_SIZE, + .precision = -1, + .flags = SPECIAL | SMALL | ZEROPAD, + }; + static const struct printf_spec mem_spec = { + .base = 16, + .field_width = MEM_RSRC_PRINTK_SIZE, + .precision = -1, + .flags = SPECIAL | SMALL | ZEROPAD, + }; + static const struct printf_spec bus_spec = { + .base = 16, + .field_width = 2, + .precision = -1, + .flags = SMALL | ZEROPAD, + }; + static const struct printf_spec dec_spec = { + .base = 10, + .precision = -1, + .flags = 0, + }; + static const struct printf_spec str_spec = { + .field_width = -1, + .precision = 10, + .flags = LEFT, + }; + static const struct printf_spec flag_spec = { + .base = 16, + .precision = -1, + .flags = SPECIAL | SMALL, + }; + + /* 32-bit res (sizeof==4): 10 chars in dec, 10 in hex ("0x" + 8) + * 64-bit res (sizeof==8): 20 chars in dec, 18 in hex ("0x" + 16) */ +#define RSRC_BUF_SIZE ((2 * sizeof(resource_size_t)) + 4) +#define FLAG_BUF_SIZE (2 * sizeof(res->flags)) +#define DECODED_BUF_SIZE sizeof("[mem - 64bit pref window disabled]") +#define RAW_BUF_SIZE sizeof("[mem - flags 0x]") + char sym[max(2*RSRC_BUF_SIZE + DECODED_BUF_SIZE, + 2*RSRC_BUF_SIZE + FLAG_BUF_SIZE + RAW_BUF_SIZE)]; + + char *p = sym, *pend = sym + sizeof(sym); + int decode = (fmt[0] == 'R') ? 1 : 0; + const struct printf_spec *specp; + + *p++ = '['; + if (res->flags & IORESOURCE_IO) { + p = string(p, pend, "io ", str_spec); + specp = &io_spec; + } else if (res->flags & IORESOURCE_MEM) { + p = string(p, pend, "mem ", str_spec); + specp = &mem_spec; + } else if (res->flags & IORESOURCE_IRQ) { + p = string(p, pend, "irq ", str_spec); + specp = &dec_spec; + } else if (res->flags & IORESOURCE_DMA) { + p = string(p, pend, "dma ", str_spec); + specp = &dec_spec; + } else if (res->flags & IORESOURCE_BUS) { + p = string(p, pend, "bus ", str_spec); + specp = &bus_spec; + } else { + p = string(p, pend, "??? ", str_spec); + specp = &mem_spec; + decode = 0; + } + p = number(p, pend, res->start, *specp); + if (res->start != res->end) { + *p++ = '-'; + p = number(p, pend, res->end, *specp); + } + if (decode) { + if (res->flags & IORESOURCE_MEM_64) + p = string(p, pend, " 64bit", str_spec); + if (res->flags & IORESOURCE_PREFETCH) + p = string(p, pend, " pref", str_spec); + if (res->flags & IORESOURCE_WINDOW) + p = string(p, pend, " window", str_spec); + if (res->flags & IORESOURCE_DISABLED) + p = string(p, pend, " disabled", str_spec); + } else { + p = string(p, pend, " flags ", str_spec); + p = number(p, pend, res->flags, flag_spec); + } + *p++ = ']'; + *p = '\0'; + + return string(buf, end, sym, spec); +} + +static noinline_for_stack +char *mac_address_string(char *buf, char *end, u8 *addr, + struct printf_spec spec, const char *fmt) +{ + char mac_addr[sizeof("xx:xx:xx:xx:xx:xx")]; + char *p = mac_addr; + int i; + char separator; + + if (fmt[1] == 'F') { /* FDDI canonical format */ + separator = '-'; + } else { + separator = ':'; + } + + for (i = 0; i < 6; i++) { + p = pack_hex_byte(p, addr[i]); + if (fmt[0] == 'M' && i != 5) + *p++ = separator; + } + *p = '\0'; + + return string(buf, end, mac_addr, spec); +} + +static noinline_for_stack +char *ip4_string(char *p, const u8 *addr, const char *fmt) +{ + int i; + bool leading_zeros = (fmt[0] == 'i'); + int index; + int step; + + switch (fmt[2]) { + case 'h': +#ifdef __BIG_ENDIAN + index = 0; + step = 1; +#else + index = 3; + step = -1; +#endif + break; + case 'l': + index = 3; + step = -1; + break; + case 'n': + case 'b': + default: + index = 0; + step = 1; + break; + } + for (i = 0; i < 4; i++) { + char temp[3]; /* hold each IP quad in reverse order */ + int digits = put_dec_trunc(temp, addr[index]) - temp; + if (leading_zeros) { + if (digits < 3) + *p++ = '0'; + if (digits < 2) + *p++ = '0'; + } + /* reverse the digits in the quad */ + while (digits--) + *p++ = temp[digits]; + if (i < 3) + *p++ = '.'; + index += step; + } + *p = '\0'; + + return p; +} + +static noinline_for_stack +char *ip6_compressed_string(char *p, const char *addr) +{ + int i, j, range; + unsigned char zerolength[8]; + int longest = 1; + int colonpos = -1; + u16 word; + u8 hi, lo; + bool needcolon = false; + bool useIPv4; + struct in6_addr in6; + + memcpy(&in6, addr, sizeof(struct in6_addr)); + + useIPv4 = ipv6_addr_v4mapped(&in6) || ipv6_addr_is_isatap(&in6); + + memset(zerolength, 0, sizeof(zerolength)); + + if (useIPv4) + range = 6; + else + range = 8; + + /* find position of longest 0 run */ + for (i = 0; i < range; i++) { + for (j = i; j < range; j++) { + if (in6.s6_addr16[j] != 0) + break; + zerolength[i]++; + } + } + for (i = 0; i < range; i++) { + if (zerolength[i] > longest) { + longest = zerolength[i]; + colonpos = i; + } + } + + /* emit address */ + for (i = 0; i < range; i++) { + if (i == colonpos) { + if (needcolon || i == 0) + *p++ = ':'; + *p++ = ':'; + needcolon = false; + i += longest - 1; + continue; + } + if (needcolon) { + *p++ = ':'; + needcolon = false; + } + /* hex u16 without leading 0s */ + word = ntohs(in6.s6_addr16[i]); + hi = word >> 8; + lo = word & 0xff; + if (hi) { + if (hi > 0x0f) + p = pack_hex_byte(p, hi); + else + *p++ = hex_asc_lo(hi); + p = pack_hex_byte(p, lo); + } + else if (lo > 0x0f) + p = pack_hex_byte(p, lo); + else + *p++ = hex_asc_lo(lo); + needcolon = true; + } + + if (useIPv4) { + if (needcolon) + *p++ = ':'; + p = ip4_string(p, &in6.s6_addr[12], "I4"); + } + *p = '\0'; + + return p; +} + +static noinline_for_stack +char *ip6_string(char *p, const char *addr, const char *fmt) +{ + int i; + + for (i = 0; i < 8; i++) { + p = pack_hex_byte(p, *addr++); + p = pack_hex_byte(p, *addr++); + if (fmt[0] == 'I' && i != 7) + *p++ = ':'; + } + *p = '\0'; + + return p; +} + +static noinline_for_stack +char *ip6_addr_string(char *buf, char *end, const u8 *addr, + struct printf_spec spec, const char *fmt) +{ + char ip6_addr[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255")]; + + if (fmt[0] == 'I' && fmt[2] == 'c') + ip6_compressed_string(ip6_addr, addr); + else + ip6_string(ip6_addr, addr, fmt); + + return string(buf, end, ip6_addr, spec); +} + +static noinline_for_stack +char *ip4_addr_string(char *buf, char *end, const u8 *addr, + struct printf_spec spec, const char *fmt) +{ + char ip4_addr[sizeof("255.255.255.255")]; + + ip4_string(ip4_addr, addr, fmt); + + return string(buf, end, ip4_addr, spec); +} + +static noinline_for_stack +char *uuid_string(char *buf, char *end, const u8 *addr, + struct printf_spec spec, const char *fmt) +{ + char uuid[sizeof("xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx")]; + char *p = uuid; + int i; + static const u8 be[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + static const u8 le[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15}; + const u8 *index = be; + bool uc = false; + + switch (*(++fmt)) { + case 'L': + uc = true; /* fall-through */ + case 'l': + index = le; + break; + case 'B': + uc = true; + break; + } + + for (i = 0; i < 16; i++) { + p = pack_hex_byte(p, addr[index[i]]); + switch (i) { + case 3: + case 5: + case 7: + case 9: + *p++ = '-'; + break; + } + } + + *p = 0; + + if (uc) { + p = uuid; + do { + *p = toupper(*p); + } while (*(++p)); + } + + return string(buf, end, uuid, spec); +} + +/* + * Show a '%p' thing. A kernel extension is that the '%p' is followed + * by an extra set of alphanumeric characters that are extended format + * specifiers. + * + * Right now we handle: + * + * - 'F' For symbolic function descriptor pointers with offset + * - 'f' For simple symbolic function names without offset + * - 'S' For symbolic direct pointers with offset + * - 's' For symbolic direct pointers without offset + * - 'R' For decoded struct resource, e.g., [mem 0x0-0x1f 64bit pref] + * - 'r' For raw struct resource, e.g., [mem 0x0-0x1f flags 0x201] + * - 'M' For a 6-byte MAC address, it prints the address in the + * usual colon-separated hex notation + * - 'm' For a 6-byte MAC address, it prints the hex address without colons + * - 'MF' For a 6-byte MAC FDDI address, it prints the address + * with a dash-separated hex notation + * - 'I' [46] for IPv4/IPv6 addresses printed in the usual way + * IPv4 uses dot-separated decimal without leading 0's (1.2.3.4) + * IPv6 uses colon separated network-order 16 bit hex with leading 0's + * - 'i' [46] for 'raw' IPv4/IPv6 addresses + * IPv6 omits the colons (01020304...0f) + * IPv4 uses dot-separated decimal with leading 0's (010.123.045.006) + * - '[Ii]4[hnbl]' IPv4 addresses in host, network, big or little endian order + * - 'I6c' for IPv6 addresses printed as specified by + * http://tools.ietf.org/html/draft-ietf-6man-text-addr-representation-00 + * - 'U' For a 16 byte UUID/GUID, it prints the UUID/GUID in the form + * "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" + * Options for %pU are: + * b big endian lower case hex (default) + * B big endian UPPER case hex + * l little endian lower case hex + * L little endian UPPER case hex + * big endian output byte order is: + * [0][1][2][3]-[4][5]-[6][7]-[8][9]-[10][11][12][13][14][15] + * little endian output byte order is: + * [3][2][1][0]-[5][4]-[7][6]-[8][9]-[10][11][12][13][14][15] + * - 'V' For a struct va_format which contains a format string * and va_list *, + * call vsnprintf(->format, *->va_list). + * Implements a "recursive vsnprintf". + * Do not use this feature without some mechanism to verify the + * correctness of the format string and va_list arguments. + * + * Note: The difference between 'S' and 'F' is that on ia64 and ppc64 + * function pointers are really function descriptors, which contain a + * pointer to the real address. + */ +static noinline_for_stack +char *pointer(const char *fmt, char *buf, char *end, void *ptr, + struct printf_spec spec) +{ + if (!ptr) + return string(buf, end, "(null)", spec); + + switch (*fmt) { + case 'F': + case 'f': + ptr = dereference_function_descriptor(ptr); + /* Fallthrough */ + case 'S': + case 's': + return symbol_string(buf, end, ptr, spec, *fmt); + case 'R': + case 'r': + return resource_string(buf, end, ptr, spec, fmt); + case 'M': /* Colon separated: 00:01:02:03:04:05 */ + case 'm': /* Contiguous: 000102030405 */ + /* [mM]F (FDDI, bit reversed) */ + return mac_address_string(buf, end, ptr, spec, fmt); + case 'I': /* Formatted IP supported + * 4: 1.2.3.4 + * 6: 0001:0203:...:0708 + * 6c: 1::708 or 1::1.2.3.4 + */ + case 'i': /* Contiguous: + * 4: 001.002.003.004 + * 6: 000102...0f + */ + switch (fmt[1]) { + case '6': + return ip6_addr_string(buf, end, ptr, spec, fmt); + case '4': + return ip4_addr_string(buf, end, ptr, spec, fmt); + } + break; + case 'U': + return uuid_string(buf, end, ptr, spec, fmt); + case 'V': + return buf + vsnprintf(buf, end - buf, + ((struct va_format *)ptr)->fmt, + *(((struct va_format *)ptr)->va)); + } + spec.flags |= SMALL; + if (spec.field_width == -1) { + spec.field_width = 2*sizeof(void *); + spec.flags |= ZEROPAD; + } + spec.base = 16; + + return number(buf, end, (unsigned long) ptr, spec); +} + +/* + * Helper function to decode printf style format. + * Each call decode a token from the format and return the + * number of characters read (or likely the delta where it wants + * to go on the next call). + * The decoded token is returned through the parameters + * + * 'h', 'l', or 'L' for integer fields + * 'z' support added 23/7/1999 S.H. + * 'z' changed to 'Z' --davidm 1/25/99 + * 't' added for ptrdiff_t + * + * @fmt: the format string + * @type of the token returned + * @flags: various flags such as +, -, # tokens.. + * @field_width: overwritten width + * @base: base of the number (octal, hex, ...) + * @precision: precision of a number + * @qualifier: qualifier of a number (long, size_t, ...) + */ +static noinline_for_stack +int format_decode(const char *fmt, struct printf_spec *spec) +{ + const char *start = fmt; + + /* we finished early by reading the field width */ + if (spec->type == FORMAT_TYPE_WIDTH) { + if (spec->field_width < 0) { + spec->field_width = -spec->field_width; + spec->flags |= LEFT; + } + spec->type = FORMAT_TYPE_NONE; + goto precision; + } + + /* we finished early by reading the precision */ + if (spec->type == FORMAT_TYPE_PRECISION) { + if (spec->precision < 0) + spec->precision = 0; + + spec->type = FORMAT_TYPE_NONE; + goto qualifier; + } + + /* By default */ + spec->type = FORMAT_TYPE_NONE; + + for (; *fmt ; ++fmt) { + if (*fmt == '%') + break; + } + + /* Return the current non-format string */ + if (fmt != start || !*fmt) + return fmt - start; + + /* Process flags */ + spec->flags = 0; + + while (1) { /* this also skips first '%' */ + bool found = true; + + ++fmt; + + switch (*fmt) { + case '-': spec->flags |= LEFT; break; + case '+': spec->flags |= PLUS; break; + case ' ': spec->flags |= SPACE; break; + case '#': spec->flags |= SPECIAL; break; + case '0': spec->flags |= ZEROPAD; break; + default: found = false; + } + + if (!found) + break; + } + + /* get field width */ + spec->field_width = -1; + + if (isdigit(*fmt)) + spec->field_width = skip_atoi(&fmt); + else if (*fmt == '*') { + /* it's the next argument */ + spec->type = FORMAT_TYPE_WIDTH; + return ++fmt - start; + } + +precision: + /* get the precision */ + spec->precision = -1; + if (*fmt == '.') { + ++fmt; + if (isdigit(*fmt)) { + spec->precision = skip_atoi(&fmt); + if (spec->precision < 0) + spec->precision = 0; + } else if (*fmt == '*') { + /* it's the next argument */ + spec->type = FORMAT_TYPE_PRECISION; + return ++fmt - start; + } + } + +qualifier: + /* get the conversion qualifier */ + spec->qualifier = -1; + if (*fmt == 'h' || TOLOWER(*fmt) == 'l' || + TOLOWER(*fmt) == 'z' || *fmt == 't') { + spec->qualifier = *fmt++; + if (unlikely(spec->qualifier == *fmt)) { + if (spec->qualifier == 'l') { + spec->qualifier = 'L'; + ++fmt; + } else if (spec->qualifier == 'h') { + spec->qualifier = 'H'; + ++fmt; + } + } + } + + /* default base */ + spec->base = 10; + switch (*fmt) { + case 'c': + spec->type = FORMAT_TYPE_CHAR; + return ++fmt - start; + + case 's': + spec->type = FORMAT_TYPE_STR; + return ++fmt - start; + + case 'p': + spec->type = FORMAT_TYPE_PTR; + return fmt - start; + /* skip alnum */ + + case 'n': + spec->type = FORMAT_TYPE_NRCHARS; + return ++fmt - start; + + case '%': + spec->type = FORMAT_TYPE_PERCENT_CHAR; + return ++fmt - start; + + /* integer number formats - set up the flags and "break" */ + case 'o': + spec->base = 8; + break; + + case 'x': + spec->flags |= SMALL; + + case 'X': + spec->base = 16; + break; + + case 'd': + case 'i': + spec->flags |= SIGN; + case 'u': + break; + + default: + spec->type = FORMAT_TYPE_INVALID; + return fmt - start; + } + + if (spec->qualifier == 'L') + spec->type = FORMAT_TYPE_LONG_LONG; + else if (spec->qualifier == 'l') { + if (spec->flags & SIGN) + spec->type = FORMAT_TYPE_LONG; + else + spec->type = FORMAT_TYPE_ULONG; + } else if (TOLOWER(spec->qualifier) == 'z') { + spec->type = FORMAT_TYPE_SIZE_T; + } else if (spec->qualifier == 't') { + spec->type = FORMAT_TYPE_PTRDIFF; + } else if (spec->qualifier == 'H') { + if (spec->flags & SIGN) + spec->type = FORMAT_TYPE_BYTE; + else + spec->type = FORMAT_TYPE_UBYTE; + } else if (spec->qualifier == 'h') { + if (spec->flags & SIGN) + spec->type = FORMAT_TYPE_SHORT; + else + spec->type = FORMAT_TYPE_USHORT; + } else { + if (spec->flags & SIGN) + spec->type = FORMAT_TYPE_INT; + else + spec->type = FORMAT_TYPE_UINT; + } + + return ++fmt - start; +} + +/** + * vsnprintf - Format a string and place it in a buffer + * @buf: The buffer to place the result into + * @size: The size of the buffer, including the trailing null space + * @fmt: The format string to use + * @args: Arguments for the format string + * + * This function follows C99 vsnprintf, but has some extensions: + * %pS output the name of a text symbol with offset + * %ps output the name of a text symbol without offset + * %pF output the name of a function pointer with its offset + * %pf output the name of a function pointer without its offset + * %pR output the address range in a struct resource with decoded flags + * %pr output the address range in a struct resource with raw flags + * %pM output a 6-byte MAC address with colons + * %pm output a 6-byte MAC address without colons + * %pI4 print an IPv4 address without leading zeros + * %pi4 print an IPv4 address with leading zeros + * %pI6 print an IPv6 address with colons + * %pi6 print an IPv6 address without colons + * %pI6c print an IPv6 address as specified by + * http://tools.ietf.org/html/draft-ietf-6man-text-addr-representation-00 + * %pU[bBlL] print a UUID/GUID in big or little endian using lower or upper + * case. + * %n is ignored + * + * The return value is the number of characters which would + * be generated for the given input, excluding the trailing + * '\0', as per ISO C99. If you want to have the exact + * number of characters written into @buf as return value + * (not including the trailing '\0'), use vscnprintf(). If the + * return is greater than or equal to @size, the resulting + * string is truncated. + * + * Call this function if you are already dealing with a va_list. + * You probably want snprintf() instead. + */ +int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) +{ + unsigned long long num; + char *str, *end; + struct printf_spec spec = {0}; + + /* Reject out-of-range values early. Large positive sizes are + used for unknown buffer sizes. */ + if (WARN_ON_ONCE((int) size < 0)) + return 0; + + str = buf; + end = buf + size; + + /* Make sure end is always >= buf */ + if (end < buf) { + end = ((void *)-1); + size = end - buf; + } + + while (*fmt) { + const char *old_fmt = fmt; + int read = format_decode(fmt, &spec); + + fmt += read; + + switch (spec.type) { + case FORMAT_TYPE_NONE: { + int copy = read; + if (str < end) { + if (copy > end - str) + copy = end - str; + memcpy(str, old_fmt, copy); + } + str += read; + break; + } + + case FORMAT_TYPE_WIDTH: + spec.field_width = va_arg(args, int); + break; + + case FORMAT_TYPE_PRECISION: + spec.precision = va_arg(args, int); + break; + + case FORMAT_TYPE_CHAR: { + char c; + + if (!(spec.flags & LEFT)) { + while (--spec.field_width > 0) { + if (str < end) + *str = ' '; + ++str; + + } + } + c = (unsigned char) va_arg(args, int); + if (str < end) + *str = c; + ++str; + while (--spec.field_width > 0) { + if (str < end) + *str = ' '; + ++str; + } + break; + } + + case FORMAT_TYPE_STR: + str = string(str, end, va_arg(args, char *), spec); + break; + + case FORMAT_TYPE_PTR: + str = pointer(fmt+1, str, end, va_arg(args, void *), + spec); + while (isalnum(*fmt)) + fmt++; + break; + + case FORMAT_TYPE_PERCENT_CHAR: + if (str < end) + *str = '%'; + ++str; + break; + + case FORMAT_TYPE_INVALID: + if (str < end) + *str = '%'; + ++str; + break; + + case FORMAT_TYPE_NRCHARS: { + u8 qualifier = spec.qualifier; + + if (qualifier == 'l') { + long *ip = va_arg(args, long *); + *ip = (str - buf); + } else if (TOLOWER(qualifier) == 'z') { + size_t *ip = va_arg(args, size_t *); + *ip = (str - buf); + } else { + int *ip = va_arg(args, int *); + *ip = (str - buf); + } + break; + } + + default: + switch (spec.type) { + case FORMAT_TYPE_LONG_LONG: + num = va_arg(args, long long); + break; + case FORMAT_TYPE_ULONG: + num = va_arg(args, unsigned long); + break; + case FORMAT_TYPE_LONG: + num = va_arg(args, long); + break; + case FORMAT_TYPE_SIZE_T: + num = va_arg(args, size_t); + break; + case FORMAT_TYPE_PTRDIFF: + num = va_arg(args, ptrdiff_t); + break; + case FORMAT_TYPE_UBYTE: + num = (unsigned char) va_arg(args, int); + break; + case FORMAT_TYPE_BYTE: + num = (signed char) va_arg(args, int); + break; + case FORMAT_TYPE_USHORT: + num = (unsigned short) va_arg(args, int); + break; + case FORMAT_TYPE_SHORT: + num = (short) va_arg(args, int); + break; + case FORMAT_TYPE_INT: + num = (int) va_arg(args, int); + break; + default: + num = va_arg(args, unsigned int); + } + + str = number(str, end, num, spec); + } + } + + if (size > 0) { + if (str < end) + *str = '\0'; + else + end[-1] = '\0'; + } + + /* the trailing null byte doesn't count towards the total */ + return str-buf; + +} +EXPORT_SYMBOL(vsnprintf); + +/** + * vscnprintf - Format a string and place it in a buffer + * @buf: The buffer to place the result into + * @size: The size of the buffer, including the trailing null space + * @fmt: The format string to use + * @args: Arguments for the format string + * + * The return value is the number of characters which have been written into + * the @buf not including the trailing '\0'. If @size is <= 0 the function + * returns 0. + * + * Call this function if you are already dealing with a va_list. + * You probably want scnprintf() instead. + * + * See the vsnprintf() documentation for format string extensions over C99. + */ +int vscnprintf(char *buf, size_t size, const char *fmt, va_list args) +{ + int i; + + i = vsnprintf(buf, size, fmt, args); + + return (i >= size) ? (size - 1) : i; +} +EXPORT_SYMBOL(vscnprintf); + +/** + * snprintf - Format a string and place it in a buffer + * @buf: The buffer to place the result into + * @size: The size of the buffer, including the trailing null space + * @fmt: The format string to use + * @...: Arguments for the format string + * + * The return value is the number of characters which would be + * generated for the given input, excluding the trailing null, + * as per ISO C99. If the return is greater than or equal to + * @size, the resulting string is truncated. + * + * See the vsnprintf() documentation for format string extensions over C99. + */ +int snprintf(char *buf, size_t size, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i = vsnprintf(buf, size, fmt, args); + va_end(args); + + return i; +} +EXPORT_SYMBOL(snprintf); + +/** + * scnprintf - Format a string and place it in a buffer + * @buf: The buffer to place the result into + * @size: The size of the buffer, including the trailing null space + * @fmt: The format string to use + * @...: Arguments for the format string + * + * The return value is the number of characters written into @buf not including + * the trailing '\0'. If @size is <= 0 the function returns 0. + */ + +int scnprintf(char *buf, size_t size, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i = vsnprintf(buf, size, fmt, args); + va_end(args); + + return (i >= size) ? (size - 1) : i; +} +EXPORT_SYMBOL(scnprintf); + +/** + * vsprintf - Format a string and place it in a buffer + * @buf: The buffer to place the result into + * @fmt: The format string to use + * @args: Arguments for the format string + * + * The function returns the number of characters written + * into @buf. Use vsnprintf() or vscnprintf() in order to avoid + * buffer overflows. + * + * Call this function if you are already dealing with a va_list. + * You probably want sprintf() instead. + * + * See the vsnprintf() documentation for format string extensions over C99. + */ +int vsprintf(char *buf, const char *fmt, va_list args) +{ + return vsnprintf(buf, INT_MAX, fmt, args); +} +EXPORT_SYMBOL(vsprintf); + +/** + * sprintf - Format a string and place it in a buffer + * @buf: The buffer to place the result into + * @fmt: The format string to use + * @...: Arguments for the format string + * + * The function returns the number of characters written + * into @buf. Use snprintf() or scnprintf() in order to avoid + * buffer overflows. + * + * See the vsnprintf() documentation for format string extensions over C99. + */ +int sprintf(char *buf, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i = vsnprintf(buf, INT_MAX, fmt, args); + va_end(args); + + return i; +} +EXPORT_SYMBOL(sprintf); + +#ifdef CONFIG_BINARY_PRINTF +/* + * bprintf service: + * vbin_printf() - VA arguments to binary data + * bstr_printf() - Binary data to text string + */ + +/** + * vbin_printf - Parse a format string and place args' binary value in a buffer + * @bin_buf: The buffer to place args' binary value + * @size: The size of the buffer(by words(32bits), not characters) + * @fmt: The format string to use + * @args: Arguments for the format string + * + * The format follows C99 vsnprintf, except %n is ignored, and its argument + * is skiped. + * + * The return value is the number of words(32bits) which would be generated for + * the given input. + * + * NOTE: + * If the return value is greater than @size, the resulting bin_buf is NOT + * valid for bstr_printf(). + */ +int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args) +{ + struct printf_spec spec = {0}; + char *str, *end; + + str = (char *)bin_buf; + end = (char *)(bin_buf + size); + +#define save_arg(type) \ +do { \ + if (sizeof(type) == 8) { \ + unsigned long long value; \ + str = PTR_ALIGN(str, sizeof(u32)); \ + value = va_arg(args, unsigned long long); \ + if (str + sizeof(type) <= end) { \ + *(u32 *)str = *(u32 *)&value; \ + *(u32 *)(str + 4) = *((u32 *)&value + 1); \ + } \ + } else { \ + unsigned long value; \ + str = PTR_ALIGN(str, sizeof(type)); \ + value = va_arg(args, int); \ + if (str + sizeof(type) <= end) \ + *(typeof(type) *)str = (type)value; \ + } \ + str += sizeof(type); \ +} while (0) + + while (*fmt) { + int read = format_decode(fmt, &spec); + + fmt += read; + + switch (spec.type) { + case FORMAT_TYPE_NONE: + case FORMAT_TYPE_INVALID: + case FORMAT_TYPE_PERCENT_CHAR: + break; + + case FORMAT_TYPE_WIDTH: + case FORMAT_TYPE_PRECISION: + save_arg(int); + break; + + case FORMAT_TYPE_CHAR: + save_arg(char); + break; + + case FORMAT_TYPE_STR: { + const char *save_str = va_arg(args, char *); + size_t len; + + if ((unsigned long)save_str > (unsigned long)-PAGE_SIZE + || (unsigned long)save_str < PAGE_SIZE) + save_str = "(null)"; + len = strlen(save_str) + 1; + if (str + len < end) + memcpy(str, save_str, len); + str += len; + break; + } + + case FORMAT_TYPE_PTR: + save_arg(void *); + /* skip all alphanumeric pointer suffixes */ + while (isalnum(*fmt)) + fmt++; + break; + + case FORMAT_TYPE_NRCHARS: { + /* skip %n 's argument */ + u8 qualifier = spec.qualifier; + void *skip_arg; + if (qualifier == 'l') + skip_arg = va_arg(args, long *); + else if (TOLOWER(qualifier) == 'z') + skip_arg = va_arg(args, size_t *); + else + skip_arg = va_arg(args, int *); + break; + } + + default: + switch (spec.type) { + + case FORMAT_TYPE_LONG_LONG: + save_arg(long long); + break; + case FORMAT_TYPE_ULONG: + case FORMAT_TYPE_LONG: + save_arg(unsigned long); + break; + case FORMAT_TYPE_SIZE_T: + save_arg(size_t); + break; + case FORMAT_TYPE_PTRDIFF: + save_arg(ptrdiff_t); + break; + case FORMAT_TYPE_UBYTE: + case FORMAT_TYPE_BYTE: + save_arg(char); + break; + case FORMAT_TYPE_USHORT: + case FORMAT_TYPE_SHORT: + save_arg(short); + break; + default: + save_arg(int); + } + } + } + + return (u32 *)(PTR_ALIGN(str, sizeof(u32))) - bin_buf; +#undef save_arg +} +EXPORT_SYMBOL_GPL(vbin_printf); + +/** + * bstr_printf - Format a string from binary arguments and place it in a buffer + * @buf: The buffer to place the result into + * @size: The size of the buffer, including the trailing null space + * @fmt: The format string to use + * @bin_buf: Binary arguments for the format string + * + * This function like C99 vsnprintf, but the difference is that vsnprintf gets + * arguments from stack, and bstr_printf gets arguments from @bin_buf which is + * a binary buffer that generated by vbin_printf. + * + * The format follows C99 vsnprintf, but has some extensions: + * see vsnprintf comment for details. + * + * The return value is the number of characters which would + * be generated for the given input, excluding the trailing + * '\0', as per ISO C99. If you want to have the exact + * number of characters written into @buf as return value + * (not including the trailing '\0'), use vscnprintf(). If the + * return is greater than or equal to @size, the resulting + * string is truncated. + */ +int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) +{ + struct printf_spec spec = {0}; + char *str, *end; + const char *args = (const char *)bin_buf; + + if (WARN_ON_ONCE((int) size < 0)) + return 0; + + str = buf; + end = buf + size; + +#define get_arg(type) \ +({ \ + typeof(type) value; \ + if (sizeof(type) == 8) { \ + args = PTR_ALIGN(args, sizeof(u32)); \ + *(u32 *)&value = *(u32 *)args; \ + *((u32 *)&value + 1) = *(u32 *)(args + 4); \ + } else { \ + args = PTR_ALIGN(args, sizeof(type)); \ + value = *(typeof(type) *)args; \ + } \ + args += sizeof(type); \ + value; \ +}) + + /* Make sure end is always >= buf */ + if (end < buf) { + end = ((void *)-1); + size = end - buf; + } + + while (*fmt) { + const char *old_fmt = fmt; + int read = format_decode(fmt, &spec); + + fmt += read; + + switch (spec.type) { + case FORMAT_TYPE_NONE: { + int copy = read; + if (str < end) { + if (copy > end - str) + copy = end - str; + memcpy(str, old_fmt, copy); + } + str += read; + break; + } + + case FORMAT_TYPE_WIDTH: + spec.field_width = get_arg(int); + break; + + case FORMAT_TYPE_PRECISION: + spec.precision = get_arg(int); + break; + + case FORMAT_TYPE_CHAR: { + char c; + + if (!(spec.flags & LEFT)) { + while (--spec.field_width > 0) { + if (str < end) + *str = ' '; + ++str; + } + } + c = (unsigned char) get_arg(char); + if (str < end) + *str = c; + ++str; + while (--spec.field_width > 0) { + if (str < end) + *str = ' '; + ++str; + } + break; + } + + case FORMAT_TYPE_STR: { + const char *str_arg = args; + args += strlen(str_arg) + 1; + str = string(str, end, (char *)str_arg, spec); + break; + } + + case FORMAT_TYPE_PTR: + str = pointer(fmt+1, str, end, get_arg(void *), spec); + while (isalnum(*fmt)) + fmt++; + break; + + case FORMAT_TYPE_PERCENT_CHAR: + case FORMAT_TYPE_INVALID: + if (str < end) + *str = '%'; + ++str; + break; + + case FORMAT_TYPE_NRCHARS: + /* skip */ + break; + + default: { + unsigned long long num; + + switch (spec.type) { + + case FORMAT_TYPE_LONG_LONG: + num = get_arg(long long); + break; + case FORMAT_TYPE_ULONG: + case FORMAT_TYPE_LONG: + num = get_arg(unsigned long); + break; + case FORMAT_TYPE_SIZE_T: + num = get_arg(size_t); + break; + case FORMAT_TYPE_PTRDIFF: + num = get_arg(ptrdiff_t); + break; + case FORMAT_TYPE_UBYTE: + num = get_arg(unsigned char); + break; + case FORMAT_TYPE_BYTE: + num = get_arg(signed char); + break; + case FORMAT_TYPE_USHORT: + num = get_arg(unsigned short); + break; + case FORMAT_TYPE_SHORT: + num = get_arg(short); + break; + case FORMAT_TYPE_UINT: + num = get_arg(unsigned int); + break; + default: + num = get_arg(int); + } + + str = number(str, end, num, spec); + } /* default: */ + } /* switch(spec.type) */ + } /* while(*fmt) */ + + if (size > 0) { + if (str < end) + *str = '\0'; + else + end[-1] = '\0'; + } + +#undef get_arg + + /* the trailing null byte doesn't count towards the total */ + return str - buf; +} +EXPORT_SYMBOL_GPL(bstr_printf); + +/** + * bprintf - Parse a format string and place args' binary value in a buffer + * @bin_buf: The buffer to place args' binary value + * @size: The size of the buffer(by words(32bits), not characters) + * @fmt: The format string to use + * @...: Arguments for the format string + * + * The function returns the number of words(u32) written + * into @bin_buf. + */ +int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...) +{ + va_list args; + int ret; + + va_start(args, fmt); + ret = vbin_printf(bin_buf, size, fmt, args); + va_end(args); + + return ret; +} +EXPORT_SYMBOL_GPL(bprintf); + +#endif /* CONFIG_BINARY_PRINTF */ + +/** + * vsscanf - Unformat a buffer into a list of arguments + * @buf: input buffer + * @fmt: format of buffer + * @args: arguments + */ +int vsscanf(const char *buf, const char *fmt, va_list args) +{ + const char *str = buf; + char *next; + char digit; + int num = 0; + u8 qualifier; + u8 base; + s16 field_width; + bool is_sign; + + while (*fmt && *str) { + /* skip any white space in format */ + /* white space in format matchs any amount of + * white space, including none, in the input. + */ + if (isspace(*fmt)) { + fmt = skip_spaces(++fmt); + str = skip_spaces(str); + } + + /* anything that is not a conversion must match exactly */ + if (*fmt != '%' && *fmt) { + if (*fmt++ != *str++) + break; + continue; + } + + if (!*fmt) + break; + ++fmt; + + /* skip this conversion. + * advance both strings to next white space + */ + if (*fmt == '*') { + while (!isspace(*fmt) && *fmt != '%' && *fmt) + fmt++; + while (!isspace(*str) && *str) + str++; + continue; + } + + /* get field width */ + field_width = -1; + if (isdigit(*fmt)) + field_width = skip_atoi(&fmt); + + /* get conversion qualifier */ + qualifier = -1; + if (*fmt == 'h' || TOLOWER(*fmt) == 'l' || + TOLOWER(*fmt) == 'z') { + qualifier = *fmt++; + if (unlikely(qualifier == *fmt)) { + if (qualifier == 'h') { + qualifier = 'H'; + fmt++; + } else if (qualifier == 'l') { + qualifier = 'L'; + fmt++; + } + } + } + + if (!*fmt || !*str) + break; + + base = 10; + is_sign = 0; + + switch (*fmt++) { + case 'c': + { + char *s = (char *)va_arg(args, char*); + if (field_width == -1) + field_width = 1; + do { + *s++ = *str++; + } while (--field_width > 0 && *str); + num++; + } + continue; + case 's': + { + char *s = (char *)va_arg(args, char *); + if (field_width == -1) + field_width = SHRT_MAX; + /* first, skip leading white space in buffer */ + str = skip_spaces(str); + + /* now copy until next white space */ + while (*str && !isspace(*str) && field_width--) + *s++ = *str++; + *s = '\0'; + num++; + } + continue; + case 'n': + /* return number of characters read so far */ + { + int *i = (int *)va_arg(args, int*); + *i = str - buf; + } + continue; + case 'o': + base = 8; + break; + case 'x': + case 'X': + base = 16; + break; + case 'i': + base = 0; + case 'd': + is_sign = 1; + case 'u': + break; + case '%': + /* looking for '%' in str */ + if (*str++ != '%') + return num; + continue; + default: + /* invalid format; stop here */ + return num; + } + + /* have some sort of integer conversion. + * first, skip white space in buffer. + */ + str = skip_spaces(str); + + digit = *str; + if (is_sign && digit == '-') + digit = *(str + 1); + + if (!digit + || (base == 16 && !isxdigit(digit)) + || (base == 10 && !isdigit(digit)) + || (base == 8 && (!isdigit(digit) || digit > '7')) + || (base == 0 && !isdigit(digit))) + break; + + switch (qualifier) { + case 'H': /* that's 'hh' in format */ + if (is_sign) { + signed char *s = (signed char *)va_arg(args, signed char *); + *s = (signed char)simple_strtol(str, &next, base); + } else { + unsigned char *s = (unsigned char *)va_arg(args, unsigned char *); + *s = (unsigned char)simple_strtoul(str, &next, base); + } + break; + case 'h': + if (is_sign) { + short *s = (short *)va_arg(args, short *); + *s = (short)simple_strtol(str, &next, base); + } else { + unsigned short *s = (unsigned short *)va_arg(args, unsigned short *); + *s = (unsigned short)simple_strtoul(str, &next, base); + } + break; + case 'l': + if (is_sign) { + long *l = (long *)va_arg(args, long *); + *l = simple_strtol(str, &next, base); + } else { + unsigned long *l = (unsigned long *)va_arg(args, unsigned long *); + *l = simple_strtoul(str, &next, base); + } + break; + case 'L': + if (is_sign) { + long long *l = (long long *)va_arg(args, long long *); + *l = simple_strtoll(str, &next, base); + } else { + unsigned long long *l = (unsigned long long *)va_arg(args, unsigned long long *); + *l = simple_strtoull(str, &next, base); + } + break; + case 'Z': + case 'z': + { + size_t *s = (size_t *)va_arg(args, size_t *); + *s = (size_t)simple_strtoul(str, &next, base); + } + break; + default: + if (is_sign) { + int *i = (int *)va_arg(args, int *); + *i = (int)simple_strtol(str, &next, base); + } else { + unsigned int *i = (unsigned int *)va_arg(args, unsigned int*); + *i = (unsigned int)simple_strtoul(str, &next, base); + } + break; + } + num++; + + if (!next) + break; + str = next; + } + + /* + * Now we've come all the way through so either the input string or the + * format ended. In the former case, there can be a %n at the current + * position in the format that needs to be filled. + */ + if (*fmt == '%' && *(fmt + 1) == 'n') { + int *p = (int *)va_arg(args, int *); + *p = str - buf; + } + + return num; +} +EXPORT_SYMBOL(vsscanf); + +/** + * sscanf - Unformat a buffer into a list of arguments + * @buf: input buffer + * @fmt: formatting of buffer + * @...: resulting arguments + */ +int sscanf(const char *buf, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i = vsscanf(buf, fmt, args); + va_end(args); + + return i; +} +EXPORT_SYMBOL(sscanf); diff --git a/lib/zlib_deflate/Makefile b/lib/zlib_deflate/Makefile new file mode 100644 index 00000000..86275e3f --- /dev/null +++ b/lib/zlib_deflate/Makefile @@ -0,0 +1,11 @@ +# +# This is a modified version of zlib, which does all memory +# allocation ahead of time. +# +# This is the compression code, see zlib_inflate for the +# decompression code. +# + +obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate.o + +zlib_deflate-objs := deflate.o deftree.o deflate_syms.o diff --git a/lib/zlib_deflate/deflate.c b/lib/zlib_deflate/deflate.c new file mode 100644 index 00000000..46a31e5f --- /dev/null +++ b/lib/zlib_deflate/deflate.c @@ -0,0 +1,1253 @@ +/* +++ deflate.c */ +/* deflate.c -- compress data using the deflation algorithm + * Copyright (C) 1995-1996 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * ALGORITHM + * + * The "deflation" process depends on being able to identify portions + * of the input text which are identical to earlier input (within a + * sliding window trailing behind the input currently being processed). + * + * The most straightforward technique turns out to be the fastest for + * most input files: try all possible matches and select the longest. + * The key feature of this algorithm is that insertions into the string + * dictionary are very simple and thus fast, and deletions are avoided + * completely. Insertions are performed at each input character, whereas + * string matches are performed only when the previous match ends. So it + * is preferable to spend more time in matches to allow very fast string + * insertions and avoid deletions. The matching algorithm for small + * strings is inspired from that of Rabin & Karp. A brute force approach + * is used to find longer strings when a small match has been found. + * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze + * (by Leonid Broukhis). + * A previous version of this file used a more sophisticated algorithm + * (by Fiala and Greene) which is guaranteed to run in linear amortized + * time, but has a larger average cost, uses more memory and is patented. + * However the F&G algorithm may be faster for some highly redundant + * files if the parameter max_chain_length (described below) is too large. + * + * ACKNOWLEDGEMENTS + * + * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and + * I found it in 'freeze' written by Leonid Broukhis. + * Thanks to many people for bug reports and testing. + * + * REFERENCES + * + * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification". + * Available in ftp://ds.internic.net/rfc/rfc1951.txt + * + * A description of the Rabin and Karp algorithm is given in the book + * "Algorithms" by R. Sedgewick, Addison-Wesley, p252. + * + * Fiala,E.R., and Greene,D.H. + * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595 + * + */ + +#include <linux/module.h> +#include <linux/zutil.h> +#include "defutil.h" + + +/* =========================================================================== + * Function prototypes. + */ +typedef enum { + need_more, /* block not completed, need more input or more output */ + block_done, /* block flush performed */ + finish_started, /* finish started, need only more output at next deflate */ + finish_done /* finish done, accept no more input or output */ +} block_state; + +typedef block_state (*compress_func) (deflate_state *s, int flush); +/* Compression function. Returns the block state after the call. */ + +static void fill_window (deflate_state *s); +static block_state deflate_stored (deflate_state *s, int flush); +static block_state deflate_fast (deflate_state *s, int flush); +static block_state deflate_slow (deflate_state *s, int flush); +static void lm_init (deflate_state *s); +static void putShortMSB (deflate_state *s, uInt b); +static void flush_pending (z_streamp strm); +static int read_buf (z_streamp strm, Byte *buf, unsigned size); +static uInt longest_match (deflate_state *s, IPos cur_match); + +#ifdef DEBUG_ZLIB +static void check_match (deflate_state *s, IPos start, IPos match, + int length); +#endif + +/* =========================================================================== + * Local data + */ + +#define NIL 0 +/* Tail of hash chains */ + +#ifndef TOO_FAR +# define TOO_FAR 4096 +#endif +/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */ + +#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) +/* Minimum amount of lookahead, except at the end of the input file. + * See deflate.c for comments about the MIN_MATCH+1. + */ + +/* Values for max_lazy_match, good_match and max_chain_length, depending on + * the desired pack level (0..9). The values given below have been tuned to + * exclude worst case performance for pathological files. Better values may be + * found for specific files. + */ +typedef struct config_s { + ush good_length; /* reduce lazy search above this match length */ + ush max_lazy; /* do not perform lazy search above this match length */ + ush nice_length; /* quit search above this match length */ + ush max_chain; + compress_func func; +} config; + +static const config configuration_table[10] = { +/* good lazy nice chain */ +/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ +/* 1 */ {4, 4, 8, 4, deflate_fast}, /* maximum speed, no lazy matches */ +/* 2 */ {4, 5, 16, 8, deflate_fast}, +/* 3 */ {4, 6, 32, 32, deflate_fast}, + +/* 4 */ {4, 4, 16, 16, deflate_slow}, /* lazy matches */ +/* 5 */ {8, 16, 32, 32, deflate_slow}, +/* 6 */ {8, 16, 128, 128, deflate_slow}, +/* 7 */ {8, 32, 128, 256, deflate_slow}, +/* 8 */ {32, 128, 258, 1024, deflate_slow}, +/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* maximum compression */ + +/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4 + * For deflate_fast() (levels <= 3) good is ignored and lazy has a different + * meaning. + */ + +#define EQUAL 0 +/* result of memcmp for equal strings */ + +/* =========================================================================== + * Update a hash value with the given input byte + * IN assertion: all calls to UPDATE_HASH are made with consecutive + * input characters, so that a running hash key can be computed from the + * previous key instead of complete recalculation each time. + */ +#define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask) + + +/* =========================================================================== + * Insert string str in the dictionary and set match_head to the previous head + * of the hash chain (the most recent string with same hash key). Return + * the previous length of the hash chain. + * IN assertion: all calls to INSERT_STRING are made with consecutive + * input characters and the first MIN_MATCH bytes of str are valid + * (except for the last MIN_MATCH-1 bytes of the input file). + */ +#define INSERT_STRING(s, str, match_head) \ + (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ + s->prev[(str) & s->w_mask] = match_head = s->head[s->ins_h], \ + s->head[s->ins_h] = (Pos)(str)) + +/* =========================================================================== + * Initialize the hash table (avoiding 64K overflow for 16 bit systems). + * prev[] will be initialized on the fly. + */ +#define CLEAR_HASH(s) \ + s->head[s->hash_size-1] = NIL; \ + memset((char *)s->head, 0, (unsigned)(s->hash_size-1)*sizeof(*s->head)); + +/* ========================================================================= */ +int zlib_deflateInit2( + z_streamp strm, + int level, + int method, + int windowBits, + int memLevel, + int strategy +) +{ + deflate_state *s; + int noheader = 0; + deflate_workspace *mem; + + ush *overlay; + /* We overlay pending_buf and d_buf+l_buf. This works since the average + * output size for (length,distance) codes is <= 24 bits. + */ + + if (strm == NULL) return Z_STREAM_ERROR; + + strm->msg = NULL; + + if (level == Z_DEFAULT_COMPRESSION) level = 6; + + mem = (deflate_workspace *) strm->workspace; + + if (windowBits < 0) { /* undocumented feature: suppress zlib header */ + noheader = 1; + windowBits = -windowBits; + } + if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED || + windowBits < 9 || windowBits > 15 || level < 0 || level > 9 || + strategy < 0 || strategy > Z_HUFFMAN_ONLY) { + return Z_STREAM_ERROR; + } + s = (deflate_state *) &(mem->deflate_memory); + strm->state = (struct internal_state *)s; + s->strm = strm; + + s->noheader = noheader; + s->w_bits = windowBits; + s->w_size = 1 << s->w_bits; + s->w_mask = s->w_size - 1; + + s->hash_bits = memLevel + 7; + s->hash_size = 1 << s->hash_bits; + s->hash_mask = s->hash_size - 1; + s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); + + s->window = (Byte *) mem->window_memory; + s->prev = (Pos *) mem->prev_memory; + s->head = (Pos *) mem->head_memory; + + s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ + + overlay = (ush *) mem->overlay_memory; + s->pending_buf = (uch *) overlay; + s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L); + + s->d_buf = overlay + s->lit_bufsize/sizeof(ush); + s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize; + + s->level = level; + s->strategy = strategy; + s->method = (Byte)method; + + return zlib_deflateReset(strm); +} + +/* ========================================================================= */ +#if 0 +int zlib_deflateSetDictionary( + z_streamp strm, + const Byte *dictionary, + uInt dictLength +) +{ + deflate_state *s; + uInt length = dictLength; + uInt n; + IPos hash_head = 0; + + if (strm == NULL || strm->state == NULL || dictionary == NULL) + return Z_STREAM_ERROR; + + s = (deflate_state *) strm->state; + if (s->status != INIT_STATE) return Z_STREAM_ERROR; + + strm->adler = zlib_adler32(strm->adler, dictionary, dictLength); + + if (length < MIN_MATCH) return Z_OK; + if (length > MAX_DIST(s)) { + length = MAX_DIST(s); +#ifndef USE_DICT_HEAD + dictionary += dictLength - length; /* use the tail of the dictionary */ +#endif + } + memcpy((char *)s->window, dictionary, length); + s->strstart = length; + s->block_start = (long)length; + + /* Insert all strings in the hash table (except for the last two bytes). + * s->lookahead stays null, so s->ins_h will be recomputed at the next + * call of fill_window. + */ + s->ins_h = s->window[0]; + UPDATE_HASH(s, s->ins_h, s->window[1]); + for (n = 0; n <= length - MIN_MATCH; n++) { + INSERT_STRING(s, n, hash_head); + } + if (hash_head) hash_head = 0; /* to make compiler happy */ + return Z_OK; +} +#endif /* 0 */ + +/* ========================================================================= */ +int zlib_deflateReset( + z_streamp strm +) +{ + deflate_state *s; + + if (strm == NULL || strm->state == NULL) + return Z_STREAM_ERROR; + + strm->total_in = strm->total_out = 0; + strm->msg = NULL; + strm->data_type = Z_UNKNOWN; + + s = (deflate_state *)strm->state; + s->pending = 0; + s->pending_out = s->pending_buf; + + if (s->noheader < 0) { + s->noheader = 0; /* was set to -1 by deflate(..., Z_FINISH); */ + } + s->status = s->noheader ? BUSY_STATE : INIT_STATE; + strm->adler = 1; + s->last_flush = Z_NO_FLUSH; + + zlib_tr_init(s); + lm_init(s); + + return Z_OK; +} + +/* ========================================================================= */ +#if 0 +int zlib_deflateParams( + z_streamp strm, + int level, + int strategy +) +{ + deflate_state *s; + compress_func func; + int err = Z_OK; + + if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR; + s = (deflate_state *) strm->state; + + if (level == Z_DEFAULT_COMPRESSION) { + level = 6; + } + if (level < 0 || level > 9 || strategy < 0 || strategy > Z_HUFFMAN_ONLY) { + return Z_STREAM_ERROR; + } + func = configuration_table[s->level].func; + + if (func != configuration_table[level].func && strm->total_in != 0) { + /* Flush the last buffer: */ + err = zlib_deflate(strm, Z_PARTIAL_FLUSH); + } + if (s->level != level) { + s->level = level; + s->max_lazy_match = configuration_table[level].max_lazy; + s->good_match = configuration_table[level].good_length; + s->nice_match = configuration_table[level].nice_length; + s->max_chain_length = configuration_table[level].max_chain; + } + s->strategy = strategy; + return err; +} +#endif /* 0 */ + +/* ========================================================================= + * Put a short in the pending buffer. The 16-bit value is put in MSB order. + * IN assertion: the stream state is correct and there is enough room in + * pending_buf. + */ +static void putShortMSB( + deflate_state *s, + uInt b +) +{ + put_byte(s, (Byte)(b >> 8)); + put_byte(s, (Byte)(b & 0xff)); +} + +/* ========================================================================= + * Flush as much pending output as possible. All deflate() output goes + * through this function so some applications may wish to modify it + * to avoid allocating a large strm->next_out buffer and copying into it. + * (See also read_buf()). + */ +static void flush_pending( + z_streamp strm +) +{ + deflate_state *s = (deflate_state *) strm->state; + unsigned len = s->pending; + + if (len > strm->avail_out) len = strm->avail_out; + if (len == 0) return; + + if (strm->next_out != NULL) { + memcpy(strm->next_out, s->pending_out, len); + strm->next_out += len; + } + s->pending_out += len; + strm->total_out += len; + strm->avail_out -= len; + s->pending -= len; + if (s->pending == 0) { + s->pending_out = s->pending_buf; + } +} + +/* ========================================================================= */ +int zlib_deflate( + z_streamp strm, + int flush +) +{ + int old_flush; /* value of flush param for previous deflate call */ + deflate_state *s; + + if (strm == NULL || strm->state == NULL || + flush > Z_FINISH || flush < 0) { + return Z_STREAM_ERROR; + } + s = (deflate_state *) strm->state; + + if ((strm->next_in == NULL && strm->avail_in != 0) || + (s->status == FINISH_STATE && flush != Z_FINISH)) { + return Z_STREAM_ERROR; + } + if (strm->avail_out == 0) return Z_BUF_ERROR; + + s->strm = strm; /* just in case */ + old_flush = s->last_flush; + s->last_flush = flush; + + /* Write the zlib header */ + if (s->status == INIT_STATE) { + + uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8; + uInt level_flags = (s->level-1) >> 1; + + if (level_flags > 3) level_flags = 3; + header |= (level_flags << 6); + if (s->strstart != 0) header |= PRESET_DICT; + header += 31 - (header % 31); + + s->status = BUSY_STATE; + putShortMSB(s, header); + + /* Save the adler32 of the preset dictionary: */ + if (s->strstart != 0) { + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + } + strm->adler = 1L; + } + + /* Flush as much pending output as possible */ + if (s->pending != 0) { + flush_pending(strm); + if (strm->avail_out == 0) { + /* Since avail_out is 0, deflate will be called again with + * more output space, but possibly with both pending and + * avail_in equal to zero. There won't be anything to do, + * but this is not an error situation so make sure we + * return OK instead of BUF_ERROR at next call of deflate: + */ + s->last_flush = -1; + return Z_OK; + } + + /* Make sure there is something to do and avoid duplicate consecutive + * flushes. For repeated and useless calls with Z_FINISH, we keep + * returning Z_STREAM_END instead of Z_BUFF_ERROR. + */ + } else if (strm->avail_in == 0 && flush <= old_flush && + flush != Z_FINISH) { + return Z_BUF_ERROR; + } + + /* User must not provide more input after the first FINISH: */ + if (s->status == FINISH_STATE && strm->avail_in != 0) { + return Z_BUF_ERROR; + } + + /* Start a new block or continue the current one. + */ + if (strm->avail_in != 0 || s->lookahead != 0 || + (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) { + block_state bstate; + + bstate = (*(configuration_table[s->level].func))(s, flush); + + if (bstate == finish_started || bstate == finish_done) { + s->status = FINISH_STATE; + } + if (bstate == need_more || bstate == finish_started) { + if (strm->avail_out == 0) { + s->last_flush = -1; /* avoid BUF_ERROR next call, see above */ + } + return Z_OK; + /* If flush != Z_NO_FLUSH && avail_out == 0, the next call + * of deflate should use the same flush parameter to make sure + * that the flush is complete. So we don't have to output an + * empty block here, this will be done at next call. This also + * ensures that for a very small output buffer, we emit at most + * one empty block. + */ + } + if (bstate == block_done) { + if (flush == Z_PARTIAL_FLUSH) { + zlib_tr_align(s); + } else if (flush == Z_PACKET_FLUSH) { + /* Output just the 3-bit `stored' block type value, + but not a zero length. */ + zlib_tr_stored_type_only(s); + } else { /* FULL_FLUSH or SYNC_FLUSH */ + zlib_tr_stored_block(s, (char*)0, 0L, 0); + /* For a full flush, this empty block will be recognized + * as a special marker by inflate_sync(). + */ + if (flush == Z_FULL_FLUSH) { + CLEAR_HASH(s); /* forget history */ + } + } + flush_pending(strm); + if (strm->avail_out == 0) { + s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */ + return Z_OK; + } + } + } + Assert(strm->avail_out > 0, "bug2"); + + if (flush != Z_FINISH) return Z_OK; + if (s->noheader) return Z_STREAM_END; + + /* Write the zlib trailer (adler32) */ + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + flush_pending(strm); + /* If avail_out is zero, the application will call deflate again + * to flush the rest. + */ + s->noheader = -1; /* write the trailer only once! */ + return s->pending != 0 ? Z_OK : Z_STREAM_END; +} + +/* ========================================================================= */ +int zlib_deflateEnd( + z_streamp strm +) +{ + int status; + deflate_state *s; + + if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR; + s = (deflate_state *) strm->state; + + status = s->status; + if (status != INIT_STATE && status != BUSY_STATE && + status != FINISH_STATE) { + return Z_STREAM_ERROR; + } + + strm->state = NULL; + + return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; +} + +/* ========================================================================= + * Copy the source state to the destination state. + */ +#if 0 +int zlib_deflateCopy ( + z_streamp dest, + z_streamp source +) +{ +#ifdef MAXSEG_64K + return Z_STREAM_ERROR; +#else + deflate_state *ds; + deflate_state *ss; + ush *overlay; + deflate_workspace *mem; + + + if (source == NULL || dest == NULL || source->state == NULL) { + return Z_STREAM_ERROR; + } + + ss = (deflate_state *) source->state; + + *dest = *source; + + mem = (deflate_workspace *) dest->workspace; + + ds = &(mem->deflate_memory); + + dest->state = (struct internal_state *) ds; + *ds = *ss; + ds->strm = dest; + + ds->window = (Byte *) mem->window_memory; + ds->prev = (Pos *) mem->prev_memory; + ds->head = (Pos *) mem->head_memory; + overlay = (ush *) mem->overlay_memory; + ds->pending_buf = (uch *) overlay; + + memcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte)); + memcpy(ds->prev, ss->prev, ds->w_size * sizeof(Pos)); + memcpy(ds->head, ss->head, ds->hash_size * sizeof(Pos)); + memcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); + + ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); + ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush); + ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize; + + ds->l_desc.dyn_tree = ds->dyn_ltree; + ds->d_desc.dyn_tree = ds->dyn_dtree; + ds->bl_desc.dyn_tree = ds->bl_tree; + + return Z_OK; +#endif +} +#endif /* 0 */ + +/* =========================================================================== + * Read a new buffer from the current input stream, update the adler32 + * and total number of bytes read. All deflate() input goes through + * this function so some applications may wish to modify it to avoid + * allocating a large strm->next_in buffer and copying from it. + * (See also flush_pending()). + */ +static int read_buf( + z_streamp strm, + Byte *buf, + unsigned size +) +{ + unsigned len = strm->avail_in; + + if (len > size) len = size; + if (len == 0) return 0; + + strm->avail_in -= len; + + if (!((deflate_state *)(strm->state))->noheader) { + strm->adler = zlib_adler32(strm->adler, strm->next_in, len); + } + memcpy(buf, strm->next_in, len); + strm->next_in += len; + strm->total_in += len; + + return (int)len; +} + +/* =========================================================================== + * Initialize the "longest match" routines for a new zlib stream + */ +static void lm_init( + deflate_state *s +) +{ + s->window_size = (ulg)2L*s->w_size; + + CLEAR_HASH(s); + + /* Set the default configuration parameters: + */ + s->max_lazy_match = configuration_table[s->level].max_lazy; + s->good_match = configuration_table[s->level].good_length; + s->nice_match = configuration_table[s->level].nice_length; + s->max_chain_length = configuration_table[s->level].max_chain; + + s->strstart = 0; + s->block_start = 0L; + s->lookahead = 0; + s->match_length = s->prev_length = MIN_MATCH-1; + s->match_available = 0; + s->ins_h = 0; +} + +/* =========================================================================== + * Set match_start to the longest match starting at the given string and + * return its length. Matches shorter or equal to prev_length are discarded, + * in which case the result is equal to prev_length and match_start is + * garbage. + * IN assertions: cur_match is the head of the hash chain for the current + * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 + * OUT assertion: the match length is not greater than s->lookahead. + */ +/* For 80x86 and 680x0, an optimized version will be provided in match.asm or + * match.S. The code will be functionally equivalent. + */ +static uInt longest_match( + deflate_state *s, + IPos cur_match /* current match */ +) +{ + unsigned chain_length = s->max_chain_length;/* max hash chain length */ + register Byte *scan = s->window + s->strstart; /* current string */ + register Byte *match; /* matched string */ + register int len; /* length of current match */ + int best_len = s->prev_length; /* best match length so far */ + int nice_match = s->nice_match; /* stop if match long enough */ + IPos limit = s->strstart > (IPos)MAX_DIST(s) ? + s->strstart - (IPos)MAX_DIST(s) : NIL; + /* Stop when cur_match becomes <= limit. To simplify the code, + * we prevent matches with the string of window index 0. + */ + Pos *prev = s->prev; + uInt wmask = s->w_mask; + +#ifdef UNALIGNED_OK + /* Compare two bytes at a time. Note: this is not always beneficial. + * Try with and without -DUNALIGNED_OK to check. + */ + register Byte *strend = s->window + s->strstart + MAX_MATCH - 1; + register ush scan_start = *(ush*)scan; + register ush scan_end = *(ush*)(scan+best_len-1); +#else + register Byte *strend = s->window + s->strstart + MAX_MATCH; + register Byte scan_end1 = scan[best_len-1]; + register Byte scan_end = scan[best_len]; +#endif + + /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. + * It is easy to get rid of this optimization if necessary. + */ + Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); + + /* Do not waste too much time if we already have a good match: */ + if (s->prev_length >= s->good_match) { + chain_length >>= 2; + } + /* Do not look for matches beyond the end of the input. This is necessary + * to make deflate deterministic. + */ + if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; + + Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); + + do { + Assert(cur_match < s->strstart, "no future"); + match = s->window + cur_match; + + /* Skip to next match if the match length cannot increase + * or if the match length is less than 2: + */ +#if (defined(UNALIGNED_OK) && MAX_MATCH == 258) + /* This code assumes sizeof(unsigned short) == 2. Do not use + * UNALIGNED_OK if your compiler uses a different size. + */ + if (*(ush*)(match+best_len-1) != scan_end || + *(ush*)match != scan_start) continue; + + /* It is not necessary to compare scan[2] and match[2] since they are + * always equal when the other bytes match, given that the hash keys + * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at + * strstart+3, +5, ... up to strstart+257. We check for insufficient + * lookahead only every 4th comparison; the 128th check will be made + * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is + * necessary to put more guard bytes at the end of the window, or + * to check more often for insufficient lookahead. + */ + Assert(scan[2] == match[2], "scan[2]?"); + scan++, match++; + do { + } while (*(ush*)(scan+=2) == *(ush*)(match+=2) && + *(ush*)(scan+=2) == *(ush*)(match+=2) && + *(ush*)(scan+=2) == *(ush*)(match+=2) && + *(ush*)(scan+=2) == *(ush*)(match+=2) && + scan < strend); + /* The funny "do {}" generates better code on most compilers */ + + /* Here, scan <= window+strstart+257 */ + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + if (*scan == *match) scan++; + + len = (MAX_MATCH - 1) - (int)(strend-scan); + scan = strend - (MAX_MATCH-1); + +#else /* UNALIGNED_OK */ + + if (match[best_len] != scan_end || + match[best_len-1] != scan_end1 || + *match != *scan || + *++match != scan[1]) continue; + + /* The check at best_len-1 can be removed because it will be made + * again later. (This heuristic is not always a win.) + * It is not necessary to compare scan[2] and match[2] since they + * are always equal when the other bytes match, given that + * the hash keys are equal and that HASH_BITS >= 8. + */ + scan += 2, match++; + Assert(*scan == *match, "match[2]?"); + + /* We check for insufficient lookahead only every 8th comparison; + * the 256th check will be made at strstart+258. + */ + do { + } while (*++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + scan < strend); + + Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); + + len = MAX_MATCH - (int)(strend - scan); + scan = strend - MAX_MATCH; + +#endif /* UNALIGNED_OK */ + + if (len > best_len) { + s->match_start = cur_match; + best_len = len; + if (len >= nice_match) break; +#ifdef UNALIGNED_OK + scan_end = *(ush*)(scan+best_len-1); +#else + scan_end1 = scan[best_len-1]; + scan_end = scan[best_len]; +#endif + } + } while ((cur_match = prev[cur_match & wmask]) > limit + && --chain_length != 0); + + if ((uInt)best_len <= s->lookahead) return best_len; + return s->lookahead; +} + +#ifdef DEBUG_ZLIB +/* =========================================================================== + * Check that the match at match_start is indeed a match. + */ +static void check_match( + deflate_state *s, + IPos start, + IPos match, + int length +) +{ + /* check that the match is indeed a match */ + if (memcmp((char *)s->window + match, + (char *)s->window + start, length) != EQUAL) { + fprintf(stderr, " start %u, match %u, length %d\n", + start, match, length); + do { + fprintf(stderr, "%c%c", s->window[match++], s->window[start++]); + } while (--length != 0); + z_error("invalid match"); + } + if (z_verbose > 1) { + fprintf(stderr,"\\[%d,%d]", start-match, length); + do { putc(s->window[start++], stderr); } while (--length != 0); + } +} +#else +# define check_match(s, start, match, length) +#endif + +/* =========================================================================== + * Fill the window when the lookahead becomes insufficient. + * Updates strstart and lookahead. + * + * IN assertion: lookahead < MIN_LOOKAHEAD + * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD + * At least one byte has been read, or avail_in == 0; reads are + * performed for at least two bytes (required for the zip translate_eol + * option -- not supported here). + */ +static void fill_window( + deflate_state *s +) +{ + register unsigned n, m; + register Pos *p; + unsigned more; /* Amount of free space at the end of the window. */ + uInt wsize = s->w_size; + + do { + more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart); + + /* Deal with !@#$% 64K limit: */ + if (more == 0 && s->strstart == 0 && s->lookahead == 0) { + more = wsize; + + } else if (more == (unsigned)(-1)) { + /* Very unlikely, but possible on 16 bit machine if strstart == 0 + * and lookahead == 1 (input done one byte at time) + */ + more--; + + /* If the window is almost full and there is insufficient lookahead, + * move the upper half to the lower one to make room in the upper half. + */ + } else if (s->strstart >= wsize+MAX_DIST(s)) { + + memcpy((char *)s->window, (char *)s->window+wsize, + (unsigned)wsize); + s->match_start -= wsize; + s->strstart -= wsize; /* we now have strstart >= MAX_DIST */ + s->block_start -= (long) wsize; + + /* Slide the hash table (could be avoided with 32 bit values + at the expense of memory usage). We slide even when level == 0 + to keep the hash table consistent if we switch back to level > 0 + later. (Using level 0 permanently is not an optimal usage of + zlib, so we don't care about this pathological case.) + */ + n = s->hash_size; + p = &s->head[n]; + do { + m = *--p; + *p = (Pos)(m >= wsize ? m-wsize : NIL); + } while (--n); + + n = wsize; + p = &s->prev[n]; + do { + m = *--p; + *p = (Pos)(m >= wsize ? m-wsize : NIL); + /* If n is not on any hash chain, prev[n] is garbage but + * its value will never be used. + */ + } while (--n); + more += wsize; + } + if (s->strm->avail_in == 0) return; + + /* If there was no sliding: + * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 && + * more == window_size - lookahead - strstart + * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1) + * => more >= window_size - 2*WSIZE + 2 + * In the BIG_MEM or MMAP case (not yet supported), + * window_size == input_size + MIN_LOOKAHEAD && + * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD. + * Otherwise, window_size == 2*WSIZE so more >= 2. + * If there was sliding, more >= WSIZE. So in all cases, more >= 2. + */ + Assert(more >= 2, "more < 2"); + + n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more); + s->lookahead += n; + + /* Initialize the hash value now that we have some input: */ + if (s->lookahead >= MIN_MATCH) { + s->ins_h = s->window[s->strstart]; + UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); +#if MIN_MATCH != 3 + Call UPDATE_HASH() MIN_MATCH-3 more times +#endif + } + /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage, + * but this is not important since only literal bytes will be emitted. + */ + + } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0); +} + +/* =========================================================================== + * Flush the current block, with given end-of-file flag. + * IN assertion: strstart is set to the end of the current match. + */ +#define FLUSH_BLOCK_ONLY(s, eof) { \ + zlib_tr_flush_block(s, (s->block_start >= 0L ? \ + (char *)&s->window[(unsigned)s->block_start] : \ + NULL), \ + (ulg)((long)s->strstart - s->block_start), \ + (eof)); \ + s->block_start = s->strstart; \ + flush_pending(s->strm); \ + Tracev((stderr,"[FLUSH]")); \ +} + +/* Same but force premature exit if necessary. */ +#define FLUSH_BLOCK(s, eof) { \ + FLUSH_BLOCK_ONLY(s, eof); \ + if (s->strm->avail_out == 0) return (eof) ? finish_started : need_more; \ +} + +/* =========================================================================== + * Copy without compression as much as possible from the input stream, return + * the current block state. + * This function does not insert new strings in the dictionary since + * uncompressible data is probably not useful. This function is used + * only for the level=0 compression option. + * NOTE: this function should be optimized to avoid extra copying from + * window to pending_buf. + */ +static block_state deflate_stored( + deflate_state *s, + int flush +) +{ + /* Stored blocks are limited to 0xffff bytes, pending_buf is limited + * to pending_buf_size, and each stored block has a 5 byte header: + */ + ulg max_block_size = 0xffff; + ulg max_start; + + if (max_block_size > s->pending_buf_size - 5) { + max_block_size = s->pending_buf_size - 5; + } + + /* Copy as much as possible from input to output: */ + for (;;) { + /* Fill the window as much as possible: */ + if (s->lookahead <= 1) { + + Assert(s->strstart < s->w_size+MAX_DIST(s) || + s->block_start >= (long)s->w_size, "slide too late"); + + fill_window(s); + if (s->lookahead == 0 && flush == Z_NO_FLUSH) return need_more; + + if (s->lookahead == 0) break; /* flush the current block */ + } + Assert(s->block_start >= 0L, "block gone"); + + s->strstart += s->lookahead; + s->lookahead = 0; + + /* Emit a stored block if pending_buf will be full: */ + max_start = s->block_start + max_block_size; + if (s->strstart == 0 || (ulg)s->strstart >= max_start) { + /* strstart == 0 is possible when wraparound on 16-bit machine */ + s->lookahead = (uInt)(s->strstart - max_start); + s->strstart = (uInt)max_start; + FLUSH_BLOCK(s, 0); + } + /* Flush if we may have to slide, otherwise block_start may become + * negative and the data will be gone: + */ + if (s->strstart - (uInt)s->block_start >= MAX_DIST(s)) { + FLUSH_BLOCK(s, 0); + } + } + FLUSH_BLOCK(s, flush == Z_FINISH); + return flush == Z_FINISH ? finish_done : block_done; +} + +/* =========================================================================== + * Compress as much as possible from the input stream, return the current + * block state. + * This function does not perform lazy evaluation of matches and inserts + * new strings in the dictionary only for unmatched strings or for short + * matches. It is used only for the fast compression options. + */ +static block_state deflate_fast( + deflate_state *s, + int flush +) +{ + IPos hash_head = NIL; /* head of the hash chain */ + int bflush; /* set if current block must be flushed */ + + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the next match, plus MIN_MATCH bytes to insert the + * string following the next match. + */ + if (s->lookahead < MIN_LOOKAHEAD) { + fill_window(s); + if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* Insert the string window[strstart .. strstart+2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ + if (s->lookahead >= MIN_MATCH) { + INSERT_STRING(s, s->strstart, hash_head); + } + + /* Find the longest match, discarding those <= prev_length. + * At this point we have always match_length < MIN_MATCH + */ + if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) { + /* To simplify the code, we prevent matches with the string + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ + if (s->strategy != Z_HUFFMAN_ONLY) { + s->match_length = longest_match (s, hash_head); + } + /* longest_match() sets match_start */ + } + if (s->match_length >= MIN_MATCH) { + check_match(s, s->strstart, s->match_start, s->match_length); + + bflush = zlib_tr_tally(s, s->strstart - s->match_start, + s->match_length - MIN_MATCH); + + s->lookahead -= s->match_length; + + /* Insert new strings in the hash table only if the match length + * is not too large. This saves time but degrades compression. + */ + if (s->match_length <= s->max_insert_length && + s->lookahead >= MIN_MATCH) { + s->match_length--; /* string at strstart already in hash table */ + do { + s->strstart++; + INSERT_STRING(s, s->strstart, hash_head); + /* strstart never exceeds WSIZE-MAX_MATCH, so there are + * always MIN_MATCH bytes ahead. + */ + } while (--s->match_length != 0); + s->strstart++; + } else { + s->strstart += s->match_length; + s->match_length = 0; + s->ins_h = s->window[s->strstart]; + UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); +#if MIN_MATCH != 3 + Call UPDATE_HASH() MIN_MATCH-3 more times +#endif + /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not + * matter since it will be recomputed at next deflate call. + */ + } + } else { + /* No match, output a literal byte */ + Tracevv((stderr,"%c", s->window[s->strstart])); + bflush = zlib_tr_tally (s, 0, s->window[s->strstart]); + s->lookahead--; + s->strstart++; + } + if (bflush) FLUSH_BLOCK(s, 0); + } + FLUSH_BLOCK(s, flush == Z_FINISH); + return flush == Z_FINISH ? finish_done : block_done; +} + +/* =========================================================================== + * Same as above, but achieves better compression. We use a lazy + * evaluation for matches: a match is finally adopted only if there is + * no better match at the next window position. + */ +static block_state deflate_slow( + deflate_state *s, + int flush +) +{ + IPos hash_head = NIL; /* head of hash chain */ + int bflush; /* set if current block must be flushed */ + + /* Process the input block. */ + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the next match, plus MIN_MATCH bytes to insert the + * string following the next match. + */ + if (s->lookahead < MIN_LOOKAHEAD) { + fill_window(s); + if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* Insert the string window[strstart .. strstart+2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ + if (s->lookahead >= MIN_MATCH) { + INSERT_STRING(s, s->strstart, hash_head); + } + + /* Find the longest match, discarding those <= prev_length. + */ + s->prev_length = s->match_length, s->prev_match = s->match_start; + s->match_length = MIN_MATCH-1; + + if (hash_head != NIL && s->prev_length < s->max_lazy_match && + s->strstart - hash_head <= MAX_DIST(s)) { + /* To simplify the code, we prevent matches with the string + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ + if (s->strategy != Z_HUFFMAN_ONLY) { + s->match_length = longest_match (s, hash_head); + } + /* longest_match() sets match_start */ + + if (s->match_length <= 5 && (s->strategy == Z_FILTERED || + (s->match_length == MIN_MATCH && + s->strstart - s->match_start > TOO_FAR))) { + + /* If prev_match is also MIN_MATCH, match_start is garbage + * but we will ignore the current match anyway. + */ + s->match_length = MIN_MATCH-1; + } + } + /* If there was a match at the previous step and the current + * match is not better, output the previous match: + */ + if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) { + uInt max_insert = s->strstart + s->lookahead - MIN_MATCH; + /* Do not insert strings in hash table beyond this. */ + + check_match(s, s->strstart-1, s->prev_match, s->prev_length); + + bflush = zlib_tr_tally(s, s->strstart -1 - s->prev_match, + s->prev_length - MIN_MATCH); + + /* Insert in hash table all strings up to the end of the match. + * strstart-1 and strstart are already inserted. If there is not + * enough lookahead, the last two strings are not inserted in + * the hash table. + */ + s->lookahead -= s->prev_length-1; + s->prev_length -= 2; + do { + if (++s->strstart <= max_insert) { + INSERT_STRING(s, s->strstart, hash_head); + } + } while (--s->prev_length != 0); + s->match_available = 0; + s->match_length = MIN_MATCH-1; + s->strstart++; + + if (bflush) FLUSH_BLOCK(s, 0); + + } else if (s->match_available) { + /* If there was no match at the previous position, output a + * single literal. If there was a match but the current match + * is longer, truncate the previous match to a single literal. + */ + Tracevv((stderr,"%c", s->window[s->strstart-1])); + if (zlib_tr_tally (s, 0, s->window[s->strstart-1])) { + FLUSH_BLOCK_ONLY(s, 0); + } + s->strstart++; + s->lookahead--; + if (s->strm->avail_out == 0) return need_more; + } else { + /* There is no previous match to compare with, wait for + * the next step to decide. + */ + s->match_available = 1; + s->strstart++; + s->lookahead--; + } + } + Assert (flush != Z_NO_FLUSH, "no flush?"); + if (s->match_available) { + Tracevv((stderr,"%c", s->window[s->strstart-1])); + zlib_tr_tally (s, 0, s->window[s->strstart-1]); + s->match_available = 0; + } + FLUSH_BLOCK(s, flush == Z_FINISH); + return flush == Z_FINISH ? finish_done : block_done; +} + +int zlib_deflate_workspacesize(void) +{ + return sizeof(deflate_workspace); +} diff --git a/lib/zlib_deflate/deflate_syms.c b/lib/zlib_deflate/deflate_syms.c new file mode 100644 index 00000000..ccfe25f3 --- /dev/null +++ b/lib/zlib_deflate/deflate_syms.c @@ -0,0 +1,18 @@ +/* + * linux/lib/zlib_deflate/deflate_syms.c + * + * Exported symbols for the deflate functionality. + * + */ + +#include <linux/module.h> +#include <linux/init.h> + +#include <linux/zlib.h> + +EXPORT_SYMBOL(zlib_deflate_workspacesize); +EXPORT_SYMBOL(zlib_deflate); +EXPORT_SYMBOL(zlib_deflateInit2); +EXPORT_SYMBOL(zlib_deflateEnd); +EXPORT_SYMBOL(zlib_deflateReset); +MODULE_LICENSE("GPL"); diff --git a/lib/zlib_deflate/deftree.c b/lib/zlib_deflate/deftree.c new file mode 100644 index 00000000..ddf34829 --- /dev/null +++ b/lib/zlib_deflate/deftree.c @@ -0,0 +1,1113 @@ +/* +++ trees.c */ +/* trees.c -- output deflated data using Huffman coding + * Copyright (C) 1995-1996 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * ALGORITHM + * + * The "deflation" process uses several Huffman trees. The more + * common source values are represented by shorter bit sequences. + * + * Each code tree is stored in a compressed form which is itself + * a Huffman encoding of the lengths of all the code strings (in + * ascending order by source values). The actual code strings are + * reconstructed from the lengths in the inflate process, as described + * in the deflate specification. + * + * REFERENCES + * + * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification". + * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc + * + * Storer, James A. + * Data Compression: Methods and Theory, pp. 49-50. + * Computer Science Press, 1988. ISBN 0-7167-8156-5. + * + * Sedgewick, R. + * Algorithms, p290. + * Addison-Wesley, 1983. ISBN 0-201-06672-6. + */ + +/* From: trees.c,v 1.11 1996/07/24 13:41:06 me Exp $ */ + +/* #include "deflate.h" */ + +#include <linux/zutil.h> +#include "defutil.h" + +#ifdef DEBUG_ZLIB +# include <ctype.h> +#endif + +/* =========================================================================== + * Constants + */ + +#define MAX_BL_BITS 7 +/* Bit length codes must not exceed MAX_BL_BITS bits */ + +#define END_BLOCK 256 +/* end of block literal code */ + +#define REP_3_6 16 +/* repeat previous bit length 3-6 times (2 bits of repeat count) */ + +#define REPZ_3_10 17 +/* repeat a zero length 3-10 times (3 bits of repeat count) */ + +#define REPZ_11_138 18 +/* repeat a zero length 11-138 times (7 bits of repeat count) */ + +static const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */ + = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0}; + +static const int extra_dbits[D_CODES] /* extra bits for each distance code */ + = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +static const int extra_blbits[BL_CODES]/* extra bits for each bit length code */ + = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7}; + +static const uch bl_order[BL_CODES] + = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15}; +/* The lengths of the bit length codes are sent in order of decreasing + * probability, to avoid transmitting the lengths for unused bit length codes. + */ + +#define Buf_size (8 * 2*sizeof(char)) +/* Number of bits used within bi_buf. (bi_buf might be implemented on + * more than 16 bits on some systems.) + */ + +/* =========================================================================== + * Local data. These are initialized only once. + */ + +static ct_data static_ltree[L_CODES+2]; +/* The static literal tree. Since the bit lengths are imposed, there is no + * need for the L_CODES extra codes used during heap construction. However + * The codes 286 and 287 are needed to build a canonical tree (see zlib_tr_init + * below). + */ + +static ct_data static_dtree[D_CODES]; +/* The static distance tree. (Actually a trivial tree since all codes use + * 5 bits.) + */ + +static uch dist_code[512]; +/* distance codes. The first 256 values correspond to the distances + * 3 .. 258, the last 256 values correspond to the top 8 bits of + * the 15 bit distances. + */ + +static uch length_code[MAX_MATCH-MIN_MATCH+1]; +/* length code for each normalized match length (0 == MIN_MATCH) */ + +static int base_length[LENGTH_CODES]; +/* First normalized length for each code (0 = MIN_MATCH) */ + +static int base_dist[D_CODES]; +/* First normalized distance for each code (0 = distance of 1) */ + +struct static_tree_desc_s { + const ct_data *static_tree; /* static tree or NULL */ + const int *extra_bits; /* extra bits for each code or NULL */ + int extra_base; /* base index for extra_bits */ + int elems; /* max number of elements in the tree */ + int max_length; /* max bit length for the codes */ +}; + +static static_tree_desc static_l_desc = +{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS}; + +static static_tree_desc static_d_desc = +{static_dtree, extra_dbits, 0, D_CODES, MAX_BITS}; + +static static_tree_desc static_bl_desc = +{(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS}; + +/* =========================================================================== + * Local (static) routines in this file. + */ + +static void tr_static_init (void); +static void init_block (deflate_state *s); +static void pqdownheap (deflate_state *s, ct_data *tree, int k); +static void gen_bitlen (deflate_state *s, tree_desc *desc); +static void gen_codes (ct_data *tree, int max_code, ush *bl_count); +static void build_tree (deflate_state *s, tree_desc *desc); +static void scan_tree (deflate_state *s, ct_data *tree, int max_code); +static void send_tree (deflate_state *s, ct_data *tree, int max_code); +static int build_bl_tree (deflate_state *s); +static void send_all_trees (deflate_state *s, int lcodes, int dcodes, + int blcodes); +static void compress_block (deflate_state *s, ct_data *ltree, + ct_data *dtree); +static void set_data_type (deflate_state *s); +static unsigned bi_reverse (unsigned value, int length); +static void bi_windup (deflate_state *s); +static void bi_flush (deflate_state *s); +static void copy_block (deflate_state *s, char *buf, unsigned len, + int header); + +#ifndef DEBUG_ZLIB +# define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len) + /* Send a code of the given tree. c and tree must not have side effects */ + +#else /* DEBUG_ZLIB */ +# define send_code(s, c, tree) \ + { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \ + send_bits(s, tree[c].Code, tree[c].Len); } +#endif + +#define d_code(dist) \ + ((dist) < 256 ? dist_code[dist] : dist_code[256+((dist)>>7)]) +/* Mapping from a distance to a distance code. dist is the distance - 1 and + * must not have side effects. dist_code[256] and dist_code[257] are never + * used. + */ + +/* =========================================================================== + * Send a value on a given number of bits. + * IN assertion: length <= 16 and value fits in length bits. + */ +#ifdef DEBUG_ZLIB +static void send_bits (deflate_state *s, int value, int length); + +static void send_bits( + deflate_state *s, + int value, /* value to send */ + int length /* number of bits */ +) +{ + Tracevv((stderr," l %2d v %4x ", length, value)); + Assert(length > 0 && length <= 15, "invalid length"); + s->bits_sent += (ulg)length; + + /* If not enough room in bi_buf, use (valid) bits from bi_buf and + * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid)) + * unused bits in value. + */ + if (s->bi_valid > (int)Buf_size - length) { + s->bi_buf |= (value << s->bi_valid); + put_short(s, s->bi_buf); + s->bi_buf = (ush)value >> (Buf_size - s->bi_valid); + s->bi_valid += length - Buf_size; + } else { + s->bi_buf |= value << s->bi_valid; + s->bi_valid += length; + } +} +#else /* !DEBUG_ZLIB */ + +#define send_bits(s, value, length) \ +{ int len = length;\ + if (s->bi_valid > (int)Buf_size - len) {\ + int val = value;\ + s->bi_buf |= (val << s->bi_valid);\ + put_short(s, s->bi_buf);\ + s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\ + s->bi_valid += len - Buf_size;\ + } else {\ + s->bi_buf |= (value) << s->bi_valid;\ + s->bi_valid += len;\ + }\ +} +#endif /* DEBUG_ZLIB */ + +/* =========================================================================== + * Initialize the various 'constant' tables. In a multi-threaded environment, + * this function may be called by two threads concurrently, but this is + * harmless since both invocations do exactly the same thing. + */ +static void tr_static_init(void) +{ + static int static_init_done; + int n; /* iterates over tree elements */ + int bits; /* bit counter */ + int length; /* length value */ + int code; /* code value */ + int dist; /* distance index */ + ush bl_count[MAX_BITS+1]; + /* number of codes at each bit length for an optimal tree */ + + if (static_init_done) return; + + /* Initialize the mapping length (0..255) -> length code (0..28) */ + length = 0; + for (code = 0; code < LENGTH_CODES-1; code++) { + base_length[code] = length; + for (n = 0; n < (1<<extra_lbits[code]); n++) { + length_code[length++] = (uch)code; + } + } + Assert (length == 256, "tr_static_init: length != 256"); + /* Note that the length 255 (match length 258) can be represented + * in two different ways: code 284 + 5 bits or code 285, so we + * overwrite length_code[255] to use the best encoding: + */ + length_code[length-1] = (uch)code; + + /* Initialize the mapping dist (0..32K) -> dist code (0..29) */ + dist = 0; + for (code = 0 ; code < 16; code++) { + base_dist[code] = dist; + for (n = 0; n < (1<<extra_dbits[code]); n++) { + dist_code[dist++] = (uch)code; + } + } + Assert (dist == 256, "tr_static_init: dist != 256"); + dist >>= 7; /* from now on, all distances are divided by 128 */ + for ( ; code < D_CODES; code++) { + base_dist[code] = dist << 7; + for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) { + dist_code[256 + dist++] = (uch)code; + } + } + Assert (dist == 256, "tr_static_init: 256+dist != 512"); + + /* Construct the codes of the static literal tree */ + for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0; + n = 0; + while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++; + while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++; + while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++; + while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++; + /* Codes 286 and 287 do not exist, but we must include them in the + * tree construction to get a canonical Huffman tree (longest code + * all ones) + */ + gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count); + + /* The static distance tree is trivial: */ + for (n = 0; n < D_CODES; n++) { + static_dtree[n].Len = 5; + static_dtree[n].Code = bi_reverse((unsigned)n, 5); + } + static_init_done = 1; +} + +/* =========================================================================== + * Initialize the tree data structures for a new zlib stream. + */ +void zlib_tr_init( + deflate_state *s +) +{ + tr_static_init(); + + s->compressed_len = 0L; + + s->l_desc.dyn_tree = s->dyn_ltree; + s->l_desc.stat_desc = &static_l_desc; + + s->d_desc.dyn_tree = s->dyn_dtree; + s->d_desc.stat_desc = &static_d_desc; + + s->bl_desc.dyn_tree = s->bl_tree; + s->bl_desc.stat_desc = &static_bl_desc; + + s->bi_buf = 0; + s->bi_valid = 0; + s->last_eob_len = 8; /* enough lookahead for inflate */ +#ifdef DEBUG_ZLIB + s->bits_sent = 0L; +#endif + + /* Initialize the first block of the first file: */ + init_block(s); +} + +/* =========================================================================== + * Initialize a new block. + */ +static void init_block( + deflate_state *s +) +{ + int n; /* iterates over tree elements */ + + /* Initialize the trees. */ + for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0; + for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0; + for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0; + + s->dyn_ltree[END_BLOCK].Freq = 1; + s->opt_len = s->static_len = 0L; + s->last_lit = s->matches = 0; +} + +#define SMALLEST 1 +/* Index within the heap array of least frequent node in the Huffman tree */ + + +/* =========================================================================== + * Remove the smallest element from the heap and recreate the heap with + * one less element. Updates heap and heap_len. + */ +#define pqremove(s, tree, top) \ +{\ + top = s->heap[SMALLEST]; \ + s->heap[SMALLEST] = s->heap[s->heap_len--]; \ + pqdownheap(s, tree, SMALLEST); \ +} + +/* =========================================================================== + * Compares to subtrees, using the tree depth as tie breaker when + * the subtrees have equal frequency. This minimizes the worst case length. + */ +#define smaller(tree, n, m, depth) \ + (tree[n].Freq < tree[m].Freq || \ + (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m])) + +/* =========================================================================== + * Restore the heap property by moving down the tree starting at node k, + * exchanging a node with the smallest of its two sons if necessary, stopping + * when the heap property is re-established (each father smaller than its + * two sons). + */ +static void pqdownheap( + deflate_state *s, + ct_data *tree, /* the tree to restore */ + int k /* node to move down */ +) +{ + int v = s->heap[k]; + int j = k << 1; /* left son of k */ + while (j <= s->heap_len) { + /* Set j to the smallest of the two sons: */ + if (j < s->heap_len && + smaller(tree, s->heap[j+1], s->heap[j], s->depth)) { + j++; + } + /* Exit if v is smaller than both sons */ + if (smaller(tree, v, s->heap[j], s->depth)) break; + + /* Exchange v with the smallest son */ + s->heap[k] = s->heap[j]; k = j; + + /* And continue down the tree, setting j to the left son of k */ + j <<= 1; + } + s->heap[k] = v; +} + +/* =========================================================================== + * Compute the optimal bit lengths for a tree and update the total bit length + * for the current block. + * IN assertion: the fields freq and dad are set, heap[heap_max] and + * above are the tree nodes sorted by increasing frequency. + * OUT assertions: the field len is set to the optimal bit length, the + * array bl_count contains the frequencies for each bit length. + * The length opt_len is updated; static_len is also updated if stree is + * not null. + */ +static void gen_bitlen( + deflate_state *s, + tree_desc *desc /* the tree descriptor */ +) +{ + ct_data *tree = desc->dyn_tree; + int max_code = desc->max_code; + const ct_data *stree = desc->stat_desc->static_tree; + const int *extra = desc->stat_desc->extra_bits; + int base = desc->stat_desc->extra_base; + int max_length = desc->stat_desc->max_length; + int h; /* heap index */ + int n, m; /* iterate over the tree elements */ + int bits; /* bit length */ + int xbits; /* extra bits */ + ush f; /* frequency */ + int overflow = 0; /* number of elements with bit length too large */ + + for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0; + + /* In a first pass, compute the optimal bit lengths (which may + * overflow in the case of the bit length tree). + */ + tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */ + + for (h = s->heap_max+1; h < HEAP_SIZE; h++) { + n = s->heap[h]; + bits = tree[tree[n].Dad].Len + 1; + if (bits > max_length) bits = max_length, overflow++; + tree[n].Len = (ush)bits; + /* We overwrite tree[n].Dad which is no longer needed */ + + if (n > max_code) continue; /* not a leaf node */ + + s->bl_count[bits]++; + xbits = 0; + if (n >= base) xbits = extra[n-base]; + f = tree[n].Freq; + s->opt_len += (ulg)f * (bits + xbits); + if (stree) s->static_len += (ulg)f * (stree[n].Len + xbits); + } + if (overflow == 0) return; + + Trace((stderr,"\nbit length overflow\n")); + /* This happens for example on obj2 and pic of the Calgary corpus */ + + /* Find the first bit length which could increase: */ + do { + bits = max_length-1; + while (s->bl_count[bits] == 0) bits--; + s->bl_count[bits]--; /* move one leaf down the tree */ + s->bl_count[bits+1] += 2; /* move one overflow item as its brother */ + s->bl_count[max_length]--; + /* The brother of the overflow item also moves one step up, + * but this does not affect bl_count[max_length] + */ + overflow -= 2; + } while (overflow > 0); + + /* Now recompute all bit lengths, scanning in increasing frequency. + * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all + * lengths instead of fixing only the wrong ones. This idea is taken + * from 'ar' written by Haruhiko Okumura.) + */ + for (bits = max_length; bits != 0; bits--) { + n = s->bl_count[bits]; + while (n != 0) { + m = s->heap[--h]; + if (m > max_code) continue; + if (tree[m].Len != (unsigned) bits) { + Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits)); + s->opt_len += ((long)bits - (long)tree[m].Len) + *(long)tree[m].Freq; + tree[m].Len = (ush)bits; + } + n--; + } + } +} + +/* =========================================================================== + * Generate the codes for a given tree and bit counts (which need not be + * optimal). + * IN assertion: the array bl_count contains the bit length statistics for + * the given tree and the field len is set for all tree elements. + * OUT assertion: the field code is set for all tree elements of non + * zero code length. + */ +static void gen_codes( + ct_data *tree, /* the tree to decorate */ + int max_code, /* largest code with non zero frequency */ + ush *bl_count /* number of codes at each bit length */ +) +{ + ush next_code[MAX_BITS+1]; /* next code value for each bit length */ + ush code = 0; /* running code value */ + int bits; /* bit index */ + int n; /* code index */ + + /* The distribution counts are first used to generate the code values + * without bit reversal. + */ + for (bits = 1; bits <= MAX_BITS; bits++) { + next_code[bits] = code = (code + bl_count[bits-1]) << 1; + } + /* Check that the bit counts in bl_count are consistent. The last code + * must be all ones. + */ + Assert (code + bl_count[MAX_BITS]-1 == (1<<MAX_BITS)-1, + "inconsistent bit counts"); + Tracev((stderr,"\ngen_codes: max_code %d ", max_code)); + + for (n = 0; n <= max_code; n++) { + int len = tree[n].Len; + if (len == 0) continue; + /* Now reverse the bits */ + tree[n].Code = bi_reverse(next_code[len]++, len); + + Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ", + n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1)); + } +} + +/* =========================================================================== + * Construct one Huffman tree and assigns the code bit strings and lengths. + * Update the total bit length for the current block. + * IN assertion: the field freq is set for all tree elements. + * OUT assertions: the fields len and code are set to the optimal bit length + * and corresponding code. The length opt_len is updated; static_len is + * also updated if stree is not null. The field max_code is set. + */ +static void build_tree( + deflate_state *s, + tree_desc *desc /* the tree descriptor */ +) +{ + ct_data *tree = desc->dyn_tree; + const ct_data *stree = desc->stat_desc->static_tree; + int elems = desc->stat_desc->elems; + int n, m; /* iterate over heap elements */ + int max_code = -1; /* largest code with non zero frequency */ + int node; /* new node being created */ + + /* Construct the initial heap, with least frequent element in + * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1]. + * heap[0] is not used. + */ + s->heap_len = 0, s->heap_max = HEAP_SIZE; + + for (n = 0; n < elems; n++) { + if (tree[n].Freq != 0) { + s->heap[++(s->heap_len)] = max_code = n; + s->depth[n] = 0; + } else { + tree[n].Len = 0; + } + } + + /* The pkzip format requires that at least one distance code exists, + * and that at least one bit should be sent even if there is only one + * possible code. So to avoid special checks later on we force at least + * two codes of non zero frequency. + */ + while (s->heap_len < 2) { + node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0); + tree[node].Freq = 1; + s->depth[node] = 0; + s->opt_len--; if (stree) s->static_len -= stree[node].Len; + /* node is 0 or 1 so it does not have extra bits */ + } + desc->max_code = max_code; + + /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree, + * establish sub-heaps of increasing lengths: + */ + for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n); + + /* Construct the Huffman tree by repeatedly combining the least two + * frequent nodes. + */ + node = elems; /* next internal node of the tree */ + do { + pqremove(s, tree, n); /* n = node of least frequency */ + m = s->heap[SMALLEST]; /* m = node of next least frequency */ + + s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */ + s->heap[--(s->heap_max)] = m; + + /* Create a new node father of n and m */ + tree[node].Freq = tree[n].Freq + tree[m].Freq; + s->depth[node] = (uch) (max(s->depth[n], s->depth[m]) + 1); + tree[n].Dad = tree[m].Dad = (ush)node; +#ifdef DUMP_BL_TREE + if (tree == s->bl_tree) { + fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)", + node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq); + } +#endif + /* and insert the new node in the heap */ + s->heap[SMALLEST] = node++; + pqdownheap(s, tree, SMALLEST); + + } while (s->heap_len >= 2); + + s->heap[--(s->heap_max)] = s->heap[SMALLEST]; + + /* At this point, the fields freq and dad are set. We can now + * generate the bit lengths. + */ + gen_bitlen(s, (tree_desc *)desc); + + /* The field len is now set, we can generate the bit codes */ + gen_codes ((ct_data *)tree, max_code, s->bl_count); +} + +/* =========================================================================== + * Scan a literal or distance tree to determine the frequencies of the codes + * in the bit length tree. + */ +static void scan_tree( + deflate_state *s, + ct_data *tree, /* the tree to be scanned */ + int max_code /* and its largest code of non zero frequency */ +) +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + if (nextlen == 0) max_count = 138, min_count = 3; + tree[max_code+1].Len = (ush)0xffff; /* guard */ + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; nextlen = tree[n+1].Len; + if (++count < max_count && curlen == nextlen) { + continue; + } else if (count < min_count) { + s->bl_tree[curlen].Freq += count; + } else if (curlen != 0) { + if (curlen != prevlen) s->bl_tree[curlen].Freq++; + s->bl_tree[REP_3_6].Freq++; + } else if (count <= 10) { + s->bl_tree[REPZ_3_10].Freq++; + } else { + s->bl_tree[REPZ_11_138].Freq++; + } + count = 0; prevlen = curlen; + if (nextlen == 0) { + max_count = 138, min_count = 3; + } else if (curlen == nextlen) { + max_count = 6, min_count = 3; + } else { + max_count = 7, min_count = 4; + } + } +} + +/* =========================================================================== + * Send a literal or distance tree in compressed form, using the codes in + * bl_tree. + */ +static void send_tree( + deflate_state *s, + ct_data *tree, /* the tree to be scanned */ + int max_code /* and its largest code of non zero frequency */ +) +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + /* tree[max_code+1].Len = -1; */ /* guard already set */ + if (nextlen == 0) max_count = 138, min_count = 3; + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; nextlen = tree[n+1].Len; + if (++count < max_count && curlen == nextlen) { + continue; + } else if (count < min_count) { + do { send_code(s, curlen, s->bl_tree); } while (--count != 0); + + } else if (curlen != 0) { + if (curlen != prevlen) { + send_code(s, curlen, s->bl_tree); count--; + } + Assert(count >= 3 && count <= 6, " 3_6?"); + send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2); + + } else if (count <= 10) { + send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3); + + } else { + send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7); + } + count = 0; prevlen = curlen; + if (nextlen == 0) { + max_count = 138, min_count = 3; + } else if (curlen == nextlen) { + max_count = 6, min_count = 3; + } else { + max_count = 7, min_count = 4; + } + } +} + +/* =========================================================================== + * Construct the Huffman tree for the bit lengths and return the index in + * bl_order of the last bit length code to send. + */ +static int build_bl_tree( + deflate_state *s +) +{ + int max_blindex; /* index of last bit length code of non zero freq */ + + /* Determine the bit length frequencies for literal and distance trees */ + scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code); + scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code); + + /* Build the bit length tree: */ + build_tree(s, (tree_desc *)(&(s->bl_desc))); + /* opt_len now includes the length of the tree representations, except + * the lengths of the bit lengths codes and the 5+5+4 bits for the counts. + */ + + /* Determine the number of bit length codes to send. The pkzip format + * requires that at least 4 bit length codes be sent. (appnote.txt says + * 3 but the actual value used is 4.) + */ + for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) { + if (s->bl_tree[bl_order[max_blindex]].Len != 0) break; + } + /* Update opt_len to include the bit length tree and counts */ + s->opt_len += 3*(max_blindex+1) + 5+5+4; + Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld", + s->opt_len, s->static_len)); + + return max_blindex; +} + +/* =========================================================================== + * Send the header for a block using dynamic Huffman trees: the counts, the + * lengths of the bit length codes, the literal tree and the distance tree. + * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. + */ +static void send_all_trees( + deflate_state *s, + int lcodes, /* number of codes for each tree */ + int dcodes, /* number of codes for each tree */ + int blcodes /* number of codes for each tree */ +) +{ + int rank; /* index in bl_order */ + + Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes"); + Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES, + "too many codes"); + Tracev((stderr, "\nbl counts: ")); + send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */ + send_bits(s, dcodes-1, 5); + send_bits(s, blcodes-4, 4); /* not -3 as stated in appnote.txt */ + for (rank = 0; rank < blcodes; rank++) { + Tracev((stderr, "\nbl code %2d ", bl_order[rank])); + send_bits(s, s->bl_tree[bl_order[rank]].Len, 3); + } + Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent)); + + send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */ + Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent)); + + send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */ + Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent)); +} + +/* =========================================================================== + * Send a stored block + */ +void zlib_tr_stored_block( + deflate_state *s, + char *buf, /* input block */ + ulg stored_len, /* length of input block */ + int eof /* true if this is the last block for a file */ +) +{ + send_bits(s, (STORED_BLOCK<<1)+eof, 3); /* send block type */ + s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L; + s->compressed_len += (stored_len + 4) << 3; + + copy_block(s, buf, (unsigned)stored_len, 1); /* with header */ +} + +/* Send just the `stored block' type code without any length bytes or data. + */ +void zlib_tr_stored_type_only( + deflate_state *s +) +{ + send_bits(s, (STORED_BLOCK << 1), 3); + bi_windup(s); + s->compressed_len = (s->compressed_len + 3) & ~7L; +} + + +/* =========================================================================== + * Send one empty static block to give enough lookahead for inflate. + * This takes 10 bits, of which 7 may remain in the bit buffer. + * The current inflate code requires 9 bits of lookahead. If the + * last two codes for the previous block (real code plus EOB) were coded + * on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode + * the last real code. In this case we send two empty static blocks instead + * of one. (There are no problems if the previous block is stored or fixed.) + * To simplify the code, we assume the worst case of last real code encoded + * on one bit only. + */ +void zlib_tr_align( + deflate_state *s +) +{ + send_bits(s, STATIC_TREES<<1, 3); + send_code(s, END_BLOCK, static_ltree); + s->compressed_len += 10L; /* 3 for block type, 7 for EOB */ + bi_flush(s); + /* Of the 10 bits for the empty block, we have already sent + * (10 - bi_valid) bits. The lookahead for the last real code (before + * the EOB of the previous block) was thus at least one plus the length + * of the EOB plus what we have just sent of the empty static block. + */ + if (1 + s->last_eob_len + 10 - s->bi_valid < 9) { + send_bits(s, STATIC_TREES<<1, 3); + send_code(s, END_BLOCK, static_ltree); + s->compressed_len += 10L; + bi_flush(s); + } + s->last_eob_len = 7; +} + +/* =========================================================================== + * Determine the best encoding for the current block: dynamic trees, static + * trees or store, and output the encoded block to the zip file. This function + * returns the total compressed length for the file so far. + */ +ulg zlib_tr_flush_block( + deflate_state *s, + char *buf, /* input block, or NULL if too old */ + ulg stored_len, /* length of input block */ + int eof /* true if this is the last block for a file */ +) +{ + ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ + int max_blindex = 0; /* index of last bit length code of non zero freq */ + + /* Build the Huffman trees unless a stored block is forced */ + if (s->level > 0) { + + /* Check if the file is ascii or binary */ + if (s->data_type == Z_UNKNOWN) set_data_type(s); + + /* Construct the literal and distance trees */ + build_tree(s, (tree_desc *)(&(s->l_desc))); + Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len, + s->static_len)); + + build_tree(s, (tree_desc *)(&(s->d_desc))); + Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len, + s->static_len)); + /* At this point, opt_len and static_len are the total bit lengths of + * the compressed block data, excluding the tree representations. + */ + + /* Build the bit length tree for the above two trees, and get the index + * in bl_order of the last bit length code to send. + */ + max_blindex = build_bl_tree(s); + + /* Determine the best encoding. Compute first the block length in bytes*/ + opt_lenb = (s->opt_len+3+7)>>3; + static_lenb = (s->static_len+3+7)>>3; + + Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", + opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, + s->last_lit)); + + if (static_lenb <= opt_lenb) opt_lenb = static_lenb; + + } else { + Assert(buf != (char*)0, "lost buf"); + opt_lenb = static_lenb = stored_len + 5; /* force a stored block */ + } + + /* If compression failed and this is the first and last block, + * and if the .zip file can be seeked (to rewrite the local header), + * the whole file is transformed into a stored file: + */ +#ifdef STORED_FILE_OK +# ifdef FORCE_STORED_FILE + if (eof && s->compressed_len == 0L) { /* force stored file */ +# else + if (stored_len <= opt_lenb && eof && s->compressed_len==0L && seekable()) { +# endif + /* Since LIT_BUFSIZE <= 2*WSIZE, the input data must be there: */ + if (buf == (char*)0) error ("block vanished"); + + copy_block(s, buf, (unsigned)stored_len, 0); /* without header */ + s->compressed_len = stored_len << 3; + s->method = STORED; + } else +#endif /* STORED_FILE_OK */ + +#ifdef FORCE_STORED + if (buf != (char*)0) { /* force stored block */ +#else + if (stored_len+4 <= opt_lenb && buf != (char*)0) { + /* 4: two words for the lengths */ +#endif + /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. + * Otherwise we can't have processed more than WSIZE input bytes since + * the last block flush, because compression would have been + * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to + * transform a block into a stored block. + */ + zlib_tr_stored_block(s, buf, stored_len, eof); + +#ifdef FORCE_STATIC + } else if (static_lenb >= 0) { /* force static trees */ +#else + } else if (static_lenb == opt_lenb) { +#endif + send_bits(s, (STATIC_TREES<<1)+eof, 3); + compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree); + s->compressed_len += 3 + s->static_len; + } else { + send_bits(s, (DYN_TREES<<1)+eof, 3); + send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1, + max_blindex+1); + compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree); + s->compressed_len += 3 + s->opt_len; + } + Assert (s->compressed_len == s->bits_sent, "bad compressed size"); + init_block(s); + + if (eof) { + bi_windup(s); + s->compressed_len += 7; /* align on byte boundary */ + } + Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3, + s->compressed_len-7*eof)); + + return s->compressed_len >> 3; +} + +/* =========================================================================== + * Save the match info and tally the frequency counts. Return true if + * the current block must be flushed. + */ +int zlib_tr_tally( + deflate_state *s, + unsigned dist, /* distance of matched string */ + unsigned lc /* match length-MIN_MATCH or unmatched char (if dist==0) */ +) +{ + s->d_buf[s->last_lit] = (ush)dist; + s->l_buf[s->last_lit++] = (uch)lc; + if (dist == 0) { + /* lc is the unmatched char */ + s->dyn_ltree[lc].Freq++; + } else { + s->matches++; + /* Here, lc is the match length - MIN_MATCH */ + dist--; /* dist = match distance - 1 */ + Assert((ush)dist < (ush)MAX_DIST(s) && + (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) && + (ush)d_code(dist) < (ush)D_CODES, "zlib_tr_tally: bad match"); + + s->dyn_ltree[length_code[lc]+LITERALS+1].Freq++; + s->dyn_dtree[d_code(dist)].Freq++; + } + + /* Try to guess if it is profitable to stop the current block here */ + if ((s->last_lit & 0xfff) == 0 && s->level > 2) { + /* Compute an upper bound for the compressed length */ + ulg out_length = (ulg)s->last_lit*8L; + ulg in_length = (ulg)((long)s->strstart - s->block_start); + int dcode; + for (dcode = 0; dcode < D_CODES; dcode++) { + out_length += (ulg)s->dyn_dtree[dcode].Freq * + (5L+extra_dbits[dcode]); + } + out_length >>= 3; + Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", + s->last_lit, in_length, out_length, + 100L - out_length*100L/in_length)); + if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1; + } + return (s->last_lit == s->lit_bufsize-1); + /* We avoid equality with lit_bufsize because of wraparound at 64K + * on 16 bit machines and because stored blocks are restricted to + * 64K-1 bytes. + */ +} + +/* =========================================================================== + * Send the block data compressed using the given Huffman trees + */ +static void compress_block( + deflate_state *s, + ct_data *ltree, /* literal tree */ + ct_data *dtree /* distance tree */ +) +{ + unsigned dist; /* distance of matched string */ + int lc; /* match length or unmatched char (if dist == 0) */ + unsigned lx = 0; /* running index in l_buf */ + unsigned code; /* the code to send */ + int extra; /* number of extra bits to send */ + + if (s->last_lit != 0) do { + dist = s->d_buf[lx]; + lc = s->l_buf[lx++]; + if (dist == 0) { + send_code(s, lc, ltree); /* send a literal byte */ + Tracecv(isgraph(lc), (stderr," '%c' ", lc)); + } else { + /* Here, lc is the match length - MIN_MATCH */ + code = length_code[lc]; + send_code(s, code+LITERALS+1, ltree); /* send the length code */ + extra = extra_lbits[code]; + if (extra != 0) { + lc -= base_length[code]; + send_bits(s, lc, extra); /* send the extra length bits */ + } + dist--; /* dist is now the match distance - 1 */ + code = d_code(dist); + Assert (code < D_CODES, "bad d_code"); + + send_code(s, code, dtree); /* send the distance code */ + extra = extra_dbits[code]; + if (extra != 0) { + dist -= base_dist[code]; + send_bits(s, dist, extra); /* send the extra distance bits */ + } + } /* literal or match pair ? */ + + /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ + Assert(s->pending < s->lit_bufsize + 2*lx, "pendingBuf overflow"); + + } while (lx < s->last_lit); + + send_code(s, END_BLOCK, ltree); + s->last_eob_len = ltree[END_BLOCK].Len; +} + +/* =========================================================================== + * Set the data type to ASCII or BINARY, using a crude approximation: + * binary if more than 20% of the bytes are <= 6 or >= 128, ascii otherwise. + * IN assertion: the fields freq of dyn_ltree are set and the total of all + * frequencies does not exceed 64K (to fit in an int on 16 bit machines). + */ +static void set_data_type( + deflate_state *s +) +{ + int n = 0; + unsigned ascii_freq = 0; + unsigned bin_freq = 0; + while (n < 7) bin_freq += s->dyn_ltree[n++].Freq; + while (n < 128) ascii_freq += s->dyn_ltree[n++].Freq; + while (n < LITERALS) bin_freq += s->dyn_ltree[n++].Freq; + s->data_type = (Byte)(bin_freq > (ascii_freq >> 2) ? Z_BINARY : Z_ASCII); +} + +/* =========================================================================== + * Copy a stored block, storing first the length and its + * one's complement if requested. + */ +static void copy_block( + deflate_state *s, + char *buf, /* the input data */ + unsigned len, /* its length */ + int header /* true if block header must be written */ +) +{ + bi_windup(s); /* align on byte boundary */ + s->last_eob_len = 8; /* enough lookahead for inflate */ + + if (header) { + put_short(s, (ush)len); + put_short(s, (ush)~len); +#ifdef DEBUG_ZLIB + s->bits_sent += 2*16; +#endif + } +#ifdef DEBUG_ZLIB + s->bits_sent += (ulg)len<<3; +#endif + /* bundle up the put_byte(s, *buf++) calls */ + memcpy(&s->pending_buf[s->pending], buf, len); + s->pending += len; +} + diff --git a/lib/zlib_deflate/defutil.h b/lib/zlib_deflate/defutil.h new file mode 100644 index 00000000..6b15a909 --- /dev/null +++ b/lib/zlib_deflate/defutil.h @@ -0,0 +1,334 @@ + + + +#define Assert(err, str) +#define Trace(dummy) +#define Tracev(dummy) +#define Tracecv(err, dummy) +#define Tracevv(dummy) + + + +#define LENGTH_CODES 29 +/* number of length codes, not counting the special END_BLOCK code */ + +#define LITERALS 256 +/* number of literal bytes 0..255 */ + +#define L_CODES (LITERALS+1+LENGTH_CODES) +/* number of Literal or Length codes, including the END_BLOCK code */ + +#define D_CODES 30 +/* number of distance codes */ + +#define BL_CODES 19 +/* number of codes used to transfer the bit lengths */ + +#define HEAP_SIZE (2*L_CODES+1) +/* maximum heap size */ + +#define MAX_BITS 15 +/* All codes must not exceed MAX_BITS bits */ + +#define INIT_STATE 42 +#define BUSY_STATE 113 +#define FINISH_STATE 666 +/* Stream status */ + + +/* Data structure describing a single value and its code string. */ +typedef struct ct_data_s { + union { + ush freq; /* frequency count */ + ush code; /* bit string */ + } fc; + union { + ush dad; /* father node in Huffman tree */ + ush len; /* length of bit string */ + } dl; +} ct_data; + +#define Freq fc.freq +#define Code fc.code +#define Dad dl.dad +#define Len dl.len + +typedef struct static_tree_desc_s static_tree_desc; + +typedef struct tree_desc_s { + ct_data *dyn_tree; /* the dynamic tree */ + int max_code; /* largest code with non zero frequency */ + static_tree_desc *stat_desc; /* the corresponding static tree */ +} tree_desc; + +typedef ush Pos; +typedef unsigned IPos; + +/* A Pos is an index in the character window. We use short instead of int to + * save space in the various tables. IPos is used only for parameter passing. + */ + +typedef struct deflate_state { + z_streamp strm; /* pointer back to this zlib stream */ + int status; /* as the name implies */ + Byte *pending_buf; /* output still pending */ + ulg pending_buf_size; /* size of pending_buf */ + Byte *pending_out; /* next pending byte to output to the stream */ + int pending; /* nb of bytes in the pending buffer */ + int noheader; /* suppress zlib header and adler32 */ + Byte data_type; /* UNKNOWN, BINARY or ASCII */ + Byte method; /* STORED (for zip only) or DEFLATED */ + int last_flush; /* value of flush param for previous deflate call */ + + /* used by deflate.c: */ + + uInt w_size; /* LZ77 window size (32K by default) */ + uInt w_bits; /* log2(w_size) (8..16) */ + uInt w_mask; /* w_size - 1 */ + + Byte *window; + /* Sliding window. Input bytes are read into the second half of the window, + * and move to the first half later to keep a dictionary of at least wSize + * bytes. With this organization, matches are limited to a distance of + * wSize-MAX_MATCH bytes, but this ensures that IO is always + * performed with a length multiple of the block size. Also, it limits + * the window size to 64K, which is quite useful on MSDOS. + * To do: use the user input buffer as sliding window. + */ + + ulg window_size; + /* Actual size of window: 2*wSize, except when the user input buffer + * is directly used as sliding window. + */ + + Pos *prev; + /* Link to older string with same hash index. To limit the size of this + * array to 64K, this link is maintained only for the last 32K strings. + * An index in this array is thus a window index modulo 32K. + */ + + Pos *head; /* Heads of the hash chains or NIL. */ + + uInt ins_h; /* hash index of string to be inserted */ + uInt hash_size; /* number of elements in hash table */ + uInt hash_bits; /* log2(hash_size) */ + uInt hash_mask; /* hash_size-1 */ + + uInt hash_shift; + /* Number of bits by which ins_h must be shifted at each input + * step. It must be such that after MIN_MATCH steps, the oldest + * byte no longer takes part in the hash key, that is: + * hash_shift * MIN_MATCH >= hash_bits + */ + + long block_start; + /* Window position at the beginning of the current output block. Gets + * negative when the window is moved backwards. + */ + + uInt match_length; /* length of best match */ + IPos prev_match; /* previous match */ + int match_available; /* set if previous match exists */ + uInt strstart; /* start of string to insert */ + uInt match_start; /* start of matching string */ + uInt lookahead; /* number of valid bytes ahead in window */ + + uInt prev_length; + /* Length of the best match at previous step. Matches not greater than this + * are discarded. This is used in the lazy match evaluation. + */ + + uInt max_chain_length; + /* To speed up deflation, hash chains are never searched beyond this + * length. A higher limit improves compression ratio but degrades the + * speed. + */ + + uInt max_lazy_match; + /* Attempt to find a better match only when the current match is strictly + * smaller than this value. This mechanism is used only for compression + * levels >= 4. + */ +# define max_insert_length max_lazy_match + /* Insert new strings in the hash table only if the match length is not + * greater than this length. This saves time but degrades compression. + * max_insert_length is used only for compression levels <= 3. + */ + + int level; /* compression level (1..9) */ + int strategy; /* favor or force Huffman coding*/ + + uInt good_match; + /* Use a faster search when the previous match is longer than this */ + + int nice_match; /* Stop searching when current match exceeds this */ + + /* used by trees.c: */ + /* Didn't use ct_data typedef below to suppress compiler warning */ + struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */ + struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */ + struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */ + + struct tree_desc_s l_desc; /* desc. for literal tree */ + struct tree_desc_s d_desc; /* desc. for distance tree */ + struct tree_desc_s bl_desc; /* desc. for bit length tree */ + + ush bl_count[MAX_BITS+1]; + /* number of codes at each bit length for an optimal tree */ + + int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */ + int heap_len; /* number of elements in the heap */ + int heap_max; /* element of largest frequency */ + /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. + * The same heap array is used to build all trees. + */ + + uch depth[2*L_CODES+1]; + /* Depth of each subtree used as tie breaker for trees of equal frequency + */ + + uch *l_buf; /* buffer for literals or lengths */ + + uInt lit_bufsize; + /* Size of match buffer for literals/lengths. There are 4 reasons for + * limiting lit_bufsize to 64K: + * - frequencies can be kept in 16 bit counters + * - if compression is not successful for the first block, all input + * data is still in the window so we can still emit a stored block even + * when input comes from standard input. (This can also be done for + * all blocks if lit_bufsize is not greater than 32K.) + * - if compression is not successful for a file smaller than 64K, we can + * even emit a stored file instead of a stored block (saving 5 bytes). + * This is applicable only for zip (not gzip or zlib). + * - creating new Huffman trees less frequently may not provide fast + * adaptation to changes in the input data statistics. (Take for + * example a binary file with poorly compressible code followed by + * a highly compressible string table.) Smaller buffer sizes give + * fast adaptation but have of course the overhead of transmitting + * trees more frequently. + * - I can't count above 4 + */ + + uInt last_lit; /* running index in l_buf */ + + ush *d_buf; + /* Buffer for distances. To simplify the code, d_buf and l_buf have + * the same number of elements. To use different lengths, an extra flag + * array would be necessary. + */ + + ulg opt_len; /* bit length of current block with optimal trees */ + ulg static_len; /* bit length of current block with static trees */ + ulg compressed_len; /* total bit length of compressed file */ + uInt matches; /* number of string matches in current block */ + int last_eob_len; /* bit length of EOB code for last block */ + +#ifdef DEBUG_ZLIB + ulg bits_sent; /* bit length of the compressed data */ +#endif + + ush bi_buf; + /* Output buffer. bits are inserted starting at the bottom (least + * significant bits). + */ + int bi_valid; + /* Number of valid bits in bi_buf. All bits above the last valid bit + * are always zero. + */ + +} deflate_state; + +typedef struct deflate_workspace { + /* State memory for the deflator */ + deflate_state deflate_memory; + Byte window_memory[2 * (1 << MAX_WBITS)]; + Pos prev_memory[1 << MAX_WBITS]; + Pos head_memory[1 << (MAX_MEM_LEVEL + 7)]; + char overlay_memory[(1 << (MAX_MEM_LEVEL + 6)) * (sizeof(ush)+2)]; +} deflate_workspace; + +/* Output a byte on the stream. + * IN assertion: there is enough room in pending_buf. + */ +#define put_byte(s, c) {s->pending_buf[s->pending++] = (c);} + + +#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) +/* Minimum amount of lookahead, except at the end of the input file. + * See deflate.c for comments about the MIN_MATCH+1. + */ + +#define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD) +/* In order to simplify the code, particularly on 16 bit machines, match + * distances are limited to MAX_DIST instead of WSIZE. + */ + + /* in trees.c */ +void zlib_tr_init (deflate_state *s); +int zlib_tr_tally (deflate_state *s, unsigned dist, unsigned lc); +ulg zlib_tr_flush_block (deflate_state *s, char *buf, ulg stored_len, + int eof); +void zlib_tr_align (deflate_state *s); +void zlib_tr_stored_block (deflate_state *s, char *buf, ulg stored_len, + int eof); +void zlib_tr_stored_type_only (deflate_state *); + + +/* =========================================================================== + * Output a short LSB first on the stream. + * IN assertion: there is enough room in pendingBuf. + */ +#define put_short(s, w) { \ + put_byte(s, (uch)((w) & 0xff)); \ + put_byte(s, (uch)((ush)(w) >> 8)); \ +} + +/* =========================================================================== + * Reverse the first len bits of a code, using straightforward code (a faster + * method would use a table) + * IN assertion: 1 <= len <= 15 + */ +static inline unsigned bi_reverse(unsigned code, /* the value to invert */ + int len) /* its bit length */ +{ + register unsigned res = 0; + do { + res |= code & 1; + code >>= 1, res <<= 1; + } while (--len > 0); + return res >> 1; +} + +/* =========================================================================== + * Flush the bit buffer, keeping at most 7 bits in it. + */ +static inline void bi_flush(deflate_state *s) +{ + if (s->bi_valid == 16) { + put_short(s, s->bi_buf); + s->bi_buf = 0; + s->bi_valid = 0; + } else if (s->bi_valid >= 8) { + put_byte(s, (Byte)s->bi_buf); + s->bi_buf >>= 8; + s->bi_valid -= 8; + } +} + +/* =========================================================================== + * Flush the bit buffer and align the output on a byte boundary + */ +static inline void bi_windup(deflate_state *s) +{ + if (s->bi_valid > 8) { + put_short(s, s->bi_buf); + } else if (s->bi_valid > 0) { + put_byte(s, (Byte)s->bi_buf); + } + s->bi_buf = 0; + s->bi_valid = 0; +#ifdef DEBUG_ZLIB + s->bits_sent = (s->bits_sent+7) & ~7; +#endif +} + diff --git a/lib/zlib_inflate/Makefile b/lib/zlib_inflate/Makefile new file mode 100644 index 00000000..49f8ce57 --- /dev/null +++ b/lib/zlib_inflate/Makefile @@ -0,0 +1,19 @@ +# +# This is a modified version of zlib, which does all memory +# allocation ahead of time. +# +# This is only the decompression, see zlib_deflate for the +# the compression +# +# Decompression needs to be serialized for each memory +# allocation. +# +# (The upsides of the simplification is that you can't get in +# any nasty situations wrt memory management, and that the +# uncompression can be done without blocking on allocation). +# + +obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate.o + +zlib_inflate-objs := inffast.o inflate.o infutil.o \ + inftrees.o inflate_syms.o diff --git a/lib/zlib_inflate/inffast.c b/lib/zlib_inflate/inffast.c new file mode 100644 index 00000000..2c13ecc5 --- /dev/null +++ b/lib/zlib_inflate/inffast.c @@ -0,0 +1,363 @@ +/* inffast.c -- fast decoding + * Copyright (C) 1995-2004 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include <linux/zutil.h> +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +#ifndef ASMINF + +/* Allow machine dependent optimization for post-increment or pre-increment. + Based on testing to date, + Pre-increment preferred for: + - PowerPC G3 (Adler) + - MIPS R5000 (Randers-Pehrson) + Post-increment preferred for: + - none + No measurable difference: + - Pentium III (Anderson) + - M68060 (Nikl) + */ +union uu { + unsigned short us; + unsigned char b[2]; +}; + +/* Endian independed version */ +static inline unsigned short +get_unaligned16(const unsigned short *p) +{ + union uu mm; + unsigned char *b = (unsigned char *)p; + + mm.b[0] = b[0]; + mm.b[1] = b[1]; + return mm.us; +} + +#ifdef POSTINC +# define OFF 0 +# define PUP(a) *(a)++ +# define UP_UNALIGNED(a) get_unaligned16((a)++) +#else +# define OFF 1 +# define PUP(a) *++(a) +# define UP_UNALIGNED(a) get_unaligned16(++(a)) +#endif + +/* + Decode literal, length, and distance codes and write out the resulting + literal and match bytes until either not enough input or output is + available, an end-of-block is encountered, or a data error is encountered. + When large enough input and output buffers are supplied to inflate(), for + example, a 16K input buffer and a 64K output buffer, more than 95% of the + inflate execution time is spent in this routine. + + Entry assumptions: + + state->mode == LEN + strm->avail_in >= 6 + strm->avail_out >= 258 + start >= strm->avail_out + state->bits < 8 + + On return, state->mode is one of: + + LEN -- ran out of enough output space or enough available input + TYPE -- reached end of block code, inflate() to interpret next block + BAD -- error in block data + + Notes: + + - The maximum input bits used by a length/distance pair is 15 bits for the + length code, 5 bits for the length extra, 15 bits for the distance code, + and 13 bits for the distance extra. This totals 48 bits, or six bytes. + Therefore if strm->avail_in >= 6, then there is enough input to avoid + checking for available input while decoding. + + - The maximum bytes that a single length/distance pair can output is 258 + bytes, which is the maximum length that can be coded. inflate_fast() + requires strm->avail_out >= 258 for each loop to avoid checking for + output space. + + - @start: inflate()'s starting value for strm->avail_out + */ +void inflate_fast(z_streamp strm, unsigned start) +{ + struct inflate_state *state; + const unsigned char *in; /* local strm->next_in */ + const unsigned char *last; /* while in < last, enough input available */ + unsigned char *out; /* local strm->next_out */ + unsigned char *beg; /* inflate()'s initial strm->next_out */ + unsigned char *end; /* while out < end, enough space available */ +#ifdef INFLATE_STRICT + unsigned dmax; /* maximum distance from zlib header */ +#endif + unsigned wsize; /* window size or zero if not using window */ + unsigned whave; /* valid bytes in the window */ + unsigned write; /* window write index */ + unsigned char *window; /* allocated sliding window, if wsize != 0 */ + unsigned long hold; /* local strm->hold */ + unsigned bits; /* local strm->bits */ + code const *lcode; /* local strm->lencode */ + code const *dcode; /* local strm->distcode */ + unsigned lmask; /* mask for first level of length codes */ + unsigned dmask; /* mask for first level of distance codes */ + code this; /* retrieved table entry */ + unsigned op; /* code bits, operation, extra bits, or */ + /* window position, window bytes to copy */ + unsigned len; /* match length, unused bytes */ + unsigned dist; /* match distance */ + unsigned char *from; /* where to copy match from */ + + /* copy state to local variables */ + state = (struct inflate_state *)strm->state; + in = strm->next_in - OFF; + last = in + (strm->avail_in - 5); + out = strm->next_out - OFF; + beg = out - (start - strm->avail_out); + end = out + (strm->avail_out - 257); +#ifdef INFLATE_STRICT + dmax = state->dmax; +#endif + wsize = state->wsize; + whave = state->whave; + write = state->write; + window = state->window; + hold = state->hold; + bits = state->bits; + lcode = state->lencode; + dcode = state->distcode; + lmask = (1U << state->lenbits) - 1; + dmask = (1U << state->distbits) - 1; + + /* decode literals and length/distances until end-of-block or not enough + input data or output space */ + do { + if (bits < 15) { + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + } + this = lcode[hold & lmask]; + dolen: + op = (unsigned)(this.bits); + hold >>= op; + bits -= op; + op = (unsigned)(this.op); + if (op == 0) { /* literal */ + PUP(out) = (unsigned char)(this.val); + } + else if (op & 16) { /* length base */ + len = (unsigned)(this.val); + op &= 15; /* number of extra bits */ + if (op) { + if (bits < op) { + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + } + len += (unsigned)hold & ((1U << op) - 1); + hold >>= op; + bits -= op; + } + if (bits < 15) { + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + } + this = dcode[hold & dmask]; + dodist: + op = (unsigned)(this.bits); + hold >>= op; + bits -= op; + op = (unsigned)(this.op); + if (op & 16) { /* distance base */ + dist = (unsigned)(this.val); + op &= 15; /* number of extra bits */ + if (bits < op) { + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + if (bits < op) { + hold += (unsigned long)(PUP(in)) << bits; + bits += 8; + } + } + dist += (unsigned)hold & ((1U << op) - 1); +#ifdef INFLATE_STRICT + if (dist > dmax) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#endif + hold >>= op; + bits -= op; + op = (unsigned)(out - beg); /* max distance in output */ + if (dist > op) { /* see if copy from window */ + op = dist - op; /* distance back in window */ + if (op > whave) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } + from = window - OFF; + if (write == 0) { /* very common case */ + from += wsize - op; + if (op < len) { /* some from window */ + len -= op; + do { + PUP(out) = PUP(from); + } while (--op); + from = out - dist; /* rest from output */ + } + } + else if (write < op) { /* wrap around window */ + from += wsize + write - op; + op -= write; + if (op < len) { /* some from end of window */ + len -= op; + do { + PUP(out) = PUP(from); + } while (--op); + from = window - OFF; + if (write < len) { /* some from start of window */ + op = write; + len -= op; + do { + PUP(out) = PUP(from); + } while (--op); + from = out - dist; /* rest from output */ + } + } + } + else { /* contiguous in window */ + from += write - op; + if (op < len) { /* some from window */ + len -= op; + do { + PUP(out) = PUP(from); + } while (--op); + from = out - dist; /* rest from output */ + } + } + while (len > 2) { + PUP(out) = PUP(from); + PUP(out) = PUP(from); + PUP(out) = PUP(from); + len -= 3; + } + if (len) { + PUP(out) = PUP(from); + if (len > 1) + PUP(out) = PUP(from); + } + } + else { + unsigned short *sout; + unsigned long loops; + + from = out - dist; /* copy direct from output */ + /* minimum length is three */ + /* Align out addr */ + if (!((long)(out - 1 + OFF) & 1)) { + PUP(out) = PUP(from); + len--; + } + sout = (unsigned short *)(out - OFF); + if (dist > 2) { + unsigned short *sfrom; + + sfrom = (unsigned short *)(from - OFF); + loops = len >> 1; + do +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + PUP(sout) = PUP(sfrom); +#else + PUP(sout) = UP_UNALIGNED(sfrom); +#endif + while (--loops); + out = (unsigned char *)sout + OFF; + from = (unsigned char *)sfrom + OFF; + } else { /* dist == 1 or dist == 2 */ + unsigned short pat16; + + pat16 = *(sout-1+OFF); + if (dist == 1) { + union uu mm; + /* copy one char pattern to both bytes */ + mm.us = pat16; + mm.b[0] = mm.b[1]; + pat16 = mm.us; + } + loops = len >> 1; + do + PUP(sout) = pat16; + while (--loops); + out = (unsigned char *)sout + OFF; + } + if (len & 1) + PUP(out) = PUP(from); + } + } + else if ((op & 64) == 0) { /* 2nd level distance code */ + this = dcode[this.val + (hold & ((1U << op) - 1))]; + goto dodist; + } + else { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + } + else if ((op & 64) == 0) { /* 2nd level length code */ + this = lcode[this.val + (hold & ((1U << op) - 1))]; + goto dolen; + } + else if (op & 32) { /* end-of-block */ + state->mode = TYPE; + break; + } + else { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + } while (in < last && out < end); + + /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ + len = bits >> 3; + in -= len; + bits -= len << 3; + hold &= (1U << bits) - 1; + + /* update state and return */ + strm->next_in = in + OFF; + strm->next_out = out + OFF; + strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); + strm->avail_out = (unsigned)(out < end ? + 257 + (end - out) : 257 - (out - end)); + state->hold = hold; + state->bits = bits; + return; +} + +/* + inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): + - Using bit fields for code structure + - Different op definition to avoid & for extra bits (do & for table bits) + - Three separate decoding do-loops for direct, window, and write == 0 + - Special case for distance > 1 copies to do overlapped load and store copy + - Explicit branch predictions (based on measured branch probabilities) + - Deferring match copy and interspersed it with decoding subsequent codes + - Swapping literal/length else + - Swapping window/direct else + - Larger unrolled copy loops (three is about right) + - Moving len -= 3 statement into middle of loop + */ + +#endif /* !ASMINF */ diff --git a/lib/zlib_inflate/inffast.h b/lib/zlib_inflate/inffast.h new file mode 100644 index 00000000..40315d9f --- /dev/null +++ b/lib/zlib_inflate/inffast.h @@ -0,0 +1,11 @@ +/* inffast.h -- header to use inffast.c + * Copyright (C) 1995-2003 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +void inflate_fast (z_streamp strm, unsigned start); diff --git a/lib/zlib_inflate/inffixed.h b/lib/zlib_inflate/inffixed.h new file mode 100644 index 00000000..75ed4b59 --- /dev/null +++ b/lib/zlib_inflate/inffixed.h @@ -0,0 +1,94 @@ + /* inffixed.h -- table for decoding fixed codes + * Generated automatically by makefixed(). + */ + + /* WARNING: this file should *not* be used by applications. It + is part of the implementation of the compression library and + is subject to change. Applications should only use zlib.h. + */ + + static const code lenfix[512] = { + {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48}, + {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128}, + {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59}, + {0,8,120},{0,8,56},{0,9,208},{17,7,17},{0,8,104},{0,8,40},{0,9,176}, + {0,8,8},{0,8,136},{0,8,72},{0,9,240},{16,7,4},{0,8,84},{0,8,20}, + {21,8,227},{19,7,43},{0,8,116},{0,8,52},{0,9,200},{17,7,13},{0,8,100}, + {0,8,36},{0,9,168},{0,8,4},{0,8,132},{0,8,68},{0,9,232},{16,7,8}, + {0,8,92},{0,8,28},{0,9,152},{20,7,83},{0,8,124},{0,8,60},{0,9,216}, + {18,7,23},{0,8,108},{0,8,44},{0,9,184},{0,8,12},{0,8,140},{0,8,76}, + {0,9,248},{16,7,3},{0,8,82},{0,8,18},{21,8,163},{19,7,35},{0,8,114}, + {0,8,50},{0,9,196},{17,7,11},{0,8,98},{0,8,34},{0,9,164},{0,8,2}, + {0,8,130},{0,8,66},{0,9,228},{16,7,7},{0,8,90},{0,8,26},{0,9,148}, + {20,7,67},{0,8,122},{0,8,58},{0,9,212},{18,7,19},{0,8,106},{0,8,42}, + {0,9,180},{0,8,10},{0,8,138},{0,8,74},{0,9,244},{16,7,5},{0,8,86}, + {0,8,22},{64,8,0},{19,7,51},{0,8,118},{0,8,54},{0,9,204},{17,7,15}, + {0,8,102},{0,8,38},{0,9,172},{0,8,6},{0,8,134},{0,8,70},{0,9,236}, + {16,7,9},{0,8,94},{0,8,30},{0,9,156},{20,7,99},{0,8,126},{0,8,62}, + {0,9,220},{18,7,27},{0,8,110},{0,8,46},{0,9,188},{0,8,14},{0,8,142}, + {0,8,78},{0,9,252},{96,7,0},{0,8,81},{0,8,17},{21,8,131},{18,7,31}, + {0,8,113},{0,8,49},{0,9,194},{16,7,10},{0,8,97},{0,8,33},{0,9,162}, + {0,8,1},{0,8,129},{0,8,65},{0,9,226},{16,7,6},{0,8,89},{0,8,25}, + {0,9,146},{19,7,59},{0,8,121},{0,8,57},{0,9,210},{17,7,17},{0,8,105}, + {0,8,41},{0,9,178},{0,8,9},{0,8,137},{0,8,73},{0,9,242},{16,7,4}, + {0,8,85},{0,8,21},{16,8,258},{19,7,43},{0,8,117},{0,8,53},{0,9,202}, + {17,7,13},{0,8,101},{0,8,37},{0,9,170},{0,8,5},{0,8,133},{0,8,69}, + {0,9,234},{16,7,8},{0,8,93},{0,8,29},{0,9,154},{20,7,83},{0,8,125}, + {0,8,61},{0,9,218},{18,7,23},{0,8,109},{0,8,45},{0,9,186},{0,8,13}, + {0,8,141},{0,8,77},{0,9,250},{16,7,3},{0,8,83},{0,8,19},{21,8,195}, + {19,7,35},{0,8,115},{0,8,51},{0,9,198},{17,7,11},{0,8,99},{0,8,35}, + {0,9,166},{0,8,3},{0,8,131},{0,8,67},{0,9,230},{16,7,7},{0,8,91}, + {0,8,27},{0,9,150},{20,7,67},{0,8,123},{0,8,59},{0,9,214},{18,7,19}, + {0,8,107},{0,8,43},{0,9,182},{0,8,11},{0,8,139},{0,8,75},{0,9,246}, + {16,7,5},{0,8,87},{0,8,23},{64,8,0},{19,7,51},{0,8,119},{0,8,55}, + {0,9,206},{17,7,15},{0,8,103},{0,8,39},{0,9,174},{0,8,7},{0,8,135}, + {0,8,71},{0,9,238},{16,7,9},{0,8,95},{0,8,31},{0,9,158},{20,7,99}, + {0,8,127},{0,8,63},{0,9,222},{18,7,27},{0,8,111},{0,8,47},{0,9,190}, + {0,8,15},{0,8,143},{0,8,79},{0,9,254},{96,7,0},{0,8,80},{0,8,16}, + {20,8,115},{18,7,31},{0,8,112},{0,8,48},{0,9,193},{16,7,10},{0,8,96}, + {0,8,32},{0,9,161},{0,8,0},{0,8,128},{0,8,64},{0,9,225},{16,7,6}, + {0,8,88},{0,8,24},{0,9,145},{19,7,59},{0,8,120},{0,8,56},{0,9,209}, + {17,7,17},{0,8,104},{0,8,40},{0,9,177},{0,8,8},{0,8,136},{0,8,72}, + {0,9,241},{16,7,4},{0,8,84},{0,8,20},{21,8,227},{19,7,43},{0,8,116}, + {0,8,52},{0,9,201},{17,7,13},{0,8,100},{0,8,36},{0,9,169},{0,8,4}, + {0,8,132},{0,8,68},{0,9,233},{16,7,8},{0,8,92},{0,8,28},{0,9,153}, + {20,7,83},{0,8,124},{0,8,60},{0,9,217},{18,7,23},{0,8,108},{0,8,44}, + {0,9,185},{0,8,12},{0,8,140},{0,8,76},{0,9,249},{16,7,3},{0,8,82}, + {0,8,18},{21,8,163},{19,7,35},{0,8,114},{0,8,50},{0,9,197},{17,7,11}, + {0,8,98},{0,8,34},{0,9,165},{0,8,2},{0,8,130},{0,8,66},{0,9,229}, + {16,7,7},{0,8,90},{0,8,26},{0,9,149},{20,7,67},{0,8,122},{0,8,58}, + {0,9,213},{18,7,19},{0,8,106},{0,8,42},{0,9,181},{0,8,10},{0,8,138}, + {0,8,74},{0,9,245},{16,7,5},{0,8,86},{0,8,22},{64,8,0},{19,7,51}, + {0,8,118},{0,8,54},{0,9,205},{17,7,15},{0,8,102},{0,8,38},{0,9,173}, + {0,8,6},{0,8,134},{0,8,70},{0,9,237},{16,7,9},{0,8,94},{0,8,30}, + {0,9,157},{20,7,99},{0,8,126},{0,8,62},{0,9,221},{18,7,27},{0,8,110}, + {0,8,46},{0,9,189},{0,8,14},{0,8,142},{0,8,78},{0,9,253},{96,7,0}, + {0,8,81},{0,8,17},{21,8,131},{18,7,31},{0,8,113},{0,8,49},{0,9,195}, + {16,7,10},{0,8,97},{0,8,33},{0,9,163},{0,8,1},{0,8,129},{0,8,65}, + {0,9,227},{16,7,6},{0,8,89},{0,8,25},{0,9,147},{19,7,59},{0,8,121}, + {0,8,57},{0,9,211},{17,7,17},{0,8,105},{0,8,41},{0,9,179},{0,8,9}, + {0,8,137},{0,8,73},{0,9,243},{16,7,4},{0,8,85},{0,8,21},{16,8,258}, + {19,7,43},{0,8,117},{0,8,53},{0,9,203},{17,7,13},{0,8,101},{0,8,37}, + {0,9,171},{0,8,5},{0,8,133},{0,8,69},{0,9,235},{16,7,8},{0,8,93}, + {0,8,29},{0,9,155},{20,7,83},{0,8,125},{0,8,61},{0,9,219},{18,7,23}, + {0,8,109},{0,8,45},{0,9,187},{0,8,13},{0,8,141},{0,8,77},{0,9,251}, + {16,7,3},{0,8,83},{0,8,19},{21,8,195},{19,7,35},{0,8,115},{0,8,51}, + {0,9,199},{17,7,11},{0,8,99},{0,8,35},{0,9,167},{0,8,3},{0,8,131}, + {0,8,67},{0,9,231},{16,7,7},{0,8,91},{0,8,27},{0,9,151},{20,7,67}, + {0,8,123},{0,8,59},{0,9,215},{18,7,19},{0,8,107},{0,8,43},{0,9,183}, + {0,8,11},{0,8,139},{0,8,75},{0,9,247},{16,7,5},{0,8,87},{0,8,23}, + {64,8,0},{19,7,51},{0,8,119},{0,8,55},{0,9,207},{17,7,15},{0,8,103}, + {0,8,39},{0,9,175},{0,8,7},{0,8,135},{0,8,71},{0,9,239},{16,7,9}, + {0,8,95},{0,8,31},{0,9,159},{20,7,99},{0,8,127},{0,8,63},{0,9,223}, + {18,7,27},{0,8,111},{0,8,47},{0,9,191},{0,8,15},{0,8,143},{0,8,79}, + {0,9,255} + }; + + static const code distfix[32] = { + {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025}, + {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193}, + {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385}, + {19,5,25},{27,5,6145},{17,5,7},{25,5,1537},{21,5,97},{29,5,24577}, + {16,5,4},{24,5,769},{20,5,49},{28,5,12289},{18,5,13},{26,5,3073}, + {22,5,193},{64,5,0} + }; diff --git a/lib/zlib_inflate/inflate.c b/lib/zlib_inflate/inflate.c new file mode 100644 index 00000000..f5ce87b0 --- /dev/null +++ b/lib/zlib_inflate/inflate.c @@ -0,0 +1,918 @@ +/* inflate.c -- zlib decompression + * Copyright (C) 1995-2005 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * + * Based on zlib 1.2.3 but modified for the Linux Kernel by + * Richard Purdie <richard@openedhand.com> + * + * Changes mainly for static instead of dynamic memory allocation + * + */ + +#include <linux/zutil.h> +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" +#include "infutil.h" + +int zlib_inflate_workspacesize(void) +{ + return sizeof(struct inflate_workspace); +} + +int zlib_inflateReset(z_streamp strm) +{ + struct inflate_state *state; + + if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR; + state = (struct inflate_state *)strm->state; + strm->total_in = strm->total_out = state->total = 0; + strm->msg = NULL; + strm->adler = 1; /* to support ill-conceived Java test suite */ + state->mode = HEAD; + state->last = 0; + state->havedict = 0; + state->dmax = 32768U; + state->hold = 0; + state->bits = 0; + state->lencode = state->distcode = state->next = state->codes; + + /* Initialise Window */ + state->wsize = 1U << state->wbits; + state->write = 0; + state->whave = 0; + + return Z_OK; +} + +#if 0 +int zlib_inflatePrime(z_streamp strm, int bits, int value) +{ + struct inflate_state *state; + + if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR; + state = (struct inflate_state *)strm->state; + if (bits > 16 || state->bits + bits > 32) return Z_STREAM_ERROR; + value &= (1L << bits) - 1; + state->hold += value << state->bits; + state->bits += bits; + return Z_OK; +} +#endif + +int zlib_inflateInit2(z_streamp strm, int windowBits) +{ + struct inflate_state *state; + + if (strm == NULL) return Z_STREAM_ERROR; + strm->msg = NULL; /* in case we return an error */ + + state = &WS(strm)->inflate_state; + strm->state = (struct internal_state *)state; + + if (windowBits < 0) { + state->wrap = 0; + windowBits = -windowBits; + } + else { + state->wrap = (windowBits >> 4) + 1; + } + if (windowBits < 8 || windowBits > 15) { + return Z_STREAM_ERROR; + } + state->wbits = (unsigned)windowBits; + state->window = &WS(strm)->working_window[0]; + + return zlib_inflateReset(strm); +} + +/* + Return state with length and distance decoding tables and index sizes set to + fixed code decoding. This returns fixed tables from inffixed.h. + */ +static void zlib_fixedtables(struct inflate_state *state) +{ +# include "inffixed.h" + state->lencode = lenfix; + state->lenbits = 9; + state->distcode = distfix; + state->distbits = 5; +} + + +/* + Update the window with the last wsize (normally 32K) bytes written before + returning. This is only called when a window is already in use, or when + output has been written during this inflate call, but the end of the deflate + stream has not been reached yet. It is also called to window dictionary data + when a dictionary is loaded. + + Providing output buffers larger than 32K to inflate() should provide a speed + advantage, since only the last 32K of output is copied to the sliding window + upon return from inflate(), and since all distances after the first 32K of + output will fall in the output data, making match copies simpler and faster. + The advantage may be dependent on the size of the processor's data caches. + */ +static void zlib_updatewindow(z_streamp strm, unsigned out) +{ + struct inflate_state *state; + unsigned copy, dist; + + state = (struct inflate_state *)strm->state; + + /* copy state->wsize or less output bytes into the circular window */ + copy = out - strm->avail_out; + if (copy >= state->wsize) { + memcpy(state->window, strm->next_out - state->wsize, state->wsize); + state->write = 0; + state->whave = state->wsize; + } + else { + dist = state->wsize - state->write; + if (dist > copy) dist = copy; + memcpy(state->window + state->write, strm->next_out - copy, dist); + copy -= dist; + if (copy) { + memcpy(state->window, strm->next_out - copy, copy); + state->write = copy; + state->whave = state->wsize; + } + else { + state->write += dist; + if (state->write == state->wsize) state->write = 0; + if (state->whave < state->wsize) state->whave += dist; + } + } +} + + +/* + * At the end of a Deflate-compressed PPP packet, we expect to have seen + * a `stored' block type value but not the (zero) length bytes. + */ +/* + Returns true if inflate is currently at the end of a block generated by + Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP + implementation to provide an additional safety check. PPP uses + Z_SYNC_FLUSH but removes the length bytes of the resulting empty stored + block. When decompressing, PPP checks that at the end of input packet, + inflate is waiting for these length bytes. + */ +static int zlib_inflateSyncPacket(z_streamp strm) +{ + struct inflate_state *state; + + if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR; + state = (struct inflate_state *)strm->state; + + if (state->mode == STORED && state->bits == 0) { + state->mode = TYPE; + return Z_OK; + } + return Z_DATA_ERROR; +} + +/* Macros for inflate(): */ + +/* check function to use adler32() for zlib or crc32() for gzip */ +#define UPDATE(check, buf, len) zlib_adler32(check, buf, len) + +/* Load registers with state in inflate() for speed */ +#define LOAD() \ + do { \ + put = strm->next_out; \ + left = strm->avail_out; \ + next = strm->next_in; \ + have = strm->avail_in; \ + hold = state->hold; \ + bits = state->bits; \ + } while (0) + +/* Restore state from registers in inflate() */ +#define RESTORE() \ + do { \ + strm->next_out = put; \ + strm->avail_out = left; \ + strm->next_in = next; \ + strm->avail_in = have; \ + state->hold = hold; \ + state->bits = bits; \ + } while (0) + +/* Clear the input bit accumulator */ +#define INITBITS() \ + do { \ + hold = 0; \ + bits = 0; \ + } while (0) + +/* Get a byte of input into the bit accumulator, or return from inflate() + if there is no input available. */ +#define PULLBYTE() \ + do { \ + if (have == 0) goto inf_leave; \ + have--; \ + hold += (unsigned long)(*next++) << bits; \ + bits += 8; \ + } while (0) + +/* Assure that there are at least n bits in the bit accumulator. If there is + not enough available input to do that, then return from inflate(). */ +#define NEEDBITS(n) \ + do { \ + while (bits < (unsigned)(n)) \ + PULLBYTE(); \ + } while (0) + +/* Return the low n bits of the bit accumulator (n < 16) */ +#define BITS(n) \ + ((unsigned)hold & ((1U << (n)) - 1)) + +/* Remove n bits from the bit accumulator */ +#define DROPBITS(n) \ + do { \ + hold >>= (n); \ + bits -= (unsigned)(n); \ + } while (0) + +/* Remove zero to seven bits as needed to go to a byte boundary */ +#define BYTEBITS() \ + do { \ + hold >>= bits & 7; \ + bits -= bits & 7; \ + } while (0) + +/* Reverse the bytes in a 32-bit value */ +#define REVERSE(q) \ + ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \ + (((q) & 0xff00) << 8) + (((q) & 0xff) << 24)) + +/* + inflate() uses a state machine to process as much input data and generate as + much output data as possible before returning. The state machine is + structured roughly as follows: + + for (;;) switch (state) { + ... + case STATEn: + if (not enough input data or output space to make progress) + return; + ... make progress ... + state = STATEm; + break; + ... + } + + so when inflate() is called again, the same case is attempted again, and + if the appropriate resources are provided, the machine proceeds to the + next state. The NEEDBITS() macro is usually the way the state evaluates + whether it can proceed or should return. NEEDBITS() does the return if + the requested bits are not available. The typical use of the BITS macros + is: + + NEEDBITS(n); + ... do something with BITS(n) ... + DROPBITS(n); + + where NEEDBITS(n) either returns from inflate() if there isn't enough + input left to load n bits into the accumulator, or it continues. BITS(n) + gives the low n bits in the accumulator. When done, DROPBITS(n) drops + the low n bits off the accumulator. INITBITS() clears the accumulator + and sets the number of available bits to zero. BYTEBITS() discards just + enough bits to put the accumulator on a byte boundary. After BYTEBITS() + and a NEEDBITS(8), then BITS(8) would return the next byte in the stream. + + NEEDBITS(n) uses PULLBYTE() to get an available byte of input, or to return + if there is no input available. The decoding of variable length codes uses + PULLBYTE() directly in order to pull just enough bytes to decode the next + code, and no more. + + Some states loop until they get enough input, making sure that enough + state information is maintained to continue the loop where it left off + if NEEDBITS() returns in the loop. For example, want, need, and keep + would all have to actually be part of the saved state in case NEEDBITS() + returns: + + case STATEw: + while (want < need) { + NEEDBITS(n); + keep[want++] = BITS(n); + DROPBITS(n); + } + state = STATEx; + case STATEx: + + As shown above, if the next state is also the next case, then the break + is omitted. + + A state may also return if there is not enough output space available to + complete that state. Those states are copying stored data, writing a + literal byte, and copying a matching string. + + When returning, a "goto inf_leave" is used to update the total counters, + update the check value, and determine whether any progress has been made + during that inflate() call in order to return the proper return code. + Progress is defined as a change in either strm->avail_in or strm->avail_out. + When there is a window, goto inf_leave will update the window with the last + output written. If a goto inf_leave occurs in the middle of decompression + and there is no window currently, goto inf_leave will create one and copy + output to the window for the next call of inflate(). + + In this implementation, the flush parameter of inflate() only affects the + return code (per zlib.h). inflate() always writes as much as possible to + strm->next_out, given the space available and the provided input--the effect + documented in zlib.h of Z_SYNC_FLUSH. Furthermore, inflate() always defers + the allocation of and copying into a sliding window until necessary, which + provides the effect documented in zlib.h for Z_FINISH when the entire input + stream available. So the only thing the flush parameter actually does is: + when flush is set to Z_FINISH, inflate() cannot return Z_OK. Instead it + will return Z_BUF_ERROR if it has not reached the end of the stream. + */ + +int zlib_inflate(z_streamp strm, int flush) +{ + struct inflate_state *state; + const unsigned char *next; /* next input */ + unsigned char *put; /* next output */ + unsigned have, left; /* available input and output */ + unsigned long hold; /* bit buffer */ + unsigned bits; /* bits in bit buffer */ + unsigned in, out; /* save starting available input and output */ + unsigned copy; /* number of stored or match bytes to copy */ + unsigned char *from; /* where to copy match bytes from */ + code this; /* current decoding table entry */ + code last; /* parent table entry */ + unsigned len; /* length to copy for repeats, bits to drop */ + int ret; /* return code */ + static const unsigned short order[19] = /* permutation of code lengths */ + {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + + /* Do not check for strm->next_out == NULL here as ppc zImage + inflates to strm->next_out = 0 */ + + if (strm == NULL || strm->state == NULL || + (strm->next_in == NULL && strm->avail_in != 0)) + return Z_STREAM_ERROR; + + state = (struct inflate_state *)strm->state; + + if (state->mode == TYPE) state->mode = TYPEDO; /* skip check */ + LOAD(); + in = have; + out = left; + ret = Z_OK; + for (;;) + switch (state->mode) { + case HEAD: + if (state->wrap == 0) { + state->mode = TYPEDO; + break; + } + NEEDBITS(16); + if ( + ((BITS(8) << 8) + (hold >> 8)) % 31) { + strm->msg = (char *)"incorrect header check"; + state->mode = BAD; + break; + } + if (BITS(4) != Z_DEFLATED) { + strm->msg = (char *)"unknown compression method"; + state->mode = BAD; + break; + } + DROPBITS(4); + len = BITS(4) + 8; + if (len > state->wbits) { + strm->msg = (char *)"invalid window size"; + state->mode = BAD; + break; + } + state->dmax = 1U << len; + strm->adler = state->check = zlib_adler32(0L, NULL, 0); + state->mode = hold & 0x200 ? DICTID : TYPE; + INITBITS(); + break; + case DICTID: + NEEDBITS(32); + strm->adler = state->check = REVERSE(hold); + INITBITS(); + state->mode = DICT; + case DICT: + if (state->havedict == 0) { + RESTORE(); + return Z_NEED_DICT; + } + strm->adler = state->check = zlib_adler32(0L, NULL, 0); + state->mode = TYPE; + case TYPE: + if (flush == Z_BLOCK) goto inf_leave; + case TYPEDO: + if (state->last) { + BYTEBITS(); + state->mode = CHECK; + break; + } + NEEDBITS(3); + state->last = BITS(1); + DROPBITS(1); + switch (BITS(2)) { + case 0: /* stored block */ + state->mode = STORED; + break; + case 1: /* fixed block */ + zlib_fixedtables(state); + state->mode = LEN; /* decode codes */ + break; + case 2: /* dynamic block */ + state->mode = TABLE; + break; + case 3: + strm->msg = (char *)"invalid block type"; + state->mode = BAD; + } + DROPBITS(2); + break; + case STORED: + BYTEBITS(); /* go to byte boundary */ + NEEDBITS(32); + if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) { + strm->msg = (char *)"invalid stored block lengths"; + state->mode = BAD; + break; + } + state->length = (unsigned)hold & 0xffff; + INITBITS(); + state->mode = COPY; + case COPY: + copy = state->length; + if (copy) { + if (copy > have) copy = have; + if (copy > left) copy = left; + if (copy == 0) goto inf_leave; + memcpy(put, next, copy); + have -= copy; + next += copy; + left -= copy; + put += copy; + state->length -= copy; + break; + } + state->mode = TYPE; + break; + case TABLE: + NEEDBITS(14); + state->nlen = BITS(5) + 257; + DROPBITS(5); + state->ndist = BITS(5) + 1; + DROPBITS(5); + state->ncode = BITS(4) + 4; + DROPBITS(4); +#ifndef PKZIP_BUG_WORKAROUND + if (state->nlen > 286 || state->ndist > 30) { + strm->msg = (char *)"too many length or distance symbols"; + state->mode = BAD; + break; + } +#endif + state->have = 0; + state->mode = LENLENS; + case LENLENS: + while (state->have < state->ncode) { + NEEDBITS(3); + state->lens[order[state->have++]] = (unsigned short)BITS(3); + DROPBITS(3); + } + while (state->have < 19) + state->lens[order[state->have++]] = 0; + state->next = state->codes; + state->lencode = (code const *)(state->next); + state->lenbits = 7; + ret = zlib_inflate_table(CODES, state->lens, 19, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid code lengths set"; + state->mode = BAD; + break; + } + state->have = 0; + state->mode = CODELENS; + case CODELENS: + while (state->have < state->nlen + state->ndist) { + for (;;) { + this = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(this.bits) <= bits) break; + PULLBYTE(); + } + if (this.val < 16) { + NEEDBITS(this.bits); + DROPBITS(this.bits); + state->lens[state->have++] = this.val; + } + else { + if (this.val == 16) { + NEEDBITS(this.bits + 2); + DROPBITS(this.bits); + if (state->have == 0) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + len = state->lens[state->have - 1]; + copy = 3 + BITS(2); + DROPBITS(2); + } + else if (this.val == 17) { + NEEDBITS(this.bits + 3); + DROPBITS(this.bits); + len = 0; + copy = 3 + BITS(3); + DROPBITS(3); + } + else { + NEEDBITS(this.bits + 7); + DROPBITS(this.bits); + len = 0; + copy = 11 + BITS(7); + DROPBITS(7); + } + if (state->have + copy > state->nlen + state->ndist) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + while (copy--) + state->lens[state->have++] = (unsigned short)len; + } + } + + /* handle error breaks in while */ + if (state->mode == BAD) break; + + /* build code tables */ + state->next = state->codes; + state->lencode = (code const *)(state->next); + state->lenbits = 9; + ret = zlib_inflate_table(LENS, state->lens, state->nlen, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid literal/lengths set"; + state->mode = BAD; + break; + } + state->distcode = (code const *)(state->next); + state->distbits = 6; + ret = zlib_inflate_table(DISTS, state->lens + state->nlen, state->ndist, + &(state->next), &(state->distbits), state->work); + if (ret) { + strm->msg = (char *)"invalid distances set"; + state->mode = BAD; + break; + } + state->mode = LEN; + case LEN: + if (have >= 6 && left >= 258) { + RESTORE(); + inflate_fast(strm, out); + LOAD(); + break; + } + for (;;) { + this = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(this.bits) <= bits) break; + PULLBYTE(); + } + if (this.op && (this.op & 0xf0) == 0) { + last = this; + for (;;) { + this = state->lencode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + this.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + } + DROPBITS(this.bits); + state->length = (unsigned)this.val; + if ((int)(this.op) == 0) { + state->mode = LIT; + break; + } + if (this.op & 32) { + state->mode = TYPE; + break; + } + if (this.op & 64) { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + state->extra = (unsigned)(this.op) & 15; + state->mode = LENEXT; + case LENEXT: + if (state->extra) { + NEEDBITS(state->extra); + state->length += BITS(state->extra); + DROPBITS(state->extra); + } + state->mode = DIST; + case DIST: + for (;;) { + this = state->distcode[BITS(state->distbits)]; + if ((unsigned)(this.bits) <= bits) break; + PULLBYTE(); + } + if ((this.op & 0xf0) == 0) { + last = this; + for (;;) { + this = state->distcode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + this.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + } + DROPBITS(this.bits); + if (this.op & 64) { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + state->offset = (unsigned)this.val; + state->extra = (unsigned)(this.op) & 15; + state->mode = DISTEXT; + case DISTEXT: + if (state->extra) { + NEEDBITS(state->extra); + state->offset += BITS(state->extra); + DROPBITS(state->extra); + } +#ifdef INFLATE_STRICT + if (state->offset > state->dmax) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#endif + if (state->offset > state->whave + out - left) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } + state->mode = MATCH; + case MATCH: + if (left == 0) goto inf_leave; + copy = out - left; + if (state->offset > copy) { /* copy from window */ + copy = state->offset - copy; + if (copy > state->write) { + copy -= state->write; + from = state->window + (state->wsize - copy); + } + else + from = state->window + (state->write - copy); + if (copy > state->length) copy = state->length; + } + else { /* copy from output */ + from = put - state->offset; + copy = state->length; + } + if (copy > left) copy = left; + left -= copy; + state->length -= copy; + do { + *put++ = *from++; + } while (--copy); + if (state->length == 0) state->mode = LEN; + break; + case LIT: + if (left == 0) goto inf_leave; + *put++ = (unsigned char)(state->length); + left--; + state->mode = LEN; + break; + case CHECK: + if (state->wrap) { + NEEDBITS(32); + out -= left; + strm->total_out += out; + state->total += out; + if (out) + strm->adler = state->check = + UPDATE(state->check, put - out, out); + out = left; + if (( + REVERSE(hold)) != state->check) { + strm->msg = (char *)"incorrect data check"; + state->mode = BAD; + break; + } + INITBITS(); + } + state->mode = DONE; + case DONE: + ret = Z_STREAM_END; + goto inf_leave; + case BAD: + ret = Z_DATA_ERROR; + goto inf_leave; + case MEM: + return Z_MEM_ERROR; + case SYNC: + default: + return Z_STREAM_ERROR; + } + + /* + Return from inflate(), updating the total counts and the check value. + If there was no progress during the inflate() call, return a buffer + error. Call zlib_updatewindow() to create and/or update the window state. + */ + inf_leave: + RESTORE(); + if (state->wsize || (state->mode < CHECK && out != strm->avail_out)) + zlib_updatewindow(strm, out); + + in -= strm->avail_in; + out -= strm->avail_out; + strm->total_in += in; + strm->total_out += out; + state->total += out; + if (state->wrap && out) + strm->adler = state->check = + UPDATE(state->check, strm->next_out - out, out); + + strm->data_type = state->bits + (state->last ? 64 : 0) + + (state->mode == TYPE ? 128 : 0); + + if (flush == Z_PACKET_FLUSH && ret == Z_OK && + strm->avail_out != 0 && strm->avail_in == 0) + return zlib_inflateSyncPacket(strm); + + if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK) + ret = Z_BUF_ERROR; + + return ret; +} + +int zlib_inflateEnd(z_streamp strm) +{ + if (strm == NULL || strm->state == NULL) + return Z_STREAM_ERROR; + return Z_OK; +} + +#if 0 +int zlib_inflateSetDictionary(z_streamp strm, const Byte *dictionary, + uInt dictLength) +{ + struct inflate_state *state; + unsigned long id; + + /* check state */ + if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR; + state = (struct inflate_state *)strm->state; + if (state->wrap != 0 && state->mode != DICT) + return Z_STREAM_ERROR; + + /* check for correct dictionary id */ + if (state->mode == DICT) { + id = zlib_adler32(0L, NULL, 0); + id = zlib_adler32(id, dictionary, dictLength); + if (id != state->check) + return Z_DATA_ERROR; + } + + /* copy dictionary to window */ + zlib_updatewindow(strm, strm->avail_out); + + if (dictLength > state->wsize) { + memcpy(state->window, dictionary + dictLength - state->wsize, + state->wsize); + state->whave = state->wsize; + } + else { + memcpy(state->window + state->wsize - dictLength, dictionary, + dictLength); + state->whave = dictLength; + } + state->havedict = 1; + return Z_OK; +} +#endif + +#if 0 +/* + Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff. Return when found + or when out of input. When called, *have is the number of pattern bytes + found in order so far, in 0..3. On return *have is updated to the new + state. If on return *have equals four, then the pattern was found and the + return value is how many bytes were read including the last byte of the + pattern. If *have is less than four, then the pattern has not been found + yet and the return value is len. In the latter case, zlib_syncsearch() can be + called again with more data and the *have state. *have is initialized to + zero for the first call. + */ +static unsigned zlib_syncsearch(unsigned *have, unsigned char *buf, + unsigned len) +{ + unsigned got; + unsigned next; + + got = *have; + next = 0; + while (next < len && got < 4) { + if ((int)(buf[next]) == (got < 2 ? 0 : 0xff)) + got++; + else if (buf[next]) + got = 0; + else + got = 4 - got; + next++; + } + *have = got; + return next; +} +#endif + +#if 0 +int zlib_inflateSync(z_streamp strm) +{ + unsigned len; /* number of bytes to look at or looked at */ + unsigned long in, out; /* temporary to save total_in and total_out */ + unsigned char buf[4]; /* to restore bit buffer to byte string */ + struct inflate_state *state; + + /* check parameters */ + if (strm == NULL || strm->state == NULL) return Z_STREAM_ERROR; + state = (struct inflate_state *)strm->state; + if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR; + + /* if first time, start search in bit buffer */ + if (state->mode != SYNC) { + state->mode = SYNC; + state->hold <<= state->bits & 7; + state->bits -= state->bits & 7; + len = 0; + while (state->bits >= 8) { + buf[len++] = (unsigned char)(state->hold); + state->hold >>= 8; + state->bits -= 8; + } + state->have = 0; + zlib_syncsearch(&(state->have), buf, len); + } + + /* search available input */ + len = zlib_syncsearch(&(state->have), strm->next_in, strm->avail_in); + strm->avail_in -= len; + strm->next_in += len; + strm->total_in += len; + + /* return no joy or set up to restart inflate() on a new block */ + if (state->have != 4) return Z_DATA_ERROR; + in = strm->total_in; out = strm->total_out; + zlib_inflateReset(strm); + strm->total_in = in; strm->total_out = out; + state->mode = TYPE; + return Z_OK; +} +#endif + +/* + * This subroutine adds the data at next_in/avail_in to the output history + * without performing any output. The output buffer must be "caught up"; + * i.e. no pending output but this should always be the case. The state must + * be waiting on the start of a block (i.e. mode == TYPE or HEAD). On exit, + * the output will also be caught up, and the checksum will have been updated + * if need be. + */ +int zlib_inflateIncomp(z_stream *z) +{ + struct inflate_state *state = (struct inflate_state *)z->state; + Byte *saved_no = z->next_out; + uInt saved_ao = z->avail_out; + + if (state->mode != TYPE && state->mode != HEAD) + return Z_DATA_ERROR; + + /* Setup some variables to allow misuse of updateWindow */ + z->avail_out = 0; + z->next_out = (unsigned char*)z->next_in + z->avail_in; + + zlib_updatewindow(z, z->avail_in); + + /* Restore saved variables */ + z->avail_out = saved_ao; + z->next_out = saved_no; + + z->adler = state->check = + UPDATE(state->check, z->next_in, z->avail_in); + + z->total_out += z->avail_in; + z->total_in += z->avail_in; + z->next_in += z->avail_in; + state->total += z->avail_in; + z->avail_in = 0; + + return Z_OK; +} diff --git a/lib/zlib_inflate/inflate.h b/lib/zlib_inflate/inflate.h new file mode 100644 index 00000000..3d17b3d1 --- /dev/null +++ b/lib/zlib_inflate/inflate.h @@ -0,0 +1,111 @@ +#ifndef INFLATE_H +#define INFLATE_H + +/* inflate.h -- internal inflate state definition + * Copyright (C) 1995-2004 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* Possible inflate modes between inflate() calls */ +typedef enum { + HEAD, /* i: waiting for magic header */ + FLAGS, /* i: waiting for method and flags (gzip) */ + TIME, /* i: waiting for modification time (gzip) */ + OS, /* i: waiting for extra flags and operating system (gzip) */ + EXLEN, /* i: waiting for extra length (gzip) */ + EXTRA, /* i: waiting for extra bytes (gzip) */ + NAME, /* i: waiting for end of file name (gzip) */ + COMMENT, /* i: waiting for end of comment (gzip) */ + HCRC, /* i: waiting for header crc (gzip) */ + DICTID, /* i: waiting for dictionary check value */ + DICT, /* waiting for inflateSetDictionary() call */ + TYPE, /* i: waiting for type bits, including last-flag bit */ + TYPEDO, /* i: same, but skip check to exit inflate on new block */ + STORED, /* i: waiting for stored size (length and complement) */ + COPY, /* i/o: waiting for input or output to copy stored block */ + TABLE, /* i: waiting for dynamic block table lengths */ + LENLENS, /* i: waiting for code length code lengths */ + CODELENS, /* i: waiting for length/lit and distance code lengths */ + LEN, /* i: waiting for length/lit code */ + LENEXT, /* i: waiting for length extra bits */ + DIST, /* i: waiting for distance code */ + DISTEXT, /* i: waiting for distance extra bits */ + MATCH, /* o: waiting for output space to copy string */ + LIT, /* o: waiting for output space to write literal */ + CHECK, /* i: waiting for 32-bit check value */ + LENGTH, /* i: waiting for 32-bit length (gzip) */ + DONE, /* finished check, done -- remain here until reset */ + BAD, /* got a data error -- remain here until reset */ + MEM, /* got an inflate() memory error -- remain here until reset */ + SYNC /* looking for synchronization bytes to restart inflate() */ +} inflate_mode; + +/* + State transitions between above modes - + + (most modes can go to the BAD or MEM mode -- not shown for clarity) + + Process header: + HEAD -> (gzip) or (zlib) + (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME + NAME -> COMMENT -> HCRC -> TYPE + (zlib) -> DICTID or TYPE + DICTID -> DICT -> TYPE + Read deflate blocks: + TYPE -> STORED or TABLE or LEN or CHECK + STORED -> COPY -> TYPE + TABLE -> LENLENS -> CODELENS -> LEN + Read deflate codes: + LEN -> LENEXT or LIT or TYPE + LENEXT -> DIST -> DISTEXT -> MATCH -> LEN + LIT -> LEN + Process trailer: + CHECK -> LENGTH -> DONE + */ + +/* state maintained between inflate() calls. Approximately 7K bytes. */ +struct inflate_state { + inflate_mode mode; /* current inflate mode */ + int last; /* true if processing last block */ + int wrap; /* bit 0 true for zlib, bit 1 true for gzip */ + int havedict; /* true if dictionary provided */ + int flags; /* gzip header method and flags (0 if zlib) */ + unsigned dmax; /* zlib header max distance (INFLATE_STRICT) */ + unsigned long check; /* protected copy of check value */ + unsigned long total; /* protected copy of output count */ + /* gz_headerp head; */ /* where to save gzip header information */ + /* sliding window */ + unsigned wbits; /* log base 2 of requested window size */ + unsigned wsize; /* window size or zero if not using window */ + unsigned whave; /* valid bytes in the window */ + unsigned write; /* window write index */ + unsigned char *window; /* allocated sliding window, if needed */ + /* bit accumulator */ + unsigned long hold; /* input bit accumulator */ + unsigned bits; /* number of bits in "in" */ + /* for string and stored block copying */ + unsigned length; /* literal or length of data to copy */ + unsigned offset; /* distance back to copy string from */ + /* for table and code decoding */ + unsigned extra; /* extra bits needed */ + /* fixed and dynamic code tables */ + code const *lencode; /* starting table for length/literal codes */ + code const *distcode; /* starting table for distance codes */ + unsigned lenbits; /* index bits for lencode */ + unsigned distbits; /* index bits for distcode */ + /* dynamic table building */ + unsigned ncode; /* number of code length code lengths */ + unsigned nlen; /* number of length code lengths */ + unsigned ndist; /* number of distance code lengths */ + unsigned have; /* number of code lengths in lens[] */ + code *next; /* next available space in codes[] */ + unsigned short lens[320]; /* temporary storage for code lengths */ + unsigned short work[288]; /* work area for code table building */ + code codes[ENOUGH]; /* space for code tables */ +}; +#endif diff --git a/lib/zlib_inflate/inflate_syms.c b/lib/zlib_inflate/inflate_syms.c new file mode 100644 index 00000000..67329fe9 --- /dev/null +++ b/lib/zlib_inflate/inflate_syms.c @@ -0,0 +1,20 @@ +/* + * linux/lib/zlib_inflate/inflate_syms.c + * + * Exported symbols for the inflate functionality. + * + */ + +#include <linux/module.h> +#include <linux/init.h> + +#include <linux/zlib.h> + +EXPORT_SYMBOL(zlib_inflate_workspacesize); +EXPORT_SYMBOL(zlib_inflate); +EXPORT_SYMBOL(zlib_inflateInit2); +EXPORT_SYMBOL(zlib_inflateEnd); +EXPORT_SYMBOL(zlib_inflateReset); +EXPORT_SYMBOL(zlib_inflateIncomp); +EXPORT_SYMBOL(zlib_inflate_blob); +MODULE_LICENSE("GPL"); diff --git a/lib/zlib_inflate/inftrees.c b/lib/zlib_inflate/inftrees.c new file mode 100644 index 00000000..3fe6ce5b --- /dev/null +++ b/lib/zlib_inflate/inftrees.c @@ -0,0 +1,315 @@ +/* inftrees.c -- generate Huffman trees for efficient decoding + * Copyright (C) 1995-2005 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include <linux/zutil.h> +#include "inftrees.h" + +#define MAXBITS 15 + +/* + Build a set of tables to decode the provided canonical Huffman code. + The code lengths are lens[0..codes-1]. The result starts at *table, + whose indices are 0..2^bits-1. work is a writable array of at least + lens shorts, which is used as a work area. type is the type of code + to be generated, CODES, LENS, or DISTS. On return, zero is success, + -1 is an invalid code, and +1 means that ENOUGH isn't enough. table + on return points to the next available entry's address. bits is the + requested root table index bits, and on return it is the actual root + table index bits. It will differ if the request is greater than the + longest code or if it is less than the shortest code. + */ +int zlib_inflate_table(codetype type, unsigned short *lens, unsigned codes, + code **table, unsigned *bits, unsigned short *work) +{ + unsigned len; /* a code's length in bits */ + unsigned sym; /* index of code symbols */ + unsigned min, max; /* minimum and maximum code lengths */ + unsigned root; /* number of index bits for root table */ + unsigned curr; /* number of index bits for current table */ + unsigned drop; /* code bits to drop for sub-table */ + int left; /* number of prefix codes available */ + unsigned used; /* code entries in table used */ + unsigned huff; /* Huffman code */ + unsigned incr; /* for incrementing code, index */ + unsigned fill; /* index for replicating entries */ + unsigned low; /* low bits for current root entry */ + unsigned mask; /* mask for low root bits */ + code this; /* table entry for duplication */ + code *next; /* next available space in table */ + const unsigned short *base; /* base value table to use */ + const unsigned short *extra; /* extra bits table to use */ + int end; /* use base and extra for symbol > end */ + unsigned short count[MAXBITS+1]; /* number of codes of each length */ + unsigned short offs[MAXBITS+1]; /* offsets in table for each length */ + static const unsigned short lbase[31] = { /* Length codes 257..285 base */ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; + static const unsigned short lext[31] = { /* Length codes 257..285 extra */ + 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, + 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 201, 196}; + static const unsigned short dbase[32] = { /* Distance codes 0..29 base */ + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, + 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, + 8193, 12289, 16385, 24577, 0, 0}; + static const unsigned short dext[32] = { /* Distance codes 0..29 extra */ + 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, + 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, + 28, 28, 29, 29, 64, 64}; + + /* + Process a set of code lengths to create a canonical Huffman code. The + code lengths are lens[0..codes-1]. Each length corresponds to the + symbols 0..codes-1. The Huffman code is generated by first sorting the + symbols by length from short to long, and retaining the symbol order + for codes with equal lengths. Then the code starts with all zero bits + for the first code of the shortest length, and the codes are integer + increments for the same length, and zeros are appended as the length + increases. For the deflate format, these bits are stored backwards + from their more natural integer increment ordering, and so when the + decoding tables are built in the large loop below, the integer codes + are incremented backwards. + + This routine assumes, but does not check, that all of the entries in + lens[] are in the range 0..MAXBITS. The caller must assure this. + 1..MAXBITS is interpreted as that code length. zero means that that + symbol does not occur in this code. + + The codes are sorted by computing a count of codes for each length, + creating from that a table of starting indices for each length in the + sorted table, and then entering the symbols in order in the sorted + table. The sorted table is work[], with that space being provided by + the caller. + + The length counts are used for other purposes as well, i.e. finding + the minimum and maximum length codes, determining if there are any + codes at all, checking for a valid set of lengths, and looking ahead + at length counts to determine sub-table sizes when building the + decoding tables. + */ + + /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */ + for (len = 0; len <= MAXBITS; len++) + count[len] = 0; + for (sym = 0; sym < codes; sym++) + count[lens[sym]]++; + + /* bound code lengths, force root to be within code lengths */ + root = *bits; + for (max = MAXBITS; max >= 1; max--) + if (count[max] != 0) break; + if (root > max) root = max; + if (max == 0) { /* no symbols to code at all */ + this.op = (unsigned char)64; /* invalid code marker */ + this.bits = (unsigned char)1; + this.val = (unsigned short)0; + *(*table)++ = this; /* make a table to force an error */ + *(*table)++ = this; + *bits = 1; + return 0; /* no symbols, but wait for decoding to report error */ + } + for (min = 1; min <= MAXBITS; min++) + if (count[min] != 0) break; + if (root < min) root = min; + + /* check for an over-subscribed or incomplete set of lengths */ + left = 1; + for (len = 1; len <= MAXBITS; len++) { + left <<= 1; + left -= count[len]; + if (left < 0) return -1; /* over-subscribed */ + } + if (left > 0 && (type == CODES || max != 1)) + return -1; /* incomplete set */ + + /* generate offsets into symbol table for each length for sorting */ + offs[1] = 0; + for (len = 1; len < MAXBITS; len++) + offs[len + 1] = offs[len] + count[len]; + + /* sort symbols by length, by symbol order within each length */ + for (sym = 0; sym < codes; sym++) + if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym; + + /* + Create and fill in decoding tables. In this loop, the table being + filled is at next and has curr index bits. The code being used is huff + with length len. That code is converted to an index by dropping drop + bits off of the bottom. For codes where len is less than drop + curr, + those top drop + curr - len bits are incremented through all values to + fill the table with replicated entries. + + root is the number of index bits for the root table. When len exceeds + root, sub-tables are created pointed to by the root entry with an index + of the low root bits of huff. This is saved in low to check for when a + new sub-table should be started. drop is zero when the root table is + being filled, and drop is root when sub-tables are being filled. + + When a new sub-table is needed, it is necessary to look ahead in the + code lengths to determine what size sub-table is needed. The length + counts are used for this, and so count[] is decremented as codes are + entered in the tables. + + used keeps track of how many table entries have been allocated from the + provided *table space. It is checked when a LENS table is being made + against the space in *table, ENOUGH, minus the maximum space needed by + the worst case distance code, MAXD. This should never happen, but the + sufficiency of ENOUGH has not been proven exhaustively, hence the check. + This assumes that when type == LENS, bits == 9. + + sym increments through all symbols, and the loop terminates when + all codes of length max, i.e. all codes, have been processed. This + routine permits incomplete codes, so another loop after this one fills + in the rest of the decoding tables with invalid code markers. + */ + + /* set up for code type */ + switch (type) { + case CODES: + base = extra = work; /* dummy value--not used */ + end = 19; + break; + case LENS: + base = lbase; + base -= 257; + extra = lext; + extra -= 257; + end = 256; + break; + default: /* DISTS */ + base = dbase; + extra = dext; + end = -1; + } + + /* initialize state for loop */ + huff = 0; /* starting code */ + sym = 0; /* starting code symbol */ + len = min; /* starting code length */ + next = *table; /* current table to fill in */ + curr = root; /* current table index bits */ + drop = 0; /* current bits to drop from code for index */ + low = (unsigned)(-1); /* trigger new sub-table when len > root */ + used = 1U << root; /* use root table entries */ + mask = used - 1; /* mask for comparing low */ + + /* check available table space */ + if (type == LENS && used >= ENOUGH - MAXD) + return 1; + + /* process all codes and make table entries */ + for (;;) { + /* create table entry */ + this.bits = (unsigned char)(len - drop); + if ((int)(work[sym]) < end) { + this.op = (unsigned char)0; + this.val = work[sym]; + } + else if ((int)(work[sym]) > end) { + this.op = (unsigned char)(extra[work[sym]]); + this.val = base[work[sym]]; + } + else { + this.op = (unsigned char)(32 + 64); /* end of block */ + this.val = 0; + } + + /* replicate for those indices with low len bits equal to huff */ + incr = 1U << (len - drop); + fill = 1U << curr; + min = fill; /* save offset to next table */ + do { + fill -= incr; + next[(huff >> drop) + fill] = this; + } while (fill != 0); + + /* backwards increment the len-bit code huff */ + incr = 1U << (len - 1); + while (huff & incr) + incr >>= 1; + if (incr != 0) { + huff &= incr - 1; + huff += incr; + } + else + huff = 0; + + /* go to next symbol, update count, len */ + sym++; + if (--(count[len]) == 0) { + if (len == max) break; + len = lens[work[sym]]; + } + + /* create new sub-table if needed */ + if (len > root && (huff & mask) != low) { + /* if first time, transition to sub-tables */ + if (drop == 0) + drop = root; + + /* increment past last table */ + next += min; /* here min is 1 << curr */ + + /* determine length of next table */ + curr = len - drop; + left = (int)(1 << curr); + while (curr + drop < max) { + left -= count[curr + drop]; + if (left <= 0) break; + curr++; + left <<= 1; + } + + /* check for enough space */ + used += 1U << curr; + if (type == LENS && used >= ENOUGH - MAXD) + return 1; + + /* point entry in root table to sub-table */ + low = huff & mask; + (*table)[low].op = (unsigned char)curr; + (*table)[low].bits = (unsigned char)root; + (*table)[low].val = (unsigned short)(next - *table); + } + } + + /* + Fill in rest of table for incomplete codes. This loop is similar to the + loop above in incrementing huff for table indices. It is assumed that + len is equal to curr + drop, so there is no loop needed to increment + through high index bits. When the current sub-table is filled, the loop + drops back to the root table to fill in any remaining entries there. + */ + this.op = (unsigned char)64; /* invalid code marker */ + this.bits = (unsigned char)(len - drop); + this.val = (unsigned short)0; + while (huff != 0) { + /* when done with sub-table, drop back to root table */ + if (drop != 0 && (huff & mask) != low) { + drop = 0; + len = root; + next = *table; + this.bits = (unsigned char)len; + } + + /* put invalid code marker in table */ + next[huff >> drop] = this; + + /* backwards increment the len-bit code huff */ + incr = 1U << (len - 1); + while (huff & incr) + incr >>= 1; + if (incr != 0) { + huff &= incr - 1; + huff += incr; + } + else + huff = 0; + } + + /* set return parameters */ + *table += used; + *bits = root; + return 0; +} diff --git a/lib/zlib_inflate/inftrees.h b/lib/zlib_inflate/inftrees.h new file mode 100644 index 00000000..b70b4731 --- /dev/null +++ b/lib/zlib_inflate/inftrees.h @@ -0,0 +1,59 @@ +#ifndef INFTREES_H +#define INFTREES_H + +/* inftrees.h -- header to use inftrees.c + * Copyright (C) 1995-2005 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* Structure for decoding tables. Each entry provides either the + information needed to do the operation requested by the code that + indexed that table entry, or it provides a pointer to another + table that indexes more bits of the code. op indicates whether + the entry is a pointer to another table, a literal, a length or + distance, an end-of-block, or an invalid code. For a table + pointer, the low four bits of op is the number of index bits of + that table. For a length or distance, the low four bits of op + is the number of extra bits to get after the code. bits is + the number of bits in this code or part of the code to drop off + of the bit buffer. val is the actual byte to output in the case + of a literal, the base length or distance, or the offset from + the current table to the next table. Each entry is four bytes. */ +typedef struct { + unsigned char op; /* operation, extra bits, table bits */ + unsigned char bits; /* bits in this part of the code */ + unsigned short val; /* offset in table or code value */ +} code; + +/* op values as set by inflate_table(): + 00000000 - literal + 0000tttt - table link, tttt != 0 is the number of table index bits + 0001eeee - length or distance, eeee is the number of extra bits + 01100000 - end of block + 01000000 - invalid code + */ + +/* Maximum size of dynamic tree. The maximum found in a long but non- + exhaustive search was 1444 code structures (852 for length/literals + and 592 for distances, the latter actually the result of an + exhaustive search). The true maximum is not known, but the value + below is more than safe. */ +#define ENOUGH 2048 +#define MAXD 592 + +/* Type of code to build for inftable() */ +typedef enum { + CODES, + LENS, + DISTS +} codetype; + +extern int zlib_inflate_table (codetype type, unsigned short *lens, + unsigned codes, code **table, + unsigned *bits, unsigned short *work); +#endif diff --git a/lib/zlib_inflate/infutil.c b/lib/zlib_inflate/infutil.c new file mode 100644 index 00000000..4824c2cc --- /dev/null +++ b/lib/zlib_inflate/infutil.c @@ -0,0 +1,49 @@ +#include <linux/zutil.h> +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> + +/* Utility function: initialize zlib, unpack binary blob, clean up zlib, + * return len or negative error code. + */ +int zlib_inflate_blob(void *gunzip_buf, unsigned int sz, + const void *buf, unsigned int len) +{ + const u8 *zbuf = buf; + struct z_stream_s *strm; + int rc; + + rc = -ENOMEM; + strm = kmalloc(sizeof(*strm), GFP_KERNEL); + if (strm == NULL) + goto gunzip_nomem1; + strm->workspace = kmalloc(zlib_inflate_workspacesize(), GFP_KERNEL); + if (strm->workspace == NULL) + goto gunzip_nomem2; + + /* gzip header (1f,8b,08... 10 bytes total + possible asciz filename) + * expected to be stripped from input + */ + strm->next_in = zbuf; + strm->avail_in = len; + strm->next_out = gunzip_buf; + strm->avail_out = sz; + + rc = zlib_inflateInit2(strm, -MAX_WBITS); + if (rc == Z_OK) { + rc = zlib_inflate(strm, Z_FINISH); + /* after Z_FINISH, only Z_STREAM_END is "we unpacked it all" */ + if (rc == Z_STREAM_END) + rc = sz - strm->avail_out; + else + rc = -EINVAL; + zlib_inflateEnd(strm); + } else + rc = -EINVAL; + + kfree(strm->workspace); +gunzip_nomem2: + kfree(strm); +gunzip_nomem1: + return rc; /* returns Z_OK (0) if successful */ +} diff --git a/lib/zlib_inflate/infutil.h b/lib/zlib_inflate/infutil.h new file mode 100644 index 00000000..eb1a9007 --- /dev/null +++ b/lib/zlib_inflate/infutil.h @@ -0,0 +1,25 @@ +/* infutil.h -- types and macros common to blocks and codes + * Copyright (C) 1995-1998 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +#ifndef _INFUTIL_H +#define _INFUTIL_H + +#include <linux/zlib.h> + +/* memory allocation for inflation */ + +struct inflate_workspace { + struct inflate_state inflate_state; + unsigned char working_window[1 << MAX_WBITS]; +}; + +#define WS(z) ((struct inflate_workspace *)(z->workspace)) + +#endif |