summaryrefslogtreecommitdiff
path: root/src/tools
diff options
context:
space:
mode:
Diffstat (limited to 'src/tools')
-rw-r--r--src/tools/Makefile.am27
-rw-r--r--src/tools/Makefile.in755
-rwxr-xr-xsrc/tools/affixcompress192
-rw-r--r--src/tools/analyze.cxx79
-rw-r--r--src/tools/chmorph.cxx86
-rw-r--r--src/tools/example.cxx65
-rw-r--r--src/tools/hunspell.cxx1785
-rw-r--r--src/tools/hunzip.cxx22
-rw-r--r--src/tools/hzip.c325
-rw-r--r--src/tools/ispellaff2myspell472
-rwxr-xr-xsrc/tools/makealias115
-rw-r--r--src/tools/munch.c832
-rw-r--r--src/tools/munch.h121
-rw-r--r--src/tools/unmunch.c514
-rw-r--r--src/tools/unmunch.h78
-rwxr-xr-xsrc/tools/wordforms35
-rw-r--r--src/tools/wordlist2hunspell38
17 files changed, 5541 insertions, 0 deletions
diff --git a/src/tools/Makefile.am b/src/tools/Makefile.am
new file mode 100644
index 0000000..8074eea
--- /dev/null
+++ b/src/tools/Makefile.am
@@ -0,0 +1,27 @@
+bin_PROGRAMS=analyze chmorph hunspell munch unmunch hzip hunzip
+
+INCLUDES=-I${top_srcdir}/src/hunspell -I${top_srcdir}/src/parsers
+
+hzip_SOURCES=hzip.c
+hunzip_SOURCES=hunzip.cxx
+hunzip_LDADD = ../hunspell/libhunspell-1.3.la
+
+munch_SOURCES=munch.c munch.h
+unmunch_SOURCES=unmunch.c unmunch.h
+
+example_SOURCES=example.cxx
+example_LDADD = ../hunspell/libhunspell-1.3.la
+
+hunspell_SOURCES=hunspell.cxx
+hunspell_LDADD = @LIBINTL@ @LIBICONV@ ../parsers/libparsers.a \
+ ../hunspell/libhunspell-1.3.la @CURSESLIB@ @READLINELIB@
+
+analyze_SOURCES=analyze.cxx
+analyze_LDADD = ../hunspell/libhunspell-1.3.la
+
+chmorph_SOURCES=chmorph.cxx
+chmorph_LDADD = ../parsers/libparsers.a ../hunspell/libhunspell-1.3.la
+
+noinst_PROGRAMS=example
+
+dist_bin_SCRIPTS=makealias affixcompress wordforms ispellaff2myspell wordlist2hunspell
diff --git a/src/tools/Makefile.in b/src/tools/Makefile.in
new file mode 100644
index 0000000..075f0b7
--- /dev/null
+++ b/src/tools/Makefile.in
@@ -0,0 +1,755 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+bin_PROGRAMS = analyze$(EXEEXT) chmorph$(EXEEXT) hunspell$(EXEEXT) \
+ munch$(EXEEXT) unmunch$(EXEEXT) hzip$(EXEEXT) hunzip$(EXEEXT)
+noinst_PROGRAMS = example$(EXEEXT)
+subdir = src/tools
+DIST_COMMON = $(dist_bin_SCRIPTS) $(srcdir)/Makefile.am \
+ $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/codeset.m4 \
+ $(top_srcdir)/m4/gettext.m4 $(top_srcdir)/m4/glibc2.m4 \
+ $(top_srcdir)/m4/glibc21.m4 $(top_srcdir)/m4/iconv.m4 \
+ $(top_srcdir)/m4/intdiv0.m4 $(top_srcdir)/m4/intl.m4 \
+ $(top_srcdir)/m4/intlmacosx.m4 $(top_srcdir)/m4/intmax.m4 \
+ $(top_srcdir)/m4/inttypes-pri.m4 \
+ $(top_srcdir)/m4/inttypes_h.m4 $(top_srcdir)/m4/lcmessage.m4 \
+ $(top_srcdir)/m4/lib-ld.m4 $(top_srcdir)/m4/lib-link.m4 \
+ $(top_srcdir)/m4/lib-prefix.m4 $(top_srcdir)/m4/libtool.m4 \
+ $(top_srcdir)/m4/lock.m4 $(top_srcdir)/m4/longlong.m4 \
+ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
+ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
+ $(top_srcdir)/m4/nls.m4 $(top_srcdir)/m4/po.m4 \
+ $(top_srcdir)/m4/printf-posix.m4 $(top_srcdir)/m4/progtest.m4 \
+ $(top_srcdir)/m4/size_max.m4 $(top_srcdir)/m4/stdint_h.m4 \
+ $(top_srcdir)/m4/uintmax_t.m4 $(top_srcdir)/m4/visibility.m4 \
+ $(top_srcdir)/m4/wchar_t.m4 $(top_srcdir)/m4/wint_t.m4 \
+ $(top_srcdir)/m4/xsize.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(bindir)"
+PROGRAMS = $(bin_PROGRAMS) $(noinst_PROGRAMS)
+am_analyze_OBJECTS = analyze.$(OBJEXT)
+analyze_OBJECTS = $(am_analyze_OBJECTS)
+analyze_DEPENDENCIES = ../hunspell/libhunspell-1.3.la
+am_chmorph_OBJECTS = chmorph.$(OBJEXT)
+chmorph_OBJECTS = $(am_chmorph_OBJECTS)
+chmorph_DEPENDENCIES = ../parsers/libparsers.a \
+ ../hunspell/libhunspell-1.3.la
+am_example_OBJECTS = example.$(OBJEXT)
+example_OBJECTS = $(am_example_OBJECTS)
+example_DEPENDENCIES = ../hunspell/libhunspell-1.3.la
+am_hunspell_OBJECTS = hunspell.$(OBJEXT)
+hunspell_OBJECTS = $(am_hunspell_OBJECTS)
+hunspell_DEPENDENCIES = ../parsers/libparsers.a \
+ ../hunspell/libhunspell-1.3.la
+am_hunzip_OBJECTS = hunzip.$(OBJEXT)
+hunzip_OBJECTS = $(am_hunzip_OBJECTS)
+hunzip_DEPENDENCIES = ../hunspell/libhunspell-1.3.la
+am_hzip_OBJECTS = hzip.$(OBJEXT)
+hzip_OBJECTS = $(am_hzip_OBJECTS)
+hzip_LDADD = $(LDADD)
+am_munch_OBJECTS = munch.$(OBJEXT)
+munch_OBJECTS = $(am_munch_OBJECTS)
+munch_LDADD = $(LDADD)
+am_unmunch_OBJECTS = unmunch.$(OBJEXT)
+unmunch_OBJECTS = $(am_unmunch_OBJECTS)
+unmunch_LDADD = $(LDADD)
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+SCRIPTS = $(dist_bin_SCRIPTS)
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+ $(LDFLAGS) -o $@
+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+LTCXXCOMPILE = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+CXXLD = $(CXX)
+CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
+ $(LDFLAGS) -o $@
+SOURCES = $(analyze_SOURCES) $(chmorph_SOURCES) $(example_SOURCES) \
+ $(hunspell_SOURCES) $(hunzip_SOURCES) $(hzip_SOURCES) \
+ $(munch_SOURCES) $(unmunch_SOURCES)
+DIST_SOURCES = $(analyze_SOURCES) $(chmorph_SOURCES) \
+ $(example_SOURCES) $(hunspell_SOURCES) $(hunzip_SOURCES) \
+ $(hzip_SOURCES) $(munch_SOURCES) $(unmunch_SOURCES)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+ALLOCA = @ALLOCA@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BUILD_INCLUDED_LIBINTL = @BUILD_INCLUDED_LIBINTL@
+CATOBJEXT = @CATOBJEXT@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CFLAG_VISIBILITY = @CFLAG_VISIBILITY@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CURSESLIB = @CURSESLIB@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DATADIRNAME = @DATADIRNAME@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GENCAT = @GENCAT@
+GETTEXT_MACRO_VERSION = @GETTEXT_MACRO_VERSION@
+GLIBC2 = @GLIBC2@
+GLIBC21 = @GLIBC21@
+GMSGFMT = @GMSGFMT@
+GMSGFMT_015 = @GMSGFMT_015@
+GREP = @GREP@
+HAVE_ASPRINTF = @HAVE_ASPRINTF@
+HAVE_POSIX_PRINTF = @HAVE_POSIX_PRINTF@
+HAVE_SNPRINTF = @HAVE_SNPRINTF@
+HAVE_VISIBILITY = @HAVE_VISIBILITY@
+HAVE_WPRINTF = @HAVE_WPRINTF@
+HUNSPELL_VERSION_MAJOR = @HUNSPELL_VERSION_MAJOR@
+HUNSPELL_VERSION_MINOR = @HUNSPELL_VERSION_MINOR@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+INSTOBJEXT = @INSTOBJEXT@
+INTLBISON = @INTLBISON@
+INTLLIBS = @INTLLIBS@
+INTLOBJS = @INTLOBJS@
+INTL_LIBTOOL_SUFFIX_PREFIX = @INTL_LIBTOOL_SUFFIX_PREFIX@
+INTL_MACOSX_LIBS = @INTL_MACOSX_LIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBICONV = @LIBICONV@
+LIBINTL = @LIBINTL@
+LIBMULTITHREAD = @LIBMULTITHREAD@
+LIBOBJS = @LIBOBJS@
+LIBPTH = @LIBPTH@
+LIBPTH_PREFIX = @LIBPTH_PREFIX@
+LIBS = @LIBS@
+LIBTHREAD = @LIBTHREAD@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBC = @LTLIBC@
+LTLIBICONV = @LTLIBICONV@
+LTLIBINTL = @LTLIBINTL@
+LTLIBMULTITHREAD = @LTLIBMULTITHREAD@
+LTLIBOBJS = @LTLIBOBJS@
+LTLIBPTH = @LTLIBPTH@
+LTLIBTHREAD = @LTLIBTHREAD@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+MSGFMT = @MSGFMT@
+MSGFMT_015 = @MSGFMT_015@
+MSGMERGE = @MSGMERGE@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+POSUB = @POSUB@
+PRI_MACROS_BROKEN = @PRI_MACROS_BROKEN@
+RANLIB = @RANLIB@
+READLINELIB = @READLINELIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+USE_INCLUDED_LIBINTL = @USE_INCLUDED_LIBINTL@
+USE_NLS = @USE_NLS@
+VERSION = @VERSION@
+WINDRES = @WINDRES@
+WOE32 = @WOE32@
+WOE32DLL = @WOE32DLL@
+XFAILED = @XFAILED@
+XGETTEXT = @XGETTEXT@
+XGETTEXT_015 = @XGETTEXT_015@
+XGETTEXT_EXTRA_OPTIONS = @XGETTEXT_EXTRA_OPTIONS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+lt_ECHO = @lt_ECHO@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+INCLUDES = -I${top_srcdir}/src/hunspell -I${top_srcdir}/src/parsers
+hzip_SOURCES = hzip.c
+hunzip_SOURCES = hunzip.cxx
+hunzip_LDADD = ../hunspell/libhunspell-1.3.la
+munch_SOURCES = munch.c munch.h
+unmunch_SOURCES = unmunch.c unmunch.h
+example_SOURCES = example.cxx
+example_LDADD = ../hunspell/libhunspell-1.3.la
+hunspell_SOURCES = hunspell.cxx
+hunspell_LDADD = @LIBINTL@ @LIBICONV@ ../parsers/libparsers.a \
+ ../hunspell/libhunspell-1.3.la @CURSESLIB@ @READLINELIB@
+
+analyze_SOURCES = analyze.cxx
+analyze_LDADD = ../hunspell/libhunspell-1.3.la
+chmorph_SOURCES = chmorph.cxx
+chmorph_LDADD = ../parsers/libparsers.a ../hunspell/libhunspell-1.3.la
+dist_bin_SCRIPTS = makealias affixcompress wordforms ispellaff2myspell wordlist2hunspell
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .cxx .lo .o .obj
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/tools/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --gnu src/tools/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-binPROGRAMS: $(bin_PROGRAMS)
+ @$(NORMAL_INSTALL)
+ test -z "$(bindir)" || $(MKDIR_P) "$(DESTDIR)$(bindir)"
+ @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed 's/$(EXEEXT)$$//' | \
+ while read p p1; do if test -f $$p || test -f $$p1; \
+ then echo "$$p"; echo "$$p"; else :; fi; \
+ done | \
+ sed -e 'p;s,.*/,,;n;h' -e 's|.*|.|' \
+ -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
+ sed 'N;N;N;s,\n, ,g' | \
+ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
+ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+ if ($$2 == $$4) files[d] = files[d] " " $$1; \
+ else { print "f", $$3 "/" $$4, $$1; } } \
+ END { for (d in files) print "f", d, files[d] }' | \
+ while read type dir files; do \
+ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+ test -z "$$files" || { \
+ echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \
+ $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
+ } \
+ ; done
+
+uninstall-binPROGRAMS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+ files=`for p in $$list; do echo "$$p"; done | \
+ sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
+ -e 's/$$/$(EXEEXT)/' `; \
+ test -n "$$list" || exit 0; \
+ echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
+ cd "$(DESTDIR)$(bindir)" && rm -f $$files
+
+clean-binPROGRAMS:
+ @list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \
+ echo " rm -f" $$list; \
+ rm -f $$list || exit $$?; \
+ test -n "$(EXEEXT)" || exit 0; \
+ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+ echo " rm -f" $$list; \
+ rm -f $$list
+
+clean-noinstPROGRAMS:
+ @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \
+ echo " rm -f" $$list; \
+ rm -f $$list || exit $$?; \
+ test -n "$(EXEEXT)" || exit 0; \
+ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+ echo " rm -f" $$list; \
+ rm -f $$list
+analyze$(EXEEXT): $(analyze_OBJECTS) $(analyze_DEPENDENCIES)
+ @rm -f analyze$(EXEEXT)
+ $(CXXLINK) $(analyze_OBJECTS) $(analyze_LDADD) $(LIBS)
+chmorph$(EXEEXT): $(chmorph_OBJECTS) $(chmorph_DEPENDENCIES)
+ @rm -f chmorph$(EXEEXT)
+ $(CXXLINK) $(chmorph_OBJECTS) $(chmorph_LDADD) $(LIBS)
+example$(EXEEXT): $(example_OBJECTS) $(example_DEPENDENCIES)
+ @rm -f example$(EXEEXT)
+ $(CXXLINK) $(example_OBJECTS) $(example_LDADD) $(LIBS)
+hunspell$(EXEEXT): $(hunspell_OBJECTS) $(hunspell_DEPENDENCIES)
+ @rm -f hunspell$(EXEEXT)
+ $(CXXLINK) $(hunspell_OBJECTS) $(hunspell_LDADD) $(LIBS)
+hunzip$(EXEEXT): $(hunzip_OBJECTS) $(hunzip_DEPENDENCIES)
+ @rm -f hunzip$(EXEEXT)
+ $(CXXLINK) $(hunzip_OBJECTS) $(hunzip_LDADD) $(LIBS)
+hzip$(EXEEXT): $(hzip_OBJECTS) $(hzip_DEPENDENCIES)
+ @rm -f hzip$(EXEEXT)
+ $(LINK) $(hzip_OBJECTS) $(hzip_LDADD) $(LIBS)
+munch$(EXEEXT): $(munch_OBJECTS) $(munch_DEPENDENCIES)
+ @rm -f munch$(EXEEXT)
+ $(LINK) $(munch_OBJECTS) $(munch_LDADD) $(LIBS)
+unmunch$(EXEEXT): $(unmunch_OBJECTS) $(unmunch_DEPENDENCIES)
+ @rm -f unmunch$(EXEEXT)
+ $(LINK) $(unmunch_OBJECTS) $(unmunch_LDADD) $(LIBS)
+install-dist_binSCRIPTS: $(dist_bin_SCRIPTS)
+ @$(NORMAL_INSTALL)
+ test -z "$(bindir)" || $(MKDIR_P) "$(DESTDIR)$(bindir)"
+ @list='$(dist_bin_SCRIPTS)'; test -n "$(bindir)" || list=; \
+ for p in $$list; do \
+ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+ if test -f "$$d$$p"; then echo "$$d$$p"; echo "$$p"; else :; fi; \
+ done | \
+ sed -e 'p;s,.*/,,;n' \
+ -e 'h;s|.*|.|' \
+ -e 'p;x;s,.*/,,;$(transform)' | sed 'N;N;N;s,\n, ,g' | \
+ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1; } \
+ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+ if ($$2 == $$4) { files[d] = files[d] " " $$1; \
+ if (++n[d] == $(am__install_max)) { \
+ print "f", d, files[d]; n[d] = 0; files[d] = "" } } \
+ else { print "f", d "/" $$4, $$1 } } \
+ END { for (d in files) print "f", d, files[d] }' | \
+ while read type dir files; do \
+ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+ test -z "$$files" || { \
+ echo " $(INSTALL_SCRIPT) $$files '$(DESTDIR)$(bindir)$$dir'"; \
+ $(INSTALL_SCRIPT) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
+ } \
+ ; done
+
+uninstall-dist_binSCRIPTS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(dist_bin_SCRIPTS)'; test -n "$(bindir)" || exit 0; \
+ files=`for p in $$list; do echo "$$p"; done | \
+ sed -e 's,.*/,,;$(transform)'`; \
+ test -n "$$list" || exit 0; \
+ echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
+ cd "$(DESTDIR)$(bindir)" && rm -f $$files
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/analyze.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chmorph.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/example.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hunspell.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hunzip.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hzip.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/munch.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unmunch.Po@am__quote@
+
+.c.o:
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(COMPILE) -c $<
+
+.c.obj:
+@am__fastdepCC_TRUE@ $(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+@am__fastdepCC_TRUE@ $(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $<
+
+.cxx.o:
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $<
+
+.cxx.obj:
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.cxx.lo:
+@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(LTCXXCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ mkid -fID $$unique
+tags: TAGS
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ set x; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: CTAGS
+CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(PROGRAMS) $(SCRIPTS)
+installdirs:
+ for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(bindir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-binPROGRAMS clean-generic clean-libtool \
+ clean-noinstPROGRAMS mostlyclean-am
+
+distclean: distclean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-binPROGRAMS install-dist_binSCRIPTS
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-binPROGRAMS uninstall-dist_binSCRIPTS
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-binPROGRAMS \
+ clean-generic clean-libtool clean-noinstPROGRAMS ctags \
+ distclean distclean-compile distclean-generic \
+ distclean-libtool distclean-tags distdir dvi dvi-am html \
+ html-am info info-am install install-am install-binPROGRAMS \
+ install-data install-data-am install-dist_binSCRIPTS \
+ install-dvi install-dvi-am install-exec install-exec-am \
+ install-html install-html-am install-info install-info-am \
+ install-man install-pdf install-pdf-am install-ps \
+ install-ps-am install-strip installcheck installcheck-am \
+ installdirs maintainer-clean maintainer-clean-generic \
+ mostlyclean mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
+ uninstall-am uninstall-binPROGRAMS uninstall-dist_binSCRIPTS
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/src/tools/affixcompress b/src/tools/affixcompress
new file mode 100755
index 0000000..9fc2989
--- /dev/null
+++ b/src/tools/affixcompress
@@ -0,0 +1,192 @@
+#!/bin/sh
+# affix compressor utility for Hunspell
+# 2008 (c) László Németh, version 0.3
+# usage: affixcompress sorted_word_list_file [max_affix_rules]
+case $# in
+0) echo \
+"affixcompress - compress a huge sorted word list to Hunspell format
+Usage:
+
+LC_ALL=C sort word_list >sorted_word_list
+affixcompress sorted_word_list [max_affix_rules]
+
+Default value of max_affix_rules = 5000
+
+Note: output may need manually added affix parameters (SET character_encoding,
+TRY suggestion_characters etc., see man(4) hunspell)"
+ exit 0;;
+esac
+
+MAXAFFIX=${2:-5000}
+
+# profiling
+#AWK="pgawk --profile"
+AWK="gawk"
+
+rm -f $1.aff $1.dic
+cat $1 | $AWK '
+{
+ # calculate frequent suffixes
+ A[$1] = 1
+ len = length($1)
+ if (len > 2) {
+# print $1, substr($1, 1, len - 1), substr($1, len, 1) >"/dev/stderr"
+ B[substr($1, 1, len - 1)] = substr($1, len, 1);
+ }
+ for(i = 2; i < len; i++) {
+ r = substr($1, 1, i)
+ if (i == 2) {
+ if (prev != r) {
+ delete A
+ delete B
+ print "Deleted roots: ", prev > "/dev/stderr"
+ A[$1] = 1
+ }
+ prev = r
+ }
+ if (A[r]) {
+# print $1 ": " r " és "substr($1, i + 1, len - i + 1) >"/dev/stderr"
+ sfx[substr($1, i + 1, len - i + 1)]++
+ } else if (B[r] && B[r] != substr($1, i + 1, 1)) {
+ r2 = substr($1, i + 1, len - i + 1)
+ sfy[r2,B[r]]++
+ }
+ }
+}
+END {
+ for (i in sfx) print i, 0, sfx[i]
+ for (i in sfy) print i, sfy[i]
+}
+' | tr '\034' ' ' >affixcompress0.tmp
+sort -rnk 3 affixcompress0.tmp | $AWK '$3 >= 1{print $0}' |
+head -$MAXAFFIX >affixcompress1.tmp
+cat affixcompress1.tmp |
+$AWK '
+function potential_roots() {
+ # potential roots with most frequent suffixes
+ for(word in W) if (W[word]==1) {
+ print word >"word"
+ len = length(word);
+ for(i = 2; i < len; i++) {
+ root = substr(word, 1, i)
+ suff = substr(word, i + 1, len - i + 1)
+ if ((W[root]!="") && (sfxfr[suff] > 100)) C[root]++
+ if (sfz[suff]) {
+ l = split(sfz[suff], a)
+ for (k=1; k <= l; k++) if ((W[root a[k]]!="") && (sfyfr[root a[k]] > 100)) {
+ C[root a[k]]++
+ }
+ }
+ }
+ }
+
+ # calculate roots
+ for(word in W) if (W[word]==1) {
+ print word >"word2"
+ len = length(word);
+ z = 0
+ # choose most frequent root (maybe the original word)
+ max = C[word]
+ maxword = word
+ maxsuff = 0
+ for(i = 2; i < len; i++) {
+ root = substr(word, 1, i)
+ suff = substr(word, i + 1, len - i + 1)
+ if ((sfx[suff] != "") && (C[root] > max)) {
+ max = C[root]
+ maxword = root
+ maxsuff = sfx[suff]
+ }
+ if (sfz[suff] != "") {
+ l = split(sfz[suff], a)
+ for (k=1; k <= l; k++) if (C[root a[k]] > max) {
+ max = C[root a[k]]
+ maxword = root a[k]
+ maxsuff = sfy[suff,a[k]]
+ }
+ }
+ }
+ if (max > 0) {
+ if (maxsuff > 0) print maxword, maxsuff; else print maxword
+ A[maxword]++
+ z=1
+ } else {
+ for(i = 2; i < len; i++) {
+ root = substr(word, 1, i)
+ suff = substr(word, i + 1, len - i + 1)
+ if ((A[root] > 0) && sfx[suff]!="") {
+ print root, sfx[suff]
+ z = 1
+ break
+ }
+ if (sfz[suff]) {
+ l = split(sfz[suff], a)
+ for (k=1; k <= l; k++) if (A[root a[k]]!="") {
+ print root a[k], sfy[suff,a[k]]
+ z = 1
+ break
+ }
+ }
+ }
+ }
+ if (z == 0) {
+ print word
+ A[word]++
+ }
+ }
+ delete A
+ delete C
+}
+FILENAME == "-" {
+ if ($2 == 0) {
+ sfx[$1] = NR
+ sfxfr[$1] = $3
+ } else {
+ sfy[$1,$2] = NR
+ sfyfr[$1,$2] = $3
+ sfz[$1] = sfz[$1] " " $2
+ }
+ maxsuf = NR
+ next
+}
+{
+ cap = substr($1, 1, 3)
+ if (cap != prev) {
+ potential_roots()
+ delete W
+ print "Deleted class:", prev > "/dev/stderr"
+ }
+ prev = cap
+ W[$1] = 1
+}
+END {
+ potential_roots()
+ # write out frequent suffixes
+ out=FILENAME ".aff"
+ print "FLAG num" >out
+ for (i in sfx) if (sfx[i] > 0) {
+ print "SFX", sfx[i], "Y 1" >out
+ print "SFX", sfx[i], "0", i, "." >out
+ }
+ for (i in sfy) if (sfy[i] > 0) {
+ print "SFX", sfy[i], "Y 1" >out
+ split(i, c, "\034");
+ print "SFX", sfy[i], c[2], c[1], c[2] >out
+ }
+}
+' - $1 >affixcompress2.tmp
+sort -nk 2 affixcompress2.tmp >affixcompress3.tmp
+cat affixcompress3.tmp | $AWK -v out="$1.dic" '
+{
+ if (A[$1]=="") A[$1]=$2;
+ else if ($2!="") A[$1] = A[$1] "," $2
+}
+END {
+ for (i in A) n++
+ print n >out
+ for (i in A) {
+ if (A[i]=="") print i
+ else print i "/" A[i]
+ }
+}
+' | sort >>$1.dic
diff --git a/src/tools/analyze.cxx b/src/tools/analyze.cxx
new file mode 100644
index 0000000..03434fa
--- /dev/null
+++ b/src/tools/analyze.cxx
@@ -0,0 +1,79 @@
+
+#include <cstring>
+#include <cstdlib>
+#include <cstdio>
+
+#include "hunspell.hxx"
+
+#ifndef WIN32
+using namespace std;
+#endif
+
+
+
+int main(int argc, char **argv)
+{
+
+ FILE *wtclst;
+ int i;
+ int dp;
+ char buf[101];
+ Hunspell *pMS;
+
+ /* first parse the command line options */
+
+ for (i = 1; i < 3; i++)
+ if (!argv[i]) {
+ fprintf(stderr, "correct syntax is:\nanalyze affix_file");
+ fprintf(stderr, " dictionary_file file_of_words_to_check\n");
+ fprintf(stderr, "use two words per line for morphological generation\n");
+ exit(1);
+ }
+
+ /* open the words to check list */
+
+ wtclst = fopen(argv[3], "r");
+ if (!wtclst) {
+ fprintf(stderr, "Error - could not open file to check\n");
+ exit(1);
+ }
+
+ pMS = new Hunspell(argv[1], argv[2]);
+ while (fgets(buf, 100, wtclst)) {
+ *(buf + strlen(buf) - 1) = '\0';
+ if (*buf == '\0') continue;
+ // morphgen demo
+ char * s = strchr(buf, ' ');
+ if (s) {
+ *s = '\0';
+ char ** result;
+ int n = pMS->generate(&result, buf, s+1);
+ for (int i = 0; i < n; i++) {
+ fprintf(stdout, "generate(%s, %s) = %s\n", buf, s+1, result[i]);
+ }
+ pMS->free_list(&result, n);
+ if (n == 0) fprintf(stdout, "generate(%s, %s) = NO DATA\n", buf, s+1);
+ } else {
+ dp = pMS->spell(buf);
+ fprintf(stdout, "> %s\n", buf);
+ if (dp) {
+ char ** result;
+ int n = pMS->analyze(&result, buf);
+ for (int i = 0; i < n; i++) {
+ fprintf(stdout, "analyze(%s) = %s\n", buf, result[i]);
+ }
+ pMS->free_list(&result, n);
+ n = pMS->stem(&result, buf);
+ for (int i = 0; i < n; i++) {
+ fprintf(stdout, "stem(%s) = %s\n", buf, result[i]);
+ }
+ pMS->free_list(&result, n);
+ } else {
+ fprintf(stdout, "Unknown word.\n");
+ }
+ }
+ }
+ delete pMS;
+ fclose(wtclst);
+ return 0;
+}
diff --git a/src/tools/chmorph.cxx b/src/tools/chmorph.cxx
new file mode 100644
index 0000000..0faa8f0
--- /dev/null
+++ b/src/tools/chmorph.cxx
@@ -0,0 +1,86 @@
+#include <cstring>
+#include <cstdlib>
+#include <cstdio>
+
+#include "hunspell.hxx"
+#include "textparser.hxx"
+
+#ifndef W32
+using namespace std;
+#endif
+
+int
+main(int argc, char** argv)
+{
+ FILE * f;
+
+ /* first parse the command line options */
+
+ for (int i = 1; i < 6; i++)
+ if (!argv[i]) {
+ fprintf(stderr,
+ "chmorph - change affixes by morphological analysis and generation\n"
+ "correct syntax is:\nchmorph affix_file "
+ "dictionary_file file_to_convert STRING1 STRING2\n"
+ "STRINGS may be arbitrary parts of the morphological descriptions\n"
+ "example: chmorph hu.aff hu.dic hu.txt SG_2 SG_3 "
+ " (convert informal Hungarian second person texts to formal third person texts)\n");
+ exit(1);
+ }
+
+ /* open the words to check list */
+
+ f = fopen(argv[3], "r");
+ if (!f) {
+ fprintf(stderr, "Error - could not open file to check\n");
+ exit(1);
+ }
+
+ Hunspell *pMS = new Hunspell(argv[1], argv[2]);
+ TextParser * p = new TextParser("qwertzuiopasdfghjklyxcvbnméáúõûóüöíQWERTZUIOPASDFGHJKLYXCVBNMÍÉÁÕÚÖÜÓÛ");
+
+ char buf[MAXLNLEN];
+ char * next;
+
+ while(fgets(buf,MAXLNLEN,f)) {
+ p->put_line(buf);
+ while ((next=p->next_token())) {
+ char ** pl;
+ int pln = pMS->analyze(&pl, next);
+ if (pln) {
+ int gen = 0;
+ for (int i = 0; i < pln; i++) {
+ char *pos = strstr(pl[i], argv[4]);
+ if (pos) {
+ char * r = (char * ) malloc(strlen(pl[i]) -
+ strlen(argv[4]) + strlen(argv[5]) + 1);
+ strncpy(r, pl[i], pos - pl[i]);
+ strcpy(r + (pos - pl[i]), argv[5]);
+ strcat(r, pos + strlen(argv[4]));
+ free(pl[i]);
+ pl[i] = r;
+ gen = 1;
+ }
+ }
+ if (gen) {
+ char **pl2;
+ int pl2n = pMS->generate(&pl2, next, pl, pln);
+ if (pl2n) {
+ p->change_token(pl2[0]);
+ pMS->free_list(&pl2, pl2n);
+ // jump over the (possibly un)modified word
+ free(next);
+ next=p->next_token();
+ }
+ }
+ pMS->free_list(&pl, pln);
+ }
+ free(next);
+ }
+ fprintf(stdout, "%s\n", p->get_line());
+ }
+
+ delete p;
+ fclose(f);
+ return 0;
+}
diff --git a/src/tools/example.cxx b/src/tools/example.cxx
new file mode 100644
index 0000000..093a038
--- /dev/null
+++ b/src/tools/example.cxx
@@ -0,0 +1,65 @@
+#include <cstring>
+#include <cstdlib>
+#include <cstdio>
+
+#include "hunspell.hxx"
+
+extern char * mystrdup(const char * s);
+
+using namespace std;
+
+int
+main(int argc, char** argv)
+{
+
+ FILE* wtclst;
+
+ /* first parse the command line options */
+
+ if (argc < 4) {
+ fprintf(stderr,"example (now it works with more dictionary files):\n");
+ fprintf(stderr,"example affix_file dictionary_file(s) file_of_words_to_check\n");
+ exit(1);
+ }
+
+ /* open the words to check list */
+ wtclst = fopen(argv[argc - 1],"r");
+ if (!wtclst) {
+ fprintf(stderr,"Error - could not open file of words to check\n");
+ exit(1);
+ }
+
+ int k;
+ int dp;
+ char buf[101];
+
+ Hunspell * pMS= new Hunspell(argv[1], argv[2]);
+
+ // load extra dictionaries
+ if (argc > 4) for (k = 3; k < argc - 1; k++) pMS->add_dic(argv[k]);
+
+ while(fgets(buf, 100, wtclst)) {
+ k = strlen(buf);
+ *(buf + k - 1) = '\0';
+ dp = pMS->spell(buf);
+ if (dp) {
+ fprintf(stdout,"\"%s\" is okay\n",buf);
+ fprintf(stdout,"\n");
+ } else {
+ fprintf(stdout,"\"%s\" is incorrect!\n",buf);
+ fprintf(stdout," suggestions:\n");
+ char ** wlst;
+ int ns = pMS->suggest(&wlst,buf);
+ for (int i=0; i < ns; i++) {
+ fprintf(stdout," ...\"%s\"\n",wlst[i]);
+ }
+ pMS->free_list(&wlst, ns);
+ fprintf(stdout,"\n");
+ }
+ }
+
+ delete pMS;
+ fclose(wtclst);
+ return 0;
+}
+
diff --git a/src/tools/hunspell.cxx b/src/tools/hunspell.cxx
new file mode 100644
index 0000000..07ad6bb
--- /dev/null
+++ b/src/tools/hunspell.cxx
@@ -0,0 +1,1785 @@
+// glibc < 3.0 (for mkstemp)
+#ifndef __USE_MISC
+#define __USE_MISC
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "config.h"
+#include "hunspell.hxx"
+#include "csutil.hxx"
+
+#ifndef HUNSPELL_EXTRA
+#define suggest_auto suggest
+#endif
+
+#define HUNSPELL_VERSION VERSION
+#define INPUTLEN 50
+
+#define HUNSPELL_PIPE_HEADING "@(#) International Ispell Version 3.2.06 (but really Hunspell "VERSION")\n"
+#define HUNSPELL_HEADING "Hunspell "
+
+//for debugging only
+//#define LOG
+
+#define DEFAULTDICNAME "default"
+
+#ifdef WIN32
+
+#define LIBDIR "C:\\Hunspell\\"
+#define USEROOODIR "Application Data\\OpenOffice.org 2\\user\\wordbook"
+#define OOODIR \
+ "C:\\Program files\\OpenOffice.org 2.4\\share\\dict\\ooo\\;" \
+ "C:\\Program files\\OpenOffice.org 2.3\\share\\dict\\ooo\\;" \
+ "C:\\Program files\\OpenOffice.org 2.2\\share\\dict\\ooo\\;" \
+ "C:\\Program files\\OpenOffice.org 2.1\\share\\dict\\ooo\\;" \
+ "C:\\Program files\\OpenOffice.org 2.0\\share\\dict\\ooo\\"
+#define HOME "%USERPROFILE%\\"
+#define DICBASENAME "hunspell_"
+#define LOGFILE "C:\\Hunspell\\log"
+#define DIRSEPCH '\\'
+#define DIRSEP "\\"
+#define PATHSEP ";"
+
+#include "textparser.hxx"
+#include "htmlparser.hxx"
+#include "latexparser.hxx"
+#include "manparser.hxx"
+#include "firstparser.hxx"
+
+#else
+
+// Not Windows
+#include <sys/types.h>
+#include <dirent.h>
+#include <unistd.h>
+#include "textparser.hxx"
+#include "htmlparser.hxx"
+#include "latexparser.hxx"
+#include "manparser.hxx"
+#include "firstparser.hxx"
+
+#define LIBDIR \
+ "/usr/share/hunspell:" \
+ "/usr/share/myspell:" \
+ "/usr/share/myspell/dicts:" \
+ "/Library/Spelling"
+#define USEROOODIR \
+ ".openoffice.org/3/user/wordbook:" \
+ ".openoffice.org2/user/wordbook:" \
+ ".openoffice.org2.0/user/wordbook:" \
+ "Library/Spelling"
+#define OOODIR \
+ "/opt/openoffice.org/basis3.0/share/dict/ooo:" \
+ "/usr/lib/openoffice.org/basis3.0/share/dict/ooo:" \
+ "/opt/openoffice.org2.4/share/dict/ooo:" \
+ "/usr/lib/openoffice.org2.4/share/dict/ooo:" \
+ "/opt/openoffice.org2.3/share/dict/ooo:" \
+ "/usr/lib/openoffice.org2.3/share/dict/ooo:" \
+ "/opt/openoffice.org2.2/share/dict/ooo:" \
+ "/usr/lib/openoffice.org2.2/share/dict/ooo:" \
+ "/opt/openoffice.org2.1/share/dict/ooo:" \
+ "/usr/lib/openoffice.org2.1/share/dict/ooo:" \
+ "/opt/openoffice.org2.0/share/dict/ooo:" \
+ "/usr/lib/openoffice.org2.0/share/dict/ooo"
+#define HOME getenv("HOME")
+#define DICBASENAME ".hunspell_"
+#define LOGFILE "/tmp/hunspell.log"
+#define DIRSEPCH '/'
+#define DIRSEP "/"
+#define PATHSEP ":"
+#endif
+
+#ifdef HAVE_ICONV
+#include <iconv.h>
+char text_conv[MAXLNLEN];
+#endif
+
+#if ENABLE_NLS
+# ifdef HAVE_LOCALE_H
+# include <locale.h>
+# ifdef HAVE_LANGINFO_CODESET
+# include <langinfo.h>
+# endif
+# endif
+# ifdef HAVE_LIBINTL_H
+# include <libintl.h>
+# else
+# include <../../intl/libintl.h>
+# endif
+#else
+# define gettext
+# undef HAVE_LOCALE_H
+# undef HAVE_LIBINTL_H
+#endif
+
+#ifdef HAVE_CURSES_H
+#ifdef HAVE_NCURSESW_H
+#include <ncurses.h>
+#else
+#include <curses.h>
+#endif
+#endif
+
+#ifdef HAVE_READLINE
+#include <readline/readline.h>
+#else
+#define readline scanline
+#endif
+
+#define TEMPNAME "hunSPELL.bak"
+
+extern char * mystrdup(const char * s);
+
+// file formats:
+
+enum { FMT_TEXT, FMT_LATEX, FMT_HTML, FMT_MAN, FMT_FIRST };
+
+struct wordlist {
+ char * word;
+ wordlist * next;
+};
+
+// global variables
+
+char * wordchars = NULL;
+char * dicpath = NULL;
+int wordchars_len;
+unsigned short * wordchars_utf16 = NULL;
+int wordchars_utf16_free = 0;
+int wordchars_utf16_len;
+char * dicname = NULL;
+char * privdicname = NULL;
+const char * currentfilename = NULL;
+
+int modified; // modified file sign
+enum { NORMAL,
+ BADWORD, // print only bad words
+ WORDFILTER, // print only bad words from 1 word/line input
+ BADLINE, // print only lines with bad words
+ STEM, // stem input words
+ ANALYZE, // analyze input words
+ PIPE, // print only stars for LyX compatibility
+ AUTO0, // search typical error (based on SuggestMgr::suggest_auto())
+ AUTO, // automatic spelling to standard output
+ AUTO2, // automatic spelling to standard output with sed log
+ AUTO3 }; // automatic spelling to standard output with gcc error format
+int filter_mode = NORMAL;
+int printgood = 0; // print only good words and lines
+int showpath = 0; // show detected path of the dictionary
+int checkurl = 0; // check URLs and mail addresses
+int warn = 0; // warn potential mistakes (dictionary words with WARN flags)
+const char * ui_enc = NULL; // locale character encoding (default for I/O)
+const char * io_enc = NULL; // I/O character encoding
+
+#define DMAX 10 // maximal count of loaded dictionaries
+
+const char * dic_enc[DMAX]; // dictionary encoding
+char * path = NULL;
+int dmax = 0; // dictionary count
+
+// functions
+
+#ifdef HAVE_ICONV
+static const char* fix_encoding_name(const char *enc)
+{
+ if (strcmp(enc, "TIS620-2533") == 0)
+ enc = "TIS620";
+ return enc;
+}
+#endif
+
+/* change character encoding */
+char * chenc(char * st, const char * enc1, const char * enc2) {
+ char * out = st;
+#ifdef HAVE_ICONV
+ if (enc1 && enc2 && strcmp(enc1, enc2) != 0) {
+
+ size_t c1 = strlen(st) + 1;
+ size_t c2 = MAXLNLEN;
+ char * source = st;
+ char * dest = text_conv;
+ iconv_t conv = iconv_open(fix_encoding_name(enc2), fix_encoding_name(enc1));
+ if (conv == (iconv_t) -1) {
+ fprintf(stderr, gettext("error - iconv_open: %s -> %s\n"), enc2, enc1);
+ } else {
+ size_t res = iconv(conv, (ICONV_CONST char **) &source, &c1, &dest, &c2);
+ iconv_close(conv);
+ if (res != (size_t) -1) out = text_conv;
+ }
+ }
+#endif
+ return out;
+}
+
+TextParser * get_parser(int format, char * extension, Hunspell * pMS) {
+ TextParser * p = NULL;
+ int io_utf8 = 0;
+ char * denc = pMS->get_dic_encoding();
+#ifdef HAVE_ICONV
+ initialize_utf_tbl(); // also need for 8-bit tokenization
+ if (io_enc) {
+ if ((strcmp(io_enc, "UTF-8") == 0) ||
+ (strcmp(io_enc, "utf-8") == 0) ||
+ (strcmp(io_enc, "UTF8") == 0) ||
+ (strcmp(io_enc, "utf8") == 0)) {
+ io_utf8 = 1;
+ io_enc = "UTF-8";
+ }
+ } else if (ui_enc) {
+ io_enc = ui_enc;
+ if (strcmp(ui_enc, "UTF-8") == 0) io_utf8 = 1;
+ } else {
+ io_enc = denc;
+ if (strcmp(denc, "UTF-8") == 0) io_utf8 = 1;
+ }
+
+ if (io_utf8) {
+ wordchars_utf16 = pMS->get_wordchars_utf16(&wordchars_utf16_len);
+ if ((strcmp(denc, "UTF-8") != 0) && pMS->get_wordchars()) {
+ char * wchars = (char *) pMS->get_wordchars();
+ int wlen = strlen(wchars);
+ size_t c1 = wlen;
+ size_t c2 = MAXLNLEN;
+ char * dest = text_conv;
+ iconv_t conv = iconv_open("UTF-8", fix_encoding_name(denc));
+ if (conv == (iconv_t) -1) {
+ fprintf(stderr, gettext("error - iconv_open: UTF-8 -> %s\n"), denc);
+ wordchars_utf16 = NULL;
+ wordchars_utf16_len = 0;
+ } else {
+ iconv(conv, (ICONV_CONST char **) &wchars, &c1, &dest, &c2);
+ iconv_close(conv);
+ wordchars_utf16 = (unsigned short *) malloc(sizeof(unsigned short) * wlen);
+ int n = u8_u16((w_char *) wordchars_utf16, wlen, text_conv);
+ if (n > 0) flag_qsort(wordchars_utf16, 0, n);
+ wordchars_utf16_len = n;
+ wordchars_utf16_free = 1;
+ }
+ }
+ } else {
+ // 8-bit input encoding
+ // detect letters by unicodeisalpha() for tokenization
+ char letters[MAXLNLEN];
+ char * pletters = letters;
+ char ch[2];
+ char u8[10];
+ *pletters = '\0';
+ iconv_t conv = iconv_open("UTF-8", fix_encoding_name(io_enc));
+ if (conv == (iconv_t) -1) {
+ fprintf(stderr, gettext("error - iconv_open: UTF-8 -> %s\n"), io_enc);
+ } else {
+ for (int i = 32; i < 256; i++) {
+ size_t c1 = 1;
+ size_t c2 = 10;
+ char * dest = u8;
+ u8[0] = '\0';
+ char * ch8bit = ch;
+ ch[0] = (char) i;
+ ch[1] = '\0';
+ size_t res = iconv(conv, (ICONV_CONST char **) &ch8bit, &c1, &dest, &c2);
+ if (res != (size_t) -1) {
+ unsigned short idx;
+ w_char w;
+ w.l = 0;
+ w.h = 0;
+ u8_u16(&w, 1, u8);
+ idx = (w.h << 8) + w.l;
+ if (unicodeisalpha(idx)) {
+ *pletters = (char) i;
+ pletters++;
+ }
+ }
+ }
+ iconv_close(conv);
+ }
+ *pletters = '\0';
+
+ // UTF-8 wordchars -> 8 bit wordchars
+ int len = 0;
+ char * wchars = (char *) pMS->get_wordchars();
+ if (wchars) {
+ if ((strcmp(denc, "UTF-8")==0)) {
+ pMS->get_wordchars_utf16(&len);
+ } else {
+ len = strlen(wchars);
+ }
+ char * dest = letters + strlen(letters); // append wordchars
+ size_t c1 = len + 1;
+ size_t c2 = len + 1;
+ iconv_t conv = iconv_open(fix_encoding_name(io_enc), fix_encoding_name(denc));
+ if (conv == (iconv_t) -1) {
+ fprintf(stderr, gettext("error - iconv_open: %s -> %s\n"), io_enc, denc);
+ } else {
+ iconv(conv, (ICONV_CONST char **) &wchars, &c1, &dest, &c2);
+ iconv_close(conv);
+ *dest = '\0';
+ }
+ }
+ if (*letters) wordchars = mystrdup(letters);
+ }
+#else
+ if (strcmp(denc, "UTF-8") == 0) {
+ wordchars_utf16 = pMS->get_wordchars_utf16(&wordchars_utf16_len);
+ io_utf8 = 1;
+ } else {
+ char * casechars = get_casechars(denc);
+ wordchars = (char *) pMS->get_wordchars();
+ if (casechars && wordchars) {
+ casechars = (char *) realloc(casechars, strlen(casechars) + strlen(wordchars) + 1);
+ strcat(casechars, wordchars);
+ }
+ wordchars = casechars;
+ }
+ io_enc = denc;
+#endif
+
+ if (io_utf8) {
+ switch (format) {
+ case FMT_LATEX: p = new LaTeXParser(wordchars_utf16, wordchars_utf16_len); break;
+ case FMT_HTML: p = new HTMLParser(wordchars_utf16, wordchars_utf16_len); break;
+ case FMT_MAN: p = new ManParser(wordchars_utf16, wordchars_utf16_len); break;
+ case FMT_FIRST: p = new FirstParser(wordchars);
+ }
+ } else {
+ switch (format) {
+ case FMT_LATEX: p = new LaTeXParser(wordchars); break;
+ case FMT_HTML: p = new HTMLParser(wordchars); break;
+ case FMT_MAN: p = new ManParser(wordchars); break;
+ case FMT_FIRST: p = new FirstParser(wordchars);
+ }
+ }
+
+ if ((!p) && (extension)) {
+ if ((strcmp(extension, "html") == 0) ||
+ (strcmp(extension, "htm") == 0) ||
+ (strcmp(extension, "xml") == 0)) {
+ if (io_utf8) {
+ p = new HTMLParser(wordchars_utf16, wordchars_utf16_len);
+ } else {
+ p = new HTMLParser(wordchars);
+ }
+ } else if (((extension[0] > '0') && (extension[0] <= '9'))) {
+ if (io_utf8) {
+ p = new ManParser(wordchars_utf16, wordchars_utf16_len);
+ } else {
+ p = new ManParser(wordchars);
+ }
+ } else if ((strcmp(extension, "tex") == 0)) {
+ if (io_utf8) {
+ p = new LaTeXParser(wordchars_utf16, wordchars_utf16_len);
+ } else {
+ p = new LaTeXParser(wordchars);
+ }
+ }
+ }
+ if (!p) {
+ if (io_utf8) {
+ p = new TextParser(wordchars_utf16, wordchars_utf16_len);
+ } else {
+ p = new TextParser(wordchars);
+ }
+ }
+ p->set_url_checking(checkurl);
+ return p;
+}
+
+
+#ifdef LOG
+void log(char * message)
+{
+ FILE *f = fopen(LOGFILE,"a");
+ if (f) {
+ fprintf(f,"%s\n",message);
+ fclose(f);
+ } else {
+ fprintf(stderr,"Logfile...");
+ }
+}
+#endif
+
+int putdic(char * word, Hunspell * pMS)
+{
+ char * w;
+
+ word = chenc(word, ui_enc, dic_enc[0]);
+
+ if (((w = strstr(word + 1, "/")) == NULL)) {
+ if (*word == '*') return pMS->remove(word + 1);
+ else return pMS->add(word);
+ } else {
+ char c;
+ int ret;
+ c = *w;
+ *w = '\0';
+ if (*(w+1) == '/') {
+ ret = pMS->add_with_affix(word, w + 2); // word//pattern (back comp.)
+ } else {
+ ret = pMS->add_with_affix(word, w + 1); // word/pattern
+ }
+ *w = c;
+ return ret;
+ }
+}
+
+void load_privdic(char * filename, Hunspell * pMS)
+{
+ char buf[MAXLNLEN];
+ FILE *dic = fopen(filename,"r");
+ if (dic) {
+ while(fgets(buf,MAXLNLEN,dic)) {
+ if (*(buf + strlen(buf) - 1) == '\n') *(buf + strlen(buf) - 1) = '\0';
+ putdic(buf,pMS);
+ }
+ fclose(dic);
+ }
+}
+
+int exist(char * filename)
+{
+ FILE *f = fopen(filename,"r");
+ if (f) {
+ fclose(f);
+ return 1;
+ }
+ return 0;
+}
+
+int save_privdic(char * filename, char * filename2, wordlist * w)
+{
+ wordlist * r;
+ FILE *dic = fopen(filename,"r");
+ if (dic) {
+ fclose(dic);
+ dic = fopen(filename,"a");
+ } else {
+ dic = fopen(filename2,"a");
+ }
+ if (! dic) return 0;
+ while (w != NULL) {
+ char *word = chenc(w->word, io_enc, ui_enc);
+ fprintf(dic,"%s\n",word);
+#ifdef LOG
+ log(word);log("\n");
+#endif
+ r = w;
+ free(w->word);
+ w = w->next;
+ free(r);
+ }
+ fclose(dic);
+ return 1;
+}
+
+char * basename(char * s, char c) {
+ char * p = s + strlen(s);
+ while ((*p != c) && (p != s)) p--;
+ if (*p == c) p++;
+ return p;
+}
+
+#ifdef HAVE_CURSES_H
+char * scanline(char * message) {
+ char input[INPUTLEN];
+ printw(message);
+ echo();
+ getnstr(input, INPUTLEN);
+ noecho();
+ return mystrdup(input);
+}
+#endif
+
+// check words in the dictionaries (and set first checked dictionary)
+int check(Hunspell ** pMS, int * d, char * token, int * info, char ** root) {
+ for (int i = 0; i < dmax; i++) {
+ if (pMS[*d]->spell(chenc(token, io_enc, dic_enc[*d]), info, root) && !(warn && (*info & SPELL_WARN))) {
+ return 1;
+ }
+ if (++(*d) == dmax) *d = 0;
+ }
+ return 0;
+}
+
+void pipe_interface(Hunspell ** pMS, int format, FILE * fileid) {
+ char buf[MAXLNLEN];
+ char * buf2;
+ wordlist * dicwords = NULL;
+ char * token;
+ int pos;
+ int bad;
+ int lineno = 0;
+ int terse_mode = 0;
+ int verbose_mode = 0;
+ int d = 0;
+
+ TextParser * parser = get_parser(format, NULL, pMS[0]);
+
+ if ((filter_mode == NORMAL)) {
+ fprintf(stdout,gettext(HUNSPELL_HEADING));
+ fprintf(stdout,HUNSPELL_VERSION);
+ if (pMS[0]->get_version()) fprintf(stdout," - %s", pMS[0]->get_version());
+ fprintf(stdout,"\n");
+ fflush(stdout);
+ }
+
+nextline: while(fgets(buf, MAXLNLEN, fileid)) {
+ if (*(buf + strlen(buf) - 1) == '\n') *(buf + strlen(buf) - 1) = '\0';
+ lineno++;
+#ifdef LOG
+ log(buf);
+#endif
+ bad = 0;
+ pos = 0;
+
+ // execute commands
+ if (filter_mode == PIPE) {
+ pos = -1;
+ switch (buf[0]) {
+ case '%': { verbose_mode = terse_mode = 0; break; }
+ case '!': { terse_mode = 1; break; }
+ case '`': { verbose_mode = 1; break; }
+ case '+': {
+ delete parser;
+ parser = get_parser(FMT_LATEX, NULL, pMS[0]);
+ parser->set_url_checking(checkurl);
+ break;
+ }
+ case '-': {
+ delete parser;
+ parser = get_parser(format, NULL, pMS[0]);
+ break;
+ }
+ case '@': { putdic(buf+1, pMS[d]); break; }
+ case '*': {
+ struct wordlist* i =
+ (struct wordlist *) malloc (sizeof(struct wordlist));
+ i->word = mystrdup(buf+1);
+ i->next = dicwords;
+ dicwords = i;
+ putdic(buf+1, pMS[d]);
+ break;
+ }
+ case '#': {
+ if (HOME) strcpy(buf,HOME); else {
+ fprintf(stderr, gettext("error - missing HOME variable\n"));
+ continue;
+ }
+#ifndef WIN32
+ strcat(buf,"/");
+#endif
+ buf2 = buf+strlen(buf);
+ if (!privdicname) {
+ strcat(buf,DICBASENAME);
+ strcat(buf,basename(dicname,DIRSEPCH));
+ } else {
+ strcat(buf,privdicname);
+ }
+ if (save_privdic(buf2, buf, dicwords)) {
+ dicwords=NULL;
+ }
+ break;
+ }
+ case '^': {
+ pos = 1;
+ }
+
+ default: {
+ pos = 0;
+ }
+
+ } // end switch
+ } // end filter_mode == PIPE
+
+if (pos >= 0) {
+ parser->put_line(buf + pos);
+ while ((token = parser->next_token())) {
+ switch (filter_mode) {
+
+ case BADWORD: {
+ if (!check(pMS, &d, token, NULL, NULL)) {
+ bad = 1;
+ if (! printgood) fprintf(stdout,"%s\n", token);
+ } else {
+ if (printgood) fprintf(stdout,"%s\n", token);
+ }
+ free(token);
+ continue;
+ }
+
+ case WORDFILTER: {
+ if (!check(pMS, &d, token, NULL, NULL)) {
+ bad = 1;
+ if (! printgood) fprintf(stdout,"%s\n", buf);
+ } else {
+ if (printgood) fprintf(stdout,"%s\n", buf);
+ }
+ free(token);
+ goto nextline;
+ }
+
+ case BADLINE: {
+ if (!check(pMS, &d, token, NULL, NULL)) {
+ bad = 1;
+ }
+ free(token);
+ continue;
+ }
+
+ case AUTO0:
+ case AUTO:
+ case AUTO2:
+ case AUTO3: {
+ FILE * f = (filter_mode == AUTO) ? stderr : stdout;
+ if (!check(pMS, &d, token, NULL, NULL)) {
+ char ** wlst = NULL;
+ bad = 1;
+ int ns = pMS[d]->suggest_auto(&wlst, chenc(token, io_enc, dic_enc[d]));
+ if (ns > 0) {
+ parser->change_token(chenc(wlst[0], dic_enc[d], io_enc));
+ if (filter_mode == AUTO3) {
+ fprintf(f,"%s:%d: Locate: %s | Try: %s\n",
+ currentfilename, lineno,
+ token, chenc(wlst[0], dic_enc[d], io_enc));
+ } else if (filter_mode == AUTO2) {
+ fprintf(f,"%ds/%s/%s/g; # %s\n", lineno,
+ token, chenc(wlst[0], dic_enc[d], io_enc), buf);
+ } else {
+ fprintf(f,gettext("Line %d: %s -> "), lineno,
+ chenc(token, io_enc, ui_enc));
+ fprintf(f, "%s\n",
+ chenc(wlst[0], dic_enc[d], ui_enc));
+ }
+ }
+ pMS[d]->free_list(&wlst, ns);
+ }
+ free(token);
+ continue;
+ }
+
+ case STEM: {
+ char ** result;
+ int n = pMS[d]->stem(&result, chenc(token, io_enc, dic_enc[d]));
+ for (int i = 0; i < n; i++) {
+ fprintf(stdout, "%s %s\n", token, chenc(result[i], dic_enc[d], ui_enc));
+ }
+ pMS[d]->free_list(&result, n);
+ if (n == 0 && token[strlen(token) - 1] == '.') {
+ token[strlen(token) - 1] = '\0';
+ n = pMS[d]->stem(&result, token);
+ for (int i = 0; i < n; i++) {
+ fprintf(stdout, "%s %s\n", token, chenc(result[i], dic_enc[d], ui_enc));
+ }
+ pMS[d]->free_list(&result, n);
+ }
+ if (n == 0) fprintf(stdout, "%s\n", chenc(token, dic_enc[d], ui_enc));
+ fprintf(stdout, "\n");
+ free(token);
+ continue;
+ }
+
+ case ANALYZE: {
+ char ** result;
+ int n = pMS[d]->analyze(&result, chenc(token, io_enc, dic_enc[d]));
+ for (int i = 0; i < n; i++) {
+ fprintf(stdout, "%s %s\n", token, chenc(result[i], dic_enc[d], ui_enc));
+ }
+ pMS[d]->free_list(&result, n);
+ if (n == 0 && token[strlen(token) - 1] == '.') {
+ token[strlen(token) - 1] = '\0';
+ n = pMS[d]->analyze(&result, token);
+ for (int i = 0; i < n; i++) {
+ fprintf(stdout, "%s %s\n", token, chenc(result[i], dic_enc[d], ui_enc));
+ }
+ pMS[d]->free_list(&result, n);
+ }
+ if (n == 0) fprintf(stdout, "%s\n", chenc(token, dic_enc[d], ui_enc));
+ fprintf(stdout, "\n");
+ free(token);
+ continue;
+ }
+
+ case PIPE: {
+ int info;
+ char * root = NULL;
+ if (check(pMS, &d, token, &info, &root)) {
+ if (!terse_mode) {
+ if (verbose_mode) fprintf(stdout,"* %s\n", token);
+ else fprintf(stdout,"*\n");
+ }
+ fflush(stdout);
+ } else {
+ char ** wlst = NULL;
+ int ns = pMS[d]->suggest(&wlst, token);
+ if (ns == 0) {
+ fprintf(stdout,"# %s %d", token,
+ parser->get_tokenpos() + pos);
+ } else {
+ fprintf(stdout,"& %s %d %d: ", token, ns,
+ parser->get_tokenpos() + pos);
+ fprintf(stdout,"%s", chenc(wlst[0], dic_enc[d], io_enc));
+ }
+ for (int j = 1; j < ns; j++) {
+ fprintf(stdout, ", %s", chenc(wlst[j], dic_enc[d], io_enc));
+ }
+ pMS[d]->free_list(&wlst, ns);
+ fprintf(stdout, "\n");
+ fflush(stdout);
+ }
+ if (root) free(root);
+ free(token);
+ continue;
+ }
+ case NORMAL: {
+ int info;
+ char * root = NULL;
+ if (check(pMS, &d, token, &info, &root)) {
+ if (info & SPELL_COMPOUND) {
+ fprintf(stdout,"-\n");
+ } else if (root) {
+ fprintf(stdout,"+ %s\n", chenc(root, dic_enc[d], ui_enc));
+ } else {
+ fprintf(stdout,"*\n");
+ }
+ fflush(stdout);
+ if (root) free(root);
+ } else {
+ char ** wlst = NULL;
+ int ns = pMS[d]->suggest(&wlst, chenc(token, io_enc, dic_enc[d]));
+ if (ns == 0) {
+ fprintf(stdout,"# %s %d", chenc(token, io_enc, ui_enc),
+ parser->get_tokenpos() + pos);
+ } else {
+ fprintf(stdout,"& %s %d %d: ", chenc(token, io_enc, ui_enc), ns,
+ parser->get_tokenpos() + pos);
+ fprintf(stdout,"%s", chenc(wlst[0], dic_enc[d], ui_enc));
+ }
+ for (int j = 1; j < ns; j++) {
+ fprintf(stdout, ", %s", chenc(wlst[j], dic_enc[d], ui_enc));
+ }
+ pMS[d]->free_list(&wlst, ns);
+ fprintf(stdout, "\n");
+ fflush(stdout);
+ }
+ free(token);
+ }
+ }
+ }
+
+ switch (filter_mode) {
+ case AUTO: {
+ fprintf(stdout,"%s\n", parser->get_line());
+ break;
+ }
+
+ case BADLINE: {
+ if (((printgood) && (!bad)) ||
+ (!printgood && (bad))) fprintf(stdout,"%s\n",buf);
+ break;
+ }
+
+ case PIPE:
+ case NORMAL: {
+ fprintf(stdout,"\n");
+ fflush(stdout);
+ break;
+ }
+
+ }
+} // if
+} // while
+
+if (parser) delete(parser);
+
+} // pipe_interface
+
+#ifdef HAVE_READLINE
+
+#ifdef HAVE_CURSES_H
+static const char * rltext;
+
+// set base text of input line
+static int set_rltext ()
+{
+ if (rltext)
+ {
+ rl_insert_text (rltext);
+ rltext = NULL;
+ rl_startup_hook = (rl_hook_func_t *)NULL;
+ }
+ return 0;
+}
+
+#endif
+
+// Readline escape
+static int rl_escape (int count, int key)
+{
+ rl_delete_text(0, rl_end);
+ rl_done = 1;
+ return 0;
+}
+#endif
+
+#ifdef HAVE_CURSES_H
+int expand_tab(char * dest, char * src, int limit) {
+ int i = 0;
+ int u8 = ((ui_enc != NULL) && (strcmp(ui_enc, "UTF-8") == 0)) ? 1 : 0;
+ int chpos = 0;
+ for(int j = 0; (i < limit) && (src[j] != '\0') && (src[j] != '\r'); j++) {
+ dest[i] = src[j];
+ if (src[j] == '\t') {
+ int end = 8 - (chpos % 8);
+ for(int k = 0; k < end; k++) {
+ dest[i] = ' ';
+ i++;
+ chpos++;
+ }
+ } else {
+ i++;
+ if (!u8 || (src[j] & 0xc0) != 0x80) chpos++;
+ }
+ }
+ dest[i] = '\0';
+ return chpos;
+}
+
+// UTF-8-aware version of strncpy (but output is always null terminated)
+// What we should deal in is cursor position cells in a terminal emulator,
+// i.e. the number of visual columns occupied like wcwidth/wcswidth does
+// What we're really current doing is to deal in the number of characters,
+// like mbstowcs which isn't quite correct, but close enough for western
+// text in UTF-8
+void strncpyu8(char * dest, const char * src, int begin, int n) {
+ int u8 = ((ui_enc != NULL) && (strcmp(ui_enc, "UTF-8") == 0)) ? 1 : 0;
+ int i = 0;
+ while (i < begin + n) {
+ if (i >= begin)
+ {
+ if (!*src)
+ break;
+ *dest++ = *src;
+ }
+ if (!u8 || (*src & 0xc0) != 0x80)
+ i++;
+ ++src;
+ }
+ *dest = '\0';
+}
+
+//See strncpyu8 for gotchas
+int strlenu8(const char * src) {
+ int u8 = ((ui_enc != NULL) && (strcmp(ui_enc, "UTF-8") == 0)) ? 1 : 0;
+ int i = 0;
+ while (*src) {
+ if (!u8 || (*src & 0xc0) != 0x80)
+ i++;
+ ++src;
+ }
+ return i;
+}
+
+void dialogscreen(TextParser * parser, char * token,
+ char * filename, int forbidden, char ** wlst, int ns) {
+ int x, y;
+ char line[MAXLNLEN];
+ char line2[MAXLNLEN];
+ getmaxyx(stdscr,y,x);
+ clear();
+
+ if (forbidden & SPELL_FORBIDDEN) printw(gettext("FORBIDDEN!")); else
+ if (forbidden & SPELL_WARN) printw(gettext("Spelling mistake?"));
+ printw(gettext("\t%s\t\tFile: %s\n\n"), chenc(token, io_enc, ui_enc), filename);
+
+ // handle long lines and tabulators
+
+ char lines[MAXPREVLINE][MAXLNLEN];
+
+ for (int i = 0; i < MAXPREVLINE; i++) {
+ expand_tab(lines[i], chenc(parser->get_prevline(i), io_enc, ui_enc), MAXLNLEN);
+ }
+
+ int prevline = 0;
+
+ strncpy(line, parser->get_prevline(0), parser->get_tokenpos());
+ line[parser->get_tokenpos()] = '\0';
+ int tokenbeg = expand_tab(line2, chenc(line, io_enc, ui_enc), MAXLNLEN);
+
+ strncpy(line, parser->get_prevline(0), parser->get_tokenpos() + strlen(token));
+ line[parser->get_tokenpos() + strlen(token)] = '\0';
+ int tokenend = expand_tab(line2, chenc(line, io_enc, ui_enc), MAXLNLEN);
+
+ int rowindex = tokenend / x;
+ int beginrow = rowindex - tokenbeg / x;
+ if (beginrow >= MAXPREVLINE) beginrow = MAXPREVLINE - 1;
+
+ for (int i = 0; i < MAXPREVLINE; i++) {
+ strncpyu8(line, lines[prevline], x * rowindex, x);
+ mvprintw(MAXPREVLINE + 1 - i, 0, "%s", line);
+ rowindex--;
+ if (rowindex == -1) {
+ prevline++;
+ rowindex = strlenu8(lines[prevline]) / x;
+ }
+ }
+
+ int linestartpos = tokenbeg - (tokenbeg % x);
+ strncpyu8(line, lines[0], x * rowindex + linestartpos, tokenbeg % x);
+ mvprintw(MAXPREVLINE + 1 - beginrow, 0, "%s", line);
+ attron(A_REVERSE);
+ printw("%s", chenc(token, io_enc, ui_enc));
+ attroff(A_REVERSE);
+
+ mvprintw(MAXPREVLINE + 2, 0, "\n");
+ for (int i = 0; i < ns; i++) {
+ if ((ns > 10) && (i < 10)) {
+ printw(" 0%d: %s\n", i, chenc(wlst[i], io_enc, ui_enc));
+ } else {
+ printw(" %d: %s\n", i, chenc(wlst[i], io_enc, ui_enc));
+ }
+ }
+
+/* TRANSLATORS: the capital letters are shortcuts, mark one letter similarly
+ in your translation and translate the standalone letter accordingly later */
+ mvprintw(y-3, 0, "%s\n",
+ gettext("\n[SPACE] R)epl A)ccept I)nsert U)ncap S)tem Q)uit e(X)it or ? for help\n"));
+}
+
+char * lower_first_char(char *token, const char *io_enc, int langnum)
+{
+ const char *utf8str = chenc(token, io_enc, "UTF-8");
+ int max = strlen(utf8str);
+ w_char *u = new w_char[max];
+ int len = u8_u16(u, max, utf8str);
+ unsigned short idx = (u[0].h << 8) + u[0].l;
+ idx = unicodetolower(idx, langnum);
+ u[0].h = (unsigned char) (idx >> 8);
+ u[0].l = (unsigned char) (idx & 0x00FF);
+ char *scratch = (char*)malloc(max + 1 + 4);
+ u16_u8(scratch, max+4, u, len);
+ delete[] u;
+ char *result = chenc(scratch, "UTF-8", io_enc);
+ if (result != scratch)
+ {
+ free (scratch);
+ result = mystrdup(result);
+ }
+ return result;
+}
+
+ // for terminal interface
+int dialog(TextParser * parser, Hunspell * pMS, char * token, char * filename,
+ char ** wlst, int ns, int forbidden) {
+ char buf[MAXLNLEN];
+ char * buf2;
+ wordlist * dicwords = NULL;
+ int c;
+
+ dialogscreen(parser, token, filename, forbidden, wlst, ns);
+
+ char firstletter='\0';
+
+ while ((c=getch())) {
+ switch (c) {
+ case '0':
+ case '1': if ((firstletter=='\0') && (ns>10)) {
+ firstletter=c;
+ break;
+ }
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9': {
+ modified=1;
+ if ((firstletter!='\0') && (firstletter=='1')) {
+ c += 10;
+ }
+ c -= '0';
+ if (c>=ns) break;
+ parser->change_token(wlst[c]);
+ goto ki;
+ }
+ case ' ': {
+ goto ki;
+ }
+ case '?': {
+ clear();
+printw(gettext("Whenever a word is found that is not in the dictionary\n"
+ "it is printed on the first line of the screen. If the dictionary\n"
+ "contains any similar words, they are listed with a number\n"
+ "next to each one. You have the option of replacing the word\n"
+ "completely, or choosing one of the suggested words.\n"));
+printw(gettext("\nCommands are:\n\n"));
+printw(gettext("R Replace the misspelled word completely.\n"));
+printw(gettext("Space Accept the word this time only.\n"));
+printw(gettext("A Accept the word for the rest of this session.\n"));
+printw(gettext("I Accept the word, and put it in your private dictionary.\n"));
+printw(gettext("U Accept and add lowercase version to private dictionary.\n"));
+printw(gettext(
+"S\tAsk a stem and a model word and store them in the private dictionary.\n"
+"\tThe stem will be accepted also with the affixes of the model word.\n"
+));
+printw(gettext("0-n Replace with one of the suggested words.\n"));
+printw(gettext("X Write the rest of this file, ignoring misspellings, and start next file.\n"));
+printw(gettext("Q Quit immediately. Asks for confirmation. Leaves file unchanged.\n"));
+printw(gettext("^Z Suspend program. Restart with fg command.\n"));
+printw(gettext("? Show this help screen.\n"));
+printw(gettext("\n-- Type space to continue -- \n"));
+ while (getch()!=' ');
+ }
+ case 12: {
+ dialogscreen(parser, token, filename, forbidden, wlst, ns);
+ break;
+ }
+ default: {
+/* TRANSLATORS: translate this letter according to the shortcut letter used
+ previously in the translation of "R)epl" before */
+ if (c==(gettext("r"))[0]) {
+ char i[MAXLNLEN];
+ char *temp;
+
+ modified=1;
+
+
+#ifdef HAVE_READLINE
+ endwin();
+ rltext = "";
+ if (rltext && *rltext) rl_startup_hook = set_rltext;
+#endif
+ temp = readline(gettext("Replace with: "));
+#ifdef HAVE_READLINE
+ initscr();
+ cbreak();
+#endif
+
+ if ((!temp) || (temp[0] == '\0')) {
+ free(temp);
+ dialogscreen(parser, token, filename, forbidden, wlst, ns);
+ break;
+ }
+
+ strncpy(i, temp, MAXLNLEN);
+ free(temp);
+
+ parser->change_token(i);
+
+ return 2; // replace
+ }
+/* TRANSLATORS: translate these letters according to the shortcut letter used
+ previously in the translation of "U)ncap" and I)nsert before */
+ int u_key = gettext("u")[0];
+ int i_key = gettext("i")[0];
+
+ if (c==u_key || c==i_key) {
+ struct wordlist* i = (struct wordlist *) malloc (sizeof(struct wordlist));
+ i->word = (c==i_key) ? mystrdup(token) : lower_first_char(token, io_enc, pMS->get_langnum());
+ i->next = dicwords;
+ dicwords = i;
+ // save
+ if (HOME) strcpy(buf,HOME); else {
+ fprintf(stderr, gettext("error - missing HOME variable\n"));
+ break;
+ }
+#ifndef WIN32
+ strcat(buf,"/");
+#endif
+ buf2 = buf+strlen(buf);
+ if (!privdicname) {
+ strcat(buf,DICBASENAME);
+ strcat(buf,basename(dicname,DIRSEPCH));
+ } else {
+ strcat(buf,privdicname);
+ }
+ if (save_privdic(buf2, buf, dicwords)) {
+ dicwords=NULL;
+ } else {
+ fprintf(stderr,gettext("Cannot update personal dictionary."));
+ break;
+ }
+ } // no break
+/* TRANSLATORS: translate this letter according to the shortcut letter used
+ previously in the translation of "U)ncap" and I)nsert before */
+ if ((c==(gettext("u"))[0]) || (c==(gettext("i"))[0]) || (c==(gettext("a"))[0])) {
+ modified=1;
+ putdic(token, pMS);
+ goto ki;
+ }
+/* TRANSLATORS: translate this letter according to the shortcut letter used
+ previously in the translation of "S)tem" before */
+ if (c==(gettext("s"))[0]) {
+ modified=1;
+
+ char w[MAXLNLEN], w2[MAXLNLEN], w3[MAXLNLEN];
+ char *temp;
+
+ strncpy(w, token, MAXLNLEN);
+ temp = basename(w, '-');
+ if (w < temp) {
+ *(temp-1) = '\0';
+ } else {
+ char ** poslst = NULL;
+#ifdef HUNSPELL_EXPERIMENTAL
+ int ps = pMS->suggest_pos_stems(&poslst, token);
+#else
+ int ps = 0;
+#endif
+ if (ps > 0) {
+ strcpy(buf, poslst[0]);
+ for (int i = 0; i < ps; i++) {
+ if (strlen(poslst[i]) <= strlen(buf)) strcpy(buf, poslst[i]);
+ free(poslst[i]);
+ }
+ strcpy(w, buf);
+ }
+ if (poslst) free(poslst);
+ }
+
+#ifdef HAVE_READLINE
+ endwin();
+ rltext = w;
+ if (rltext && *rltext) rl_startup_hook = set_rltext;
+#endif
+ temp = readline(gettext("New word (stem): "));
+
+ if ((!temp) || (temp[0] == '\0')) {
+ free(temp);
+#ifdef HAVE_READLINE
+ initscr();
+ cbreak();
+#endif
+ dialogscreen(parser, token, filename, forbidden, wlst, ns);
+ break;
+ }
+
+ strncpy(w, temp, MAXLNLEN);
+ free(temp);
+
+#ifdef HAVE_READLINE
+ initscr();
+ cbreak();
+#endif
+ dialogscreen(parser, token, filename, forbidden, wlst, ns);
+ refresh();
+
+#ifdef HAVE_READLINE
+ endwin();
+ rltext = "";
+ if (rltext && *rltext) rl_startup_hook = set_rltext;
+#endif
+ temp = readline(gettext("Model word (a similar dictionary word): "));
+
+#ifdef HAVE_READLINE
+ initscr();
+ cbreak();
+#endif
+
+ if ((!temp) || (temp[0] == '\0')) {
+ free(temp);
+ dialogscreen(parser, token, filename, forbidden, wlst, ns);
+ break;
+ }
+
+ strncpy(w2, temp, MAXLNLEN);
+ free(temp);
+
+ if (strlen(w) + strlen(w2) + 2 < MAXLNLEN) {
+ sprintf(w3, "%s/%s", w, w2);
+ } else break;
+
+ if (!putdic(w3, pMS)) {
+
+ struct wordlist* i =
+ (struct wordlist *) malloc (sizeof(struct wordlist));
+ i->word = mystrdup(w3);
+ i->next = dicwords;
+ dicwords = i;
+
+ if (strlen(w) + strlen(w2) + 4 < MAXLNLEN) {
+ sprintf(w3, "%s-/%s-", w, w2);
+ if (putdic(w3, pMS)) {
+ struct wordlist* i =
+ (struct wordlist *) malloc (sizeof(struct wordlist));
+ i->word = mystrdup(w3);
+ i->next = dicwords;
+ dicwords = i;
+ }
+ }
+ // save
+
+ if (HOME) strcpy(buf,HOME); else {
+ fprintf(stderr, gettext("error - missing HOME variable\n"));
+ continue;
+ }
+#ifndef WIN32
+ strcat(buf,"/");
+#endif
+ buf2 = buf + strlen(buf);
+ if (!privdicname) {
+ strcat(buf,DICBASENAME);
+ strcat(buf,basename(dicname,DIRSEPCH));
+ } else {
+ strcat(buf,privdicname);
+ }
+ if (save_privdic(buf2, buf, dicwords)) {
+ dicwords = NULL;
+ } else {
+ fprintf(stderr, gettext("Cannot update personal dictionary."));
+ break;
+ }
+
+ } else {
+ dialogscreen(parser, token, filename, forbidden, wlst, ns);
+ printw(gettext("Model word must be in the dictionary. Press any key!"));
+ getch();
+ dialogscreen(parser, token, filename, forbidden, wlst, ns);
+ break;
+ }
+ goto ki;
+ }
+/* TRANSLATORS: translate this letter according to the shortcut letter used
+ previously in the translation of "e(X)it" before */
+ if (c==(gettext("x"))[0]) {
+ return 1;
+ }
+/* TRANSLATORS: translate this letter according to the shortcut letter used
+ previously in the translation of "Q)uit" before */
+ if (c==(gettext("q"))[0]) {
+ if (modified) {
+ printw(gettext("Are you sure you want to throw away your changes? "));
+/* TRANSLATORS: translate this letter according to the shortcut letter y)es */
+ if (getch()==(gettext("y"))[0]) return -1;
+ dialogscreen(parser, token, filename, forbidden, wlst, ns);
+ break;
+ } else {
+ return -1;
+ }
+ }
+ }
+ }
+ }
+ ki: return 0;
+}
+
+int interactive_line(TextParser * parser, Hunspell ** pMS, char * filename, FILE * tempfile)
+{
+ char * token;
+ int dialogexit = 0;
+ int info;
+ int d = 0;
+ while ((token=parser->next_token())) {
+ if (!check(pMS, &d, token, &info, NULL)) {
+ dialogscreen(parser, token, filename, info, NULL, 0); // preview
+ refresh();
+ char ** wlst = NULL;
+ int ns = pMS[d]->suggest(&wlst, chenc(token, io_enc, dic_enc[d]));
+ if (ns==0) {
+ dialogexit = dialog(parser, pMS[d], token, filename, wlst, ns, info);
+ } else {
+ for (int j = 0; j < ns; j++) {
+ char d2io[MAXLNLEN];
+ strcpy(d2io, chenc(wlst[j], dic_enc[d], io_enc));
+ wlst[j] = (char *) realloc(wlst[j], strlen(d2io) + 1);
+ strcpy(wlst[j], d2io);
+ }
+ dialogexit = dialog(parser, pMS[d], token, filename, wlst, ns, info);
+ }
+ for (int j = 0; j < ns; j++) {
+ free(wlst[j]);
+ }
+ free(wlst);
+ }
+ free(token);
+ if ((dialogexit==-1) || (dialogexit==1)) goto ki2;
+ }
+
+ ki2: fprintf(tempfile,"%s\n",token=parser->get_line());
+ free(token);
+ return dialogexit;
+}
+
+void interactive_interface(Hunspell ** pMS, char * filename, int format)
+{
+ char buf[MAXLNLEN];
+
+ FILE *text;
+
+ text = fopen(filename, "r");
+
+ int dialogexit;
+ int check=1;
+
+ TextParser * parser;
+ char * extension = basename(filename, '.');
+ parser = get_parser(format, extension, pMS[0]);
+
+ char * tempname = (char *) malloc(strlen(filename) + strlen(TEMPNAME) + 1);
+ strcpy(tempname, filename);
+ strcpy(basename(tempname, DIRSEPCH), TEMPNAME);
+
+ FILE *tempfile;
+
+ if (!(tempfile = fopen(tempname, "w"))) {
+ fprintf(stderr, gettext("Can't create tempfile %s.\n"), tempname);
+ endwin();
+ exit(1);
+ }
+
+ while(fgets(buf,MAXLNLEN,text)) {
+ if (check) {
+ if (*(buf + strlen(buf) - 1) == '\n') *(buf + strlen(buf) - 1) = '\0';
+ parser->put_line(buf);
+ dialogexit = interactive_line(parser,pMS,filename,tempfile);
+ switch (dialogexit) {
+ case -1: {
+ clear();
+ refresh();
+ unlink(tempname);
+ endwin();
+ exit(0);
+ }
+ case 1: {
+ check = 0;
+ }
+ }
+ } else {
+ fprintf(tempfile,"%s",buf);
+ }
+ }
+ fclose(text);
+ fclose(tempfile);
+ delete parser;
+
+ if (! modified) {
+ unlink(tempname);
+ } else {
+ rename(tempname, filename);
+ }
+ free(tempname);
+}
+
+#endif
+
+char * add(char * dest, const char * st) {
+ if (!dest) {
+ dest = mystrdup(st);
+ } else {
+ dest = (char *) realloc(dest, strlen(dest) + strlen(st) + 1);
+ strcat(dest, st);
+ }
+ return dest;
+}
+
+char * exist2(char * dir, int len, const char * name, const char * ext) {
+ char buf[MAXLNLEN];
+ const char * sep = (len == 0) ? "": DIRSEP;
+ strncpy(buf, dir, len);
+ strcpy(buf + len, sep);
+ strcat(buf, name);
+ strcat(buf, ext);
+ if (exist(buf)) return mystrdup(buf);
+ strcat(buf, HZIP_EXTENSION);
+ if (exist(buf)) {
+ buf[strlen(buf) - strlen(HZIP_EXTENSION)] = '\0';
+ return mystrdup(buf);
+ }
+ return NULL;
+}
+
+#ifndef WIN32
+int listdicpath(char * dir, int len) {
+ char buf[MAXLNLEN];
+ const char * sep = (len == 0) ? "": DIRSEP;
+ strncpy(buf, dir, len);
+ strcpy(buf + len, sep);
+ DIR *d = opendir(buf);
+ if (!d) return 0;
+ struct dirent * de;
+ while ((de = readdir(d))) {
+ int len = strlen(de->d_name);
+ if ((len > 4 && strcmp(de->d_name + len - 4, ".dic") == 0) ||
+ (len > 7 && strcmp(de->d_name + len - 7, ".dic.hz") == 0)) {
+ char * s = mystrdup(de->d_name);
+ s[len - ((s[len - 1] == 'z') ? 7 : 4)] = '\0';
+ fprintf(stderr, "%s%s\n", buf, s);
+ free(s);
+ }
+ }
+ closedir(d);
+ return 1;
+}
+#endif
+
+// search existing path for file "name + ext"
+char * search(char * begin, char * name, const char * ext) {
+ char * end = begin;
+ while (1) {
+ while (!((*end == *PATHSEP) || (*end == '\0'))) end++;
+ char * res = NULL;
+ if (name) {
+ res = exist2(begin, end - begin, name, ext);
+ } else {
+#ifndef WIN32
+ listdicpath(begin, end - begin);
+#endif
+ }
+ if ((*end == '\0') || res) return res;
+ end++;
+ begin = end;
+ }
+}
+
+int main(int argc, char** argv)
+{
+ char buf[MAXLNLEN];
+ Hunspell * pMS[DMAX];
+ char * key = NULL;
+ int arg_files = -1; // first filename argumentum position in argv
+ int format = FMT_TEXT;
+ int argstate = 0;
+
+#ifdef ENABLE_NLS
+# ifdef HAVE_LOCALE_H
+ setlocale(LC_ALL, "");
+ textdomain("hunspell");
+# ifdef HAVE_LANGINFO_CODESET
+ ui_enc = nl_langinfo(CODESET);
+# endif
+# endif
+#endif
+
+#ifdef HAVE_READLINE
+ rl_set_key("", rl_escape, rl_get_keymap());
+ rl_bind_key('\t', rl_insert);
+#endif
+
+#ifdef LOG
+ log("START");
+#endif
+
+ for(int i=1; i<argc; i++) {
+#ifdef LOG
+ log(argv[i]);
+#endif
+
+ if (argstate == 1) {
+ if (dicname) free(dicname);
+ dicname = mystrdup(argv[i]);
+ argstate = 0;
+ } else if (argstate == 2) {
+ if (privdicname) free(privdicname);
+ privdicname = mystrdup(argv[i]);
+ argstate = 0;
+ } else if (argstate == 3) {
+ io_enc = argv[i];
+ argstate = 0;
+ } else if (argstate == 4) {
+ key = argv[i];
+ argstate = 0;
+ } else if (strcmp(argv[i],"-d")==0) argstate=1;
+ else if (strcmp(argv[i],"-p")==0) argstate=2;
+ else if (strcmp(argv[i],"-i")==0) argstate=3;
+ else if (strcmp(argv[i],"-P")==0) argstate=4;
+ else if ((strcmp(argv[i],"-h") == 0) || (strcmp(argv[i],"--help") == 0)) {
+ fprintf(stderr,gettext("Usage: hunspell [OPTION]... [FILE]...\n"));
+ fprintf(stderr,gettext("Check spelling of each FILE. Without FILE, check standard input.\n\n"));
+ fprintf(stderr,gettext(" -1\t\tcheck only first field in lines (delimiter = tabulator)\n"));
+ fprintf(stderr,gettext(" -a\t\tIspell's pipe interface\n"));
+ fprintf(stderr,gettext(" --check-url\tCheck URLs, e-mail addresses and directory paths\n"));
+ fprintf(stderr,gettext(" -d d[,d2,...]\tuse d (d2 etc.) dictionaries\n"));
+ fprintf(stderr,gettext(" -D\t\tshow available dictionaries\n"));
+ fprintf(stderr,gettext(" -G\t\tprint only correct words or lines\n"));
+ fprintf(stderr,gettext(" -h, --help\tdisplay this help and exit\n"));
+ fprintf(stderr,gettext(" -H\t\tHTML input file format\n"));
+ fprintf(stderr,gettext(" -i enc\tinput encoding\n"));
+ fprintf(stderr,gettext(" -l\t\tprint misspelled words\n"));
+ fprintf(stderr,gettext(" -L\t\tprint lines with misspelled words\n"));
+ fprintf(stderr,gettext(" -m \t\tanalyze the words of the input text\n"));
+ fprintf(stderr,gettext(" -n\t\tnroff/troff input file format\n"));
+ fprintf(stderr,gettext(" -p dict\tset dict custom dictionary\n"));
+ fprintf(stderr,gettext(" -r\t\twarn of the potential mistakes (rare words)\n"));
+ fprintf(stderr,gettext(" -P password\tset password for encrypted dictionaries\n"));
+ fprintf(stderr,gettext(" -s \t\tstem the words of the input text\n"));
+ fprintf(stderr,gettext(" -t\t\tTeX/LaTeX input file format\n"));
+// experimental functions: missing Unicode support
+// fprintf(stderr,gettext(" -u\t\tshow typical misspellings\n"));
+// fprintf(stderr,gettext(" -u2\t\tprint typical misspellings in sed format\n"));
+// fprintf(stderr,gettext(" -u3\t\tprint typical misspellings in gcc error format\n"));
+// fprintf(stderr,gettext(" -U\t\tautomatic correction of typical misspellings to stdout\n"));
+ fprintf(stderr,gettext(" -v, --version\tprint version number\n"));
+ fprintf(stderr,gettext(" -vv\t\tprint Ispell compatible version number\n"));
+ fprintf(stderr,gettext(" -w\t\tprint misspelled words (= lines) from one word/line input.\n\n"));
+ fprintf(stderr,gettext("Example: hunspell -d en_US file.txt # interactive spelling\n"
+ " hunspell -l file.txt # print misspelled words\n"
+ " hunspell -i utf-8 file.txt # check UTF-8 encoded file\n\n"));
+ fprintf(stderr,gettext("Bug reports: http://hunspell.sourceforge.net\n"));
+ exit(0);
+ } else if ((strcmp(argv[i],"-vv")==0) || (strcmp(argv[i],"-v")==0) || (strcmp(argv[i],"--version")==0)) {
+ fprintf(stdout,gettext(HUNSPELL_PIPE_HEADING));
+ fprintf(stdout,"\n");
+ if (strcmp(argv[i],"-vv")!=0) {
+ fprintf(stdout,gettext("\nCopyright (C) 2002-2008 L\303\241szl\303\263 N\303\251meth. License: MPL/GPL/LGPL.\n\n"
+ "Based on OpenOffice.org's Myspell library.\n"
+ "Myspell's copyright (C) Kevin Hendricks, 2001-2002, License: BSD.\n\n"));
+ fprintf(stdout,gettext("This is free software; see the source for copying conditions. There is NO\n"
+ "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE,\n"
+ "to the extent permitted by law.\n"));
+ }
+ exit(0);
+ } else if ((strcmp(argv[i],"-a")==0)) {
+ filter_mode = PIPE;
+ fprintf(stdout,gettext(HUNSPELL_PIPE_HEADING));
+ fflush(stdout);
+ } else if ((strcmp(argv[i],"-m")==0)) {
+ /*
+ if -a was used, don't override, i.e. keep ispell compatability
+ ispell: Make possible root/affix combinations that aren't in the dictionary.
+ hunspell: Analyze the words of the input text
+ */
+ if (filter_mode != PIPE)
+ filter_mode = ANALYZE;
+ } else if ((strcmp(argv[i],"-s")==0)) {
+ /*
+ if -a was used, don't override, i.e. keep ispell compatability
+ ispell: Stop itself with a SIGTSTP signal after each line of input.
+ hunspell: Stem the words of the input text
+ */
+ if (filter_mode != PIPE)
+ filter_mode = STEM;
+ } else if ((strcmp(argv[i],"-t")==0)) {
+ format = FMT_LATEX;
+ } else if ((strcmp(argv[i],"-n")==0)) {
+ format = FMT_MAN;
+ } else if ((strcmp(argv[i],"-H")==0)) {
+ format = FMT_HTML;
+ } else if ((strcmp(argv[i],"-l")==0)) {
+ filter_mode = BADWORD;
+ } else if ((strcmp(argv[i],"-w")==0)) {
+ /*
+ if -a was used, don't override, i.e. keep ispell compatability
+ ispell: Specify additional characters that can be part of a word.
+ hunspell: Print misspelled words (= lines) from one word/line input
+ */
+ if (filter_mode != PIPE)
+ filter_mode = WORDFILTER;
+ } else if ((strcmp(argv[i],"-L")==0)) {
+ /*
+ if -a was used, don't override, i.e. keep ispell compatability
+ ispell: Number of lines of context to be shown at the bottom of the screen
+ hunspell: Print lines with misspelled words
+ */
+ if (filter_mode != PIPE)
+ filter_mode = BADLINE;
+ } else if ((strcmp(argv[i],"-u")==0)) {
+ /*
+ if -a was used, don't override, i.e. keep ispell compatability
+ ispell: None
+ hunspell: Show typical misspellings
+ */
+ if (filter_mode != PIPE)
+ filter_mode = AUTO0;
+ } else if ((strcmp(argv[i],"-U")==0)) {
+ /*
+ if -a was used, don't override, i.e. keep ispell compatability
+ ispell: None
+ hunspell: Automatic correction of typical misspellings to stdout
+ */
+ if (filter_mode != PIPE)
+ filter_mode = AUTO;
+ } else if ((strcmp(argv[i],"-u2")==0)) {
+ /*
+ if -a was used, don't override, i.e. keep ispell compatability
+ ispell: None
+ hunspell: Print typical misspellings in sed format
+ */
+ if (filter_mode != PIPE)
+ filter_mode = AUTO2;
+ } else if ((strcmp(argv[i],"-u3")==0)) {
+ /*
+ if -a was used, don't override, i.e. keep ispell compatability
+ ispell: None
+ hunspell: Print typical misspellings in gcc error format
+ */
+ if (filter_mode != PIPE)
+ filter_mode = AUTO3;
+ } else if ((strcmp(argv[i],"-G")==0)) {
+ printgood = 1;
+ } else if ((strcmp(argv[i],"-1")==0)) {
+ format = FMT_FIRST;
+ } else if ((strcmp(argv[i],"-D")==0)) {
+ showpath = 1;
+ } else if ((strcmp(argv[i],"-r")==0)) {
+ warn = 1;
+fprintf(stderr, "BEKAPCS");
+ } else if ((strcmp(argv[i],"--check-url")==0)) {
+ checkurl = 1;
+ } else if ((arg_files==-1) && ((argv[i][0] != '-') && (argv[i][0] != '\0'))) {
+ arg_files = i;
+ if (! exist(argv[i])) { // first check (before time-consuming dic. load)
+ fprintf(stderr,gettext("Can't open %s.\n"),argv[i]);
+#ifdef HAVE_CURSES_H
+ endwin();
+#endif
+ exit(1);
+ }
+ }
+ }
+
+ if (printgood && (filter_mode == NORMAL)) filter_mode = BADWORD;
+
+ if (! dicname) {
+ if (! (dicname=getenv("DICTIONARY"))) {
+ /*
+ * Search in order of LC_ALL, LC_MESSAGES &
+ * LANG
+ */
+ const char *tests[] = { "LC_ALL", "LC_MESSAGES", "LANG" };
+ for (size_t i = 0; i < sizeof(tests) / sizeof(const char*); ++i) {
+ if ((dicname=getenv(tests[i])) && strcmp(dicname, "") != 0) {
+ dicname = mystrdup(dicname);
+ char * dot = strchr(dicname, '.');
+ if (dot) *dot = '\0';
+ char * at = strchr(dicname, '@');
+ if (at) *at = '\0';
+ break;
+ }
+ }
+
+ if (dicname && ((strcmp(dicname, "C") == 0) || (strcmp(dicname, "POSIX") == 0))) {
+ free(dicname);
+ dicname=mystrdup("en_US");
+ }
+
+ if (! dicname) {
+ dicname=mystrdup(DEFAULTDICNAME);
+ }
+ } else {
+ dicname = mystrdup(dicname);
+ }
+ }
+ path = add(mystrdup("."), PATHSEP); // <- check path in local directory
+ path = add(path, PATHSEP); // <- check path in root directory
+ if (getenv("DICPATH")) path = add(add(path, getenv("DICPATH")), PATHSEP);
+ path = add(add(path, LIBDIR), PATHSEP);
+ if (HOME) path = add(add(add(add(path, HOME), DIRSEP), USEROOODIR), PATHSEP);
+ path = add(path, OOODIR);
+
+ if (showpath) {
+ fprintf(stderr, gettext("SEARCH PATH:\n%s\n"), path);
+ fprintf(stderr, gettext("AVAILABLE DICTIONARIES (path is not mandatory for -d option):\n"));
+ search(path, NULL, NULL);
+ }
+
+ if (!privdicname) privdicname = mystrdup(getenv("WORDLIST"));
+
+ char * dicplus = strchr(dicname, ',');
+ if (dicplus) *dicplus = '\0';
+ char * aff = search(path, dicname, ".aff");
+ char * dic = search(path, dicname, ".dic");
+ if (aff && dic) {
+ if (showpath) {
+ fprintf(stderr, gettext("LOADED DICTIONARY:\n%s\n%s\n"), aff, dic);
+ }
+ pMS[0] = new Hunspell(aff, dic, key);
+ dic_enc[0] = pMS[0]->get_dic_encoding();
+ dmax = 1;
+ if (pMS[0] && dicplus) while (dicplus) {
+ char * dicname2 = dicplus + 1;
+ dicplus = strchr(dicname2, ',');
+ if (dicplus) *dicplus = '\0';
+ free(aff);
+ free(dic);
+ aff = search(path, dicname2, ".aff");
+ dic = search(path, dicname2, ".dic");
+ if (aff && dic) {
+ if (dmax < DMAX) {
+ pMS[dmax] = new Hunspell(aff, dic, key);
+ dic_enc[dmax] = pMS[dmax]->get_dic_encoding();
+ dmax++;
+ } else fprintf(stderr, gettext("error - %s exceeds dictionary limit.\n"), dicname2);
+ } else if (dic) pMS[dmax-1]->add_dic(dic);
+ }
+ } else {
+ fprintf(stderr,gettext("Can't open affix or dictionary files for dictionary named \"%s\".\n"), dicname);
+ exit(1);
+ }
+
+ /* open the private dictionaries */
+ if (HOME) {
+ strcpy(buf,HOME);
+#ifndef WIN32
+ strcat(buf,"/");
+#endif
+ if (!privdicname) {
+ strcat(buf,DICBASENAME);
+ strcat(buf,basename(dicname,DIRSEPCH));
+ load_privdic(buf, pMS[0]);
+ strcpy(buf,DICBASENAME);
+ strcat(buf,basename(dicname,DIRSEPCH));
+ load_privdic(buf, pMS[0]);
+ } else {
+ strcat(buf,privdicname);
+ load_privdic(buf, pMS[0]);
+ strcpy(buf,privdicname);
+ load_privdic(buf, pMS[0]);
+ }
+ }
+
+ if (arg_files==-1) {
+ pipe_interface(pMS, format, stdin);
+ } else if (filter_mode != NORMAL) {
+ for (int i = arg_files; i < argc; i++) {
+ if (exist(argv[i])) {
+ modified = 0;
+ currentfilename = argv[i];
+ FILE * f = fopen(argv[i], "r");
+ pipe_interface(pMS, format, f);
+ fclose(f);
+ } else {
+ fprintf(stderr, gettext("Can't open %s.\n"), argv[i]);
+ exit(1);
+ }
+ }
+ } else if (filter_mode == NORMAL) {
+#ifdef HAVE_CURSES_H
+ initscr();
+ cbreak();
+ noecho();
+ nonl();
+ intrflush(stdscr,FALSE);
+
+ for (int i = arg_files; i < argc; i++) {
+ if (exist(argv[i])) {
+ modified = 0;
+ interactive_interface(pMS, argv[i], format);
+ } else {
+ fprintf(stderr, gettext("Can't open %s.\n"), argv[i]);
+ endwin();
+ exit(1);
+ }
+ }
+
+ clear();
+ refresh();
+ endwin();
+#else
+ fprintf(stderr, gettext("Hunspell has been compiled without Ncurses user interface.\n"));
+#endif
+ }
+
+ if (dicname) free(dicname);
+ if (privdicname) free(privdicname);
+ if (path) free(path);
+ if (aff) free(aff);
+ if (dic) free(dic);
+ if (wordchars) free(wordchars);
+ if (wordchars_utf16_free) free(wordchars_utf16);
+#ifdef HAVE_ICONV
+ free_utf_tbl();
+#endif
+ for (int i = 0; i < dmax; i++) delete pMS[i];
+ return 0;
+}
diff --git a/src/tools/hunzip.cxx b/src/tools/hunzip.cxx
new file mode 100644
index 0000000..5d1581d
--- /dev/null
+++ b/src/tools/hunzip.cxx
@@ -0,0 +1,22 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "hunzip.hxx"
+
+#define DESC "hunzip - decompress a hzip file to the standard output\n" \
+"Usage: hunzip file.hz [password]\n"
+
+int fail(const char * err, const char * par) {
+ fprintf(stderr, err, par);
+ return 1;
+}
+
+int main(int argc, char** argv) {
+ Hunzip * h;
+ const char * s;
+ if (argc == 1 || strcmp(argv[1], "-h") == 0) return fail(DESC, NULL);
+ h = new Hunzip(argv[1], (argc > 2) ? argv[2] : NULL);
+ while (h && (s = h->getline())) printf("%s", s);
+ return 0;
+}
diff --git a/src/tools/hzip.c b/src/tools/hzip.c
new file mode 100644
index 0000000..cf760e8
--- /dev/null
+++ b/src/tools/hzip.c
@@ -0,0 +1,325 @@
+/* hzip: file compression for sorted dictionaries with optional encryption,
+ * algorithm: prefix-suffix encoding and 16-bit Huffman encoding */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define CODELEN 65536
+#define BUFSIZE 65536
+#define EXTENSION ".hz"
+
+#define ESCAPE 31
+#define MAGIC "hz0"
+#define MAGIC_ENCRYPTED "hz1"
+
+#define DESC "hzip - dictionary compression utility\n" \
+"Usage: hzip [-h | -P password ] [file1 file2 ..]\n" \
+" -P password encrypted compression\n" \
+" -h display this help and exit\n"
+
+enum { code_LEAF, code_TERM, code_NODE};
+
+struct item {
+ unsigned short word;
+ int count;
+ char type;
+ struct item * left;
+ struct item * right;
+};
+
+int fail(const char * err, const char * par) {
+ fprintf(stderr, err, par);
+ return 1;
+}
+
+void code2table(struct item * tree, char **table, char * code, int deep) {
+ int first = 0;
+ if (!code) {
+ first = 1;
+ code = malloc(CODELEN);
+ }
+ code[deep] = '1';
+ if (tree->left) code2table(tree->left, table, code, deep + 1);
+ if (tree->type != code_NODE) {
+ int i = tree->word;
+ code[deep] = '\0';
+ if (tree->type == code_TERM) i = CODELEN; /* terminal code */
+ table[i] = malloc(deep + 1);
+ strcpy(table[i], code);
+ }
+ code[deep] = '0';
+ if (tree->right) code2table(tree->right, table, code, deep + 1);
+ if (first) free(code);
+}
+
+struct item * newitem(int c, struct item * l, struct item * r, int t) {
+ struct item * ni = (struct item *) malloc(sizeof(struct item));
+ ni->type = t;
+ ni->word = 0;
+ ni->count = c;
+ ni->left = l;
+ ni->right = r;
+ return ni;
+}
+
+/* return length of the freq array */
+int get_freqdata(struct item *** dest, FILE * f, unsigned short * termword) {
+ int freq[CODELEN];
+ int i, j, k, n;
+ union {
+ char c[2];
+ unsigned short word;
+ } u;
+ for (i = 0; i < CODELEN; i++) freq[i] = 0;
+ while((j = getc(f)) != -1 && (k = getc(f)) != -1) {
+ u.c[0] = j;
+ u.c[1] = k;
+ freq[u.word]++;
+ }
+ if (j != -1) {
+ u.c[0] = 1;
+ u.c[1] = j;
+ } else {
+ u.c[0] = 0;
+ u.c[1] = 0;
+ }
+
+ *dest = (struct item **) malloc((CODELEN + 1) * sizeof(struct item *));
+ if (!*dest) return -1;
+ for (i = 0, n = 0; i < CODELEN; i++) if (freq[i]) {
+ (*dest)[n] = newitem(freq[i], NULL, NULL, code_LEAF);
+ (*dest)[n]->word = i;
+ n++;
+ }
+ /* terminal sequence (also contains the last odd byte of the file) */
+ (*dest)[n] = newitem(1, NULL, NULL, code_TERM);
+ *termword = u.word;
+ return n + 1;
+}
+
+void get_codetable(struct item **l, int n, char ** table) {
+ int i;
+ while (n > 1) {
+ int min = 0;
+ int mi2 = 1;
+ for (i = 1; i < n; i++) {
+ if (l[i]->count < l[min]->count) {
+ mi2 = min;
+ min = i;
+ } else if (l[i]->count < l[mi2]->count) mi2 = i;
+ }
+ l[min] = newitem(l[min]->count + l[mi2]->count, l[min], l[mi2], code_NODE);
+ for (i = mi2 + 1; i < n; i++) l[i - 1] = l[i];
+ n--;
+ }
+ code2table(l[0], table, NULL, 0);
+}
+
+int write_bits(FILE *f, char * bitbuf, int *bits, char * code) {
+ while (*code) {
+ int b = (*bits) % 8;
+ if (!b) bitbuf[(*bits) / 8] = ((*code) - '0') << 7;
+ else bitbuf[(*bits) / 8] |= (((*code) - '0') << (7 - b));
+ (*bits)++;
+ code++;
+ if (*bits == BUFSIZE * 8) {
+ if (BUFSIZE != fwrite(bitbuf, 1, BUFSIZE, f))
+ return 1;
+ *bits = 0;
+ }
+ }
+ return 0;
+}
+
+int encode_file(char ** table, int n, FILE *f, FILE *f2, unsigned short tw, char * key) {
+ char bitbuf[BUFSIZE];
+ int i, bits = 0;
+ unsigned char cl, ch;
+ int cx[2];
+ union {
+ char c[2];
+ unsigned short word;
+ } u;
+ char * enc = key;
+
+ /* header and codes */
+ fprintf(f2, "%s", (key ? MAGIC_ENCRYPTED : MAGIC)); /* 3-byte HEADER */
+ cl = (unsigned char) (n & 0x00ff);
+ ch = (unsigned char) (n >> 8);
+ if (key) {
+ unsigned char cs;
+ for (cs = 0; *enc; enc++) cs ^= *enc;
+ fprintf(f2, "%c", cs); /* 1-byte check sum */
+ enc = key;
+ ch ^= *enc;
+ if ((*(++enc)) == '\0') enc = key;
+ cl ^= *enc;
+ }
+ fprintf(f2, "%c%c", ch, cl); /* upper and lower byte of record count */
+ for (i = 0; i < BUFSIZE; i++) bitbuf[i] = '\0';
+ for (i = 0; i < CODELEN + 1; i++) if (table[i]) {
+ int nmemb;
+ u.word = (unsigned short) i;
+ if (i == CODELEN) u.word = tw;
+ if (key) {
+ if (*(++enc) == '\0') enc = key;
+ u.c[0] ^= *enc;
+ if (*(++enc) == '\0') enc = key;
+ u.c[1] ^= *enc;
+ }
+ fprintf(f2, "%c%c", u.c[0], u.c[1]); /* 2-character code id */
+ bits = 0;
+ if (write_bits(f2, bitbuf, &bits, table[i]) != 0)
+ return 1;
+ if (key) {
+ if (*(++enc) == '\0') enc = key;
+ fprintf(f2, "%c", ((unsigned char) bits) ^ *enc);
+ for (cl = 0; cl <= bits/8; cl++) {
+ if (*(++enc) == '\0') enc = key;
+ bitbuf[cl] ^= *enc;
+ }
+ } else
+ fprintf(f2, "%c", (unsigned char) bits); /* 1-byte code length */
+ nmemb = bits/8 + 1;
+ if (fwrite(bitbuf, 1, bits/8 + 1, f2) != nmemb) /* x-byte code */
+ return 1;
+ }
+
+ /* file encoding */
+ bits = 0;
+ while((cx[0] = getc(f)) != -1 && (cx[1] = getc(f)) != -1) {
+ u.c[0] = cx[0];
+ u.c[1] = cx[1];
+ if (write_bits(f2, bitbuf, &bits, table[u.word]) != 0)
+ return 1;
+ }
+ /* terminal suffixes */
+ if (write_bits(f2, bitbuf, &bits, table[CODELEN]) != 0)
+ return 1;
+ if (bits > 0)
+ {
+ int nmemb = bits/8 + 1;
+ if (fwrite(bitbuf, 1, nmemb, f2) != nmemb)
+ return 1;
+ }
+ return 0;
+}
+
+int prefixcompress(FILE *f, FILE *tempfile) {
+ char buf[BUFSIZE];
+ char buf2[BUFSIZE * 2];
+ char prev[BUFSIZE];
+ int prevlen = 0;
+ while(fgets(buf,BUFSIZE,f)) {
+ int i, j, k, m, c=0;
+ int pfx = prevlen;
+ char * p = buf2;
+ m = j = 0;
+ for (i = 0; buf[i]; i++) {
+ if ((pfx > 0) && (buf[i] == prev[i])) {
+ j++;
+ } else pfx = 0;
+ }
+ if (i > 0 && buf[i - 1] == '\n') {
+ if (j == i) j--; /* line duplicate */
+ if (j > 29) j = 29;
+ c = j;
+ if (c == '\t') c = 30;
+ /* common suffix */
+ for (; buf[i - m - 2] == prev[prevlen - m - 2] &&
+ m < i - j - 1 && m < 15; m++);
+ if (m == 1) m = 0;
+ } else {
+ j = 0;
+ m = -1;
+ }
+ for (k = j; k < i - m - 1; k++, p++) {
+ if (((unsigned char) buf[k]) < 47 && buf[k] != '\t' && buf[k] != ' ') {
+ *p = ESCAPE;
+ p++;
+ }
+ *p = buf[k];
+ }
+ if (m > 0) {
+ *p = m + 31; /* 33-46 */
+ p++;
+ }
+ if (i > 0 && buf[i - 1] == '\n') {
+ size_t nmemb = p - buf2 + 1;
+ *p = c;
+ if (fwrite(buf2, 1, nmemb, tempfile) != nmemb)
+ return 1;
+ } else {
+ size_t nmemb = p - buf2;
+ if (fwrite(buf2, 1, nmemb, tempfile) != nmemb)
+ return 1;
+ }
+ memcpy(prev, buf, i);
+ prevlen = i;
+ }
+ return 0;
+}
+
+int hzip(const char * filename, char * key) {
+ struct item ** list;
+ char * table[CODELEN + 1];
+ int n;
+ char out[BUFSIZE];
+ FILE *f, *f2, *tempfile;
+ unsigned short termword;
+ strcpy(out, filename);
+ strcat(out, EXTENSION);
+ f = fopen(filename, "r");
+ if (!f) return fail("hzip: %s: Permission denied\n", filename);
+ tempfile = tmpfile();
+ if (!tempfile) {
+ fclose(f);
+ return fail("hzip: cannot create temporary file\n", NULL);
+ }
+ f2 = fopen(out, "wb");
+ if (!f2) {
+ fclose(tempfile);
+ fclose(f);
+ return fail("hzip: %s: Permission denied\n", out);
+ }
+ for (n = 0; n < CODELEN; n++) table[n] = NULL;
+ if (prefixcompress(f, tempfile) != 0) {
+ fclose(f2);
+ fclose(tempfile);
+ fclose(f);
+ return fail("hzip: cannot write file\n", NULL);
+ }
+ rewind(tempfile);
+ n = get_freqdata(&list, tempfile, &termword);
+ get_codetable(list, n, table);
+ rewind(tempfile);
+ n = encode_file(table, n, tempfile, f2, termword, key);
+ fclose(f2);
+ fclose(tempfile);
+ fclose(f);
+ if (n != 0) return fail("hzip: cannot write file\n", NULL);
+ return n;
+}
+
+int main(int argc, char** argv) {
+
+ int i, j = 0;
+ char * key = NULL;
+ for (i = 1; i < argc; i++) {
+ if (*(argv[i]) == '-') {
+ if (*(argv[i] + 1) == 'h')
+ return fail(DESC, NULL);
+ if (*(argv[i] + 1) == 'P') {
+ if (i + 1 == argc)
+ return fail("hzip: missing password\n", NULL);
+ key = argv[i + 1];
+ i++;
+ continue;
+ }
+ return fail("hzip: no such option: %s\n", argv[i]);
+ } else if (hzip(argv[i], key) != 0) return 1; else j = 1;
+ }
+ if (j == 0) return fail("hzip: need a filename parameter\n", NULL);
+ return 0;
+}
diff --git a/src/tools/ispellaff2myspell b/src/tools/ispellaff2myspell
new file mode 100644
index 0000000..5d60c09
--- /dev/null
+++ b/src/tools/ispellaff2myspell
@@ -0,0 +1,472 @@
+#!/usr/bin/perl -w
+# -*- coding: iso-8859-1 -*-
+# $Id: ispellaff2myspell,v 1.2 2010/02/23 12:05:51 caolan Exp $
+#
+# (C) 2002-2005 Agustin Martin Domingo <agustin.martin@hispalinux.es>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+sub usage {
+ print "ispellaff2myspell: A program to convert ispell affix tables to myspell format
+(C) 2002-2005 Agustin Martin Domingo <agustin.martin\@hispalinux.es> License: GPL
+
+Usage:
+ ispellaff2myspell [options] <affixfile>
+
+ Options:
+ --affixfile=s Affix file
+ --bylocale Use current locale setup for upper/lowercase
+ conversion
+ --charset=s Use specified charset for upper/lowercase
+ conversion (defaults to latin1)
+ --debug Print debugging info
+ --extraflags Allow some non alphabetic flags
+ --lowercase=s Lowercase string
+ --myheader=s Header file
+ --printcomments Print commented lines in output
+ --replacements=s Replacements file
+ --split=i Split flags with more that i entries
+ --uppercase=s Uppercase string
+ --wordlist=s Still unused
+
+ Currently allowed valued for charset are: latin1, latin2, latin3
+
+This script does not create the dict file. Something like
+
+( echo `cat mydict.words+ | wc -l`; cat mydict.words+ ) > mydict.dict
+
+should do the work, with mydict.words+ being the ispell munched wordlist
+
+";
+ exit;
+}
+
+sub debugprint {
+ if ( $debug ){
+ print STDERR "@_";
+ }
+}
+
+sub shipoutflag{
+ my $flag_entries=scalar @flag_array;
+
+ if ( $flag_entries != 0 ){
+ if ( $split ){
+ while ( @flag_array ){
+ my @flag_subarray=splice(@flag_array,0,$split);
+ my $subflag_entries=scalar @flag_subarray;
+ if ( scalar @flag_array ){
+ print "$myaffix $flagname $flagcombine $subflag_entries S\n";
+ } else {
+ print "$myaffix $flagname $flagcombine $subflag_entries\n";
+ }
+ print join("\n",@flag_subarray);
+ print "\n\n";
+ }
+ } else {
+ print "$myaffix $flagname $flagcombine $flag_entries\n";
+ print join("\n",@flag_array);
+ print "\n\n";
+ }
+ }
+ @flag_array=();
+ $flagname='';
+ $flagcombine='';
+}
+
+sub mylc{
+ my $inputstring=shift;
+ my $outputstring;
+
+ if ( $bylocale ){
+ {
+ use locale;
+ $outputstring = lc $inputstring;
+ }
+ } else {
+ if ( $charset eq "latin0" ){
+ $lowercase='a-zàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþ½¨¸';
+ $uppercase='A-ZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ¼¦´';
+ } elsif ( $charset eq "latin1" ){
+ $lowercase='a-zàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþ';
+ $uppercase='A-ZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ';
+ } elsif ( $charset eq "latin2" ){
+ $lowercase='a-z±³µ¶¹º»¼¾¿àáâãäåæçèéêëìíîïðñòóôõöøùúûüýþ';
+ $uppercase='A-Z¡£¥¦©ª«¬®¯ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ';
+ } elsif ( $charset eq "latin3" ){
+ $lowercase='a-z±¶¹º»¼¿àáâäåæçèéêëìíîïñòóôõö÷øùúûüýþ';
+ $uppercase='A-Z¡¦©ª«¬¯ÀÁÂÄÅÆÇÈÉÊËÌÍÎÏÑÒÓÔÕÖ×ØÙÚÛÜÝÞ';
+# } elsif ( $charset eq "other_charset" ){
+# die "latin2 still unimplemented";
+ } else {
+ if ( not $lowercase and not $uppercase ){
+ die "Unsupported charset [$charset]
+
+Explicitly use --lowercase=string and --uppercase=string
+options. Remember that both string must match exactly, but
+case changed.
+";
+ }
+ }
+ $outputstring=$inputstring;
+ eval "\$outputstring=~tr/$uppercase/$lowercase/";
+ }
+ return $outputstring;
+}
+
+sub validate_flag (){
+ my $flag = shift;
+ if ($flag=~m/[a-zA-Z]+/){
+ return $flag;
+ } elsif ( $hasextraflags ){
+ foreach ( keys %theextraflags ){
+ if ($flag =~ m/^$_/){
+ $flag =~ s/^$_//;
+ return $flag;
+ }
+ }
+ }
+ return '';
+}
+
+sub process_replacements{
+ my $file = shift;
+ my @replaces = ();
+
+ open (REPLACE,"< $file") ||
+ die "Error: Could not open replacements file: $file\n";
+ while (<REPLACE>){
+ next unless m/^REP[\s\t]*\D.*/;
+ next if m/^REP\s+[0-9]+/;
+ s/\015\012//;
+ s/\015//;
+ chomp;
+ push @replaces, $_;
+ }
+ close REPLACE;
+ my $number = scalar @replaces;
+ print "REP $number\n";
+ foreach ( @replaces ){
+ print $_ . "\n";
+ }
+}
+
+# -----------------------------------------------------------
+# Now the progran start, after the functions are defined
+# -----------------------------------------------------------
+
+use Getopt::Long;
+
+# Initializing option values
+$affixfile = '';
+$bylocale = '';
+$charset = '';
+$debug = '';
+$lowercase = '';
+$myheader = '';
+$printcomments = '';
+$replacements = '';
+$split = '';
+$uppercase = '';
+$wordlist = '';
+$hasextraflags = '';
+@flag_array = ();
+%theextraflags = ();
+# Initializing root values
+$rootremove = "0";
+$rootname = '';
+$addtoroot = '';
+$comment = '';
+# Initializing flag values
+$flagname = '';
+$flagcombine = '';
+$inflags = '';
+
+GetOptions ('affixfile=s' => \$affixfile,
+ 'bylocale' => \$bylocale,
+ 'charset=s' => \$charset,
+ 'debug' => \$debug,
+ 'extraflags:s' => sub {
+ $hasextraflags = 1;
+ shift;
+ $theflag = shift;
+ $theextraflags{$theflag}++ if $theflag},
+ 'lowercase=s' => \$lowercase,
+ 'myheader=s' => \$myheader,
+ 'printcomments' => \$printcomments,
+ 'replacements=s'=> \$replacements,
+ 'split=i' => \$split,
+ 'uppercase=s' => \$uppercase,
+ 'wordlist=s' => \$wordlist) or usage;
+
+if ( not $affixfile ){
+ $affixfile=shift or usage;
+}
+
+if ( $charset and ( $lowercase or $uppercase )){
+ die "Error: charset and lowercase/uppercase options
+are incompatible. Use either charset or lowercase/uppercase options to
+specify the patterns
+"
+} elsif ( not $lowercase and not $uppercase and not $charset ){
+ $charset="latin1";
+}
+
+if ( scalar(keys %theextraflags) == 0 && $hasextraflags ){
+ $theextraflags{"\\\\"}++;
+}
+
+debugprint "$affixfile $charset";
+
+open (AFFIXFILE,"< $affixfile") ||
+ die "Error: Could not open affix file: $affixfile";
+
+if ( $myheader ){
+ my $myspell_header=`cat $myheader`;
+ print $myspell_header . "\n";
+}
+
+while (<AFFIXFILE>){
+ chomp;
+ if (/^\s*\#.*/){
+ debugprint "Ignoring line $.\n";
+ print "$_\n" if $printcomments;
+ } elsif (/^\s*$/){
+ debugprint "Ignoring line $.\n";
+ } elsif (/^\s*prefixes/){
+ debugprint "Prefixes starting in line $.\n";
+ $affix="PFX";
+ } elsif (/^\s*suffixes/){
+ debugprint "Suffixes starting in line $.\n";
+ $affix="SFX";
+ } elsif (/^[\s\t]*flag.*/){
+ next if not $affix; # In case we are still in the preamble
+ shipoutflag if $inflags;
+ $inflags="yes";
+ s/^[\s\t]*flag[\s\t]*//;
+ s/[\s\t]*:.*$//;
+ debugprint "Found flag $_ in line $.\n";
+
+ if (/\*/){
+ s/[\*\s]//g;
+ $flagcombine="Y";
+ debugprint "Flag renamed to $_ with combine=$flagcombine\n";
+ } else {
+ $flagcombine="N";
+ }
+
+ if ( $flagname = &validate_flag($_) ){
+ $myaffix = $affix;
+ } else {
+ $myaffix = "\# $affix";
+ $flagname = $_;
+ print STDERR "Ignoring invalid flag $flagname in line $.\n";
+ }
+ } elsif ( $affix and $inflags ) {
+ ($rootname,@comments) = split('#',$_);
+ $comment = '# ' . join('#',@comments);
+
+ $rootname =~ s/\s*//g;
+ $rootname = mylc $rootname;
+ ($rootname,$addtoroot) = split('>',$rootname);
+
+ if ( $addtoroot =~ s/^\-//g ){
+ ($rootremove,$addtoroot) = split(',',$addtoroot);
+ $addtoroot = "0" unless $addtoroot;
+ $addtoroot = "0" if ( $addtoroot eq "-");
+ } else {
+ $rootremove = "0";
+ }
+ $addtoroot =~ s/\\\-/\-/g; # prefix ANTI\- to anti-
+
+ if ( $rootname eq '.' && $rootremove ne "0" ){
+ $rootname = $rootremove;
+ }
+
+ debugprint "$rootname, $addtoroot, $rootremove\n";
+ if ( $printcomments ){
+ $affix_line=sprintf("%s %s %-5s %-11s %-24s %s",
+ $myaffix, $flagname, $rootremove,
+ $addtoroot, $rootname, $comment);
+ } else {
+ $affix_line=sprintf("%s %s %-5s %-11s %s",
+ $myaffix, $flagname, $rootremove,
+ $addtoroot, $rootname);
+ }
+ $rootremove = "0";
+ $rootname = '';
+ $addtoroot = '';
+ $comment = '';
+ @comments = ();
+ push @flag_array,$affix_line;
+ debugprint "$affix_line\n";
+ } else {
+ #
+ }
+}
+shipoutflag;
+
+close AFFIXFILE;
+
+if ( $replacements ){
+ &process_replacements($replacements);
+}
+
+__END__
+
+=head1 NAME
+
+B<ispellaff2myspell> - A program to convert ispell affix tables to myspell format.
+
+=head1 SYNOPSIS
+
+ ispellaff2myspell [options] <affixfile> --myheader your_header
+
+ Options:
+
+ --affixfile=s Affix file
+ --bylocale Use current locale setup for upper/lowercase
+ conversion
+ --charset=s Use specified charset for upper/lowercase
+ conversion (defaults to latin1)
+ --debug Print debugging info
+ --extraflags=s Allow some non alphabetic flags
+ --lowercase=s Lowercase string
+ --myheader=s Header file
+ --printcomments Print commented lines in output
+ --replacements=s Replacements file
+ --split=i Split flags with more that i entries
+ --uppercase=s Uppercase string
+
+=head1 DESCRIPTION
+
+B<ispellaff2myspell> is a script that will convert ispell affix tables
+to myspell format in a more or less successful way.
+
+This script does not create the dict file. Something like
+
+( echo `cat mydict.words+ | wc -l`; cat mydict.words+ ) > mydict.dict
+
+should do the work, with mydict.words+ being the munched wordlist
+
+=head1 OPTIONS
+
+=over 8
+
+=item B<--affixfile=s>
+
+Affix file. You can put it directly in the command line.
+
+=item B<--bylocale>
+
+Use current locale setup for upper/lowercase conversion. Make sure
+that the selected locale match the dictionary one, or you might get
+into trouble.
+
+=item B<--charset=s>
+
+Use specified charset for upper/lowercase conversion (defaults to latin1).
+Currently allowed values for charset are: latin0, latin1, latin2, latin3.
+
+=item B<--debug>
+
+Print some debugging info.
+
+=item B<--extraflags:s>
+
+Allows some non alphabetic flags.
+
+When invoked with no value the supported flags are currently those
+corresponding to chars represented with the escape char B<\> as
+first char. B<\> will be stripped.
+
+When given with the flag prefix will allow that flag and strip the
+given prefix. Be careful when giving the prefix to properly escape chars,
+e.g. you will need B<-e "\\\\"> or B<-e '\\'> for flags like B<\[> to be stripped to
+B<[>. Otherwise you might even get errors. Use B<-e "^"> to allow all
+flags and pass them unmodified.
+
+You will need a call to -e for each flag type, e.g.,
+B<-e "\\\\" -e "~\\\\"> (or B<-e '\\' -e '~\\'>).
+
+When a prefix is explicitely set, the default value (anything starting by B<\>)
+is disabled and you need to enable it explicitely as in previous example.
+
+=item B<--lowercase=s>
+
+Lowercase string. Manually set the string of lowercase chars. This
+requires B<--uppercase> having exactly that string but uppercase.
+
+=item B<--myheader=s>
+
+Header file. The myspell aff header. You need to write it
+manually. This can contain everything you want to be before the affix table
+
+=item B<--printcomments>
+
+Print commented lines in output.
+
+=item B<--replacements=file>
+
+Add a pre-defined replacements table taken from 'file' to the .aff file.
+Will skip lines not beginning with REP, and set the replacements number
+appropriately.
+
+=item B<--split=i>
+
+Split flags with more that i entries. This can be of interest for flags
+having a lot of entries. Will split the flag in chunks containing B<i>
+entries.
+
+=item B<--uppercase=s>
+
+Uppercase string. Manually set the sring of uppercase chars. This
+requires B<--lowercase> having exactly that string but lowercase.
+
+=back
+
+If your encoding is currently unsupported you can send me a file with
+the two strings of lower and uppercase chars. Note that they must match
+exactly but case changed. It will look something like
+
+ $lowercase='a-zàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþ';
+ $uppercase='A-ZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ';
+
+=head1 SEE ALSO
+
+The OpenOffice.org Lingucomponent Project home page
+
+L<http://lingucomponent.openoffice.org/index.html>
+
+and the document
+
+L<http://lingucomponent.openoffice.org/affix.readme>
+
+that provides information about the basics of the myspell affix file format.
+
+You can also take a look at
+
+ /usr/share/doc/libmyspell-dev/affix.readme.gz
+ /usr/share/doc/libmyspell-dev/README.compoundwords
+ /usr/share/doc/libmyspell-dev/README.replacetable
+
+in your Debian system.
+
+=head1 AUTHORS
+
+Agustin Martin <agustin.martin@hispalinux.es>
+
+=cut
diff --git a/src/tools/makealias b/src/tools/makealias
new file mode 100755
index 0000000..60d93b6
--- /dev/null
+++ b/src/tools/makealias
@@ -0,0 +1,115 @@
+#!/bin/sh
+# makealias: make alias compressed dic and aff files
+# Usage: alias.sh dic aff (not alias.sh aff dic!)
+# Version: 2007-10-26
+
+case $# in
+0|1)
+echo 'makealias: make alias compressed dic and aff files
+Usage: makealias file.dic file.aff (not makefile file.aff file.dic!)' >/dev/stderr
+exit;;
+esac
+
+DIC=`basename $1 .dic`
+AFF=`basename $2 .aff`
+
+# FLAG type definition must be before alias definitions
+grep '^FLAG' $2 >"${AFF}_alias.aff"
+
+awk 'BEGIN{n=1;m=1}
+function cutslash(st) {
+ if (split(st,t,"/") > 1) return t[1]
+ return st
+}
+function ltrim(st) {
+ sub(/^ +/,"",st)
+ return st
+}
+FILENAME ~ /.dic$/ && $1 ~ "/[^ \t]" {
+ split($1,t,"/")
+ if(!a[t[2]]){
+ a[t[2]]=n
+ b[n]=t[2]
+ n++
+ }
+ if (NF > 1) {
+ $1 = ""
+ if(!a2[$0]){
+ a2[$0]=m
+ c[m]=$0
+ m++
+ }
+ print t[1]"/"a[t[2]] "\t" a2[$0]
+ } else {
+ print t[1]"/"a[t[2]]
+ }
+ next
+}
+FILENAME ~ /.dic$/ && NF > 1 {
+ x = $1
+ $1 = ""
+ if(!a2[$0]){
+ a2[$0]=m
+ c[m]=$0
+ m++
+ }
+ print cutslash(x) "\t" a2[$0]
+ next
+}
+FILENAME ~ /.dic$/ { print cutslash($1) }
+FILENAME ~ /.aff$/ && /^[PS]FX/ && ($4 ~ /\/[^ ]/) && NF > 4 {
+ split($4,t,"/")
+ if(!a[t[2]]){
+ a[t[2]]=n
+ b[n]=t[2]
+ n++
+ }
+ begin = $1 " " $2 " " $3 " " (t[1]"/"a[t[2]]) " " $5
+ if ($6!="") ok = 1; else ok = 0;
+ $1 = ""
+ $2 = ""
+ $3 = ""
+ $4 = ""
+ $5 = ""
+ if(ok){
+ if(!a2[$0]){
+ a2[$0]=m
+ c[m]=$0
+ m++
+ }
+ print begin " " a2[$0] >>"/dev/stderr"
+ } else print begin >>"/dev/stderr"
+ next
+}
+FILENAME ~ /.aff$/ && /^[PS]FX/ && NF > 4 {
+ begin = $1 " " $2 " " $3 " " cutslash($4) " " $5
+ if ($6!="") ok = 1; else ok = 0;
+ $1 = ""
+ $2 = ""
+ $3 = ""
+ $4 = ""
+ $5 = ""
+ if(ok) {
+ if (!a2[$0]){
+ a2[$0]=m
+ c[m]=$0
+ m++
+ }
+ print begin " " a2[$0] >>"/dev/stderr"
+ } else print begin >>"/dev/stderr"
+ next
+}
+FILENAME ~ /.aff$/ { print $0 >>"/dev/stderr" }
+END{
+ if (n>1) {
+ print "AF", n-1 >>"'${AFF}_alias.aff'"
+ for(i=1;i<n;i++) print "AF", b[i],"#",i >>"'${AFF}_alias.aff'"
+ }
+ if (m>1) {
+ print "AM", m-1 >>"'${AFF}_alias.aff'"
+ for(i=1;i<m;i++) print "AM " ltrim(c[i]) >>"'${AFF}_alias.aff'"
+ }
+}' $1 $2 >${DIC}_alias.dic 2>${AFF}_alias.$$
+grep -v '^FLAG' ${AFF}_alias.$$ >>${AFF}_alias.aff
+echo "output: ${DIC}_alias.dic, ${AFF}_alias.aff"
+rm ${AFF}_alias.$$
diff --git a/src/tools/munch.c b/src/tools/munch.c
new file mode 100644
index 0000000..2087efa
--- /dev/null
+++ b/src/tools/munch.c
@@ -0,0 +1,832 @@
+/* Munch a word list and generate a smaller root word list with affixes*/
+
+#include <ctype.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#ifdef __linux__
+#include <error.h>
+#include <errno.h>
+#include <sys/mman.h>
+#endif
+
+#include "munch.h"
+
+int main(int argc, char** argv)
+{
+
+ int i, j, k, n;
+ int rl, p , nwl;
+ int al;
+
+ FILE * wrdlst;
+ FILE * afflst;
+
+ char *nword, *wf, *af;
+ char as[(MAX_PREFIXES + MAX_SUFFIXES)];
+ char * ap;
+
+ struct hentry * ep;
+ struct hentry * ep1;
+ struct affent * pfxp;
+ struct affent * sfxp;
+
+ /* first parse the command line options */
+ /* arg1 - wordlist, arg2 - affix file */
+
+ if (argv[1]) {
+ wf = mystrdup(argv[1]);
+ } else {
+ fprintf(stderr,"correct syntax is:\n");
+ fprintf(stderr,"munch word_list_file affix_file\n");
+ exit(1);
+ }
+ if (argv[2]) {
+ af = mystrdup(argv[2]);
+ } else {
+ fprintf(stderr,"correct syntax is:\n");
+ fprintf(stderr,"munch word_list_file affix_file\n");
+ exit(1);
+ }
+
+ /* open the affix file */
+ afflst = fopen(af,"r");
+ if (!afflst) {
+ fprintf(stderr,"Error - could not open affix description file\n");
+ exit(1);
+ }
+
+ /* step one is to parse the affix file building up the internal
+ affix data structures */
+
+ numpfx = 0;
+ numsfx = 0;
+
+ if (parse_aff_file(afflst)) {
+ fprintf(stderr,"Error - in affix file loading\n");
+ exit(1);
+ }
+ fclose(afflst);
+
+ fprintf(stderr,"parsed in %d prefixes and %d suffixes\n",numpfx,numsfx);
+
+ /* affix file is now parsed so create hash table of wordlist on the fly */
+
+ /* open the wordlist */
+ wrdlst = fopen(wf,"r");
+ if (!wrdlst) {
+ fprintf(stderr,"Error - could not open word list file\n");
+ exit(1);
+ }
+
+ if (load_tables(wrdlst)) {
+ fprintf(stderr,"Error building hash tables\n");
+ exit(1);
+ }
+ fclose(wrdlst);
+
+ for (i=0; i< tablesize; i++) {
+ ep = &tableptr[i];
+ if (ep->word == NULL) continue;
+ for ( ; ep != NULL; ep = ep->next) {
+ numroots = 0;
+ aff_chk(ep->word,strlen(ep->word));
+ if (numroots) {
+ /* now there might be a number of combinations */
+ /* of prefixes and suffixes that might match this */
+ /* word. So how to choose? As a first shot look */
+ /* for the shortest remaining root word to */
+ /* to maximize the combinatorial power */
+
+ /* but be careful, do not REQUIRE a specific combination */
+ /* of a prefix and a suffix to generate the word since */
+ /* that violates the rule that the root word with just */
+ /* the prefix or just the suffix must also exist in the */
+ /* wordlist as well */
+
+ /* in fact because of the cross product issue, this not a */
+ /* simple choice since some combinations of previous */
+ /* prefixes and new suffixes may not be valid. */
+ /* The only way to know is to simply try them all */
+
+ rl = 1000;
+ p = -1;
+
+ for (j = 0; j < numroots; j++){
+
+ /* first collect the root word info and build up */
+ /* the potential new affix string */
+ nword = (roots[j].hashent)->word;
+ nwl = strlen(nword);
+ *as = '\0';
+ al = 0;
+ ap = as;
+ if (roots[j].prefix) *ap++ = (roots[j].prefix)->achar;
+ if (roots[j].suffix) *ap++ = (roots[j].suffix)->achar;
+ if ((roots[j].hashent)->affstr) {
+ strcpy(ap,(roots[j].hashent)->affstr);
+ } else {
+ *ap = '\0';
+ }
+ al =strlen(as);
+
+ /* now expand the potential affix string to generate */
+ /* all legal words and make sure they all exist in the */
+ /* word list */
+ numwords = 0;
+ wlist[numwords].word = mystrdup(nword);
+ wlist[numwords].pallow = 0;
+ numwords++;
+ n = 0;
+ if (al)
+ expand_rootword(nword,nwl,as,al);
+ for (k=0; k<numwords; k++) {
+ if (lookup(wlist[k].word)) n++;
+ free(wlist[k].word);
+ wlist[k].word = NULL;
+ wlist[k].pallow = 0;
+ }
+
+ /* if all exist in word list then okay */
+ if (n == numwords) {
+ if (nwl < rl) {
+ rl = nwl;
+ p = j;
+ }
+ }
+ }
+ if (p != -1) {
+ ep1 = roots[p].hashent;
+ pfxp = roots[p].prefix;
+ sfxp = roots[p].suffix;
+ ep1->keep = 1;
+ if (pfxp != NULL) add_affix_char(ep1,pfxp->achar);
+ if (sfxp != NULL) add_affix_char(ep1,sfxp->achar);
+ } else {
+ ep->keep = 1;
+ }
+ } else {
+ ep->keep = 1;
+ }
+ }
+ }
+
+ /* now output only the words to keep along with affixes info */
+ /* first count how many words that is */
+ k = 0;
+ for (i=0; i< tablesize; i++) {
+ ep = &tableptr[i];
+ if (ep->word == NULL) continue;
+ for ( ; ep != NULL; ep = ep->next) {
+ if (ep->keep > 0) k++;
+ }
+ }
+ fprintf(stdout,"%d\n",k);
+
+ for (i=0; i< tablesize; i++) {
+ ep = &tableptr[i];
+ if (ep->word == NULL) continue;
+ for ( ; ep != NULL; ep = ep->next) {
+ if (ep->keep > 0) {
+ if (ep->affstr != NULL) {
+ fprintf(stdout,"%s/%s\n",ep->word,ep->affstr);
+ } else {
+ fprintf(stdout,"%s\n",ep->word);
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+
+int parse_aff_file(FILE * afflst)
+{
+ int i, j;
+ int numents = 0;
+ char achar = '\0';
+ short ff=0;
+ char ft;
+ struct affent * ptr= NULL;
+ struct affent * nptr= NULL;
+ char * line = malloc(MAX_LN_LEN);
+
+ while (fgets(line,MAX_LN_LEN,afflst)) {
+ mychomp(line);
+ ft = ' ';
+ fprintf(stderr,"parsing line: %s\n",line);
+ if (strncmp(line,"PFX",3) == 0) ft = 'P';
+ if (strncmp(line,"SFX",3) == 0) ft = 'S';
+ if (ft != ' ') {
+ char * tp = line;
+ char * piece;
+ i = 0;
+ ff = 0;
+ while ((piece=mystrsep(&tp,' '))) {
+ if (*piece != '\0') {
+ switch(i) {
+ case 0: break;
+ case 1: { achar = *piece; break; }
+ case 2: { if (*piece == 'Y') ff = XPRODUCT; break; }
+ case 3: { numents = atoi(piece);
+ ptr = malloc(numents * sizeof(struct affent));
+ ptr->achar = achar;
+ ptr->xpflg = ff;
+ fprintf(stderr,"parsing %c entries %d\n",achar,numents);
+ break;
+ }
+ default: break;
+ }
+ i++;
+ }
+ free(piece);
+ }
+ /* now parse all of the sub entries*/
+ nptr = ptr;
+ for (j=0; j < numents; j++) {
+ if (!fgets(line,MAX_LN_LEN,afflst)) return 1;
+ mychomp(line);
+ tp = line;
+ i = 0;
+ while ((piece=mystrsep(&tp,' '))) {
+ if (*piece != '\0') {
+ switch(i) {
+ case 0: { if (nptr != ptr) {
+ nptr->achar = ptr->achar;
+ nptr->xpflg = ptr->xpflg;
+ }
+ break;
+ }
+ case 1: break;
+ case 2: { nptr->strip = mystrdup(piece);
+ nptr->stripl = strlen(nptr->strip);
+ if (strcmp(nptr->strip,"0") == 0) {
+ free(nptr->strip);
+ nptr->strip=mystrdup("");
+ nptr->stripl = 0;
+ }
+ break;
+ }
+ case 3: { nptr->appnd = mystrdup(piece);
+ nptr->appndl = strlen(nptr->appnd);
+ if (strcmp(nptr->appnd,"0") == 0) {
+ free(nptr->appnd);
+ nptr->appnd=mystrdup("");
+ nptr->appndl = 0;
+ }
+ break;
+ }
+ case 4: { encodeit(nptr,piece);}
+ fprintf(stderr, " affix: %s %d, strip: %s %d\n",nptr->appnd,
+ nptr->appndl,nptr->strip,nptr->stripl);
+ default: break;
+ }
+ i++;
+ }
+ free(piece);
+ }
+ nptr++;
+ }
+ if (ft == 'P') {
+ ptable[numpfx].aep = ptr;
+ ptable[numpfx].num = numents;
+ fprintf(stderr,"ptable %d num is %d\n",numpfx,ptable[numpfx].num);
+ numpfx++;
+ } else {
+ stable[numsfx].aep = ptr;
+ stable[numsfx].num = numents;
+ fprintf(stderr,"stable %d num is %d\n",numsfx,stable[numsfx].num);
+ numsfx++;
+ }
+ ptr = NULL;
+ nptr = NULL;
+ numents = 0;
+ achar='\0';
+ }
+ }
+ free(line);
+ return 0;
+}
+
+
+void encodeit(struct affent * ptr, char * cs)
+{
+ int nc;
+ int neg;
+ int grp;
+ unsigned char c;
+ int n;
+ int ec;
+ int nm;
+ int i, j, k;
+ unsigned char mbr[MAX_WD_LEN];
+
+ /* now clear the conditions array */
+ for (i=0;i<SET_SIZE;i++) ptr->conds[i] = (unsigned char) 0;
+
+ /* now parse the string to create the conds array */
+ nc = strlen(cs);
+ neg = 0; /* complement indicator */
+ grp = 0; /* group indicator */
+ n = 0; /* number of conditions */
+ ec = 0; /* end condition indicator */
+ nm = 0; /* number of member in group */
+ i = 0;
+ if (strcmp(cs,".")==0) {
+ ptr->numconds = 0;
+ return;
+ }
+ while (i < nc) {
+ c = *((unsigned char *)(cs + i));
+ if (c == '[') {
+ grp = 1;
+ c = 0;
+ }
+ if ((grp == 1) && (c == '^')) {
+ neg = 1;
+ c = 0;
+ }
+ if (c == ']') {
+ ec = 1;
+ c = 0;
+ }
+ if ((grp == 1) && (c != 0)) {
+ *(mbr + nm) = c;
+ nm++;
+ c = 0;
+ }
+ if (c != 0) {
+ ec = 1;
+ }
+ if (ec) {
+ if (grp == 1) {
+ if (neg == 0) {
+ for (j=0;j<nm;j++) {
+ k = (unsigned int) mbr[j];
+ ptr->conds[k] = ptr->conds[k] | (1 << n);
+ }
+ } else {
+ for (j=0;j<SET_SIZE;j++) ptr->conds[j] = ptr->conds[j] | (1 << n);
+ for (j=0;j<nm;j++) {
+ k = (unsigned int) mbr[j];
+ ptr->conds[k] = ptr->conds[k] & ~(1 << n);
+ }
+ }
+ neg = 0;
+ grp = 0;
+ nm = 0;
+ } else {
+ /* not a group so just set the proper bit for this char */
+ /* but first handle special case of . inside condition */
+ if (c == '.') {
+ /* wild card character so set them all */
+ for (j=0;j<SET_SIZE;j++) ptr->conds[j] = ptr->conds[j] | (1 << n);
+ } else {
+ ptr->conds[(unsigned int) c] = ptr->conds[(unsigned int)c] | (1 << n);
+ }
+ }
+ n++;
+ ec = 0;
+ }
+ i++;
+ }
+ ptr->numconds = n;
+ return;
+}
+
+
+
+/* search for a prefix */
+void pfx_chk (const char * word, int len, struct affent* ep, int num)
+{
+ struct affent * aent;
+ int cond;
+ int tlen;
+ struct hentry * hent;
+ unsigned char * cp;
+ int i;
+ char tword[MAX_WD_LEN];
+
+ for (aent = ep, i = num; i > 0; aent++, i--) {
+
+ tlen = len - aent->appndl;
+
+ if (tlen > 0 && (aent->appndl == 0 ||
+ strncmp(aent->appnd, word, aent->appndl) == 0)
+ && tlen + aent->stripl >= aent->numconds) {
+
+ if (aent->stripl) strcpy (tword, aent->strip);
+ strcpy((tword + aent->stripl), (word + aent->appndl));
+
+ /* now go through the conds and make sure they all match */
+ cp = (unsigned char *) tword;
+ for (cond = 0; cond < aent->numconds; cond++) {
+ if ((aent->conds[*cp++] & (1 << cond)) == 0)
+ break;
+ }
+
+ if (cond >= aent->numconds) {
+ tlen += aent->stripl;
+ if ((hent = lookup(tword)) != NULL) {
+ if (numroots < MAX_ROOTS) {
+ roots[numroots].hashent = hent;
+ roots[numroots].prefix = aent;
+ roots[numroots].suffix = NULL;
+ numroots++;
+ }
+ }
+ }
+ }
+ }
+}
+
+
+
+void suf_chk (const char * word, int len, struct affent * ep,
+ int num, struct affent * pfxent, int cpflag)
+{
+ struct affent * aent;
+ int tlen;
+ int cond;
+ struct hentry * hent;
+ unsigned char * cp;
+ int i;
+ char tword[MAX_WD_LEN];
+
+ for (aent = ep, i = num; i > 0; aent++, i--) {
+
+ if ((cpflag & XPRODUCT) != 0 && (aent->xpflg & XPRODUCT) == 0)
+ continue;
+
+ tlen = len - aent->appndl;
+ if (tlen > 0 && (aent->appndl == 0 ||
+ strcmp(aent->appnd, (word + tlen)) == 0)
+ && tlen + aent->stripl >= aent->numconds) {
+
+ strcpy (tword, word);
+ cp = (unsigned char *) (tword + tlen);
+ if (aent->stripl) {
+ strcpy ((char *)cp, aent->strip);
+ tlen += aent->stripl;
+ cp = (unsigned char *)(tword + tlen);
+ } else *cp = '\0';
+
+ for (cond = aent->numconds; --cond >= 0; ) {
+ if ((aent->conds[*--cp] & (1 << cond)) == 0) break;
+ }
+ if (cond < 0) {
+ if ((hent = lookup(tword)) != NULL) {
+ if (numroots < MAX_ROOTS) {
+ roots[numroots].hashent = hent;
+ roots[numroots].prefix = pfxent;
+ roots[numroots].suffix = aent;
+ numroots++;
+ }
+ }
+ }
+ }
+ }
+}
+
+
+
+void aff_chk (const char * word, int len)
+{
+ int i;
+ int j;
+ int nh=0;
+ char * nword;
+ int nwl;
+
+ if (len < 4) return;
+
+ for (i=0; i < numpfx; i++) {
+ pfx_chk(word, len, ptable[i].aep, ptable[i].num);
+ }
+
+ nh = numroots;
+
+ if (nh > 0) {
+ for (j=0;j<nh;j++){
+ if (roots[j].prefix->xpflg & XPRODUCT) {
+ nword = mystrdup((roots[j].hashent)->word);
+ nwl = strlen(nword);
+ for (i=0; i < numsfx; i++) {
+ suf_chk(nword,nwl,stable[i].aep, stable[i].num, roots[j].prefix, XPRODUCT);
+ }
+ free(nword);
+ }
+ }
+ }
+ for (i=0; i < numsfx; i++) {
+ suf_chk(word, len, stable[i].aep, stable[i].num, NULL, 0);
+ }
+}
+
+
+
+/* lookup a root word in the hashtable */
+
+struct hentry * lookup(const char *word)
+{
+ struct hentry * dp;
+ dp = &tableptr[hash(word)];
+ if (dp->word == NULL) return NULL;
+ for ( ; dp != NULL; dp = dp->next) {
+ if (strcmp(word,dp->word) == 0) return dp;
+ }
+ return NULL;
+}
+
+
+
+/* add a word to the hash table */
+
+int add_word(char * word)
+{
+ int i;
+ struct hentry * dp;
+ struct hentry * hp = (struct hentry *) malloc (sizeof(struct hentry));
+
+ hp->word = word;
+ hp->affstr = NULL;
+ hp->keep = 0;
+ hp->next = NULL;
+
+ i = hash(word);
+ dp = &tableptr[i];
+
+ if (dp->word == NULL) {
+ *dp = *hp;
+ free(hp);
+ } else {
+ while (dp->next != NULL) dp=dp->next;
+ dp->next = hp;
+ }
+ return 0;
+}
+
+
+
+/* load a word list and build a hash table on the fly */
+
+int load_tables(FILE * wdlst)
+{
+ char * ap;
+ char ts[MAX_LN_LEN];
+
+ /* first read the first line of file to get hash table size */
+ if (! fgets(ts, MAX_LN_LEN-1,wdlst)) return 2;
+ mychomp(ts);
+ tablesize = atoi(ts);
+ tablesize = tablesize + 5;
+ if ((tablesize %2) == 0) tablesize++;
+
+ /* allocate the hash table */
+ tableptr = (struct hentry *) calloc(tablesize, sizeof(struct hentry));
+ if (! tableptr) return 3;
+
+ /* loop thorugh all words on much list and add to hash
+ * table and store away word and affix strings in tmpfile
+ */
+
+ while (fgets(ts,MAX_LN_LEN-1,wdlst)) {
+ mychomp(ts);
+ ap = mystrdup(ts);
+ add_word(ap);
+
+ }
+ return 0;
+}
+
+
+/* the hash function is a simple load and rotate
+ * algorithm borrowed
+ */
+
+int hash(const char * word)
+{
+ int i;
+ long hv = 0;
+ for (i=0; i < 4 && *word != 0; i++)
+ hv = (hv << 8) | (*word++);
+ while (*word != 0) {
+ ROTATE(hv,ROTATE_LEN);
+ hv ^= (*word++);
+ }
+ return (unsigned long) hv % tablesize;
+}
+
+
+void add_affix_char(struct hentry * ep, char ac)
+{
+ int al;
+ int i;
+ char * tmp;
+ if (ep->affstr == NULL) {
+ ep->affstr = (char *) malloc(2);
+ *(ep->affstr) = ac;
+ *((ep->affstr)+1) = '\0';
+ return;
+ }
+ al = strlen(ep->affstr);
+ for (i=0; i< al; i++)
+ if (ac == (ep->affstr)[i]) return;
+ tmp = calloc(al+2,1);
+ memcpy(tmp,ep->affstr,(al+1));
+ *(tmp+al) = ac;
+ *(tmp+al+1)='\0';
+ free(ep->affstr);
+ ep->affstr = tmp;
+ return;
+}
+
+
+/* add a prefix to word */
+void pfx_add (const char * word, int len, struct affent* ep, int num)
+{
+ struct affent * aent;
+ int cond;
+ int tlen;
+ unsigned char * cp;
+ int i;
+ char * pp;
+ char tword[MAX_WD_LEN];
+
+
+ for (aent = ep, i = num; i > 0; aent++, i--) {
+
+ /* now make sure all conditions match */
+ if ((len > aent->stripl) && (len >= aent->numconds)) {
+
+ cp = (unsigned char *) word;
+ for (cond = 0; cond < aent->numconds; cond++) {
+ if ((aent->conds[*cp++] & (1 << cond)) == 0)
+ break;
+ }
+ if (cond >= aent->numconds) {
+
+ /* we have a match so add prefix */
+ tlen = 0;
+ if (aent->appndl) {
+ strcpy(tword,aent->appnd);
+ tlen += aent->appndl;
+ }
+ pp = tword + tlen;
+ strcpy(pp, (word + aent->stripl));
+ tlen = tlen + len - aent->stripl;
+
+ if (numwords < MAX_WORDS) {
+ wlist[numwords].word = mystrdup(tword);
+ wlist[numwords].pallow = 0;
+ numwords++;
+ }
+ }
+ }
+ }
+}
+
+
+/* add a suffix to a word */
+void suf_add (const char * word, int len, struct affent * ep, int num)
+{
+ struct affent * aent;
+ int tlen;
+ int cond;
+ unsigned char * cp;
+ int i;
+ char tword[MAX_WD_LEN];
+ char * pp;
+
+ for (aent = ep, i = num; i > 0; aent++, i--) {
+
+ /* if conditions hold on root word
+ * then strip off strip string and add suffix
+ */
+
+ if ((len > aent->stripl) && (len >= aent->numconds)) {
+ cp = (unsigned char *) (word + len);
+ for (cond = aent->numconds; --cond >= 0; ) {
+ if ((aent->conds[*--cp] & (1 << cond)) == 0) break;
+ }
+ if (cond < 0) {
+ /* we have a matching condition */
+ strcpy(tword,word);
+ tlen = len;
+ if (aent->stripl) {
+ tlen -= aent->stripl;
+ }
+ pp = (tword + tlen);
+ if (aent->appndl) {
+ strcpy (pp, aent->appnd);
+ tlen += aent->stripl;
+ } else *pp = '\0';
+
+ if (numwords < MAX_WORDS) {
+ wlist[numwords].word = mystrdup(tword);
+ wlist[numwords].pallow = (aent->xpflg & XPRODUCT);
+ numwords++;
+ }
+ }
+ }
+ }
+}
+
+
+
+int expand_rootword(const char * ts, int wl, const char * ap, int al)
+{
+ int i;
+ int j;
+ int nh=0;
+ int nwl;
+
+ for (i=0; i < numsfx; i++) {
+ if (strchr(ap,(stable[i].aep)->achar)) {
+ suf_add(ts, wl, stable[i].aep, stable[i].num);
+ }
+ }
+
+ nh = numwords;
+
+ if (nh > 1) {
+ for (j=1;j<nh;j++){
+ if (wlist[j].pallow) {
+ for (i=0; i < numpfx; i++) {
+ if (strchr(ap,(ptable[i].aep)->achar)) {
+ if ((ptable[i].aep)->xpflg & XPRODUCT) {
+ nwl = strlen(wlist[j].word);
+ pfx_add(wlist[j].word, nwl, ptable[i].aep, ptable[i].num);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ for (i=0; i < numpfx; i++) {
+ if (strchr(ap,(ptable[i].aep)->achar)) {
+ pfx_add(ts, wl, ptable[i].aep, ptable[i].num);
+ }
+ }
+ return 0;
+}
+
+
+
+/* strip strings into token based on single char delimiter
+ * acts like strsep() but only uses a delim char and not
+ * a delim string
+ */
+char * mystrsep(char ** stringp, const char delim)
+{
+ char * rv = NULL;
+ char * mp = *stringp;
+ int n = strlen(mp);
+ if (n > 0) {
+ char * dp = (char *)memchr(mp,(int)((unsigned char)delim),n);
+ if (dp) {
+ int nc;
+ *stringp = dp+1;
+ nc = (int)((unsigned long)dp - (unsigned long)mp);
+ rv = (char *) malloc(nc+1);
+ if (rv) {
+ memcpy(rv,mp,nc);
+ *(rv+nc) = '\0';
+ }
+ } else {
+ rv = (char *) malloc(n+1);
+ if (rv) {
+ memcpy(rv, mp, n);
+ *(rv+n) = '\0';
+ *stringp = mp + n;
+ }
+ }
+ }
+ return rv;
+}
+
+
+char * mystrdup(const char * s)
+{
+ char * d = NULL;
+ if (s) {
+ int sl = strlen(s)+1;
+ d = (char *) malloc(sl);
+ if (d) memcpy(d,s,sl);
+ }
+ return d;
+}
+
+
+void mychomp(char * s)
+{
+ int k = strlen(s);
+ if (k > 0) *(s+k-1) = '\0';
+ if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
+}
+
diff --git a/src/tools/munch.h b/src/tools/munch.h
new file mode 100644
index 0000000..ee75878
--- /dev/null
+++ b/src/tools/munch.h
@@ -0,0 +1,121 @@
+/* munch header file */
+
+#define MAX_LN_LEN 200
+#define MAX_WD_LEN 200
+#define MAX_PREFIXES 256
+#define MAX_SUFFIXES 256
+#define MAX_ROOTS 20
+#define MAX_WORDS 5000
+
+#define ROTATE_LEN 5
+
+#define ROTATE(v,q) \
+ (v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q))-1));
+
+#define SET_SIZE 256
+
+#define XPRODUCT (1 << 0)
+
+/* the affix table entry */
+
+struct affent
+{
+ char * appnd;
+ char * strip;
+ short appndl;
+ short stripl;
+ char achar;
+ char xpflg;
+ short numconds;
+ char conds[SET_SIZE];
+};
+
+
+struct affixptr
+{
+ struct affent * aep;
+ int num;
+};
+
+/* the prefix and suffix table */
+int numpfx; /* Number of prefixes in table */
+int numsfx; /* Number of suffixes in table */
+
+/* the prefix table */
+struct affixptr ptable[MAX_PREFIXES];
+
+/* the suffix table */
+struct affixptr stable[MAX_SUFFIXES];
+
+
+/* data structure to store results of lookups */
+struct matches
+{
+ struct hentry * hashent; /* hash table entry */
+ struct affent * prefix; /* Prefix used, or NULL */
+ struct affent * suffix; /* Suffix used, or NULL */
+};
+
+int numroots; /* number of root words found */
+struct matches roots[MAX_ROOTS]; /* list of root words found */
+
+/* hashing stuff */
+
+struct hentry
+{
+ char * word;
+ char * affstr;
+ struct hentry * next;
+ int keep;
+};
+
+
+int tablesize;
+struct hentry * tableptr;
+
+/* unmunch stuff */
+
+int numwords; /* number of words found */
+struct dwords
+{
+ char * word;
+ int pallow;
+};
+
+struct dwords wlist[MAX_WORDS]; /* list words found */
+
+
+/* the routines */
+
+int parse_aff_file(FILE* afflst);
+
+void encodeit(struct affent * ptr, char * cs);
+
+int load_tables(FILE * wrdlst);
+
+int hash(const char *);
+
+int add_word(char *);
+
+struct hentry * lookup(const char *);
+
+void aff_chk (const char * word, int len);
+
+void pfx_chk (const char * word, int len, struct affent* ep, int num);
+
+void suf_chk (const char * word, int len, struct affent * ep, int num,
+ struct affent * pfxent, int cpflag);
+
+void add_affix_char(struct hentry * hent, char ac);
+
+int expand_rootword(const char *, int, const char*, int);
+
+void pfx_add (const char * word, int len, struct affent* ep, int num);
+
+void suf_add (const char * word, int len, struct affent * ep, int num);
+
+char * mystrsep(char ** stringp, const char delim);
+
+char * mystrdup(const char * s);
+
+void mychomp(char * s);
diff --git a/src/tools/unmunch.c b/src/tools/unmunch.c
new file mode 100644
index 0000000..6bbd09c
--- /dev/null
+++ b/src/tools/unmunch.c
@@ -0,0 +1,514 @@
+/* Un-munch a root word list with affix tags
+ * to recreate the original word list
+ */
+
+#include <ctype.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#ifdef __linux__
+#include <error.h>
+#include <errno.h>
+#include <sys/mman.h>
+#endif
+
+#include "unmunch.h"
+
+
+int main(int argc, char** argv)
+{
+
+ int i;
+ int al, wl;
+
+ FILE * wrdlst;
+ FILE * afflst;
+
+ char *wf, *af;
+ char * ap;
+ char ts[MAX_LN_LEN];
+
+ /* first parse the command line options */
+ /* arg1 - munched wordlist, arg2 - affix file */
+
+ if (argv[1]) {
+ wf = mystrdup(argv[1]);
+ } else {
+ fprintf(stderr,"correct syntax is:\n");
+ fprintf(stderr,"unmunch dic_file affix_file\n");
+ exit(1);
+ }
+ if (argv[2]) {
+ af = mystrdup(argv[2]);
+ } else {
+ fprintf(stderr,"correct syntax is:\n");
+ fprintf(stderr,"unmunch dic_file affix_file\n");
+ exit(1);
+ }
+
+ /* open the affix file */
+ afflst = fopen(af,"r");
+ if (!afflst) {
+ fprintf(stderr,"Error - could not open affix description file\n");
+ exit(1);
+ }
+
+ /* step one is to parse the affix file building up the internal
+ affix data structures */
+
+ numpfx = 0;
+ numsfx = 0;
+ fullstrip = 0;
+
+ if (parse_aff_file(afflst)) {
+ fprintf(stderr,"Error - in affix file loading\n");
+ exit(1);
+ }
+
+ fclose(afflst);
+
+ fprintf(stderr,"parsed in %d prefixes and %d suffixes\n",numpfx,numsfx);
+
+ /* affix file is now parsed so create hash table of wordlist on the fly */
+
+ /* open the wordlist */
+ wrdlst = fopen(wf,"r");
+ if (!wrdlst) {
+ fprintf(stderr,"Error - could not open word list file\n");
+ exit(1);
+ }
+
+ /* skip over the hash table size */
+ if (! fgets(ts, MAX_LN_LEN-1,wrdlst)) {
+ fclose(wrdlst);
+ return 2;
+ }
+ mychomp(ts);
+
+ while (fgets(ts,MAX_LN_LEN-1,wrdlst)) {
+ mychomp(ts);
+ /* split each line into word and affix char strings */
+ ap = strchr(ts,'/');
+ if (ap) {
+ *ap = '\0';
+ ap++;
+ al = strlen(ap);
+ } else {
+ al = 0;
+ ap = NULL;
+ }
+
+ wl = strlen(ts);
+
+ numwords = 0;
+ wlist[numwords].word = mystrdup(ts);
+ wlist[numwords].pallow = 0;
+ numwords++;
+
+ if (al)
+ expand_rootword(ts,wl,ap,al);
+
+ for (i=0; i < numwords; i++) {
+ fprintf(stdout,"%s\n",wlist[i].word);
+ free(wlist[i].word);
+ wlist[i].word = NULL;
+ wlist[i].pallow = 0;
+ }
+
+ }
+
+ fclose(wrdlst);
+ return 0;
+}
+
+
+
+
+int parse_aff_file(FILE * afflst)
+{
+ int i, j;
+ int numents=0;
+ char achar='\0';
+ short ff=0;
+ char ft;
+ struct affent * ptr= NULL;
+ struct affent * nptr= NULL;
+ char * line = malloc(MAX_LN_LEN);
+
+ while (fgets(line,MAX_LN_LEN,afflst)) {
+ mychomp(line);
+ ft = ' ';
+ fprintf(stderr,"parsing line: %s\n",line);
+ if (strncmp(line,"FULLSTRIP",9) == 0) fullstrip = 1;
+ if (strncmp(line,"PFX",3) == 0) ft = 'P';
+ if (strncmp(line,"SFX",3) == 0) ft = 'S';
+ if (ft != ' ') {
+ char * tp = line;
+ char * piece;
+ ff = 0;
+ i = 0;
+ while ((piece=mystrsep(&tp,' '))) {
+ if (*piece != '\0') {
+ switch(i) {
+ case 0: break;
+ case 1: { achar = *piece; break; }
+ case 2: { if (*piece == 'Y') ff = XPRODUCT; break; }
+ case 3: { numents = atoi(piece);
+ ptr = malloc(numents * sizeof(struct affent));
+ ptr->achar = achar;
+ ptr->xpflg = ff;
+ fprintf(stderr,"parsing %c entries %d\n",achar,numents);
+ break;
+ }
+ default: break;
+ }
+ i++;
+ }
+ free(piece);
+ }
+ /* now parse all of the sub entries*/
+ nptr = ptr;
+ for (j=0; j < numents; j++) {
+ if (!fgets(line,MAX_LN_LEN,afflst)) return 1;
+ mychomp(line);
+ tp = line;
+ i = 0;
+ while ((piece=mystrsep(&tp,' '))) {
+ if (*piece != '\0') {
+ switch(i) {
+ case 0: { if (nptr != ptr) {
+ nptr->achar = ptr->achar;
+ nptr->xpflg = ptr->xpflg;
+ }
+ break;
+ }
+ case 1: break;
+ case 2: { nptr->strip = mystrdup(piece);
+ nptr->stripl = strlen(nptr->strip);
+ if (strcmp(nptr->strip,"0") == 0) {
+ free(nptr->strip);
+ nptr->strip=mystrdup("");
+ nptr->stripl = 0;
+ }
+ break;
+ }
+ case 3: { nptr->appnd = mystrdup(piece);
+ nptr->appndl = strlen(nptr->appnd);
+ if (strcmp(nptr->appnd,"0") == 0) {
+ free(nptr->appnd);
+ nptr->appnd=mystrdup("");
+ nptr->appndl = 0;
+ }
+ break;
+ }
+ case 4: { encodeit(nptr,piece);}
+ fprintf(stderr, " affix: %s %d, strip: %s %d\n",nptr->appnd,
+ nptr->appndl,nptr->strip,nptr->stripl);
+ default: break;
+ }
+ i++;
+ }
+ free(piece);
+ }
+ nptr++;
+ }
+ if (ft == 'P') {
+ ptable[numpfx].aep = ptr;
+ ptable[numpfx].num = numents;
+ fprintf(stderr,"ptable %d num is %d flag %c\n",numpfx,ptable[numpfx].num,ptr->achar);
+ numpfx++;
+ } else {
+ stable[numsfx].aep = ptr;
+ stable[numsfx].num = numents;
+ fprintf(stderr,"stable %d num is %d flag %c\n",numsfx,stable[numsfx].num,ptr->achar);
+ numsfx++;
+ }
+ ptr = NULL;
+ nptr = NULL;
+ numents = 0;
+ achar='\0';
+ }
+ }
+ free(line);
+ return 0;
+}
+
+
+void encodeit(struct affent * ptr, char * cs)
+{
+ int nc;
+ int neg;
+ int grp;
+ unsigned char c;
+ int n;
+ int ec;
+ int nm;
+ int i, j, k;
+ unsigned char mbr[MAX_WD_LEN];
+
+ /* now clear the conditions array */
+ for (i=0;i<SET_SIZE;i++) ptr->conds[i] = (unsigned char) 0;
+
+ /* now parse the string to create the conds array */
+ nc = strlen(cs);
+ neg = 0; /* complement indicator */
+ grp = 0; /* group indicator */
+ n = 0; /* number of conditions */
+ ec = 0; /* end condition indicator */
+ nm = 0; /* number of member in group */
+ i = 0;
+ if (strcmp(cs,".")==0) {
+ ptr->numconds = 0;
+ return;
+ }
+ while (i < nc) {
+ c = *((unsigned char *)(cs + i));
+ if (c == '[') {
+ grp = 1;
+ c = 0;
+ }
+ if ((grp == 1) && (c == '^')) {
+ neg = 1;
+ c = 0;
+ }
+ if (c == ']') {
+ ec = 1;
+ c = 0;
+ }
+ if ((grp == 1) && (c != 0)) {
+ *(mbr + nm) = c;
+ nm++;
+ c = 0;
+ }
+ if (c != 0) {
+ ec = 1;
+ }
+ if (ec) {
+ if (grp == 1) {
+ if (neg == 0) {
+ for (j=0;j<nm;j++) {
+ k = (unsigned int) mbr[j];
+ ptr->conds[k] = ptr->conds[k] | (1 << n);
+ }
+ } else {
+ for (j=0;j<SET_SIZE;j++) ptr->conds[j] = ptr->conds[j] | (1 << n);
+ for (j=0;j<nm;j++) {
+ k = (unsigned int) mbr[j];
+ ptr->conds[k] = ptr->conds[k] & ~(1 << n);
+ }
+ }
+ neg = 0;
+ grp = 0;
+ nm = 0;
+ } else {
+ /* not a group so just set the proper bit for this char */
+ /* but first handle special case of . inside condition */
+ if (c == '.') {
+ /* wild card character so set them all */
+ for (j=0;j<SET_SIZE;j++) ptr->conds[j] = ptr->conds[j] | (1 << n);
+ } else {
+ ptr->conds[(unsigned int) c] = ptr->conds[(unsigned int)c] | (1 << n);
+ }
+ }
+ n++;
+ ec = 0;
+ }
+ i++;
+ }
+ ptr->numconds = n;
+ return;
+}
+
+
+
+/* add a prefix to word */
+void pfx_add (const char * word, int len, struct affent* ep, int num)
+{
+ struct affent * aent;
+ int cond;
+ int tlen;
+ unsigned char * cp;
+ int i;
+ char * pp;
+ char tword[MAX_WD_LEN];
+
+
+ for (aent = ep, i = num; i > 0; aent++, i--) {
+
+ /* now make sure all conditions match */
+ if ((len + fullstrip > aent->stripl) && (len >= aent->numconds) &&
+ ((aent->stripl == 0) ||
+ (strncmp(aent->strip, word, aent->stripl) == 0))) {
+
+ cp = (unsigned char *) word;
+ for (cond = 0; cond < aent->numconds; cond++) {
+ if ((aent->conds[*cp++] & (1 << cond)) == 0)
+ break;
+ }
+ if (cond >= aent->numconds) {
+
+ /* we have a match so add prefix */
+ tlen = 0;
+ if (aent->appndl) {
+ strcpy(tword,aent->appnd);
+ tlen += aent->appndl;
+ }
+ pp = tword + tlen;
+ strcpy(pp, (word + aent->stripl));
+ tlen = tlen + len - aent->stripl;
+
+ if (numwords < MAX_WORDS) {
+ wlist[numwords].word = mystrdup(tword);
+ wlist[numwords].pallow = 0;
+ numwords++;
+ }
+ }
+ }
+ }
+}
+
+
+/* add a suffix to a word */
+void suf_add (const char * word, int len, struct affent * ep, int num)
+{
+ struct affent * aent;
+ int tlen;
+ int cond;
+ unsigned char * cp;
+ int i;
+ char tword[MAX_WD_LEN];
+ char * pp;
+
+ for (aent = ep, i = num; i > 0; aent++, i--) {
+
+ /* if conditions hold on root word
+ * then strip off strip string and add suffix
+ */
+
+ if ((len + fullstrip > aent->stripl) && (len >= aent->numconds) &&
+ ((aent->stripl == 0) ||
+ (strcmp(aent->strip, word + len - aent->stripl) == 0))) {
+ cp = (unsigned char *) (word + len);
+ for (cond = aent->numconds; --cond >= 0; ) {
+ if ((aent->conds[*--cp] & (1 << cond)) == 0) break;
+ }
+ if (cond < 0) {
+ /* we have a matching condition */
+ strcpy(tword,word);
+ tlen = len;
+ if (aent->stripl) {
+ tlen -= aent->stripl;
+ }
+ pp = (tword + tlen);
+ if (aent->appndl) {
+ strcpy (pp, aent->appnd);
+ tlen += aent->stripl;
+ } else *pp = '\0';
+
+ if (numwords < MAX_WORDS) {
+ wlist[numwords].word = mystrdup(tword);
+ wlist[numwords].pallow = (aent->xpflg & XPRODUCT);
+ numwords++;
+ }
+ }
+ }
+ }
+}
+
+
+
+int expand_rootword(const char * ts, int wl, const char * ap, int al)
+{
+ int i;
+ int j;
+ int nh=0;
+ int nwl;
+
+ for (i=0; i < numsfx; i++) {
+ if (strchr(ap,(stable[i].aep)->achar)) {
+ suf_add(ts, wl, stable[i].aep, stable[i].num);
+ }
+ }
+
+ nh = numwords;
+
+ if (nh > 1) {
+ for (j=1;j<nh;j++){
+ if (wlist[j].pallow) {
+ for (i=0; i < numpfx; i++) {
+ if (strchr(ap,(ptable[i].aep)->achar)) {
+ if ((ptable[i].aep)->xpflg & XPRODUCT) {
+ nwl = strlen(wlist[j].word);
+ pfx_add(wlist[j].word, nwl, ptable[i].aep, ptable[i].num);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ for (i=0; i < numpfx; i++) {
+ if (strchr(ap,(ptable[i].aep)->achar)) {
+ pfx_add(ts, wl, ptable[i].aep, ptable[i].num);
+ }
+ }
+ return 0;
+}
+
+
+/* strip strings into token based on single char delimiter
+ * acts like strsep() but only uses a delim char and not
+ * a delim string
+ */
+char * mystrsep(char ** stringp, const char delim)
+{
+ char * rv = NULL;
+ char * mp = *stringp;
+ int n = strlen(mp);
+ if (n > 0) {
+ char * dp = (char *)memchr(mp,(int)((unsigned char)delim),n);
+ if (dp) {
+ int nc;
+ *stringp = dp+1;
+ nc = (int)((unsigned long)dp - (unsigned long)mp);
+ rv = (char *) malloc(nc+1);
+ if (rv) {
+ memcpy(rv,mp,nc);
+ *(rv+nc) = '\0';
+ }
+ } else {
+ rv = (char *) malloc(n+1);
+ if (rv) {
+ memcpy(rv, mp, n);
+ *(rv+n) = '\0';
+ *stringp = mp + n;
+ }
+ }
+ }
+ return rv;
+}
+
+
+char * mystrdup(const char * s)
+{
+ char * d = NULL;
+ if (s) {
+ int sl = strlen(s)+1;
+ d = (char *) malloc(sl);
+ if (d) memcpy(d,s,sl);
+ }
+ return d;
+}
+
+
+void mychomp(char * s)
+{
+ int k = strlen(s);
+ if ((k > 0) && (*(s+k-1) == '\n')) *(s+k-1) = '\0';
+ if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
+}
+
diff --git a/src/tools/unmunch.h b/src/tools/unmunch.h
new file mode 100644
index 0000000..0c8a6bc
--- /dev/null
+++ b/src/tools/unmunch.h
@@ -0,0 +1,78 @@
+/* unmunch header file */
+
+#define MAX_LN_LEN 200
+#define MAX_WD_LEN 200
+#define MAX_PREFIXES 256
+#define MAX_SUFFIXES 256
+#define MAX_WORDS 500000
+
+#define ROTATE_LEN 5
+
+#define ROTATE(v,q) \
+ (v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q))-1));
+
+#define SET_SIZE 256
+
+#define XPRODUCT (1 << 0)
+
+/* the affix table entry */
+
+struct affent
+{
+ char * appnd;
+ char * strip;
+ short appndl;
+ short stripl;
+ char achar;
+ char xpflg;
+ short numconds;
+ char conds[SET_SIZE];
+};
+
+
+struct affixptr
+{
+ struct affent * aep;
+ int num;
+};
+
+/* the prefix and suffix table */
+int numpfx; /* Number of prefixes in table */
+int numsfx; /* Number of suffixes in table */
+
+/* the prefix table */
+struct affixptr ptable[MAX_PREFIXES];
+
+/* the suffix table */
+struct affixptr stable[MAX_SUFFIXES];
+
+int fullstrip;
+
+
+int numwords; /* number of words found */
+struct dwords
+{
+ char * word;
+ int pallow;
+};
+
+struct dwords wlist[MAX_WORDS]; /* list words found */
+
+
+/* the routines */
+
+int parse_aff_file(FILE* afflst);
+
+void encodeit(struct affent * ptr, char * cs);
+
+int expand_rootword(const char *, int, const char*, int);
+
+void pfx_add (const char * word, int len, struct affent* ep, int num);
+
+void suf_add (const char * word, int len, struct affent * ep, int num);
+
+char * mystrsep(char ** stringp, const char delim);
+
+char * mystrdup(const char * s);
+
+void mychomp(char * s);
diff --git a/src/tools/wordforms b/src/tools/wordforms
new file mode 100755
index 0000000..dabc346
--- /dev/null
+++ b/src/tools/wordforms
@@ -0,0 +1,35 @@
+#!/bin/sh
+case $# in
+0|1|2) echo "Usage: wordforms [-s | -p] dictionary.aff dictionary.dic word
+-s: print only suffixed forms
+-p: print only prefixed forms
+"; exit 1;;
+esac
+fx=0
+case $1 in
+-s) fx=1; shift;;
+-p) fx=2; shift;;
+esac
+test -h /tmp/wordforms.aff && rm /tmp/wordforms.aff
+ln -s $PWD/$1 /tmp/wordforms.aff
+# prepared dic only with the query word
+echo 1 >/tmp/wordforms.dic
+grep "^$3/" $2 >>/tmp/wordforms.dic
+echo $3 | awk -v "fx=$fx" '
+fx!=2 && FILENAME!="-" && /^SFX/ && NF > 4{split($4,a,"/");clen=($3=="0") ? 0 : length($3);sfx[a[1],clen]=a[1];sfxc[a[1],clen]=clen;next}
+fx!=1 && FILENAME!="-" && /^PFX/ && NF > 4{split($4,a,"/");clen=($3=="0") ? 0 : length($3);pfx[a[1],clen]=a[1];pfxc[a[1],clen]=clen;next}
+FILENAME=="-"{
+wlen=length($1)
+if (fx==0 || fx==2) {
+ for (j in pfx) {if (wlen<=pfxc[j]) continue; print (pfx[j]=="0" ? "" : pfx[j]) substr($1, pfxc[j]+1)}
+}
+if (fx==0 || fx==1) {
+ for(i in sfx){clen=sfxc[i];if (wlen<=clen) continue; print substr($1, 1, wlen-clen) (sfx[i]=="0" ? "": sfx[i]) }
+}
+if (fx==0) {
+for (j in pfx) {if (wlen<=pfxc[j]) continue;
+ for(i in sfx){clen=sfxc[i];if (wlen<=clen || wlen <= (clen + pfxc[j]))continue;
+ print (pfx[j]=="0" ? "" : pfx[j]) substr($1, pfxc[j]+1, wlen-clen-pfxc[j]) (sfx[i]=="0" ? "": sfx[i]) }}
+}
+}
+' /tmp/wordforms.aff - | hunspell -d /tmp/wordforms -G -l
diff --git a/src/tools/wordlist2hunspell b/src/tools/wordlist2hunspell
new file mode 100644
index 0000000..09a2bb2
--- /dev/null
+++ b/src/tools/wordlist2hunspell
@@ -0,0 +1,38 @@
+#!/bin/sh
+#
+# (C) 2008 Caolán McNamara <caolanm@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+# This creates a LANG_TERRITORY .aff & .dic from a wordlist.
+# It is only a simple wordlist spellchecking dictionary output, no
+# knowledge of language rules can be extrapolated to shrink the
+# wordlist or provide .aff rules for extending wordstems
+
+if [ $# -lt 2 ]; then
+ echo "Usage: wordlist2hunspell wordlist_file locale"
+ echo "e.g. wordlist2hunspell breton.words br_FR to create br_FR.dic and br_FR.aff in cwd"
+ exit 1
+fi
+
+export LANG=$2.utf8
+echo "# A basic .aff for a raw wordlist, created through wordlist2hunspell" > $2.aff
+echo SET UTF-8 >> $2.aff
+#see https://bugzilla.redhat.com/show_bug.cgi?id=462184 for the "C" hacks
+echo TRY `sed 's/./&\n/g' $1 | sed '/^$/d' | LC_ALL=C sort -n | LC_ALL=C uniq -c | LC_ALL=C sort -rn | tr -s ' ' | cut -d ' ' -f 3 | tr -d '\n'` >> $2.aff
+cat $1 | sed '/^$/d' | wc -l > $2.dic
+LC_ALL=C sort $1 | sed '/^$/d' >> $2.dic
+
+echo Basic $2.dic and $2.aff created