summaryrefslogtreecommitdiff
path: root/src/parsers
diff options
context:
space:
mode:
Diffstat (limited to 'src/parsers')
-rw-r--r--src/parsers/Makefile.am10
-rw-r--r--src/parsers/Makefile.in640
-rw-r--r--src/parsers/firstparser.cxx33
-rw-r--r--src/parsers/firstparser.hxx34
-rw-r--r--src/parsers/htmlparser.cxx151
-rw-r--r--src/parsers/htmlparser.hxx44
-rw-r--r--src/parsers/latexparser.cxx223
-rw-r--r--src/parsers/latexparser.hxx44
-rw-r--r--src/parsers/manparser.cxx71
-rw-r--r--src/parsers/manparser.hxx38
-rw-r--r--src/parsers/testparser.cxx53
-rw-r--r--src/parsers/textparser.cxx291
-rw-r--r--src/parsers/textparser.hxx69
13 files changed, 1701 insertions, 0 deletions
diff --git a/src/parsers/Makefile.am b/src/parsers/Makefile.am
new file mode 100644
index 0000000..02b0886
--- /dev/null
+++ b/src/parsers/Makefile.am
@@ -0,0 +1,10 @@
+lib_LIBRARIES=libparsers.a
+libparsers_a_SOURCES=firstparser.cxx htmlparser.cxx \
+ latexparser.cxx manparser.cxx \
+ textparser.cxx
+
+noinst_PROGRAMS=testparser
+testparser_SOURCES=firstparser.cxx firstparser.hxx htmlparser.cxx htmlparser.hxx latexparser.cxx latexparser.hxx manparser.cxx manparser.hxx testparser.cxx textparser.cxx textparser.hxx
+
+# need mystrdup()
+LDADD = ../hunspell/libhunspell-1.3.la
diff --git a/src/parsers/Makefile.in b/src/parsers/Makefile.in
new file mode 100644
index 0000000..bdeea73
--- /dev/null
+++ b/src/parsers/Makefile.in
@@ -0,0 +1,640 @@
+# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
+# Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+
+VPATH = @srcdir@
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+noinst_PROGRAMS = testparser$(EXEEXT)
+subdir = src/parsers
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/codeset.m4 \
+ $(top_srcdir)/m4/gettext.m4 $(top_srcdir)/m4/glibc2.m4 \
+ $(top_srcdir)/m4/glibc21.m4 $(top_srcdir)/m4/iconv.m4 \
+ $(top_srcdir)/m4/intdiv0.m4 $(top_srcdir)/m4/intl.m4 \
+ $(top_srcdir)/m4/intlmacosx.m4 $(top_srcdir)/m4/intmax.m4 \
+ $(top_srcdir)/m4/inttypes-pri.m4 \
+ $(top_srcdir)/m4/inttypes_h.m4 $(top_srcdir)/m4/lcmessage.m4 \
+ $(top_srcdir)/m4/lib-ld.m4 $(top_srcdir)/m4/lib-link.m4 \
+ $(top_srcdir)/m4/lib-prefix.m4 $(top_srcdir)/m4/libtool.m4 \
+ $(top_srcdir)/m4/lock.m4 $(top_srcdir)/m4/longlong.m4 \
+ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
+ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
+ $(top_srcdir)/m4/nls.m4 $(top_srcdir)/m4/po.m4 \
+ $(top_srcdir)/m4/printf-posix.m4 $(top_srcdir)/m4/progtest.m4 \
+ $(top_srcdir)/m4/size_max.m4 $(top_srcdir)/m4/stdint_h.m4 \
+ $(top_srcdir)/m4/uintmax_t.m4 $(top_srcdir)/m4/visibility.m4 \
+ $(top_srcdir)/m4/wchar_t.m4 $(top_srcdir)/m4/wint_t.m4 \
+ $(top_srcdir)/m4/xsize.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__installdirs = "$(DESTDIR)$(libdir)"
+LIBRARIES = $(lib_LIBRARIES)
+ARFLAGS = cru
+libparsers_a_AR = $(AR) $(ARFLAGS)
+libparsers_a_LIBADD =
+am_libparsers_a_OBJECTS = firstparser.$(OBJEXT) htmlparser.$(OBJEXT) \
+ latexparser.$(OBJEXT) manparser.$(OBJEXT) textparser.$(OBJEXT)
+libparsers_a_OBJECTS = $(am_libparsers_a_OBJECTS)
+PROGRAMS = $(noinst_PROGRAMS)
+am_testparser_OBJECTS = firstparser.$(OBJEXT) htmlparser.$(OBJEXT) \
+ latexparser.$(OBJEXT) manparser.$(OBJEXT) testparser.$(OBJEXT) \
+ textparser.$(OBJEXT)
+testparser_OBJECTS = $(am_testparser_OBJECTS)
+testparser_LDADD = $(LDADD)
+testparser_DEPENDENCIES = ../hunspell/libhunspell-1.3.la
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+LTCXXCOMPILE = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+CXXLD = $(CXX)
+CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
+ $(LDFLAGS) -o $@
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+ $(LDFLAGS) -o $@
+SOURCES = $(libparsers_a_SOURCES) $(testparser_SOURCES)
+DIST_SOURCES = $(libparsers_a_SOURCES) $(testparser_SOURCES)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+ALLOCA = @ALLOCA@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BUILD_INCLUDED_LIBINTL = @BUILD_INCLUDED_LIBINTL@
+CATOBJEXT = @CATOBJEXT@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CFLAG_VISIBILITY = @CFLAG_VISIBILITY@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CURSESLIB = @CURSESLIB@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DATADIRNAME = @DATADIRNAME@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GENCAT = @GENCAT@
+GETTEXT_MACRO_VERSION = @GETTEXT_MACRO_VERSION@
+GLIBC2 = @GLIBC2@
+GLIBC21 = @GLIBC21@
+GMSGFMT = @GMSGFMT@
+GMSGFMT_015 = @GMSGFMT_015@
+GREP = @GREP@
+HAVE_ASPRINTF = @HAVE_ASPRINTF@
+HAVE_POSIX_PRINTF = @HAVE_POSIX_PRINTF@
+HAVE_SNPRINTF = @HAVE_SNPRINTF@
+HAVE_VISIBILITY = @HAVE_VISIBILITY@
+HAVE_WPRINTF = @HAVE_WPRINTF@
+HUNSPELL_VERSION_MAJOR = @HUNSPELL_VERSION_MAJOR@
+HUNSPELL_VERSION_MINOR = @HUNSPELL_VERSION_MINOR@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+INSTOBJEXT = @INSTOBJEXT@
+INTLBISON = @INTLBISON@
+INTLLIBS = @INTLLIBS@
+INTLOBJS = @INTLOBJS@
+INTL_LIBTOOL_SUFFIX_PREFIX = @INTL_LIBTOOL_SUFFIX_PREFIX@
+INTL_MACOSX_LIBS = @INTL_MACOSX_LIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBICONV = @LIBICONV@
+LIBINTL = @LIBINTL@
+LIBMULTITHREAD = @LIBMULTITHREAD@
+LIBOBJS = @LIBOBJS@
+LIBPTH = @LIBPTH@
+LIBPTH_PREFIX = @LIBPTH_PREFIX@
+LIBS = @LIBS@
+LIBTHREAD = @LIBTHREAD@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBC = @LTLIBC@
+LTLIBICONV = @LTLIBICONV@
+LTLIBINTL = @LTLIBINTL@
+LTLIBMULTITHREAD = @LTLIBMULTITHREAD@
+LTLIBOBJS = @LTLIBOBJS@
+LTLIBPTH = @LTLIBPTH@
+LTLIBTHREAD = @LTLIBTHREAD@
+MAKEINFO = @MAKEINFO@
+MKDIR_P = @MKDIR_P@
+MSGFMT = @MSGFMT@
+MSGFMT_015 = @MSGFMT_015@
+MSGMERGE = @MSGMERGE@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+POSUB = @POSUB@
+PRI_MACROS_BROKEN = @PRI_MACROS_BROKEN@
+RANLIB = @RANLIB@
+READLINELIB = @READLINELIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+USE_INCLUDED_LIBINTL = @USE_INCLUDED_LIBINTL@
+USE_NLS = @USE_NLS@
+VERSION = @VERSION@
+WINDRES = @WINDRES@
+WOE32 = @WOE32@
+WOE32DLL = @WOE32DLL@
+XFAILED = @XFAILED@
+XGETTEXT = @XGETTEXT@
+XGETTEXT_015 = @XGETTEXT_015@
+XGETTEXT_EXTRA_OPTIONS = @XGETTEXT_EXTRA_OPTIONS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+lt_ECHO = @lt_ECHO@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+lib_LIBRARIES = libparsers.a
+libparsers_a_SOURCES = firstparser.cxx htmlparser.cxx \
+ latexparser.cxx manparser.cxx \
+ textparser.cxx
+
+testparser_SOURCES = firstparser.cxx firstparser.hxx htmlparser.cxx htmlparser.hxx latexparser.cxx latexparser.hxx manparser.cxx manparser.hxx testparser.cxx textparser.cxx textparser.hxx
+
+# need mystrdup()
+LDADD = ../hunspell/libhunspell-1.3.la
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .cxx .lo .o .obj
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/parsers/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --gnu src/parsers/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-libLIBRARIES: $(lib_LIBRARIES)
+ @$(NORMAL_INSTALL)
+ test -z "$(libdir)" || $(MKDIR_P) "$(DESTDIR)$(libdir)"
+ @list='$(lib_LIBRARIES)'; test -n "$(libdir)" || list=; \
+ list2=; for p in $$list; do \
+ if test -f $$p; then \
+ list2="$$list2 $$p"; \
+ else :; fi; \
+ done; \
+ test -z "$$list2" || { \
+ echo " $(INSTALL_DATA) $$list2 '$(DESTDIR)$(libdir)'"; \
+ $(INSTALL_DATA) $$list2 "$(DESTDIR)$(libdir)" || exit $$?; }
+ @$(POST_INSTALL)
+ @list='$(lib_LIBRARIES)'; test -n "$(libdir)" || list=; \
+ for p in $$list; do \
+ if test -f $$p; then \
+ $(am__strip_dir) \
+ echo " ( cd '$(DESTDIR)$(libdir)' && $(RANLIB) $$f )"; \
+ ( cd "$(DESTDIR)$(libdir)" && $(RANLIB) $$f ) || exit $$?; \
+ else :; fi; \
+ done
+
+uninstall-libLIBRARIES:
+ @$(NORMAL_UNINSTALL)
+ @list='$(lib_LIBRARIES)'; test -n "$(libdir)" || list=; \
+ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+ test -n "$$files" || exit 0; \
+ echo " ( cd '$(DESTDIR)$(libdir)' && rm -f "$$files" )"; \
+ cd "$(DESTDIR)$(libdir)" && rm -f $$files
+
+clean-libLIBRARIES:
+ -test -z "$(lib_LIBRARIES)" || rm -f $(lib_LIBRARIES)
+libparsers.a: $(libparsers_a_OBJECTS) $(libparsers_a_DEPENDENCIES)
+ -rm -f libparsers.a
+ $(libparsers_a_AR) libparsers.a $(libparsers_a_OBJECTS) $(libparsers_a_LIBADD)
+ $(RANLIB) libparsers.a
+
+clean-noinstPROGRAMS:
+ @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \
+ echo " rm -f" $$list; \
+ rm -f $$list || exit $$?; \
+ test -n "$(EXEEXT)" || exit 0; \
+ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+ echo " rm -f" $$list; \
+ rm -f $$list
+testparser$(EXEEXT): $(testparser_OBJECTS) $(testparser_DEPENDENCIES)
+ @rm -f testparser$(EXEEXT)
+ $(CXXLINK) $(testparser_OBJECTS) $(testparser_LDADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/firstparser.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htmlparser.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/latexparser.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/manparser.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/testparser.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/textparser.Po@am__quote@
+
+.cxx.o:
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $<
+
+.cxx.obj:
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.cxx.lo:
+@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(LTCXXCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ mkid -fID $$unique
+tags: TAGS
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ set x; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: CTAGS
+CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LIBRARIES) $(PROGRAMS)
+installdirs:
+ for dir in "$(DESTDIR)$(libdir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ `test -z '$(STRIP)' || \
+ echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libLIBRARIES clean-libtool \
+ clean-noinstPROGRAMS mostlyclean-am
+
+distclean: distclean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-libLIBRARIES
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-libLIBRARIES
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+ clean-libLIBRARIES clean-libtool clean-noinstPROGRAMS ctags \
+ distclean distclean-compile distclean-generic \
+ distclean-libtool distclean-tags distdir dvi dvi-am html \
+ html-am info info-am install install-am install-data \
+ install-data-am install-dvi install-dvi-am install-exec \
+ install-exec-am install-html install-html-am install-info \
+ install-info-am install-libLIBRARIES install-man install-pdf \
+ install-pdf-am install-ps install-ps-am install-strip \
+ installcheck installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags uninstall uninstall-am uninstall-libLIBRARIES
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/src/parsers/firstparser.cxx b/src/parsers/firstparser.cxx
new file mode 100644
index 0000000..786ecea
--- /dev/null
+++ b/src/parsers/firstparser.cxx
@@ -0,0 +1,33 @@
+#include <cstdlib>
+#include <cstring>
+#include <cstdio>
+#include <ctype.h>
+
+#include "../hunspell/csutil.hxx"
+#include "firstparser.hxx"
+
+#ifndef W32
+using namespace std;
+#endif
+
+FirstParser::FirstParser(const char * wordchars)
+{
+ init(wordchars);
+}
+
+FirstParser::~FirstParser()
+{
+}
+
+char * FirstParser::next_token()
+{
+ char * tabpos = strchr(line[actual],'\t');
+ if ((tabpos) && (tabpos - line[actual]>token)) {
+ char * t = (char *) malloc(tabpos - line[actual] + 1);
+ t[tabpos - line[actual]] = '\0';
+ token = tabpos - line[actual] +1;
+ if (t) return strncpy(t, line[actual], tabpos - line[actual]);
+ fprintf(stderr,"Error - Insufficient Memory\n");
+ }
+ return NULL;
+}
diff --git a/src/parsers/firstparser.hxx b/src/parsers/firstparser.hxx
new file mode 100644
index 0000000..1f79289
--- /dev/null
+++ b/src/parsers/firstparser.hxx
@@ -0,0 +1,34 @@
+/*
+ * parser classes of HunTools
+ *
+ * implemented: text, HTML, TeX, first word
+ *
+ * Copyright (C) 2003, Laszlo Nemeth
+ *
+ */
+
+#ifndef _FIRSTPARSER_HXX_
+#define _FIRSTPARSER_HXX_
+
+#include "textparser.hxx"
+
+/*
+ * Check first word of the input line
+ *
+ */
+
+class FirstParser : public TextParser
+{
+
+public:
+
+
+ FirstParser(const char * wc);
+ virtual ~FirstParser();
+
+ virtual char * next_token();
+
+};
+
+#endif
+
diff --git a/src/parsers/htmlparser.cxx b/src/parsers/htmlparser.cxx
new file mode 100644
index 0000000..341be4e
--- /dev/null
+++ b/src/parsers/htmlparser.cxx
@@ -0,0 +1,151 @@
+#include <cstdlib>
+#include <cstring>
+#include <cstdio>
+#include <ctype.h>
+
+#include "../hunspell/csutil.hxx"
+#include "htmlparser.hxx"
+
+
+#ifndef W32
+using namespace std;
+#endif
+
+enum { ST_NON_WORD, ST_WORD, ST_TAG, ST_CHAR_ENTITY, ST_OTHER_TAG, ST_ATTRIB };
+
+static const char * PATTERN[][2] = {
+ { "<script", "</script>" },
+ { "<style", "</style>" },
+ { "<code", "</code>" },
+ { "<samp", "</samp>" },
+ { "<kbd", "</kbd>" },
+ { "<var", "</var>" },
+ { "<listing", "</listing>" },
+ { "<address", "</address>" },
+ { "<pre", "</pre>" },
+ { "<!--", "-->" },
+ { "<[cdata[", "]]>" }, // XML comment
+ { "<", ">" }
+};
+
+#define PATTERN_LEN (sizeof(PATTERN) / (sizeof(char *) * 2))
+
+static const char * PATTERN2[][2] = {
+ { "<img", "alt=" }, // ALT and TITLE attrib handled spec.
+ { "<img", "title=" },
+ { "<a ", "title=" }
+};
+
+#define PATTERN_LEN2 (sizeof(PATTERN2) / (sizeof(char *) * 2))
+
+HTMLParser::HTMLParser(const char * wordchars)
+{
+ init(wordchars);
+}
+
+HTMLParser::HTMLParser(unsigned short * wordchars, int len)
+{
+ init(wordchars, len);
+}
+
+HTMLParser::~HTMLParser()
+{
+}
+
+
+int HTMLParser::look_pattern(const char * p[][2], unsigned int len, int column)
+{
+ for (unsigned int i = 0; i < len; i++) {
+ char * j = line[actual] + head;
+ const char * k = p[i][column];
+ while ((*k != '\0') && (tolower(*j) == *k)) {
+ j++;
+ k++;
+ }
+ if (*k == '\0') return i;
+ }
+ return -1;
+}
+
+/*
+ * HTML parser
+ *
+ */
+
+
+char * HTMLParser::next_token()
+{
+ const char * latin1;
+
+ for (;;) {
+ //fprintf(stderr, "%d:%c:%s\n", state, line[actual][head], line[actual]);
+ //getch();
+ switch (state)
+ {
+ case ST_NON_WORD: // non word chars
+ prevstate = ST_NON_WORD;
+ if ((pattern_num = look_pattern(PATTERN, PATTERN_LEN, 0)) != -1) {
+ checkattr = 0;
+ if ((pattern2_num = look_pattern(PATTERN2, PATTERN_LEN2, 0)) != -1) {
+ checkattr = 1;
+ }
+ state = ST_TAG;
+ } else if (is_wordchar(line[actual] + head)) {
+ state = ST_WORD;
+ token = head;
+ } else if ((latin1 = get_latin1(line[actual] + head))) {
+ state = ST_WORD;
+ token = head;
+ head += strlen(latin1);
+ } else if (line[actual][head] == '&') {
+ state = ST_CHAR_ENTITY;
+ }
+ break;
+ case ST_WORD: // wordchar
+ if ((latin1 = get_latin1(line[actual] + head))) {
+ head += strlen(latin1);
+ } else if (! is_wordchar(line[actual] + head)) {
+ state = prevstate;
+ char * t = alloc_token(token, &head);
+ if (t) return t;
+ }
+ break;
+ case ST_TAG: // comment, labels, etc
+ int i;
+ if ((checkattr == 1) && ((i = look_pattern(PATTERN2, PATTERN_LEN2, 1)) != -1)
+ && (strcmp(PATTERN2[i][0],PATTERN2[pattern2_num][0]) == 0)) {
+ checkattr = 2;
+ } else if ((checkattr > 0) && (line[actual][head] == '>')) {
+ state = ST_NON_WORD;
+ } else if (((i = look_pattern(PATTERN, PATTERN_LEN, 1)) != -1) &&
+ (strcmp(PATTERN[i][1],PATTERN[pattern_num][1]) == 0)) {
+ state = ST_NON_WORD;
+ head += strlen(PATTERN[pattern_num][1]) - 1;
+ } else if ( (strcmp(PATTERN[pattern_num][0], "<") == 0) &&
+ ((line[actual][head] == '"') || (line[actual][head] == '\''))) {
+ quotmark = line[actual][head];
+ state = ST_ATTRIB;
+ }
+ break;
+ case ST_ATTRIB: // non word chars
+ prevstate = ST_ATTRIB;
+ if (line[actual][head] == quotmark) {
+ state = ST_TAG;
+ if (checkattr == 2) checkattr = 1;
+ // for IMG ALT
+ } else if (is_wordchar(line[actual] + head) && (checkattr == 2)) {
+ state = ST_WORD;
+ token = head;
+ } else if (line[actual][head] == '&') {
+ state = ST_CHAR_ENTITY;
+ }
+ break;
+ case ST_CHAR_ENTITY: // SGML element
+ if ((tolower(line[actual][head]) == ';')) {
+ state = prevstate;
+ head--;
+ }
+ }
+ if (next_char(line[actual], &head)) return NULL;
+ }
+}
diff --git a/src/parsers/htmlparser.hxx b/src/parsers/htmlparser.hxx
new file mode 100644
index 0000000..9a0da7a
--- /dev/null
+++ b/src/parsers/htmlparser.hxx
@@ -0,0 +1,44 @@
+/*
+ * HTML parser class for MySpell
+ *
+ * implemented: text, HTML, TeX
+ *
+ * Copyright (C) 2002, Laszlo Nemeth
+ *
+ */
+
+#ifndef _HTMLPARSER_HXX_
+#define _HTMLPARSER_HXX_
+
+
+#include "textparser.hxx"
+
+/*
+ * HTML Parser
+ *
+ */
+
+class HTMLParser : public TextParser
+{
+public:
+
+ HTMLParser(const char * wc);
+ HTMLParser(unsigned short * wordchars, int len);
+ virtual ~HTMLParser();
+
+ virtual char * next_token();
+
+private:
+
+ int look_pattern(const char * p[][2], unsigned int len, int column);
+ int pattern_num;
+ int pattern2_num;
+ int prevstate;
+ int checkattr;
+ char quotmark;
+
+};
+
+
+#endif
+
diff --git a/src/parsers/latexparser.cxx b/src/parsers/latexparser.cxx
new file mode 100644
index 0000000..5ffe3fd
--- /dev/null
+++ b/src/parsers/latexparser.cxx
@@ -0,0 +1,223 @@
+#include <cstdlib>
+#include <cstring>
+#include <cstdio>
+#include <ctype.h>
+
+#include "../hunspell/csutil.hxx"
+#include "latexparser.hxx"
+
+#ifndef W32
+using namespace std;
+#endif
+
+static struct {
+ const char * pat[2];
+ int arg;
+} PATTERN[] = {
+ { { "\\(", "\\)" } , 0 },
+ { { "$$", "$$" } , 0 },
+ { { "$", "$" } , 0 },
+ { { "\\begin{math}", "\\end{math}" } , 0 },
+ { { "\\[", "\\]" } , 0 },
+ { { "\\begin{displaymath}", "\\end{displaymath}" } , 0 },
+ { { "\\begin{equation}", "\\end{equation}" } , 0 },
+ { { "\\begin{equation*}", "\\end{equation*}" } , 0 },
+ { { "\\cite", NULL } , 1 },
+ { { "\\nocite", NULL } , 1 },
+ { { "\\index", NULL } , 1 },
+ { { "\\label", NULL } , 1 },
+ { { "\\ref", NULL } , 1 },
+ { { "\\pageref", NULL } , 1 },
+ { { "\\parbox", NULL } , 1 },
+ { { "\\begin{verbatim}", "\\end{verbatim}" } , 0 },
+ { { "\\verb+", "+" } , 0 },
+ { { "\\verb|", "|" } , 0 },
+ { { "\\verb#", "#" } , 0 },
+ { { "\\verb*", "*" } , 0 },
+ { { "\\documentstyle", "\\begin{document}" } , 0 },
+ { { "\\documentclass", "\\begin{document}" } , 0 },
+// { { "\\documentclass", NULL } , 1 },
+ { { "\\usepackage", NULL } , 1 },
+ { { "\\includeonly", NULL } , 1 },
+ { { "\\include", NULL } , 1 },
+ { { "\\input", NULL } , 1 },
+ { { "\\vspace", NULL } , 1 },
+ { { "\\setlength", NULL } , 2 },
+ { { "\\addtolength", NULL } , 2 },
+ { { "\\settowidth", NULL } , 2 },
+ { { "\\rule", NULL } , 2 },
+ { { "\\hspace", NULL } , 1 } ,
+ { { "\\vspace", NULL } , 1 } ,
+ { { "\\\\[", "]" } , 0 },
+ { { "\\pagebreak[", "]" } , 0 } ,
+ { { "\\nopagebreak[", "]" } , 0 } ,
+ { { "\\enlargethispage", NULL } , 1 } ,
+ { { "\\begin{tabular}", NULL } , 1 } ,
+ { { "\\addcontentsline", NULL } , 2 } ,
+ { { "\\begin{thebibliography}", NULL } , 1 } ,
+ { { "\\bibliography", NULL } , 1 } ,
+ { { "\\bibliographystyle", NULL } , 1 } ,
+ { { "\\bibitem", NULL } , 1 } ,
+ { { "\\begin", NULL } , 1 } ,
+ { { "\\end", NULL } , 1 } ,
+ { { "\\pagestyle", NULL } , 1 } ,
+ { { "\\pagenumbering", NULL } , 1 } ,
+ { { "\\thispagestyle", NULL } , 1 } ,
+ { { "\\newtheorem", NULL } , 2 },
+ { { "\\newcommand", NULL } , 2 },
+ { { "\\renewcommand", NULL } , 2 },
+ { { "\\setcounter", NULL } , 2 },
+ { { "\\addtocounter", NULL } , 1 },
+ { { "\\stepcounter", NULL } , 1 },
+ { { "\\selectlanguage", NULL } , 1 },
+ { { "\\inputencoding", NULL } , 1 },
+ { { "\\hyphenation", NULL } , 1 },
+ { { "\\definecolor", NULL } , 3 },
+ { { "\\color", NULL } , 1 },
+ { { "\\textcolor", NULL } , 1 },
+ { { "\\pagecolor", NULL } , 1 },
+ { { "\\colorbox", NULL } , 2 },
+ { { "\\fcolorbox", NULL } , 2 },
+ { { "\\declaregraphicsextensions", NULL } , 1 },
+ { { "\\psfig", NULL } , 1 },
+ { { "\\url", NULL } , 1 },
+ { { "\\eqref", NULL } , 1 },
+ { { "\\vskip", NULL } , 1 },
+ { { "\\vglue", NULL } , 1 },
+ { { "\'\'", NULL } , 1 }
+};
+
+#define PATTERN_LEN (sizeof(PATTERN) / sizeof(PATTERN[0]))
+
+LaTeXParser::LaTeXParser(const char * wordchars)
+{
+ init(wordchars);
+}
+
+LaTeXParser::LaTeXParser(unsigned short * wordchars, int len)
+{
+ init(wordchars, len);
+}
+
+LaTeXParser::~LaTeXParser()
+{
+}
+
+int LaTeXParser::look_pattern(int col)
+{
+ for (unsigned int i = 0; i < PATTERN_LEN; i++) {
+ char * j = line[actual] + head;
+ const char * k = PATTERN[i].pat[col];
+ if (! k) continue;
+ while ((*k != '\0') && (tolower(*j) == *k)) {
+ j++;
+ k++;
+ }
+ if (*k == '\0') return i;
+ }
+ return -1;
+}
+
+/*
+ * LaTeXParser
+ *
+ * state 0: not wordchar
+ * state 1: wordchar
+ * state 2: comments
+ * state 3: commands
+ * state 4: commands with arguments
+ * state 5: % comment
+ *
+ */
+
+
+char * LaTeXParser::next_token()
+{
+ int i;
+ int slash = 0;
+ int apostrophe;
+ for (;;) {
+ // fprintf(stderr,"depth: %d, state: %d, , arg: %d, token: %s\n",depth,state,arg,line[actual]+head);
+
+ switch (state)
+ {
+ case 0: // non word chars
+ if ((pattern_num = look_pattern(0)) != -1) {
+ if (PATTERN[pattern_num].pat[1]) {
+ state = 2;
+ } else {
+ state = 4;
+ depth = 0;
+ arg = 0;
+ opt = 1;
+ }
+ head += strlen(PATTERN[pattern_num].pat[0]) - 1;
+ } else if ((line[actual][head] == '%')) {
+ state = 5;
+ } else if (is_wordchar(line[actual] + head)) {
+ state = 1;
+ token = head;
+ } else if (line[actual][head] == '\\') {
+ if (line[actual][head + 1] == '\\' || // \\ (linebreak)
+ (line[actual][head + 1] == '$') || // \$ (dollar sign)
+ (line[actual][head + 1] == '%')) { // \% (percent)
+ head++;
+ break;
+ }
+ state = 3;
+ } else if (line[actual][head] == '%') {
+ if ((head==0) || (line[actual][head - 1] != '\\')) state = 5;
+ }
+ break;
+ case 1: // wordchar
+ apostrophe = 0;
+ if (! is_wordchar(line[actual] + head) ||
+ (line[actual][head] == '\'' && line[actual][head+1] == '\'' && ++apostrophe)) {
+ state = 0;
+ char * t = alloc_token(token, &head);
+ if (apostrophe) head += 2;
+ if (t) return t;
+ }
+ break;
+ case 2: // comment, labels, etc
+ if (((i = look_pattern(1)) != -1) &&
+ (strcmp(PATTERN[i].pat[1],PATTERN[pattern_num].pat[1]) == 0)) {
+ state = 0;
+ head += strlen(PATTERN[pattern_num].pat[1]) - 1;
+ }
+ break;
+ case 3: // command
+ if ((tolower(line[actual][head]) < 'a') || (tolower(line[actual][head]) > 'z')) {
+ state = 0;
+ head--;
+ }
+ break;
+ case 4: // command with arguments
+ if (slash && (line[actual][head] != '\0')) {
+ slash = 0;
+ head++;
+ break;
+ } else if (line[actual][head]=='\\') {
+ slash = 1;
+ } else if ((line[actual][head] == '{') ||
+ ((opt) && (line[actual][head] == '['))) {
+ depth++;
+ opt = 0;
+ } else if (line[actual][head] == '}') {
+ depth--;
+ if (depth == 0) {
+ opt = 1;
+ arg++;
+ }
+ if (((depth == 0) && (arg == PATTERN[pattern_num].arg)) ||
+ (depth < 0) ) {
+ state = 0; // XXX not handles the last optional arg.
+ }
+ } else if (line[actual][head] == ']') depth--;
+ } // case
+ if (next_char(line[actual], &head)) {
+ if (state == 5) state = 0;
+ return NULL;
+ }
+ }
+}
diff --git a/src/parsers/latexparser.hxx b/src/parsers/latexparser.hxx
new file mode 100644
index 0000000..851ecb9
--- /dev/null
+++ b/src/parsers/latexparser.hxx
@@ -0,0 +1,44 @@
+/*
+ * parser classes for MySpell
+ *
+ * implemented: text, HTML, TeX
+ *
+ * Copyright (C) 2002, Laszlo Nemeth
+ *
+ */
+
+#ifndef _LATEXPARSER_HXX_
+#define _LATEXPARSER_HXX_
+
+
+#include "textparser.hxx"
+
+/*
+ * HTML Parser
+ *
+ */
+
+class LaTeXParser : public TextParser
+{
+ int pattern_num; // number of comment
+ int depth; // depth of blocks
+ int arg; // arguments's number
+ int opt; // optional argument attrib.
+
+public:
+
+ LaTeXParser(const char * wc);
+ LaTeXParser(unsigned short * wordchars, int len);
+ virtual ~LaTeXParser();
+
+ virtual char * next_token();
+
+private:
+
+ int look_pattern(int col);
+
+};
+
+
+#endif
+
diff --git a/src/parsers/manparser.cxx b/src/parsers/manparser.cxx
new file mode 100644
index 0000000..25858da
--- /dev/null
+++ b/src/parsers/manparser.cxx
@@ -0,0 +1,71 @@
+#include <cstdlib>
+#include <cstring>
+#include <cstdio>
+#include <ctype.h>
+
+#include "../hunspell/csutil.hxx"
+#include "manparser.hxx"
+
+
+#ifndef W32
+using namespace std;
+#endif
+
+ManParser::ManParser() {
+}
+
+ManParser::ManParser(const char * wordchars)
+{
+ init(wordchars);
+}
+
+ManParser::ManParser(unsigned short * wordchars, int len)
+{
+ init(wordchars, len);
+}
+
+ManParser::~ManParser()
+{
+}
+
+char * ManParser::next_token()
+{
+ for (;;) {
+ switch (state)
+ {
+ case 1: // command arguments
+ if (line[actual][head] == ' ') state = 2;
+ break;
+ case 0: // dot in begin of line
+ if (line[actual][0] == '.') {
+ state = 1;
+ break;
+ } else {
+ state = 2;
+ }
+ // no break
+ case 2: // non word chars
+ if (is_wordchar(line[actual] + head)) {
+ state = 3;
+ token = head;
+ } else if ((line[actual][head] == '\\') &&
+ (line[actual][head + 1] == 'f') &&
+ (line[actual][head + 2] != '\0')) {
+ head += 2;
+ }
+ break;
+ case 3: // wordchar
+ if (! is_wordchar(line[actual] + head)) {
+ state = 2;
+ char * t = alloc_token(token, &head);
+ if (t) return t;
+ }
+ break;
+ }
+ if (next_char(line[actual], &head)) {
+ state = 0;
+ return NULL;
+ }
+ }
+}
+
diff --git a/src/parsers/manparser.hxx b/src/parsers/manparser.hxx
new file mode 100644
index 0000000..6db37c5
--- /dev/null
+++ b/src/parsers/manparser.hxx
@@ -0,0 +1,38 @@
+/*
+ * parser classes for MySpell
+ *
+ * implemented: text, HTML, TeX
+ *
+ * Copyright (C) 2002, Laszlo Nemeth
+ *
+ */
+
+#ifndef _MANPARSER_HXX_
+#define _MANPARSER_HXX_
+
+#include "textparser.hxx"
+
+/*
+ * Manparse Parser
+ *
+ */
+
+class ManParser : public TextParser
+{
+
+protected:
+
+
+public:
+
+ ManParser();
+ ManParser(const char * wc);
+ ManParser(unsigned short * wordchars, int len);
+ virtual ~ManParser();
+
+ virtual char * next_token();
+
+};
+
+#endif
+
diff --git a/src/parsers/testparser.cxx b/src/parsers/testparser.cxx
new file mode 100644
index 0000000..b257f12
--- /dev/null
+++ b/src/parsers/testparser.cxx
@@ -0,0 +1,53 @@
+#include <cstring>
+#include <cstdlib>
+#include <cstdio>
+
+#include "textparser.hxx"
+#include "htmlparser.hxx"
+#include "latexparser.hxx"
+
+#ifndef W32
+using namespace std;
+#endif
+
+int
+main(int argc, char** argv)
+{
+ FILE * f;
+ /* first parse the command line options */
+
+ if (argc < 2) {
+ fprintf(stderr,"correct syntax is:\n");
+ fprintf(stderr,"testparser file\n");
+ fprintf(stderr,"example: testparser /dev/stdin\n");
+ exit(1);
+ }
+
+ /* open the words to check list */
+ f = fopen(argv[1],"r");
+ if (!f) {
+ fprintf(stderr,"Error - could not open file of words to check\n");
+ exit(1);
+ }
+
+ TextParser * p = new LaTeXParser("qwertzuiopasdfghjklyxcvbnméáúõûóüöíQWERTZUIOPASDFGHJKLYXCVBNMÍÉÁÕÚÖÜÓÛ");
+
+ char buf[MAXLNLEN];
+ char * next;
+
+ while(fgets(buf,MAXLNLEN,f)) {
+ fprintf(stdout,"---------------------------------------\n");
+ p->put_line(buf);
+ fprintf(stderr, "x:%s\n", buf);
+ p->set_url_checking(1);
+ while ((next=p->next_token())) {
+ fprintf(stdout,"token: %s\n",next);
+ free(next);
+ }
+ }
+
+ delete p;
+ fclose(f);
+ return 0;
+}
+
diff --git a/src/parsers/textparser.cxx b/src/parsers/textparser.cxx
new file mode 100644
index 0000000..0338136
--- /dev/null
+++ b/src/parsers/textparser.cxx
@@ -0,0 +1,291 @@
+#include <cstdlib>
+#include <cstring>
+#include <cstdio>
+#include <ctype.h>
+
+#include "../hunspell/csutil.hxx"
+#include "textparser.hxx"
+
+#ifndef W32
+using namespace std;
+#endif
+
+// ISO-8859-1 HTML character entities
+
+static const char * LATIN1[] = {
+ "&Agrave;",
+ "&Atilde;",
+ "&Aring;",
+ "&AElig;",
+ "&Egrave;",
+ "&Ecirc;",
+ "&Igrave;",
+ "&Iuml;",
+ "&ETH;",
+ "&Ntilde;",
+ "&Ograve;",
+ "&Oslash;",
+ "&Ugrave;",
+ "&THORN;",
+ "&agrave;",
+ "&atilde;",
+ "&aring;",
+ "&aelig;",
+ "&egrave;",
+ "&ecirc;",
+ "&igrave;",
+ "&iuml;",
+ "&eth;",
+ "&ntilde;",
+ "&ograve;",
+ "&oslash;",
+ "&ugrave;",
+ "&thorn;",
+ "&yuml;"
+};
+
+#define LATIN1_LEN (sizeof(LATIN1) / sizeof(char *))
+
+TextParser::TextParser() {
+ init((char *) NULL);
+}
+
+TextParser::TextParser(const char * wordchars)
+{
+ init(wordchars);
+}
+
+TextParser::TextParser(unsigned short * wordchars, int len)
+{
+ init(wordchars, len);
+}
+
+TextParser::~TextParser()
+{
+}
+
+int TextParser::is_wordchar(char * w)
+{
+ if (*w == '\0') return 0;
+ if (utf8) {
+ w_char wc;
+ unsigned short idx;
+ u8_u16(&wc, 1, w);
+ idx = (wc.h << 8) + wc.l;
+ return (unicodeisalpha(idx) || (wordchars_utf16 && flag_bsearch(wordchars_utf16, *((unsigned short *) &wc), wclen)));
+ } else {
+ return wordcharacters[(*w + 256) % 256];
+ }
+}
+
+const char * TextParser::get_latin1(char * s)
+{
+ if (s[0] == '&') {
+ unsigned int i = 0;
+ while ((i < LATIN1_LEN) &&
+ strncmp(LATIN1[i], s, strlen(LATIN1[i]))) i++;
+ if (i != LATIN1_LEN) return LATIN1[i];
+ }
+ return NULL;
+}
+
+void TextParser::init(const char * wordchars)
+{
+ for (int i = 0; i < MAXPREVLINE; i++) {
+ line[i][0] = '\0';
+ }
+ actual = 0;
+ head = 0;
+ token = 0;
+ state = 0;
+ utf8 = 0;
+ checkurl = 0;
+ unsigned int j;
+ for (j = 0; j < 256; j++) {
+ wordcharacters[j] = 0;
+ }
+ if (!wordchars) wordchars = "qwertzuiopasdfghjklyxcvbnmQWERTZUIOPASDFGHJKLYXCVBNM";
+ for (j = 0; j < strlen(wordchars); j++) {
+ wordcharacters[(wordchars[j] + 256) % 256] = 1;
+ }
+}
+
+void TextParser::init(unsigned short * wc, int len)
+{
+ for (int i = 0; i < MAXPREVLINE; i++) {
+ line[i][0] = '\0';
+ }
+ actual = 0;
+ head = 0;
+ token = 0;
+ state = 0;
+ utf8 = 1;
+ checkurl = 0;
+ wordchars_utf16 = wc;
+ wclen = len;
+}
+
+int TextParser::next_char(char * line, int * pos) {
+ if (*(line + *pos) == '\0') return 1;
+ if (utf8) {
+ if (*(line + *pos) >> 7) {
+ // jump to next UTF-8 character
+ for((*pos)++; (*(line + *pos) & 0xc0) == 0x80; (*pos)++);
+ } else {
+ (*pos)++;
+ }
+ } else (*pos)++;
+ return 0;
+}
+
+void TextParser::put_line(char * word)
+{
+ actual = (actual + 1) % MAXPREVLINE;
+ strcpy(line[actual], word);
+ token = 0;
+ head = 0;
+ check_urls();
+}
+
+char * TextParser::get_prevline(int n)
+{
+ return mystrdup(line[(actual + MAXPREVLINE - n) % MAXPREVLINE]);
+}
+
+char * TextParser::get_line()
+{
+ return get_prevline(0);
+}
+
+char * TextParser::next_token()
+{
+ const char * latin1;
+
+ for (;;) {
+ switch (state)
+ {
+ case 0: // non word chars
+ if (is_wordchar(line[actual] + head)) {
+ state = 1;
+ token = head;
+ } else if ((latin1 = get_latin1(line[actual] + head))) {
+ state = 1;
+ token = head;
+ head += strlen(latin1);
+ }
+ break;
+ case 1: // wordchar
+ if ((latin1 = get_latin1(line[actual] + head))) {
+ head += strlen(latin1);
+ } else if (! is_wordchar(line[actual] + head)) {
+ state = 0;
+ char * t = alloc_token(token, &head);
+ if (t) return t;
+ }
+ break;
+ }
+ if (next_char(line[actual], &head)) return NULL;
+ }
+}
+
+int TextParser::get_tokenpos()
+{
+ return token;
+}
+
+int TextParser::change_token(const char * word)
+{
+ if (word) {
+ char * r = mystrdup(line[actual] + head);
+ strcpy(line[actual] + token, word);
+ strcat(line[actual], r);
+ head = token;
+ free(r);
+ return 1;
+ }
+ return 0;
+}
+
+void TextParser::check_urls()
+{
+ int url_state = 0;
+ int url_head = 0;
+ int url_token = 0;
+ int url = 0;
+ for (;;) {
+ switch (url_state)
+ {
+ case 0: // non word chars
+ if (is_wordchar(line[actual] + url_head)) {
+ url_state = 1;
+ url_token = url_head;
+ // Unix path
+ } else if (*(line[actual] + url_head) == '/') {
+ url_state = 1;
+ url_token = url_head;
+ url = 1;
+ }
+ break;
+ case 1: // wordchar
+ char ch = *(line[actual] + url_head);
+ // e-mail address
+ if ((ch == '@') ||
+ // MS-DOS, Windows path
+ (strncmp(line[actual] + url_head, ":\\", 2) == 0) ||
+ // URL
+ (strncmp(line[actual] + url_head, "://", 3) == 0)) {
+ url = 1;
+ } else if (! (is_wordchar(line[actual] + url_head) ||
+ (ch == '-') || (ch == '_') || (ch == '\\') ||
+ (ch == '.') || (ch == ':') || (ch == '/') ||
+ (ch == '~') || (ch == '%') || (ch == '*') ||
+ (ch == '$') || (ch == '[') || (ch == ']') ||
+ (ch == '?') || (ch == '!') ||
+ ((ch >= '0') && (ch <= '9')))) {
+ url_state = 0;
+ if (url == 1) {
+ for (int i = url_token; i < url_head; i++) {
+ *(urlline + i) = 1;
+ }
+ }
+ url = 0;
+ }
+ break;
+ }
+ *(urlline + url_head) = 0;
+ if (next_char(line[actual], &url_head)) return;
+ }
+}
+
+int TextParser::get_url(int token_pos, int * head)
+{
+ for (int i = *head; urlline[i] && *(line[actual]+i); i++, (*head)++);
+ return checkurl ? 0 : urlline[token_pos];
+}
+
+void TextParser::set_url_checking(int check)
+{
+ checkurl = check;
+}
+
+
+char * TextParser::alloc_token(int token, int * head)
+{
+ if (get_url(token, head)) return NULL;
+ char * t = (char *) malloc(*head - token + 1);
+ if (t) {
+ t[*head - token] = '\0';
+ strncpy(t, line[actual] + token, *head - token);
+ // remove colon for Finnish and Swedish language
+ if (t[*head - token - 1] == ':') {
+ t[*head - token - 1] = '\0';
+ if (!t[0]) {
+ free(t);
+ return NULL;
+ }
+ }
+ return t;
+ }
+ fprintf(stderr,"Error - Insufficient Memory\n");
+ return NULL;
+}
diff --git a/src/parsers/textparser.hxx b/src/parsers/textparser.hxx
new file mode 100644
index 0000000..a6f472a
--- /dev/null
+++ b/src/parsers/textparser.hxx
@@ -0,0 +1,69 @@
+/*
+ * parser classes for MySpell
+ *
+ * implemented: text, HTML, TeX
+ *
+ * Copyright (C) 2002, Laszlo Nemeth
+ *
+ */
+
+#ifndef _TEXTPARSER_HXX_
+#define _TEXTPARSER_HXX_
+
+// set sum of actual and previous lines
+#define MAXPREVLINE 4
+
+#ifndef MAXLNLEN
+#define MAXLNLEN 8192
+#endif
+
+/*
+ * Base Text Parser
+ *
+ */
+
+class TextParser
+{
+
+protected:
+ void init(const char *);
+ void init(unsigned short * wordchars, int len);
+ int wordcharacters[256]; // for detection of the word boundaries
+ char line[MAXPREVLINE][MAXLNLEN]; // parsed and previous lines
+ char urlline[MAXLNLEN]; // mask for url detection
+ int checkurl;
+ int actual; // actual line
+ int head; // head position
+ int token; // begin of token
+ int state; // state of automata
+ int utf8; // UTF-8 character encoding
+ int next_char(char * line, int * pos);
+ unsigned short * wordchars_utf16;
+ int wclen;
+
+public:
+
+ TextParser();
+ TextParser(unsigned short * wordchars, int len);
+ TextParser(const char * wc);
+ virtual ~TextParser();
+
+ void put_line(char * line);
+ char * get_line();
+ char * get_prevline(int n);
+ virtual char * next_token();
+ int change_token(const char * word);
+ void set_url_checking(int check);
+
+ int get_tokenpos();
+ int is_wordchar(char * w);
+ const char * get_latin1(char * s);
+ char * next_char();
+ int tokenize_urls();
+ void check_urls();
+ int get_url(int token_pos, int * head);
+ char * alloc_token(int token, int * head);
+};
+
+#endif
+