diff options
Diffstat (limited to 'src/parsers')
-rw-r--r-- | src/parsers/Makefile.am | 10 | ||||
-rw-r--r-- | src/parsers/Makefile.in | 640 | ||||
-rw-r--r-- | src/parsers/firstparser.cxx | 33 | ||||
-rw-r--r-- | src/parsers/firstparser.hxx | 34 | ||||
-rw-r--r-- | src/parsers/htmlparser.cxx | 151 | ||||
-rw-r--r-- | src/parsers/htmlparser.hxx | 44 | ||||
-rw-r--r-- | src/parsers/latexparser.cxx | 223 | ||||
-rw-r--r-- | src/parsers/latexparser.hxx | 44 | ||||
-rw-r--r-- | src/parsers/manparser.cxx | 71 | ||||
-rw-r--r-- | src/parsers/manparser.hxx | 38 | ||||
-rw-r--r-- | src/parsers/testparser.cxx | 53 | ||||
-rw-r--r-- | src/parsers/textparser.cxx | 291 | ||||
-rw-r--r-- | src/parsers/textparser.hxx | 69 |
13 files changed, 1701 insertions, 0 deletions
diff --git a/src/parsers/Makefile.am b/src/parsers/Makefile.am new file mode 100644 index 0000000..02b0886 --- /dev/null +++ b/src/parsers/Makefile.am @@ -0,0 +1,10 @@ +lib_LIBRARIES=libparsers.a +libparsers_a_SOURCES=firstparser.cxx htmlparser.cxx \ + latexparser.cxx manparser.cxx \ + textparser.cxx + +noinst_PROGRAMS=testparser +testparser_SOURCES=firstparser.cxx firstparser.hxx htmlparser.cxx htmlparser.hxx latexparser.cxx latexparser.hxx manparser.cxx manparser.hxx testparser.cxx textparser.cxx textparser.hxx + +# need mystrdup() +LDADD = ../hunspell/libhunspell-1.3.la diff --git a/src/parsers/Makefile.in b/src/parsers/Makefile.in new file mode 100644 index 0000000..bdeea73 --- /dev/null +++ b/src/parsers/Makefile.in @@ -0,0 +1,640 @@ +# Makefile.in generated by automake 1.11.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, +# Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + + +VPATH = @srcdir@ +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +target_triplet = @target@ +noinst_PROGRAMS = testparser$(EXEEXT) +subdir = src/parsers +DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/codeset.m4 \ + $(top_srcdir)/m4/gettext.m4 $(top_srcdir)/m4/glibc2.m4 \ + $(top_srcdir)/m4/glibc21.m4 $(top_srcdir)/m4/iconv.m4 \ + $(top_srcdir)/m4/intdiv0.m4 $(top_srcdir)/m4/intl.m4 \ + $(top_srcdir)/m4/intlmacosx.m4 $(top_srcdir)/m4/intmax.m4 \ + $(top_srcdir)/m4/inttypes-pri.m4 \ + $(top_srcdir)/m4/inttypes_h.m4 $(top_srcdir)/m4/lcmessage.m4 \ + $(top_srcdir)/m4/lib-ld.m4 $(top_srcdir)/m4/lib-link.m4 \ + $(top_srcdir)/m4/lib-prefix.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/lock.m4 $(top_srcdir)/m4/longlong.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/nls.m4 $(top_srcdir)/m4/po.m4 \ + $(top_srcdir)/m4/printf-posix.m4 $(top_srcdir)/m4/progtest.m4 \ + $(top_srcdir)/m4/size_max.m4 $(top_srcdir)/m4/stdint_h.m4 \ + $(top_srcdir)/m4/uintmax_t.m4 $(top_srcdir)/m4/visibility.m4 \ + $(top_srcdir)/m4/wchar_t.m4 $(top_srcdir)/m4/wint_t.m4 \ + $(top_srcdir)/m4/xsize.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__installdirs = "$(DESTDIR)$(libdir)" +LIBRARIES = $(lib_LIBRARIES) +ARFLAGS = cru +libparsers_a_AR = $(AR) $(ARFLAGS) +libparsers_a_LIBADD = +am_libparsers_a_OBJECTS = firstparser.$(OBJEXT) htmlparser.$(OBJEXT) \ + latexparser.$(OBJEXT) manparser.$(OBJEXT) textparser.$(OBJEXT) +libparsers_a_OBJECTS = $(am_libparsers_a_OBJECTS) +PROGRAMS = $(noinst_PROGRAMS) +am_testparser_OBJECTS = firstparser.$(OBJEXT) htmlparser.$(OBJEXT) \ + latexparser.$(OBJEXT) manparser.$(OBJEXT) testparser.$(OBJEXT) \ + textparser.$(OBJEXT) +testparser_OBJECTS = $(am_testparser_OBJECTS) +testparser_LDADD = $(LDADD) +testparser_DEPENDENCIES = ../hunspell/libhunspell-1.3.la +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +LTCXXCOMPILE = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +CXXLD = $(CXX) +CXXLINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +CCLD = $(CC) +LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \ + $(LDFLAGS) -o $@ +SOURCES = $(libparsers_a_SOURCES) $(testparser_SOURCES) +DIST_SOURCES = $(libparsers_a_SOURCES) $(testparser_SOURCES) +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +ALLOCA = @ALLOCA@ +AMTAR = @AMTAR@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BUILD_INCLUDED_LIBINTL = @BUILD_INCLUDED_LIBINTL@ +CATOBJEXT = @CATOBJEXT@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAG_VISIBILITY = @CFLAG_VISIBILITY@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CURSESLIB = @CURSESLIB@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DATADIRNAME = @DATADIRNAME@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GENCAT = @GENCAT@ +GETTEXT_MACRO_VERSION = @GETTEXT_MACRO_VERSION@ +GLIBC2 = @GLIBC2@ +GLIBC21 = @GLIBC21@ +GMSGFMT = @GMSGFMT@ +GMSGFMT_015 = @GMSGFMT_015@ +GREP = @GREP@ +HAVE_ASPRINTF = @HAVE_ASPRINTF@ +HAVE_POSIX_PRINTF = @HAVE_POSIX_PRINTF@ +HAVE_SNPRINTF = @HAVE_SNPRINTF@ +HAVE_VISIBILITY = @HAVE_VISIBILITY@ +HAVE_WPRINTF = @HAVE_WPRINTF@ +HUNSPELL_VERSION_MAJOR = @HUNSPELL_VERSION_MAJOR@ +HUNSPELL_VERSION_MINOR = @HUNSPELL_VERSION_MINOR@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +INSTOBJEXT = @INSTOBJEXT@ +INTLBISON = @INTLBISON@ +INTLLIBS = @INTLLIBS@ +INTLOBJS = @INTLOBJS@ +INTL_LIBTOOL_SUFFIX_PREFIX = @INTL_LIBTOOL_SUFFIX_PREFIX@ +INTL_MACOSX_LIBS = @INTL_MACOSX_LIBS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBICONV = @LIBICONV@ +LIBINTL = @LIBINTL@ +LIBMULTITHREAD = @LIBMULTITHREAD@ +LIBOBJS = @LIBOBJS@ +LIBPTH = @LIBPTH@ +LIBPTH_PREFIX = @LIBPTH_PREFIX@ +LIBS = @LIBS@ +LIBTHREAD = @LIBTHREAD@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBC = @LTLIBC@ +LTLIBICONV = @LTLIBICONV@ +LTLIBINTL = @LTLIBINTL@ +LTLIBMULTITHREAD = @LTLIBMULTITHREAD@ +LTLIBOBJS = @LTLIBOBJS@ +LTLIBPTH = @LTLIBPTH@ +LTLIBTHREAD = @LTLIBTHREAD@ +MAKEINFO = @MAKEINFO@ +MKDIR_P = @MKDIR_P@ +MSGFMT = @MSGFMT@ +MSGFMT_015 = @MSGFMT_015@ +MSGMERGE = @MSGMERGE@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +POSUB = @POSUB@ +PRI_MACROS_BROKEN = @PRI_MACROS_BROKEN@ +RANLIB = @RANLIB@ +READLINELIB = @READLINELIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +USE_INCLUDED_LIBINTL = @USE_INCLUDED_LIBINTL@ +USE_NLS = @USE_NLS@ +VERSION = @VERSION@ +WINDRES = @WINDRES@ +WOE32 = @WOE32@ +WOE32DLL = @WOE32DLL@ +XFAILED = @XFAILED@ +XGETTEXT = @XGETTEXT@ +XGETTEXT_015 = @XGETTEXT_015@ +XGETTEXT_EXTRA_OPTIONS = @XGETTEXT_EXTRA_OPTIONS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +lt_ECHO = @lt_ECHO@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target = @target@ +target_alias = @target_alias@ +target_cpu = @target_cpu@ +target_os = @target_os@ +target_vendor = @target_vendor@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +lib_LIBRARIES = libparsers.a +libparsers_a_SOURCES = firstparser.cxx htmlparser.cxx \ + latexparser.cxx manparser.cxx \ + textparser.cxx + +testparser_SOURCES = firstparser.cxx firstparser.hxx htmlparser.cxx htmlparser.hxx latexparser.cxx latexparser.hxx manparser.cxx manparser.hxx testparser.cxx textparser.cxx textparser.hxx + +# need mystrdup() +LDADD = ../hunspell/libhunspell-1.3.la +all: all-am + +.SUFFIXES: +.SUFFIXES: .cxx .lo .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/parsers/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu src/parsers/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-libLIBRARIES: $(lib_LIBRARIES) + @$(NORMAL_INSTALL) + test -z "$(libdir)" || $(MKDIR_P) "$(DESTDIR)$(libdir)" + @list='$(lib_LIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(INSTALL_DATA) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(INSTALL_DATA) $$list2 "$(DESTDIR)$(libdir)" || exit $$?; } + @$(POST_INSTALL) + @list='$(lib_LIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + if test -f $$p; then \ + $(am__strip_dir) \ + echo " ( cd '$(DESTDIR)$(libdir)' && $(RANLIB) $$f )"; \ + ( cd "$(DESTDIR)$(libdir)" && $(RANLIB) $$f ) || exit $$?; \ + else :; fi; \ + done + +uninstall-libLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LIBRARIES)'; test -n "$(libdir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + test -n "$$files" || exit 0; \ + echo " ( cd '$(DESTDIR)$(libdir)' && rm -f "$$files" )"; \ + cd "$(DESTDIR)$(libdir)" && rm -f $$files + +clean-libLIBRARIES: + -test -z "$(lib_LIBRARIES)" || rm -f $(lib_LIBRARIES) +libparsers.a: $(libparsers_a_OBJECTS) $(libparsers_a_DEPENDENCIES) + -rm -f libparsers.a + $(libparsers_a_AR) libparsers.a $(libparsers_a_OBJECTS) $(libparsers_a_LIBADD) + $(RANLIB) libparsers.a + +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list +testparser$(EXEEXT): $(testparser_OBJECTS) $(testparser_DEPENDENCIES) + @rm -f testparser$(EXEEXT) + $(CXXLINK) $(testparser_OBJECTS) $(testparser_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/firstparser.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/htmlparser.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/latexparser.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/manparser.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/testparser.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/textparser.Po@am__quote@ + +.cxx.o: +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ $< + +.cxx.obj: +@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.cxx.lo: +@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(LTCXXCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + set x; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: CTAGS +CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LIBRARIES) $(PROGRAMS) +installdirs: + for dir in "$(DESTDIR)$(libdir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + `test -z '$(STRIP)' || \ + echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libLIBRARIES clean-libtool \ + clean-noinstPROGRAMS mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-libLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-libLIBRARIES + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ + clean-libLIBRARIES clean-libtool clean-noinstPROGRAMS ctags \ + distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-libLIBRARIES install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags uninstall uninstall-am uninstall-libLIBRARIES + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/parsers/firstparser.cxx b/src/parsers/firstparser.cxx new file mode 100644 index 0000000..786ecea --- /dev/null +++ b/src/parsers/firstparser.cxx @@ -0,0 +1,33 @@ +#include <cstdlib> +#include <cstring> +#include <cstdio> +#include <ctype.h> + +#include "../hunspell/csutil.hxx" +#include "firstparser.hxx" + +#ifndef W32 +using namespace std; +#endif + +FirstParser::FirstParser(const char * wordchars) +{ + init(wordchars); +} + +FirstParser::~FirstParser() +{ +} + +char * FirstParser::next_token() +{ + char * tabpos = strchr(line[actual],'\t'); + if ((tabpos) && (tabpos - line[actual]>token)) { + char * t = (char *) malloc(tabpos - line[actual] + 1); + t[tabpos - line[actual]] = '\0'; + token = tabpos - line[actual] +1; + if (t) return strncpy(t, line[actual], tabpos - line[actual]); + fprintf(stderr,"Error - Insufficient Memory\n"); + } + return NULL; +} diff --git a/src/parsers/firstparser.hxx b/src/parsers/firstparser.hxx new file mode 100644 index 0000000..1f79289 --- /dev/null +++ b/src/parsers/firstparser.hxx @@ -0,0 +1,34 @@ +/* + * parser classes of HunTools + * + * implemented: text, HTML, TeX, first word + * + * Copyright (C) 2003, Laszlo Nemeth + * + */ + +#ifndef _FIRSTPARSER_HXX_ +#define _FIRSTPARSER_HXX_ + +#include "textparser.hxx" + +/* + * Check first word of the input line + * + */ + +class FirstParser : public TextParser +{ + +public: + + + FirstParser(const char * wc); + virtual ~FirstParser(); + + virtual char * next_token(); + +}; + +#endif + diff --git a/src/parsers/htmlparser.cxx b/src/parsers/htmlparser.cxx new file mode 100644 index 0000000..341be4e --- /dev/null +++ b/src/parsers/htmlparser.cxx @@ -0,0 +1,151 @@ +#include <cstdlib> +#include <cstring> +#include <cstdio> +#include <ctype.h> + +#include "../hunspell/csutil.hxx" +#include "htmlparser.hxx" + + +#ifndef W32 +using namespace std; +#endif + +enum { ST_NON_WORD, ST_WORD, ST_TAG, ST_CHAR_ENTITY, ST_OTHER_TAG, ST_ATTRIB }; + +static const char * PATTERN[][2] = { + { "<script", "</script>" }, + { "<style", "</style>" }, + { "<code", "</code>" }, + { "<samp", "</samp>" }, + { "<kbd", "</kbd>" }, + { "<var", "</var>" }, + { "<listing", "</listing>" }, + { "<address", "</address>" }, + { "<pre", "</pre>" }, + { "<!--", "-->" }, + { "<[cdata[", "]]>" }, // XML comment + { "<", ">" } +}; + +#define PATTERN_LEN (sizeof(PATTERN) / (sizeof(char *) * 2)) + +static const char * PATTERN2[][2] = { + { "<img", "alt=" }, // ALT and TITLE attrib handled spec. + { "<img", "title=" }, + { "<a ", "title=" } +}; + +#define PATTERN_LEN2 (sizeof(PATTERN2) / (sizeof(char *) * 2)) + +HTMLParser::HTMLParser(const char * wordchars) +{ + init(wordchars); +} + +HTMLParser::HTMLParser(unsigned short * wordchars, int len) +{ + init(wordchars, len); +} + +HTMLParser::~HTMLParser() +{ +} + + +int HTMLParser::look_pattern(const char * p[][2], unsigned int len, int column) +{ + for (unsigned int i = 0; i < len; i++) { + char * j = line[actual] + head; + const char * k = p[i][column]; + while ((*k != '\0') && (tolower(*j) == *k)) { + j++; + k++; + } + if (*k == '\0') return i; + } + return -1; +} + +/* + * HTML parser + * + */ + + +char * HTMLParser::next_token() +{ + const char * latin1; + + for (;;) { + //fprintf(stderr, "%d:%c:%s\n", state, line[actual][head], line[actual]); + //getch(); + switch (state) + { + case ST_NON_WORD: // non word chars + prevstate = ST_NON_WORD; + if ((pattern_num = look_pattern(PATTERN, PATTERN_LEN, 0)) != -1) { + checkattr = 0; + if ((pattern2_num = look_pattern(PATTERN2, PATTERN_LEN2, 0)) != -1) { + checkattr = 1; + } + state = ST_TAG; + } else if (is_wordchar(line[actual] + head)) { + state = ST_WORD; + token = head; + } else if ((latin1 = get_latin1(line[actual] + head))) { + state = ST_WORD; + token = head; + head += strlen(latin1); + } else if (line[actual][head] == '&') { + state = ST_CHAR_ENTITY; + } + break; + case ST_WORD: // wordchar + if ((latin1 = get_latin1(line[actual] + head))) { + head += strlen(latin1); + } else if (! is_wordchar(line[actual] + head)) { + state = prevstate; + char * t = alloc_token(token, &head); + if (t) return t; + } + break; + case ST_TAG: // comment, labels, etc + int i; + if ((checkattr == 1) && ((i = look_pattern(PATTERN2, PATTERN_LEN2, 1)) != -1) + && (strcmp(PATTERN2[i][0],PATTERN2[pattern2_num][0]) == 0)) { + checkattr = 2; + } else if ((checkattr > 0) && (line[actual][head] == '>')) { + state = ST_NON_WORD; + } else if (((i = look_pattern(PATTERN, PATTERN_LEN, 1)) != -1) && + (strcmp(PATTERN[i][1],PATTERN[pattern_num][1]) == 0)) { + state = ST_NON_WORD; + head += strlen(PATTERN[pattern_num][1]) - 1; + } else if ( (strcmp(PATTERN[pattern_num][0], "<") == 0) && + ((line[actual][head] == '"') || (line[actual][head] == '\''))) { + quotmark = line[actual][head]; + state = ST_ATTRIB; + } + break; + case ST_ATTRIB: // non word chars + prevstate = ST_ATTRIB; + if (line[actual][head] == quotmark) { + state = ST_TAG; + if (checkattr == 2) checkattr = 1; + // for IMG ALT + } else if (is_wordchar(line[actual] + head) && (checkattr == 2)) { + state = ST_WORD; + token = head; + } else if (line[actual][head] == '&') { + state = ST_CHAR_ENTITY; + } + break; + case ST_CHAR_ENTITY: // SGML element + if ((tolower(line[actual][head]) == ';')) { + state = prevstate; + head--; + } + } + if (next_char(line[actual], &head)) return NULL; + } +} diff --git a/src/parsers/htmlparser.hxx b/src/parsers/htmlparser.hxx new file mode 100644 index 0000000..9a0da7a --- /dev/null +++ b/src/parsers/htmlparser.hxx @@ -0,0 +1,44 @@ +/* + * HTML parser class for MySpell + * + * implemented: text, HTML, TeX + * + * Copyright (C) 2002, Laszlo Nemeth + * + */ + +#ifndef _HTMLPARSER_HXX_ +#define _HTMLPARSER_HXX_ + + +#include "textparser.hxx" + +/* + * HTML Parser + * + */ + +class HTMLParser : public TextParser +{ +public: + + HTMLParser(const char * wc); + HTMLParser(unsigned short * wordchars, int len); + virtual ~HTMLParser(); + + virtual char * next_token(); + +private: + + int look_pattern(const char * p[][2], unsigned int len, int column); + int pattern_num; + int pattern2_num; + int prevstate; + int checkattr; + char quotmark; + +}; + + +#endif + diff --git a/src/parsers/latexparser.cxx b/src/parsers/latexparser.cxx new file mode 100644 index 0000000..5ffe3fd --- /dev/null +++ b/src/parsers/latexparser.cxx @@ -0,0 +1,223 @@ +#include <cstdlib> +#include <cstring> +#include <cstdio> +#include <ctype.h> + +#include "../hunspell/csutil.hxx" +#include "latexparser.hxx" + +#ifndef W32 +using namespace std; +#endif + +static struct { + const char * pat[2]; + int arg; +} PATTERN[] = { + { { "\\(", "\\)" } , 0 }, + { { "$$", "$$" } , 0 }, + { { "$", "$" } , 0 }, + { { "\\begin{math}", "\\end{math}" } , 0 }, + { { "\\[", "\\]" } , 0 }, + { { "\\begin{displaymath}", "\\end{displaymath}" } , 0 }, + { { "\\begin{equation}", "\\end{equation}" } , 0 }, + { { "\\begin{equation*}", "\\end{equation*}" } , 0 }, + { { "\\cite", NULL } , 1 }, + { { "\\nocite", NULL } , 1 }, + { { "\\index", NULL } , 1 }, + { { "\\label", NULL } , 1 }, + { { "\\ref", NULL } , 1 }, + { { "\\pageref", NULL } , 1 }, + { { "\\parbox", NULL } , 1 }, + { { "\\begin{verbatim}", "\\end{verbatim}" } , 0 }, + { { "\\verb+", "+" } , 0 }, + { { "\\verb|", "|" } , 0 }, + { { "\\verb#", "#" } , 0 }, + { { "\\verb*", "*" } , 0 }, + { { "\\documentstyle", "\\begin{document}" } , 0 }, + { { "\\documentclass", "\\begin{document}" } , 0 }, +// { { "\\documentclass", NULL } , 1 }, + { { "\\usepackage", NULL } , 1 }, + { { "\\includeonly", NULL } , 1 }, + { { "\\include", NULL } , 1 }, + { { "\\input", NULL } , 1 }, + { { "\\vspace", NULL } , 1 }, + { { "\\setlength", NULL } , 2 }, + { { "\\addtolength", NULL } , 2 }, + { { "\\settowidth", NULL } , 2 }, + { { "\\rule", NULL } , 2 }, + { { "\\hspace", NULL } , 1 } , + { { "\\vspace", NULL } , 1 } , + { { "\\\\[", "]" } , 0 }, + { { "\\pagebreak[", "]" } , 0 } , + { { "\\nopagebreak[", "]" } , 0 } , + { { "\\enlargethispage", NULL } , 1 } , + { { "\\begin{tabular}", NULL } , 1 } , + { { "\\addcontentsline", NULL } , 2 } , + { { "\\begin{thebibliography}", NULL } , 1 } , + { { "\\bibliography", NULL } , 1 } , + { { "\\bibliographystyle", NULL } , 1 } , + { { "\\bibitem", NULL } , 1 } , + { { "\\begin", NULL } , 1 } , + { { "\\end", NULL } , 1 } , + { { "\\pagestyle", NULL } , 1 } , + { { "\\pagenumbering", NULL } , 1 } , + { { "\\thispagestyle", NULL } , 1 } , + { { "\\newtheorem", NULL } , 2 }, + { { "\\newcommand", NULL } , 2 }, + { { "\\renewcommand", NULL } , 2 }, + { { "\\setcounter", NULL } , 2 }, + { { "\\addtocounter", NULL } , 1 }, + { { "\\stepcounter", NULL } , 1 }, + { { "\\selectlanguage", NULL } , 1 }, + { { "\\inputencoding", NULL } , 1 }, + { { "\\hyphenation", NULL } , 1 }, + { { "\\definecolor", NULL } , 3 }, + { { "\\color", NULL } , 1 }, + { { "\\textcolor", NULL } , 1 }, + { { "\\pagecolor", NULL } , 1 }, + { { "\\colorbox", NULL } , 2 }, + { { "\\fcolorbox", NULL } , 2 }, + { { "\\declaregraphicsextensions", NULL } , 1 }, + { { "\\psfig", NULL } , 1 }, + { { "\\url", NULL } , 1 }, + { { "\\eqref", NULL } , 1 }, + { { "\\vskip", NULL } , 1 }, + { { "\\vglue", NULL } , 1 }, + { { "\'\'", NULL } , 1 } +}; + +#define PATTERN_LEN (sizeof(PATTERN) / sizeof(PATTERN[0])) + +LaTeXParser::LaTeXParser(const char * wordchars) +{ + init(wordchars); +} + +LaTeXParser::LaTeXParser(unsigned short * wordchars, int len) +{ + init(wordchars, len); +} + +LaTeXParser::~LaTeXParser() +{ +} + +int LaTeXParser::look_pattern(int col) +{ + for (unsigned int i = 0; i < PATTERN_LEN; i++) { + char * j = line[actual] + head; + const char * k = PATTERN[i].pat[col]; + if (! k) continue; + while ((*k != '\0') && (tolower(*j) == *k)) { + j++; + k++; + } + if (*k == '\0') return i; + } + return -1; +} + +/* + * LaTeXParser + * + * state 0: not wordchar + * state 1: wordchar + * state 2: comments + * state 3: commands + * state 4: commands with arguments + * state 5: % comment + * + */ + + +char * LaTeXParser::next_token() +{ + int i; + int slash = 0; + int apostrophe; + for (;;) { + // fprintf(stderr,"depth: %d, state: %d, , arg: %d, token: %s\n",depth,state,arg,line[actual]+head); + + switch (state) + { + case 0: // non word chars + if ((pattern_num = look_pattern(0)) != -1) { + if (PATTERN[pattern_num].pat[1]) { + state = 2; + } else { + state = 4; + depth = 0; + arg = 0; + opt = 1; + } + head += strlen(PATTERN[pattern_num].pat[0]) - 1; + } else if ((line[actual][head] == '%')) { + state = 5; + } else if (is_wordchar(line[actual] + head)) { + state = 1; + token = head; + } else if (line[actual][head] == '\\') { + if (line[actual][head + 1] == '\\' || // \\ (linebreak) + (line[actual][head + 1] == '$') || // \$ (dollar sign) + (line[actual][head + 1] == '%')) { // \% (percent) + head++; + break; + } + state = 3; + } else if (line[actual][head] == '%') { + if ((head==0) || (line[actual][head - 1] != '\\')) state = 5; + } + break; + case 1: // wordchar + apostrophe = 0; + if (! is_wordchar(line[actual] + head) || + (line[actual][head] == '\'' && line[actual][head+1] == '\'' && ++apostrophe)) { + state = 0; + char * t = alloc_token(token, &head); + if (apostrophe) head += 2; + if (t) return t; + } + break; + case 2: // comment, labels, etc + if (((i = look_pattern(1)) != -1) && + (strcmp(PATTERN[i].pat[1],PATTERN[pattern_num].pat[1]) == 0)) { + state = 0; + head += strlen(PATTERN[pattern_num].pat[1]) - 1; + } + break; + case 3: // command + if ((tolower(line[actual][head]) < 'a') || (tolower(line[actual][head]) > 'z')) { + state = 0; + head--; + } + break; + case 4: // command with arguments + if (slash && (line[actual][head] != '\0')) { + slash = 0; + head++; + break; + } else if (line[actual][head]=='\\') { + slash = 1; + } else if ((line[actual][head] == '{') || + ((opt) && (line[actual][head] == '['))) { + depth++; + opt = 0; + } else if (line[actual][head] == '}') { + depth--; + if (depth == 0) { + opt = 1; + arg++; + } + if (((depth == 0) && (arg == PATTERN[pattern_num].arg)) || + (depth < 0) ) { + state = 0; // XXX not handles the last optional arg. + } + } else if (line[actual][head] == ']') depth--; + } // case + if (next_char(line[actual], &head)) { + if (state == 5) state = 0; + return NULL; + } + } +} diff --git a/src/parsers/latexparser.hxx b/src/parsers/latexparser.hxx new file mode 100644 index 0000000..851ecb9 --- /dev/null +++ b/src/parsers/latexparser.hxx @@ -0,0 +1,44 @@ +/* + * parser classes for MySpell + * + * implemented: text, HTML, TeX + * + * Copyright (C) 2002, Laszlo Nemeth + * + */ + +#ifndef _LATEXPARSER_HXX_ +#define _LATEXPARSER_HXX_ + + +#include "textparser.hxx" + +/* + * HTML Parser + * + */ + +class LaTeXParser : public TextParser +{ + int pattern_num; // number of comment + int depth; // depth of blocks + int arg; // arguments's number + int opt; // optional argument attrib. + +public: + + LaTeXParser(const char * wc); + LaTeXParser(unsigned short * wordchars, int len); + virtual ~LaTeXParser(); + + virtual char * next_token(); + +private: + + int look_pattern(int col); + +}; + + +#endif + diff --git a/src/parsers/manparser.cxx b/src/parsers/manparser.cxx new file mode 100644 index 0000000..25858da --- /dev/null +++ b/src/parsers/manparser.cxx @@ -0,0 +1,71 @@ +#include <cstdlib> +#include <cstring> +#include <cstdio> +#include <ctype.h> + +#include "../hunspell/csutil.hxx" +#include "manparser.hxx" + + +#ifndef W32 +using namespace std; +#endif + +ManParser::ManParser() { +} + +ManParser::ManParser(const char * wordchars) +{ + init(wordchars); +} + +ManParser::ManParser(unsigned short * wordchars, int len) +{ + init(wordchars, len); +} + +ManParser::~ManParser() +{ +} + +char * ManParser::next_token() +{ + for (;;) { + switch (state) + { + case 1: // command arguments + if (line[actual][head] == ' ') state = 2; + break; + case 0: // dot in begin of line + if (line[actual][0] == '.') { + state = 1; + break; + } else { + state = 2; + } + // no break + case 2: // non word chars + if (is_wordchar(line[actual] + head)) { + state = 3; + token = head; + } else if ((line[actual][head] == '\\') && + (line[actual][head + 1] == 'f') && + (line[actual][head + 2] != '\0')) { + head += 2; + } + break; + case 3: // wordchar + if (! is_wordchar(line[actual] + head)) { + state = 2; + char * t = alloc_token(token, &head); + if (t) return t; + } + break; + } + if (next_char(line[actual], &head)) { + state = 0; + return NULL; + } + } +} + diff --git a/src/parsers/manparser.hxx b/src/parsers/manparser.hxx new file mode 100644 index 0000000..6db37c5 --- /dev/null +++ b/src/parsers/manparser.hxx @@ -0,0 +1,38 @@ +/* + * parser classes for MySpell + * + * implemented: text, HTML, TeX + * + * Copyright (C) 2002, Laszlo Nemeth + * + */ + +#ifndef _MANPARSER_HXX_ +#define _MANPARSER_HXX_ + +#include "textparser.hxx" + +/* + * Manparse Parser + * + */ + +class ManParser : public TextParser +{ + +protected: + + +public: + + ManParser(); + ManParser(const char * wc); + ManParser(unsigned short * wordchars, int len); + virtual ~ManParser(); + + virtual char * next_token(); + +}; + +#endif + diff --git a/src/parsers/testparser.cxx b/src/parsers/testparser.cxx new file mode 100644 index 0000000..b257f12 --- /dev/null +++ b/src/parsers/testparser.cxx @@ -0,0 +1,53 @@ +#include <cstring> +#include <cstdlib> +#include <cstdio> + +#include "textparser.hxx" +#include "htmlparser.hxx" +#include "latexparser.hxx" + +#ifndef W32 +using namespace std; +#endif + +int +main(int argc, char** argv) +{ + FILE * f; + /* first parse the command line options */ + + if (argc < 2) { + fprintf(stderr,"correct syntax is:\n"); + fprintf(stderr,"testparser file\n"); + fprintf(stderr,"example: testparser /dev/stdin\n"); + exit(1); + } + + /* open the words to check list */ + f = fopen(argv[1],"r"); + if (!f) { + fprintf(stderr,"Error - could not open file of words to check\n"); + exit(1); + } + + TextParser * p = new LaTeXParser("qwertzuiopasdfghjklyxcvbnméáúõûóüöíQWERTZUIOPASDFGHJKLYXCVBNMÍÉÁÕÚÖÜÓÛ"); + + char buf[MAXLNLEN]; + char * next; + + while(fgets(buf,MAXLNLEN,f)) { + fprintf(stdout,"---------------------------------------\n"); + p->put_line(buf); + fprintf(stderr, "x:%s\n", buf); + p->set_url_checking(1); + while ((next=p->next_token())) { + fprintf(stdout,"token: %s\n",next); + free(next); + } + } + + delete p; + fclose(f); + return 0; +} + diff --git a/src/parsers/textparser.cxx b/src/parsers/textparser.cxx new file mode 100644 index 0000000..0338136 --- /dev/null +++ b/src/parsers/textparser.cxx @@ -0,0 +1,291 @@ +#include <cstdlib> +#include <cstring> +#include <cstdio> +#include <ctype.h> + +#include "../hunspell/csutil.hxx" +#include "textparser.hxx" + +#ifndef W32 +using namespace std; +#endif + +// ISO-8859-1 HTML character entities + +static const char * LATIN1[] = { + "À", + "Ã", + "Å", + "Æ", + "È", + "Ê", + "Ì", + "Ï", + "Ð", + "Ñ", + "Ò", + "Ø", + "Ù", + "Þ", + "à", + "ã", + "å", + "æ", + "è", + "ê", + "ì", + "ï", + "ð", + "ñ", + "ò", + "ø", + "ù", + "þ", + "ÿ" +}; + +#define LATIN1_LEN (sizeof(LATIN1) / sizeof(char *)) + +TextParser::TextParser() { + init((char *) NULL); +} + +TextParser::TextParser(const char * wordchars) +{ + init(wordchars); +} + +TextParser::TextParser(unsigned short * wordchars, int len) +{ + init(wordchars, len); +} + +TextParser::~TextParser() +{ +} + +int TextParser::is_wordchar(char * w) +{ + if (*w == '\0') return 0; + if (utf8) { + w_char wc; + unsigned short idx; + u8_u16(&wc, 1, w); + idx = (wc.h << 8) + wc.l; + return (unicodeisalpha(idx) || (wordchars_utf16 && flag_bsearch(wordchars_utf16, *((unsigned short *) &wc), wclen))); + } else { + return wordcharacters[(*w + 256) % 256]; + } +} + +const char * TextParser::get_latin1(char * s) +{ + if (s[0] == '&') { + unsigned int i = 0; + while ((i < LATIN1_LEN) && + strncmp(LATIN1[i], s, strlen(LATIN1[i]))) i++; + if (i != LATIN1_LEN) return LATIN1[i]; + } + return NULL; +} + +void TextParser::init(const char * wordchars) +{ + for (int i = 0; i < MAXPREVLINE; i++) { + line[i][0] = '\0'; + } + actual = 0; + head = 0; + token = 0; + state = 0; + utf8 = 0; + checkurl = 0; + unsigned int j; + for (j = 0; j < 256; j++) { + wordcharacters[j] = 0; + } + if (!wordchars) wordchars = "qwertzuiopasdfghjklyxcvbnmQWERTZUIOPASDFGHJKLYXCVBNM"; + for (j = 0; j < strlen(wordchars); j++) { + wordcharacters[(wordchars[j] + 256) % 256] = 1; + } +} + +void TextParser::init(unsigned short * wc, int len) +{ + for (int i = 0; i < MAXPREVLINE; i++) { + line[i][0] = '\0'; + } + actual = 0; + head = 0; + token = 0; + state = 0; + utf8 = 1; + checkurl = 0; + wordchars_utf16 = wc; + wclen = len; +} + +int TextParser::next_char(char * line, int * pos) { + if (*(line + *pos) == '\0') return 1; + if (utf8) { + if (*(line + *pos) >> 7) { + // jump to next UTF-8 character + for((*pos)++; (*(line + *pos) & 0xc0) == 0x80; (*pos)++); + } else { + (*pos)++; + } + } else (*pos)++; + return 0; +} + +void TextParser::put_line(char * word) +{ + actual = (actual + 1) % MAXPREVLINE; + strcpy(line[actual], word); + token = 0; + head = 0; + check_urls(); +} + +char * TextParser::get_prevline(int n) +{ + return mystrdup(line[(actual + MAXPREVLINE - n) % MAXPREVLINE]); +} + +char * TextParser::get_line() +{ + return get_prevline(0); +} + +char * TextParser::next_token() +{ + const char * latin1; + + for (;;) { + switch (state) + { + case 0: // non word chars + if (is_wordchar(line[actual] + head)) { + state = 1; + token = head; + } else if ((latin1 = get_latin1(line[actual] + head))) { + state = 1; + token = head; + head += strlen(latin1); + } + break; + case 1: // wordchar + if ((latin1 = get_latin1(line[actual] + head))) { + head += strlen(latin1); + } else if (! is_wordchar(line[actual] + head)) { + state = 0; + char * t = alloc_token(token, &head); + if (t) return t; + } + break; + } + if (next_char(line[actual], &head)) return NULL; + } +} + +int TextParser::get_tokenpos() +{ + return token; +} + +int TextParser::change_token(const char * word) +{ + if (word) { + char * r = mystrdup(line[actual] + head); + strcpy(line[actual] + token, word); + strcat(line[actual], r); + head = token; + free(r); + return 1; + } + return 0; +} + +void TextParser::check_urls() +{ + int url_state = 0; + int url_head = 0; + int url_token = 0; + int url = 0; + for (;;) { + switch (url_state) + { + case 0: // non word chars + if (is_wordchar(line[actual] + url_head)) { + url_state = 1; + url_token = url_head; + // Unix path + } else if (*(line[actual] + url_head) == '/') { + url_state = 1; + url_token = url_head; + url = 1; + } + break; + case 1: // wordchar + char ch = *(line[actual] + url_head); + // e-mail address + if ((ch == '@') || + // MS-DOS, Windows path + (strncmp(line[actual] + url_head, ":\\", 2) == 0) || + // URL + (strncmp(line[actual] + url_head, "://", 3) == 0)) { + url = 1; + } else if (! (is_wordchar(line[actual] + url_head) || + (ch == '-') || (ch == '_') || (ch == '\\') || + (ch == '.') || (ch == ':') || (ch == '/') || + (ch == '~') || (ch == '%') || (ch == '*') || + (ch == '$') || (ch == '[') || (ch == ']') || + (ch == '?') || (ch == '!') || + ((ch >= '0') && (ch <= '9')))) { + url_state = 0; + if (url == 1) { + for (int i = url_token; i < url_head; i++) { + *(urlline + i) = 1; + } + } + url = 0; + } + break; + } + *(urlline + url_head) = 0; + if (next_char(line[actual], &url_head)) return; + } +} + +int TextParser::get_url(int token_pos, int * head) +{ + for (int i = *head; urlline[i] && *(line[actual]+i); i++, (*head)++); + return checkurl ? 0 : urlline[token_pos]; +} + +void TextParser::set_url_checking(int check) +{ + checkurl = check; +} + + +char * TextParser::alloc_token(int token, int * head) +{ + if (get_url(token, head)) return NULL; + char * t = (char *) malloc(*head - token + 1); + if (t) { + t[*head - token] = '\0'; + strncpy(t, line[actual] + token, *head - token); + // remove colon for Finnish and Swedish language + if (t[*head - token - 1] == ':') { + t[*head - token - 1] = '\0'; + if (!t[0]) { + free(t); + return NULL; + } + } + return t; + } + fprintf(stderr,"Error - Insufficient Memory\n"); + return NULL; +} diff --git a/src/parsers/textparser.hxx b/src/parsers/textparser.hxx new file mode 100644 index 0000000..a6f472a --- /dev/null +++ b/src/parsers/textparser.hxx @@ -0,0 +1,69 @@ +/* + * parser classes for MySpell + * + * implemented: text, HTML, TeX + * + * Copyright (C) 2002, Laszlo Nemeth + * + */ + +#ifndef _TEXTPARSER_HXX_ +#define _TEXTPARSER_HXX_ + +// set sum of actual and previous lines +#define MAXPREVLINE 4 + +#ifndef MAXLNLEN +#define MAXLNLEN 8192 +#endif + +/* + * Base Text Parser + * + */ + +class TextParser +{ + +protected: + void init(const char *); + void init(unsigned short * wordchars, int len); + int wordcharacters[256]; // for detection of the word boundaries + char line[MAXPREVLINE][MAXLNLEN]; // parsed and previous lines + char urlline[MAXLNLEN]; // mask for url detection + int checkurl; + int actual; // actual line + int head; // head position + int token; // begin of token + int state; // state of automata + int utf8; // UTF-8 character encoding + int next_char(char * line, int * pos); + unsigned short * wordchars_utf16; + int wclen; + +public: + + TextParser(); + TextParser(unsigned short * wordchars, int len); + TextParser(const char * wc); + virtual ~TextParser(); + + void put_line(char * line); + char * get_line(); + char * get_prevline(int n); + virtual char * next_token(); + int change_token(const char * word); + void set_url_checking(int check); + + int get_tokenpos(); + int is_wordchar(char * w); + const char * get_latin1(char * s); + char * next_char(); + int tokenize_urls(); + void check_urls(); + int get_url(int token_pos, int * head); + char * alloc_token(int token, int * head); +}; + +#endif + |