summaryrefslogtreecommitdiff
path: root/src/parsers/textparser.hxx
diff options
context:
space:
mode:
Diffstat (limited to 'src/parsers/textparser.hxx')
-rw-r--r--src/parsers/textparser.hxx69
1 files changed, 69 insertions, 0 deletions
diff --git a/src/parsers/textparser.hxx b/src/parsers/textparser.hxx
new file mode 100644
index 0000000..a6f472a
--- /dev/null
+++ b/src/parsers/textparser.hxx
@@ -0,0 +1,69 @@
+/*
+ * parser classes for MySpell
+ *
+ * implemented: text, HTML, TeX
+ *
+ * Copyright (C) 2002, Laszlo Nemeth
+ *
+ */
+
+#ifndef _TEXTPARSER_HXX_
+#define _TEXTPARSER_HXX_
+
+// set sum of actual and previous lines
+#define MAXPREVLINE 4
+
+#ifndef MAXLNLEN
+#define MAXLNLEN 8192
+#endif
+
+/*
+ * Base Text Parser
+ *
+ */
+
+class TextParser
+{
+
+protected:
+ void init(const char *);
+ void init(unsigned short * wordchars, int len);
+ int wordcharacters[256]; // for detection of the word boundaries
+ char line[MAXPREVLINE][MAXLNLEN]; // parsed and previous lines
+ char urlline[MAXLNLEN]; // mask for url detection
+ int checkurl;
+ int actual; // actual line
+ int head; // head position
+ int token; // begin of token
+ int state; // state of automata
+ int utf8; // UTF-8 character encoding
+ int next_char(char * line, int * pos);
+ unsigned short * wordchars_utf16;
+ int wclen;
+
+public:
+
+ TextParser();
+ TextParser(unsigned short * wordchars, int len);
+ TextParser(const char * wc);
+ virtual ~TextParser();
+
+ void put_line(char * line);
+ char * get_line();
+ char * get_prevline(int n);
+ virtual char * next_token();
+ int change_token(const char * word);
+ void set_url_checking(int check);
+
+ int get_tokenpos();
+ int is_wordchar(char * w);
+ const char * get_latin1(char * s);
+ char * next_char();
+ int tokenize_urls();
+ void check_urls();
+ int get_url(int token_pos, int * head);
+ char * alloc_token(int token, int * head);
+};
+
+#endif
+