summaryrefslogtreecommitdiff
path: root/libxml/xml.l
diff options
context:
space:
mode:
Diffstat (limited to 'libxml/xml.l')
-rw-r--r--libxml/xml.l68
1 files changed, 43 insertions, 25 deletions
diff --git a/libxml/xml.l b/libxml/xml.l
index 13ff219..d5d8237 100644
--- a/libxml/xml.l
+++ b/libxml/xml.l
@@ -31,7 +31,6 @@
#include <vector>
#include <stdio.h>
#include "xml.h"
-//#include "message.h"
#define YY_NEVER_INTERACTIVE 1
#define YY_NO_INPUT 1
@@ -42,7 +41,7 @@ struct xmlYY_state
std::string fileName;
int lineNr = 1;
const char * inputString = 0; //!< the code fragment as text
- yy_size_t inputPosition = 0; //!< read offset during parsing
+ int inputPosition = 0; //!< read offset during parsing
std::string name;
bool isEnd = false;
bool selfClose = false;
@@ -54,14 +53,16 @@ struct xmlYY_state
int cdataContext;
int commentContext;
char stringChar;
+ std::string encoding;
std::vector<std::string> xpath;
+ std::function<XMLParser::Transcode> transcodeFunc;
};
#if USE_STATE2STRING
static const char *stateToString(int state);
#endif
-static yy_size_t yyread(yyscan_t yyscanner,char *buf,yy_size_t max_size);
+static int yyread(yyscan_t yyscanner,char *buf,int max_size);
static void initElement(yyscan_t yyscanner);
static void addCharacters(yyscan_t yyscanner);
static void addElement(yyscan_t yyscanner);
@@ -192,10 +193,18 @@ ENDCDATA "]]>"
. { yyextra->data += yytext; }
}
<Prolog>{
+ "encoding"\s*=\s*\"[^\"]*\" {
+ std::string encoding=yytext;
+ size_t i=encoding.find('"');
+ encoding=encoding.substr(i+1,yyleng-i-2);
+ if (encoding!="UTF-8") // need to transcode to UTF-8
+ {
+ yyextra->encoding=encoding;
+ }
+ }
{CLOSESPECIAL} { countLines(yyscanner,yytext,yyleng);
BEGIN(Initial);
}
- [^?\n]+ { }
\n { yyextra->lineNr++; }
. { }
}
@@ -218,12 +227,12 @@ ENDCDATA "]]>"
//----------------------------------------------------------------------------------------
-static yy_size_t yyread(yyscan_t yyscanner,char *buf,size_t max_size)
+static int yyread(yyscan_t yyscanner,char *buf,int max_size)
{
struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
- yy_size_t inputPosition = yyextra->inputPosition;
+ int inputPosition = yyextra->inputPosition;
const char *s = yyextra->inputString + inputPosition;
- yy_size_t c=0;
+ int c=0;
while( c < max_size && *s)
{
*buf++ = *s++;
@@ -321,6 +330,10 @@ static void addCharacters(yyscan_t yyscanner)
{
struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
std::string data = trimSpaces(yyextra->data);
+ if (!yyextra->encoding.empty() && !yyextra->transcodeFunc(data,yyextra->encoding.c_str()))
+ {
+ reportError(yyscanner,"failed to transcode string '"+data+"' from encoding '"+yyextra->encoding+"' to UTF-8");
+ }
if (yyextra->handlers.characters)
{
yyextra->handlers.characters(data);
@@ -337,7 +350,12 @@ static void addCharacters(yyscan_t yyscanner)
static void addAttribute(yyscan_t yyscanner)
{
struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
- yyextra->attrs.insert(std::make_pair(yyextra->attrName,yyextra->attrValue));
+ std::string val = yyextra->attrValue;
+ if (!yyextra->encoding.empty() && !yyextra->transcodeFunc(val,yyextra->encoding.c_str()))
+ {
+ reportError(yyscanner,"failed to transcode string '"+val+"' from encoding '"+yyextra->encoding+"' to UTF-8");
+ }
+ yyextra->attrs.insert(std::make_pair(yyextra->attrName,val));
}
static void reportError(yyscan_t yyscanner,const std::string &msg)
@@ -426,34 +444,37 @@ XMLParser::~XMLParser()
xmlYYlex_destroy(p->yyscanner);
}
-void XMLParser::parse(const char *fileName,const char *inputStr, bool debugEnabled)
+void XMLParser::parse(const char *fileName,
+ const char *inputStr,
+ bool debugEnabled,
+ std::function<void()> debugStart,
+ std::function<void()> debugEnd,
+ std::function<Transcode> transcodeFunc)
{
yyscan_t yyscanner = p->yyscanner;
struct yyguts_t *yyg = (struct yyguts_t*)yyscanner;
#ifdef FLEX_DEBUG
- xmlYYset_debug(1,p->yyscanner);
+ xmlYYset_debug(debugEnabled?1:0,p->yyscanner);
#endif
if (inputStr==nullptr || inputStr[0]=='\0') return; // empty input
- FILE *output = 0;
- const char *enter_txt = 0;
- const char *finished_txt = 0;
- const char *pre_txt = 0;
- if (yy_flex_debug) { output=stderr; pre_txt="--"; enter_txt="entering"; finished_txt="finished"; }
- else if (debugEnabled) { output=stdout; pre_txt=""; enter_txt="Entering"; finished_txt="Finished"; }
-
- if (output)
- {
- fprintf(output,"%s%s lexical analyzer: %s (for: %s)\n",pre_txt,enter_txt, __FILE__, fileName);
- }
+ debugStart();
BEGIN(Initial);
yyextra->fileName = fileName;
yyextra->lineNr = 1;
yyextra->inputString = inputStr;
yyextra->inputPosition = 0;
+ yyextra->transcodeFunc = transcodeFunc;
+
+ if (static_cast<unsigned char>(inputStr[0])==0xEF &&
+ static_cast<unsigned char>(inputStr[1])==0xBB &&
+ static_cast<unsigned char>(inputStr[2])==0xBF)
+ {
+ yyextra->inputPosition = 3; // remove UTF-8 BOM
+ }
xmlYYrestart( 0, yyscanner );
@@ -474,10 +495,7 @@ void XMLParser::parse(const char *fileName,const char *inputStr, bool debugEnabl
reportError(yyscanner,msg);
}
- if (output)
- {
- fprintf(output,"%s%s lexical analyzer: %s (for: %s)\n",pre_txt,finished_txt, __FILE__, fileName);
- }
+ debugEnd();
}
int XMLParser::lineNr() const