summaryrefslogtreecommitdiff
path: root/utils
diff options
context:
space:
mode:
authorAnas Nashif <anas.nashif@intel.com>2012-11-10 08:38:23 -0800
committerAnas Nashif <anas.nashif@intel.com>2012-11-10 08:38:23 -0800
commit65619a8a518ba0f513e57429e461d199264a9929 (patch)
treed87b5dd33429139a1900fb37928466df569cad3f /utils
downloadpoppler-65619a8a518ba0f513e57429e461d199264a9929.tar.gz
poppler-65619a8a518ba0f513e57429e461d199264a9929.tar.bz2
poppler-65619a8a518ba0f513e57429e461d199264a9929.zip
Imported Upstream version 0.20.4upstream/0.20.4
Diffstat (limited to 'utils')
-rw-r--r--utils/CMakeLists.txt133
-rw-r--r--utils/HtmlFonts.cc386
-rw-r--r--utils/HtmlFonts.h117
-rw-r--r--utils/HtmlLinks.cc143
-rw-r--r--utils/HtmlLinks.h73
-rw-r--r--utils/HtmlOutputDev.cc1886
-rw-r--r--utils/HtmlOutputDev.h354
-rw-r--r--utils/HtmlUtils.h51
-rw-r--r--utils/ImageOutputDev.cc425
-rw-r--r--utils/ImageOutputDev.h148
-rw-r--r--utils/Makefile.am137
-rw-r--r--utils/Makefile.in1052
-rw-r--r--utils/parseargs.cc220
-rw-r--r--utils/parseargs.h88
-rw-r--r--utils/pdf2xml.dtd49
-rw-r--r--utils/pdfdetach.1103
-rw-r--r--utils/pdfdetach.cc318
-rw-r--r--utils/pdffonts.1119
-rw-r--r--utils/pdffonts.cc224
-rw-r--r--utils/pdfimages.1189
-rw-r--r--utils/pdfimages.cc191
-rw-r--r--utils/pdfinfo.1146
-rw-r--r--utils/pdfinfo.cc464
-rw-r--r--utils/pdfseparate.149
-rw-r--r--utils/pdfseparate.cc122
-rw-r--r--utils/pdftocairo.1261
-rw-r--r--utils/pdftocairo.cc1015
-rw-r--r--utils/pdftohtml.1108
-rw-r--r--utils/pdftohtml.cc603
-rw-r--r--utils/pdftoppm.1158
-rw-r--r--utils/pdftoppm.cc440
-rw-r--r--utils/pdftops.1227
-rw-r--r--utils/pdftops.cc422
-rw-r--r--utils/pdftotext.1137
-rw-r--r--utils/pdftotext.cc486
-rw-r--r--utils/pdfunite.133
-rw-r--r--utils/pdfunite.cc182
-rw-r--r--utils/printencodings.cc34
-rw-r--r--utils/printencodings.h24
39 files changed, 11317 insertions, 0 deletions
diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt
new file mode 100644
index 00000000..4a29841b
--- /dev/null
+++ b/utils/CMakeLists.txt
@@ -0,0 +1,133 @@
+
+set(common_srcs
+ parseargs.cc
+)
+set(common_libs
+ poppler
+)
+if (FONTCONFIG_FOUND)
+ set(common_libs ${common_libs} ${FONTCONFIG_LIBRARIES})
+endif (FONTCONFIG_FOUND)
+
+if (ENABLE_SPLASH)
+ # pdftoppm
+ set(pdftoppm_SOURCES ${common_srcs}
+ pdftoppm.cc
+ )
+ add_executable(pdftoppm ${pdftoppm_SOURCES})
+ target_link_libraries(pdftoppm ${common_libs})
+ install(TARGETS pdftoppm DESTINATION bin)
+ install(FILES pdftoppm.1 DESTINATION share/man/man1)
+endif (ENABLE_SPLASH)
+
+if (HAVE_CAIRO)
+ # pdftocairo
+ set(pdftocairo_SOURCES ${common_srcs}
+ pdftocairo.cc
+ ${CMAKE_SOURCE_DIR}/poppler/CairoFontEngine.cc
+ ${CMAKE_SOURCE_DIR}/poppler/CairoOutputDev.cc
+ ${CMAKE_SOURCE_DIR}/poppler/CairoRescaleBox.cc
+ )
+ include_directories(
+ ${CMAKE_CURRENT_SOURCE_DIR}
+ ${CMAKE_CURRENT_BINARY_DIR}
+ ${CAIRO_INCLUDE_DIRS}
+ )
+ add_definitions(${CAIRO_CFLAGS})
+ add_executable(pdftocairo ${pdftocairo_SOURCES})
+ target_link_libraries(pdftocairo ${CAIRO_LIBRARIES} ${FREETYPE_LIBRARIES} ${common_libs})
+ if(LCMS_FOUND)
+ target_link_libraries(pdftocairo ${LCMS_LIBRARIES})
+ endif(LCMS_FOUND)
+ if(LCMS2_FOUND)
+ target_link_libraries(pdftocairo ${LCMS2_LIBRARIES})
+ endif(LCMS2_FOUND)
+ install(TARGETS pdftocairo DESTINATION bin)
+ install(FILES pdftocairo.1 DESTINATION share/man/man1)
+endif (HAVE_CAIRO)
+
+# pdfdetach
+set(pdfdetach_SOURCES ${common_srcs}
+ pdfdetach.cc
+)
+add_executable(pdfdetach ${pdfdetach_SOURCES})
+target_link_libraries(pdfdetach ${common_libs})
+install(TARGETS pdfdetach DESTINATION bin)
+install(FILES pdfdetach.1 DESTINATION share/man/man1)
+
+# pdffonts
+set(pdffonts_SOURCES ${common_srcs}
+ pdffonts.cc
+)
+add_executable(pdffonts ${pdffonts_SOURCES})
+target_link_libraries(pdffonts ${common_libs})
+install(TARGETS pdffonts DESTINATION bin)
+install(FILES pdffonts.1 DESTINATION share/man/man1)
+
+# pdfimages
+set(pdfimages_SOURCES ${common_srcs}
+ pdfimages.cc
+ ImageOutputDev.cc
+ ImageOutputDev.h
+)
+add_executable(pdfimages ${pdfimages_SOURCES})
+target_link_libraries(pdfimages ${common_libs})
+install(TARGETS pdfimages DESTINATION bin)
+install(FILES pdfimages.1 DESTINATION share/man/man1)
+
+# pdfinfo
+set(pdfinfo_SOURCES ${common_srcs}
+ pdfinfo.cc printencodings.cc
+)
+add_executable(pdfinfo ${pdfinfo_SOURCES})
+target_link_libraries(pdfinfo ${common_libs})
+install(TARGETS pdfinfo DESTINATION bin)
+install(FILES pdfinfo.1 DESTINATION share/man/man1)
+
+# pdftops
+set(pdftops_SOURCES ${common_srcs}
+ pdftops.cc
+)
+add_executable(pdftops ${pdftops_SOURCES})
+target_link_libraries(pdftops ${common_libs})
+install(TARGETS pdftops DESTINATION bin)
+install(FILES pdftops.1 DESTINATION share/man/man1)
+
+# pdftotext
+set(pdftotext_SOURCES ${common_srcs}
+ pdftotext.cc printencodings.cc
+)
+add_executable(pdftotext ${pdftotext_SOURCES})
+target_link_libraries(pdftotext ${common_libs})
+install(TARGETS pdftotext DESTINATION bin)
+install(FILES pdftotext.1 DESTINATION share/man/man1)
+
+# pdftohtml
+set(pdftohtml_SOURCES ${common_srcs}
+ pdftohtml.cc
+ HtmlFonts.cc
+ HtmlLinks.cc
+ HtmlOutputDev.cc
+)
+add_executable(pdftohtml ${pdftohtml_SOURCES})
+target_link_libraries(pdftohtml ${common_libs})
+install(TARGETS pdftohtml DESTINATION bin)
+install(FILES pdftohtml.1 DESTINATION share/man/man1)
+
+# pdfseparate
+set(pdfseparate_SOURCES ${common_srcs}
+ pdfseparate.cc
+)
+add_executable(pdfseparate ${pdfseparate_SOURCES})
+target_link_libraries(pdfseparate ${common_libs})
+install(TARGETS pdfseparate DESTINATION bin)
+install(FILES pdfseparate.1 DESTINATION share/man/man1)
+
+# pdfunite
+set(pdfunite_SOURCES ${common_srcs}
+ pdfunite.cc
+)
+add_executable(pdfunite ${pdfunite_SOURCES})
+target_link_libraries(pdfunite ${common_libs})
+install(TARGETS pdfunite DESTINATION bin)
+install(FILES pdfunite.1 DESTINATION share/man/man1)
diff --git a/utils/HtmlFonts.cc b/utils/HtmlFonts.cc
new file mode 100644
index 00000000..be02c5f3
--- /dev/null
+++ b/utils/HtmlFonts.cc
@@ -0,0 +1,386 @@
+//========================================================================
+//
+// This file comes from pdftohtml project
+// http://pdftohtml.sourceforge.net
+//
+// Copyright from:
+// Gueorgui Ovtcharov
+// Rainer Dorsch <http://www.ra.informatik.uni-stuttgart.de/~rainer/>
+// Mikhail Kruk <meshko@cs.brandeis.edu>
+//
+//========================================================================
+
+//========================================================================
+//
+// Modified under the Poppler project - http://poppler.freedesktop.org
+//
+// All changes made under the Poppler project to this file are licensed
+// under GPL version 2 or later
+//
+// Copyright (C) 2007, 2010 Albert Astals Cid <aacid@kde.org>
+// Copyright (C) 2008 Boris Toloknov <tlknv@yandex.ru>
+// Copyright (C) 2008 Tomas Are Haavet <tomasare@gmail.com>
+// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac@cdacmumbai.in) and Onkar Potdar (onkar@cdacmumbai.in)
+// Copyright (C) 2011 Joshua Richardson <jric@chegg.com>
+// Copyright (C) 2011 Stephen Reichling <sreichling@chegg.com>
+// Copyright (C) 2012 Igor Slepchin <igor.slepchin@gmail.com>
+//
+// To see a description of the changes please see the Changelog file that
+// came with your tarball or type make ChangeLog if you are building from git
+//
+//========================================================================
+
+#include "HtmlFonts.h"
+#include "HtmlUtils.h"
+#include "GlobalParams.h"
+#include "UnicodeMap.h"
+#include "GfxFont.h"
+#include <stdio.h>
+
+ struct Fonts{
+ const char *Fontname;
+ const char *name;
+ };
+
+const int font_num=13;
+
+static Fonts fonts[font_num+1]={
+ {"Courier", "Courier" },
+ {"Courier-Bold", "Courier"},
+ {"Courier-BoldOblique", "Courier"},
+ {"Courier-Oblique", "Courier"},
+ {"Helvetica", "Helvetica"},
+ {"Helvetica-Bold", "Helvetica"},
+ {"Helvetica-BoldOblique", "Helvetica"},
+ {"Helvetica-Oblique", "Helvetica"},
+ {"Symbol", "Symbol" },
+ {"Times-Bold", "Times" },
+ {"Times-BoldItalic", "Times" },
+ {"Times-Italic", "Times" },
+ {"Times-Roman", "Times" },
+ {" " , "Times" },
+};
+
+#define xoutRound(x) ((int)(x + 0.5))
+extern GBool xml;
+
+GooString* HtmlFont::DefaultFont=new GooString("Times"); // Arial,Helvetica,sans-serif
+
+HtmlFontColor::HtmlFontColor(GfxRGB rgb){
+ r=static_cast<int>(rgb.r/65535.0*255.0);
+ g=static_cast<int>(rgb.g/65535.0*255.0);
+ b=static_cast<int>(rgb.b/65535.0*255.0);
+ if (!(Ok(r)&&Ok(b)&&Ok(g))) {
+ if (!globalParams->getErrQuiet()) fprintf(stderr, "Error : Bad color (%d,%d,%d) reset to (0,0,0)\n", r, g, b);
+ r=0;g=0;b=0;
+ }
+}
+
+GooString *HtmlFontColor::convtoX(unsigned int xcol) const{
+ GooString *xret=new GooString();
+ char tmp;
+ unsigned int k;
+ k = (xcol/16);
+ if ((k>=0)&&(k<10)) tmp=(char) ('0'+k); else tmp=(char)('a'+k-10);
+ xret->append(tmp);
+ k = (xcol%16);
+ if ((k>=0)&&(k<10)) tmp=(char) ('0'+k); else tmp=(char)('a'+k-10);
+ xret->append(tmp);
+ return xret;
+}
+
+GooString *HtmlFontColor::toString() const{
+ GooString *tmp=new GooString("#");
+ GooString *tmpr=convtoX(r);
+ GooString *tmpg=convtoX(g);
+ GooString *tmpb=convtoX(b);
+ tmp->append(tmpr);
+ tmp->append(tmpg);
+ tmp->append(tmpb);
+ delete tmpr;
+ delete tmpg;
+ delete tmpb;
+ return tmp;
+}
+
+HtmlFont::HtmlFont(GfxFont *font, int _size, GfxRGB rgb){
+ //if (col) color=HtmlFontColor(col);
+ //else color=HtmlFontColor();
+ color=HtmlFontColor(rgb);
+ GooString* ftname=font->getName();
+ if (!ftname) ftname = getDefaultFont();
+
+ GooString *fontname = NULL;
+
+ if( ftname ){
+ fontname = new GooString(ftname);
+ FontName=new GooString(ftname);
+ }
+ else {
+ fontname = NULL;
+ FontName = NULL;
+ }
+
+ lineSize = -1;
+
+ size=(_size-1);
+ italic = gFalse;
+ bold = gFalse;
+ rotOrSkewed = gFalse;
+
+ if (font->isBold() || font->getWeight() >= GfxFont::W700) bold=gTrue;
+ if (font->isItalic()) italic=gTrue;
+
+ if (fontname){
+ if (!bold && strstr(fontname->lowerCase()->getCString(),"bold")) {
+ bold=gTrue;
+ }
+
+ if (!italic &&
+ (strstr(fontname->lowerCase()->getCString(),"italic")||
+ strstr(fontname->lowerCase()->getCString(),"oblique"))) {
+ italic=gTrue;
+ }
+
+ int i=0;
+ while (strcmp(ftname->getCString(),fonts[i].Fontname)&&(i<font_num))
+ {
+ i++;
+ }
+ pos=i;
+ delete fontname;
+ } else
+ pos = font_num;
+ if (!DefaultFont) DefaultFont=new GooString(fonts[font_num].name);
+
+}
+
+HtmlFont::HtmlFont(const HtmlFont& x){
+ size=x.size;
+ lineSize=x.lineSize;
+ italic=x.italic;
+ bold=x.bold;
+ pos=x.pos;
+ color=x.color;
+ if (x.FontName) FontName=new GooString(x.FontName);
+ rotOrSkewed = x.rotOrSkewed;
+ memcpy(rotSkewMat, x.rotSkewMat, sizeof(rotSkewMat));
+ }
+
+
+HtmlFont::~HtmlFont(){
+ if (FontName) delete FontName;
+}
+
+HtmlFont& HtmlFont::operator=(const HtmlFont& x){
+ if (this==&x) return *this;
+ size=x.size;
+ lineSize=x.lineSize;
+ italic=x.italic;
+ bold=x.bold;
+ pos=x.pos;
+ color=x.color;
+ if (FontName) delete FontName;
+ if (x.FontName) FontName=new GooString(x.FontName);
+ return *this;
+}
+
+void HtmlFont::clear(){
+ if(DefaultFont) delete DefaultFont;
+ DefaultFont = NULL;
+}
+
+
+
+/*
+ This function is used to compare font uniquely for insertion into
+ the list of all encountered fonts
+*/
+GBool HtmlFont::isEqual(const HtmlFont& x) const{
+ return (size==x.size) &&
+ (lineSize==x.lineSize) &&
+ (pos==x.pos) && (bold==x.bold) && (italic==x.italic) &&
+ (color.isEqual(x.getColor())) && isRotOrSkewed() == x.isRotOrSkewed() &&
+ (!isRotOrSkewed() || rot_matrices_equal(getRotMat(), x.getRotMat()));
+}
+
+/*
+ This one is used to decide whether two pieces of text can be joined together
+ and therefore we don't care about bold/italics properties
+*/
+GBool HtmlFont::isEqualIgnoreBold(const HtmlFont& x) const{
+ return ((size==x.size) &&
+ (!strcmp(fonts[pos].name, fonts[x.pos].name)) &&
+ (color.isEqual(x.getColor())));
+}
+
+GooString* HtmlFont::getFontName(){
+ if (pos!=font_num) return new GooString(fonts[pos].name);
+ else return new GooString(DefaultFont);
+}
+
+GooString* HtmlFont::getFullName(){
+ if (FontName)
+ return new GooString(FontName);
+ else return new GooString(DefaultFont);
+}
+
+void HtmlFont::setDefaultFont(GooString* defaultFont){
+ if (DefaultFont) delete DefaultFont;
+ DefaultFont=new GooString(defaultFont);
+}
+
+
+GooString* HtmlFont::getDefaultFont(){
+ return DefaultFont;
+}
+
+// this method if plain wrong todo
+GooString* HtmlFont::HtmlFilter(Unicode* u, int uLen) {
+ GooString *tmp = new GooString();
+ UnicodeMap *uMap;
+ char buf[8];
+ int n;
+
+ // get the output encoding
+ if (!(uMap = globalParams->getTextEncoding())) {
+ return tmp;
+ }
+
+ for (int i = 0; i < uLen; ++i) {
+ switch (u[i])
+ {
+ case '"': tmp->append("&#34;"); break;
+ case '&': tmp->append("&amp;"); break;
+ case '<': tmp->append("&lt;"); break;
+ case '>': tmp->append("&gt;"); break;
+ case ' ': tmp->append( !xml && ( i+1 >= uLen || !tmp->getLength() || tmp->getChar( tmp->getLength()-1 ) == ' ' ) ? "&#160;" : " " );
+ break;
+ default:
+ {
+ // convert unicode to string
+ if ((n = uMap->mapUnicode(u[i], buf, sizeof(buf))) > 0) {
+ tmp->append(buf, n);
+ }
+ }
+ }
+ }
+
+ uMap->decRefCnt();
+ return tmp;
+}
+
+GooString* HtmlFont::simple(HtmlFont* font, Unicode* content, int uLen){
+ GooString *cont=HtmlFilter (content, uLen);
+
+ /*if (font.isBold()) {
+ cont->insert(0,"<b>",3);
+ cont->append("</b>",4);
+ }
+ if (font.isItalic()) {
+ cont->insert(0,"<i>",3);
+ cont->append("</i>",4);
+ } */
+
+ return cont;
+}
+
+HtmlFontAccu::HtmlFontAccu(){
+ accu=new std::vector<HtmlFont>();
+}
+
+HtmlFontAccu::~HtmlFontAccu(){
+ if (accu) delete accu;
+}
+
+int HtmlFontAccu::AddFont(const HtmlFont& font){
+ std::vector<HtmlFont>::iterator i;
+ for (i=accu->begin();i!=accu->end();i++)
+ {
+ if (font.isEqual(*i))
+ {
+ return (int)(i-(accu->begin()));
+ }
+ }
+
+ accu->push_back(font);
+ return (accu->size()-1);
+}
+
+// get CSS font definition for font #i
+GooString* HtmlFontAccu::CSStyle(int i, int j){
+ GooString *tmp=new GooString();
+ GooString *iStr=GooString::fromInt(i);
+ GooString *jStr=GooString::fromInt(j);
+
+ std::vector<HtmlFont>::iterator g=accu->begin();
+ g+=i;
+ HtmlFont font=*g;
+ GooString *Size=GooString::fromInt(font.getSize());
+ GooString *colorStr=font.getColor().toString();
+ GooString *fontName=font.getFontName();
+ GooString *lSize;
+
+ if(!xml){
+ tmp->append(".ft");
+ tmp->append(jStr);
+ tmp->append(iStr);
+ tmp->append("{font-size:");
+ tmp->append(Size);
+ if( font.getLineSize() != -1 && font.getLineSize() != 0 )
+ {
+ lSize = GooString::fromInt(font.getLineSize());
+ tmp->append("px;line-height:");
+ tmp->append(lSize);
+ delete lSize;
+ }
+ tmp->append("px;font-family:");
+ tmp->append(fontName); //font.getFontName());
+ tmp->append(";color:");
+ tmp->append(colorStr);
+ // if there is rotation or skew, include the matrix
+ if (font.isRotOrSkewed()) {
+ const double * const text_mat = font.getRotMat();
+ GooString matrix_str(" matrix(");
+ matrix_str.appendf("{0:10.10g}, {1:10.10g}, {2:10.10g}, {3:10.10g}, 0, 0)",
+ text_mat[0], text_mat[1], text_mat[2], text_mat[3]);
+ tmp->append(";-moz-transform:");
+ tmp->append(&matrix_str);
+ tmp->append(";-webkit-transform:");
+ tmp->append(&matrix_str);
+ tmp->append(";-o-transform:");
+ tmp->append(&matrix_str);
+ tmp->append(";-ms-transform:");
+ tmp->append(&matrix_str);
+ // Todo: 75% is a wild guess that seems to work pretty well;
+ // We probably need to calculate the real percentage
+ // Based on the characteristic baseline and bounding box of current font
+ // PDF origin is at baseline
+ tmp->append(";-moz-transform-origin: left 75%");
+ tmp->append(";-webkit-transform-origin: left 75%");
+ tmp->append(";-o-transform-origin: left 75%");
+ tmp->append(";-ms-transform-origin: left 75%");
+ }
+ tmp->append(";}");
+ }
+ if (xml) {
+ tmp->append("<fontspec id=\"");
+ tmp->append(iStr);
+ tmp->append("\" size=\"");
+ tmp->append(Size);
+ tmp->append("\" family=\"");
+ tmp->append(fontName); //font.getFontName());
+ tmp->append("\" color=\"");
+ tmp->append(colorStr);
+ tmp->append("\"/>");
+ }
+
+ delete fontName;
+ delete colorStr;
+ delete jStr;
+ delete iStr;
+ delete Size;
+ return tmp;
+}
+
+
diff --git a/utils/HtmlFonts.h b/utils/HtmlFonts.h
new file mode 100644
index 00000000..22368b28
--- /dev/null
+++ b/utils/HtmlFonts.h
@@ -0,0 +1,117 @@
+//========================================================================
+//
+// This file comes from pdftohtml project
+// http://pdftohtml.sourceforge.net
+//
+// Copyright from:
+// Gueorgui Ovtcharov
+// Rainer Dorsch <http://www.ra.informatik.uni-stuttgart.de/~rainer/>
+// Mikhail Kruk <meshko@cs.brandeis.edu>
+//
+//========================================================================
+
+//========================================================================
+//
+// Modified under the Poppler project - http://poppler.freedesktop.org
+//
+// All changes made under the Poppler project to this file are licensed
+// under GPL version 2 or later
+//
+// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac@cdacmumbai.in) and Onkar Potdar (onkar@cdacmumbai.in)
+// Copyright (C) 2010 Albert Astals Cid <aacid@kde.org>
+// Copyright (C) 2011 Steven Murdoch <Steven.Murdoch@cl.cam.ac.uk>
+// Copyright (C) 2011 Joshua Richardson <jric@chegg.com>
+// Copyright (C) 2012 Igor Slepchin <igor.slepchin@gmail.com>
+//
+// To see a description of the changes please see the Changelog file that
+// came with your tarball or type make ChangeLog if you are building from git
+//
+//========================================================================
+
+#ifndef _HTML_FONTS_H
+#define _HTML_FONTS_H
+#include "goo/GooString.h"
+#include "GfxState.h"
+#include "CharTypes.h"
+#include <vector>
+
+class HtmlFontColor{
+ private:
+ unsigned int r;
+ unsigned int g;
+ unsigned int b;
+ GBool Ok(unsigned int xcol){ return ((xcol<=255)&&(xcol>=0));}
+ GooString *convtoX(unsigned int xcol) const;
+ public:
+ HtmlFontColor():r(0),g(0),b(0){}
+ HtmlFontColor(GfxRGB rgb);
+ HtmlFontColor(const HtmlFontColor& x){r=x.r;g=x.g;b=x.b;}
+ HtmlFontColor& operator=(const HtmlFontColor &x){
+ r=x.r;g=x.g;b=x.b;
+ return *this;
+ }
+ ~HtmlFontColor(){};
+ GooString* toString() const;
+ GBool isEqual(const HtmlFontColor& col) const{
+ return ((r==col.r)&&(g==col.g)&&(b==col.b));
+ }
+} ;
+
+
+class HtmlFont{
+ private:
+ unsigned int size;
+ int lineSize;
+ GBool italic;
+ GBool bold;
+ GBool rotOrSkewed;
+ int pos; // position of the font name in the fonts array
+ static GooString *DefaultFont;
+ GooString *FontName;
+ HtmlFontColor color;
+ double rotSkewMat[4]; // only four values needed for rotation and skew
+public:
+
+ HtmlFont(){FontName=NULL; rotOrSkewed = gFalse;}
+ HtmlFont(GfxFont *font,int _size, GfxRGB rgb);
+ HtmlFont(const HtmlFont& x);
+ HtmlFont& operator=(const HtmlFont& x);
+ HtmlFontColor getColor() const {return color;}
+ ~HtmlFont();
+ static void clear();
+ GooString* getFullName();
+ GBool isItalic() const {return italic;}
+ GBool isBold() const {return bold;}
+ GBool isRotOrSkewed() const { return rotOrSkewed; }
+ unsigned int getSize() const {return size;}
+ int getLineSize() const {return lineSize;}
+ void setLineSize(int _lineSize) { lineSize = _lineSize; }
+ void setRotMat(const double * const mat)
+ { rotOrSkewed = gTrue; memcpy(rotSkewMat, mat, sizeof(rotSkewMat)); }
+ const double *getRotMat() const { return rotSkewMat; }
+ GooString* getFontName();
+ static GooString* getDefaultFont();
+ static void setDefaultFont(GooString* defaultFont);
+ static GooString* HtmlFilter(Unicode* u, int uLen); //char* s);
+ GBool isEqual(const HtmlFont& x) const;
+ GBool isEqualIgnoreBold(const HtmlFont& x) const;
+ static GooString* simple(HtmlFont *font, Unicode *content, int uLen);
+ void print() const {printf("font: %s %d %s%spos: %d\n", FontName->getCString(), size, bold ? "bold " : "", italic ? "italic " : "", pos);};
+};
+
+class HtmlFontAccu{
+private:
+ std::vector<HtmlFont> *accu;
+
+public:
+ HtmlFontAccu();
+ ~HtmlFontAccu();
+ int AddFont(const HtmlFont& font);
+ HtmlFont *Get(int i){
+ return &(*accu)[i];
+ }
+ GooString* CSStyle(int i, int j = 0);
+ int size() const {return accu->size();}
+
+};
+#endif
diff --git a/utils/HtmlLinks.cc b/utils/HtmlLinks.cc
new file mode 100644
index 00000000..1d609f67
--- /dev/null
+++ b/utils/HtmlLinks.cc
@@ -0,0 +1,143 @@
+//========================================================================
+//
+// This file comes from pdftohtml project
+// http://pdftohtml.sourceforge.net
+//
+// Copyright from:
+// Gueorgui Ovtcharov
+// Rainer Dorsch <http://www.ra.informatik.uni-stuttgart.de/~rainer/>
+// Mikhail Kruk <meshko@cs.brandeis.edu>
+//
+//========================================================================
+
+//========================================================================
+//
+// Modified under the Poppler project - http://poppler.freedesktop.org
+//
+// All changes made under the Poppler project to this file are licensed
+// under GPL version 2 or later
+//
+// Copyright (C) 2008 Boris Toloknov <tlknv@yandex.ru>
+// Copyright (C) 2010 Albert Astals Cid <aacid@kde.org>
+//
+// To see a description of the changes please see the Changelog file that
+// came with your tarball or type make ChangeLog if you are building from git
+//
+//========================================================================
+
+#include "HtmlLinks.h"
+
+extern GBool xml;
+
+HtmlLink::HtmlLink(const HtmlLink& x){
+ Xmin=x.Xmin;
+ Ymin=x.Ymin;
+ Xmax=x.Xmax;
+ Ymax=x.Ymax;
+ dest=new GooString(x.dest);
+}
+
+HtmlLink::HtmlLink(double xmin,double ymin,double xmax,double ymax,GooString * _dest)
+{
+ if (xmin < xmax) {
+ Xmin=xmin;
+ Xmax=xmax;
+ } else {
+ Xmin=xmax;
+ Xmax=xmin;
+ }
+ if (ymin < ymax) {
+ Ymin=ymin;
+ Ymax=ymax;
+ } else {
+ Ymin=ymax;
+ Ymax=ymin;
+ }
+ dest=new GooString(_dest);
+}
+
+HtmlLink::~HtmlLink(){
+ delete dest;
+}
+
+GBool HtmlLink::isEqualDest(const HtmlLink& x) const{
+ return (!strcmp(dest->getCString(), x.dest->getCString()));
+}
+
+GBool HtmlLink::inLink(double xmin,double ymin,double xmax,double ymax) const {
+ double y=(ymin+ymax)/2;
+ if (y>Ymax) return gFalse;
+ return (y>Ymin)&&(xmin<Xmax)&&(xmax>Xmin);
+ }
+
+static GooString* EscapeSpecialChars( GooString* s )
+{
+ GooString* tmp = NULL;
+ for( int i = 0, j = 0; i < s->getLength(); i++, j++ ){
+ const char *replace = NULL;
+ switch ( s->getChar(i) ){
+ case '"': replace = "&quot;"; break;
+ case '&': replace = "&amp;"; break;
+ case '<': replace = "&lt;"; break;
+ case '>': replace = "&gt;"; break;
+ default: continue;
+ }
+ if( replace ){
+ if( !tmp ) tmp = new GooString( s );
+ if( tmp ){
+ tmp->del( j, 1 );
+ int l = strlen( replace );
+ tmp->insert( j, replace, l );
+ j += l - 1;
+ }
+ }
+ }
+ return tmp ? tmp : s;
+}
+
+GooString* HtmlLink::getLinkStart() {
+ GooString *res = new GooString("<a href=\"");
+ GooString *d = xml ? EscapeSpecialChars(dest) : dest;
+ res->append( d );
+ if( d != dest ) delete d;
+ res->append("\">");
+ return res;
+}
+
+/*GooString* HtmlLink::Link(GooString* content){
+ //GooString* _dest=new GooString(dest);
+ GooString *tmp=new GooString("<a href=\"");
+ tmp->append(dest);
+ tmp->append("\">");
+ tmp->append(content);
+ tmp->append("</a>");
+ //delete _dest;
+ return tmp;
+ }*/
+
+
+
+HtmlLinks::HtmlLinks(){
+ accu=new std::vector<HtmlLink>();
+}
+
+HtmlLinks::~HtmlLinks(){
+ delete accu;
+ accu=NULL;
+}
+
+GBool HtmlLinks::inLink(double xmin,double ymin,double xmax,double ymax,int& p)const {
+
+ for(std::vector<HtmlLink>::iterator i=accu->begin();i!=accu->end();i++){
+ if (i->inLink(xmin,ymin,xmax,ymax)) {
+ p=(i - accu->begin());
+ return 1;
+ }
+ }
+ return 0;
+}
+
+HtmlLink* HtmlLinks::getLink(int i) const{
+ return &(*accu)[i];
+}
+
diff --git a/utils/HtmlLinks.h b/utils/HtmlLinks.h
new file mode 100644
index 00000000..4a48dfa9
--- /dev/null
+++ b/utils/HtmlLinks.h
@@ -0,0 +1,73 @@
+//========================================================================
+//
+// This file comes from pdftohtml project
+// http://pdftohtml.sourceforge.net
+//
+// Copyright from:
+// Gueorgui Ovtcharov
+// Rainer Dorsch <http://www.ra.informatik.uni-stuttgart.de/~rainer/>
+// Mikhail Kruk <meshko@cs.brandeis.edu>
+//
+//========================================================================
+
+//========================================================================
+//
+// Modified under the Poppler project - http://poppler.freedesktop.org
+//
+// All changes made under the Poppler project to this file are licensed
+// under GPL version 2 or later
+//
+// Copyright (C) 2010 Albert Astals Cid <aacid@kde.org>
+//
+// To see a description of the changes please see the Changelog file that
+// came with your tarball or type make ChangeLog if you are building from git
+//
+//========================================================================
+
+#ifndef _HTML_LINKS
+#define _HTML_LINKS
+
+#include <stdlib.h>
+#include <string.h>
+#include <vector>
+#include "goo/GooString.h"
+
+class HtmlLink{
+
+private:
+ double Xmin;
+ double Ymin;
+ double Xmax;
+ double Ymax;
+ GooString* dest;
+
+public:
+ HtmlLink(const HtmlLink& x);
+ HtmlLink(double xmin,double ymin,double xmax,double ymax,GooString *_dest);
+ ~HtmlLink();
+ GBool isEqualDest(const HtmlLink& x) const;
+ GooString *getDest(){return new GooString(dest);}
+ double getX1() const {return Xmin;}
+ double getX2() const {return Xmax;}
+ double getY1() const {return Ymin;}
+ double getY2() const {return Ymax;}
+ GBool inLink(double xmin,double ymin,double xmax,double ymax) const ;
+ //GooString *Link(GooString *content);
+ GooString* getLinkStart();
+
+};
+
+class HtmlLinks{
+private:
+ std::vector<HtmlLink> *accu;
+public:
+ HtmlLinks();
+ ~HtmlLinks();
+ void AddLink(const HtmlLink& x) {accu->push_back(x);}
+ GBool inLink(double xmin,double ymin,double xmax,double ymax,int& p) const;
+ HtmlLink* getLink(int i) const;
+
+};
+
+#endif
+
diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc
new file mode 100644
index 00000000..1d1b6285
--- /dev/null
+++ b/utils/HtmlOutputDev.cc
@@ -0,0 +1,1886 @@
+//========================================================================
+//
+// HtmlOutputDev.cc
+//
+// Copyright 1997-2002 Glyph & Cog, LLC
+//
+// Changed 1999-2000 by G.Ovtcharov
+//
+// Changed 2002 by Mikhail Kruk
+//
+//========================================================================
+
+//========================================================================
+//
+// Modified under the Poppler project - http://poppler.freedesktop.org
+//
+// All changes made under the Poppler project to this file are licensed
+// under GPL version 2 or later
+//
+// Copyright (C) 2005-2012 Albert Astals Cid <aacid@kde.org>
+// Copyright (C) 2008 Kjartan Maraas <kmaraas@gnome.org>
+// Copyright (C) 2008 Boris Toloknov <tlknv@yandex.ru>
+// Copyright (C) 2008 Haruyuki Kawabe <Haruyuki.Kawabe@unisys.co.jp>
+// Copyright (C) 2008 Tomas Are Haavet <tomasare@gmail.com>
+// Copyright (C) 2009 Warren Toomey <wkt@tuhs.org>
+// Copyright (C) 2009, 2011 Carlos Garcia Campos <carlosgc@gnome.org>
+// Copyright (C) 2009 Reece Dunn <msclrhd@gmail.com>
+// Copyright (C) 2010 Adrian Johnson <ajohnson@redneon.com>
+// Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
+// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac@cdacmumbai.in) and Onkar Potdar (onkar@cdacmumbai.in)
+// Copyright (C) 2011 Joshua Richardson <jric@chegg.com>
+// Copyright (C) 2011 Stephen Reichling <sreichling@chegg.com>
+// Copyright (C) 2011, 2012 Igor Slepchin <igor.slepchin@gmail.com>
+// Copyright (C) 2012 Ihar Filipau <thephilips@gmail.com>
+//
+// To see a description of the changes please see the Changelog file that
+// came with your tarball or type make ChangeLog if you are building from git
+//
+//========================================================================
+
+#ifdef __GNUC__
+#pragma implementation
+#endif
+
+#include "config.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <ctype.h>
+#include <math.h>
+#include <iostream>
+#include "goo/GooString.h"
+#include "goo/GooList.h"
+#include "UnicodeMap.h"
+#include "goo/gmem.h"
+#include "Error.h"
+#include "GfxState.h"
+#include "Page.h"
+#include "Annot.h"
+#include "PNGWriter.h"
+#include "GlobalParams.h"
+#include "HtmlOutputDev.h"
+#include "HtmlFonts.h"
+#include "HtmlUtils.h"
+#include "Outline.h"
+#include "PDFDoc.h"
+
+#define DEBUG __FILE__ << ": " << __LINE__ << ": DEBUG: "
+
+class HtmlImage
+{
+public:
+ HtmlImage(GooString *_fName, GfxState *state)
+ : fName(_fName) {
+ state->transform(0, 0, &xMin, &yMax);
+ state->transform(1, 1, &xMax, &yMin);
+ }
+ ~HtmlImage() { delete fName; }
+
+ double xMin, xMax; // image x coordinates
+ double yMin, yMax; // image y coordinates
+ GooString *fName; // image file name
+};
+
+// returns true if x is closer to y than x is to z
+static inline bool IS_CLOSER(float x, float y, float z) { return fabs((x)-(y)) < fabs((x)-(z)); }
+
+extern GBool complexMode;
+extern GBool singleHtml;
+extern GBool ignore;
+extern GBool printCommands;
+extern GBool printHtml;
+extern GBool noframes;
+extern GBool stout;
+extern GBool xml;
+extern GBool showHidden;
+extern GBool noMerge;
+
+extern double wordBreakThreshold;
+
+static GBool debug = gFalse;
+static GooString *gstr_buff0 = NULL; // a workspace in which I format strings
+
+static GooString* basename(GooString* str){
+
+ char *p=str->getCString();
+ int len=str->getLength();
+ for (int i=len-1;i>=0;i--)
+ if (*(p+i)==SLASH)
+ return new GooString((p+i+1),len-i-1);
+ return new GooString(str);
+}
+
+#if 0
+static GooString* Dirname(GooString* str){
+
+ char *p=str->getCString();
+ int len=str->getLength();
+ for (int i=len-1;i>=0;i--)
+ if (*(p+i)==SLASH)
+ return new GooString(p,i+1);
+ return new GooString();
+}
+#endif
+
+static const char *print_matrix(const double *mat) {
+ delete gstr_buff0;
+
+ gstr_buff0 = GooString::format("[{0:g} {1:g} {2:g} {3:g} {4:g} {5:g}]",
+ *mat, mat[1], mat[2], mat[3], mat[4], mat[5]);
+ return gstr_buff0->getCString();
+}
+
+static const char *print_uni_str(const Unicode *u, const unsigned uLen) {
+ GooString *gstr_buff1 = NULL;
+
+ delete gstr_buff0;
+
+ if (!uLen) return "";
+ gstr_buff0 = GooString::format("{0:c}", (*u < 0x7F ? *u & 0xFF : '?'));
+ for (unsigned i = 1; i < uLen; i++) {
+ if (u[i] < 0x7F) {
+ gstr_buff1 = gstr_buff0->append(u[i] < 0x7F ? static_cast<char>(u[i]) & 0xFF : '?');
+ delete gstr_buff0;
+ gstr_buff0 = gstr_buff1;
+ }
+ }
+
+ return gstr_buff0->getCString();
+}
+
+//------------------------------------------------------------------------
+// HtmlString
+//------------------------------------------------------------------------
+
+HtmlString::HtmlString(GfxState *state, double fontSize, HtmlFontAccu* _fonts) : fonts(_fonts) {
+ GfxFont *font;
+ double x, y;
+
+ state->transform(state->getCurX(), state->getCurY(), &x, &y);
+ if ((font = state->getFont())) {
+ double ascent = font->getAscent();
+ double descent = font->getDescent();
+ if( ascent > 1.05 ){
+ //printf( "ascent=%.15g is too high, descent=%.15g\n", ascent, descent );
+ ascent = 1.05;
+ }
+ if( descent < -0.4 ){
+ //printf( "descent %.15g is too low, ascent=%.15g\n", descent, ascent );
+ descent = -0.4;
+ }
+ yMin = y - ascent * fontSize;
+ yMax = y - descent * fontSize;
+ GfxRGB rgb;
+ state->getFillRGB(&rgb);
+ HtmlFont hfont=HtmlFont(font, static_cast<int>(fontSize-1), rgb);
+ if (isMatRotOrSkew(state->getTextMat())) {
+ double normalizedMatrix[4];
+ memcpy(normalizedMatrix, state->getTextMat(), sizeof(normalizedMatrix));
+ // browser rotates the opposite way
+ // so flip the sign of the angle -> sin() components change sign
+ if (debug)
+ std::cerr << DEBUG << "before transform: " << print_matrix(normalizedMatrix) << std::endl;
+ normalizedMatrix[1] *= -1;
+ normalizedMatrix[2] *= -1;
+ if (debug)
+ std::cerr << DEBUG << "after reflecting angle: " << print_matrix(normalizedMatrix) << std::endl;
+ normalizeRotMat(normalizedMatrix);
+ if (debug)
+ std::cerr << DEBUG << "after norm: " << print_matrix(normalizedMatrix) << std::endl;
+ hfont.setRotMat(normalizedMatrix);
+ }
+ fontpos = fonts->AddFont(hfont);
+ } else {
+ // this means that the PDF file draws text without a current font,
+ // which should never happen
+ yMin = y - 0.95 * fontSize;
+ yMax = y + 0.35 * fontSize;
+ fontpos=0;
+ }
+ if (yMin == yMax) {
+ // this is a sanity check for a case that shouldn't happen -- but
+ // if it does happen, we want to avoid dividing by zero later
+ yMin = y;
+ yMax = y + 1;
+ }
+ col = 0;
+ text = NULL;
+ xRight = NULL;
+ link = NULL;
+ len = size = 0;
+ yxNext = NULL;
+ xyNext = NULL;
+ htext=new GooString();
+ dir = textDirUnknown;
+}
+
+
+HtmlString::~HtmlString() {
+ gfree(text);
+ delete htext;
+ gfree(xRight);
+}
+
+void HtmlString::addChar(GfxState *state, double x, double y,
+ double dx, double dy, Unicode u) {
+ if (dir == textDirUnknown) {
+ //dir = UnicodeMap::getDirection(u);
+ dir = textDirLeftRight;
+ }
+
+ if (len == size) {
+ size += 16;
+ text = (Unicode *)grealloc(text, size * sizeof(Unicode));
+ xRight = (double *)grealloc(xRight, size * sizeof(double));
+ }
+ text[len] = u;
+ if (len == 0) {
+ xMin = x;
+ }
+ xMax = xRight[len] = x + dx;
+//printf("added char: %f %f xright = %f\n", x, dx, x+dx);
+ ++len;
+}
+
+void HtmlString::endString()
+{
+ if( dir == textDirRightLeft && len > 1 )
+ {
+ //printf("will reverse!\n");
+ for (int i = 0; i < len / 2; i++)
+ {
+ Unicode ch = text[i];
+ text[i] = text[len - i - 1];
+ text[len - i - 1] = ch;
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// HtmlPage
+//------------------------------------------------------------------------
+
+HtmlPage::HtmlPage(GBool rawOrder, char *imgExtVal) {
+ this->rawOrder = rawOrder;
+ curStr = NULL;
+ yxStrings = NULL;
+ xyStrings = NULL;
+ yxCur1 = yxCur2 = NULL;
+ fonts=new HtmlFontAccu();
+ links=new HtmlLinks();
+ imgList=new GooList();
+ pageWidth=0;
+ pageHeight=0;
+ fontsPageMarker = 0;
+ DocName=NULL;
+ firstPage = -1;
+ imgExt = new GooString(imgExtVal);
+}
+
+HtmlPage::~HtmlPage() {
+ clear();
+ delete DocName;
+ delete fonts;
+ delete links;
+ delete imgExt;
+ deleteGooList(imgList, HtmlImage);
+}
+
+void HtmlPage::updateFont(GfxState *state) {
+ GfxFont *font;
+ double *fm;
+ char *name;
+ int code;
+ double w;
+
+ // adjust the font size
+ fontSize = state->getTransformedFontSize();
+ if ((font = state->getFont()) && font->getType() == fontType3) {
+ // This is a hack which makes it possible to deal with some Type 3
+ // fonts. The problem is that it's impossible to know what the
+ // base coordinate system used in the font is without actually
+ // rendering the font. This code tries to guess by looking at the
+ // width of the character 'm' (which breaks if the font is a
+ // subset that doesn't contain 'm').
+ for (code = 0; code < 256; ++code) {
+ if ((name = ((Gfx8BitFont *)font)->getCharName(code)) &&
+ name[0] == 'm' && name[1] == '\0') {
+ break;
+ }
+ }
+ if (code < 256) {
+ w = ((Gfx8BitFont *)font)->getWidth(code);
+ if (w != 0) {
+ // 600 is a generic average 'm' width -- yes, this is a hack
+ fontSize *= w / 0.6;
+ }
+ }
+ fm = font->getFontMatrix();
+ if (fm[0] != 0) {
+ fontSize *= fabs(fm[3] / fm[0]);
+ }
+ }
+}
+
+void HtmlPage::beginString(GfxState *state, GooString *s) {
+ curStr = new HtmlString(state, fontSize, fonts);
+}
+
+
+void HtmlPage::conv(){
+ HtmlString *tmp;
+
+ int linkIndex = 0;
+ HtmlFont* h;
+ for(tmp=yxStrings;tmp;tmp=tmp->yxNext){
+ int pos=tmp->fontpos;
+ // printf("%d\n",pos);
+ h=fonts->Get(pos);
+
+ if (tmp->htext) delete tmp->htext;
+ tmp->htext=HtmlFont::simple(h,tmp->text,tmp->len);
+
+ if (links->inLink(tmp->xMin,tmp->yMin,tmp->xMax,tmp->yMax, linkIndex)){
+ tmp->link = links->getLink(linkIndex);
+ /*GooString *t=tmp->htext;
+ tmp->htext=links->getLink(k)->Link(tmp->htext);
+ delete t;*/
+ }
+ }
+
+}
+
+
+void HtmlPage::addChar(GfxState *state, double x, double y,
+ double dx, double dy,
+ double ox, double oy, Unicode *u, int uLen) {
+ double x1, y1, w1, h1, dx2, dy2;
+ int n, i;
+ state->transform(x, y, &x1, &y1);
+ n = curStr->len;
+
+ // check that new character is in the same direction as current string
+ // and is not too far away from it before adding
+ //if ((UnicodeMap::getDirection(u[0]) != curStr->dir) ||
+ // XXX
+ if (debug) {
+ double *text_mat = state->getTextMat();
+ // rotation is (cos q, sin q, -sin q, cos q, 0, 0)
+ // sin q is zero iff there is no rotation, or 180 deg. rotation;
+ // for 180 rotation, cos q will be negative
+ if (text_mat[0] < 0 || !is_within(text_mat[1], .1, 0)) {
+ std::cerr << DEBUG << "rotation matrix for \"" << print_uni_str(u, uLen) << '"' << std::endl;
+ std::cerr << "text " << print_matrix(state->getTextMat());
+ }
+ }
+ if (n > 0 && // don't start a new string, unless there is already a string
+ // TODO: the following line assumes that text is flowing left to
+ // right, which will not necessarily be the case, e.g. if rotated;
+ // It assesses whether or not two characters are close enough to
+ // be part of the same string
+ fabs(x1 - curStr->xRight[n-1]) > wordBreakThreshold * (curStr->yMax - curStr->yMin) &&
+ // rotation is (cos q, sin q, -sin q, cos q, 0, 0)
+ // sin q is zero iff there is no rotation, or 180 deg. rotation;
+ // for 180 rotation, cos q will be negative
+ !rot_matrices_equal(curStr->getFont().getRotMat(), state->getTextMat()))
+ {
+ endString();
+ beginString(state, NULL);
+ }
+ state->textTransformDelta(state->getCharSpace() * state->getHorizScaling(),
+ 0, &dx2, &dy2);
+ dx -= dx2;
+ dy -= dy2;
+ state->transformDelta(dx, dy, &w1, &h1);
+ if (uLen != 0) {
+ w1 /= uLen;
+ h1 /= uLen;
+ }
+ for (i = 0; i < uLen; ++i) {
+ Unicode u1 = u[i];
+ if (u1 >= 0xd800 && u1 <= 0xdbff && i < uLen) {
+ // surrogate pair
+ const Unicode u2 = u[i + 1];
+ if (u2 >= 0xdc00 && u2 <= 0xdfff) {
+ u1 = 0x10000 + ((u1 - 0xd800) << 10) + (u2 - 0xdc00);
+
+ curStr->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, u1);
+ }
+ ++i;
+ } else {
+ curStr->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, u1);
+ }
+ }
+}
+
+void HtmlPage::endString() {
+ HtmlString *p1, *p2;
+ double h, y1, y2;
+
+ // throw away zero-length strings -- they don't have valid xMin/xMax
+ // values, and they're useless anyway
+ if (curStr->len == 0) {
+ delete curStr;
+ curStr = NULL;
+ return;
+ }
+
+ curStr->endString();
+
+#if 0 //~tmp
+ if (curStr->yMax - curStr->yMin > 20) {
+ delete curStr;
+ curStr = NULL;
+ return;
+ }
+#endif
+
+ // insert string in y-major list
+ h = curStr->yMax - curStr->yMin;
+ y1 = curStr->yMin + 0.5 * h;
+ y2 = curStr->yMin + 0.8 * h;
+ if (rawOrder) {
+ p1 = yxCur1;
+ p2 = NULL;
+ } else if ((!yxCur1 ||
+ (y1 >= yxCur1->yMin &&
+ (y2 >= yxCur1->yMax || curStr->xMax >= yxCur1->xMin))) &&
+ (!yxCur2 ||
+ (y1 < yxCur2->yMin ||
+ (y2 < yxCur2->yMax && curStr->xMax < yxCur2->xMin)))) {
+ p1 = yxCur1;
+ p2 = yxCur2;
+ } else {
+ for (p1 = NULL, p2 = yxStrings; p2; p1 = p2, p2 = p2->yxNext) {
+ if (y1 < p2->yMin || (y2 < p2->yMax && curStr->xMax < p2->xMin))
+ break;
+ }
+ yxCur2 = p2;
+ }
+ yxCur1 = curStr;
+ if (p1)
+ p1->yxNext = curStr;
+ else
+ yxStrings = curStr;
+ curStr->yxNext = p2;
+ curStr = NULL;
+}
+
+static const char *strrstr( const char *s, const char *ss )
+{
+ const char *p = strstr( s, ss );
+ for( const char *pp = p; pp != NULL; pp = strstr( p+1, ss ) ){
+ p = pp;
+ }
+ return p;
+}
+
+static void CloseTags( GooString *htext, GBool &finish_a, GBool &finish_italic, GBool &finish_bold )
+{
+ const char *last_italic = finish_italic && ( finish_bold || finish_a ) ? strrstr( htext->getCString(), "<i>" ) : NULL;
+ const char *last_bold = finish_bold && ( finish_italic || finish_a ) ? strrstr( htext->getCString(), "<b>" ) : NULL;
+ const char *last_a = finish_a && ( finish_italic || finish_bold ) ? strrstr( htext->getCString(), "<a " ) : NULL;
+ if( finish_a && ( finish_italic || finish_bold ) && last_a > ( last_italic > last_bold ? last_italic : last_bold ) ){
+ htext->append("</a>", 4);
+ finish_a = false;
+ }
+ if( finish_italic && finish_bold && last_italic > last_bold ){
+ htext->append("</i>", 4);
+ finish_italic = false;
+ }
+ if( finish_bold )
+ htext->append("</b>", 4);
+ if( finish_italic )
+ htext->append("</i>", 4);
+ if( finish_a )
+ htext->append("</a>");
+}
+
+// Strings are lines of text;
+// This function aims to combine strings into lines and paragraphs if !noMerge
+// It may also strip out duplicate strings (if they are on top of each other); sometimes they are to create a font effect
+void HtmlPage::coalesce() {
+ HtmlString *str1, *str2;
+ HtmlFont *hfont1, *hfont2;
+ double space, horSpace, vertSpace, vertOverlap;
+ GBool addSpace, addLineBreak;
+ int n, i;
+ double curX, curY;
+
+#if 0 //~ for debugging
+ for (str1 = yxStrings; str1; str1 = str1->yxNext) {
+ printf("x=%f..%f y=%f..%f size=%2d '",
+ str1->xMin, str1->xMax, str1->yMin, str1->yMax,
+ (int)(str1->yMax - str1->yMin));
+ for (i = 0; i < str1->len; ++i) {
+ fputc(str1->text[i] & 0xff, stdout);
+ }
+ printf("'\n");
+ }
+ printf("\n------------------------------------------------------------\n\n");
+#endif
+ str1 = yxStrings;
+
+ if( !str1 ) return;
+
+ //----- discard duplicated text (fake boldface, drop shadows)
+ if( !complexMode )
+ { /* if not in complex mode get rid of duplicate strings */
+ HtmlString *str3;
+ GBool found;
+ while (str1)
+ {
+ double size = str1->yMax - str1->yMin;
+ double xLimit = str1->xMin + size * 0.2;
+ found = gFalse;
+ for (str2 = str1, str3 = str1->yxNext;
+ str3 && str3->xMin < xLimit;
+ str2 = str3, str3 = str2->yxNext)
+ {
+ if (str3->len == str1->len &&
+ !memcmp(str3->text, str1->text, str1->len * sizeof(Unicode)) &&
+ fabs(str3->yMin - str1->yMin) < size * 0.2 &&
+ fabs(str3->yMax - str1->yMax) < size * 0.2 &&
+ fabs(str3->xMax - str1->xMax) < size * 0.2)
+ {
+ found = gTrue;
+ //printf("found duplicate!\n");
+ break;
+ }
+ }
+ if (found)
+ {
+ str2->xyNext = str3->xyNext;
+ str2->yxNext = str3->yxNext;
+ delete str3;
+ }
+ else
+ {
+ str1 = str1->yxNext;
+ }
+ }
+ } /*- !complexMode */
+
+ str1 = yxStrings;
+
+ hfont1 = getFont(str1);
+ if( hfont1->isBold() )
+ str1->htext->insert(0,"<b>",3);
+ if( hfont1->isItalic() )
+ str1->htext->insert(0,"<i>",3);
+ if( str1->getLink() != NULL ) {
+ GooString *ls = str1->getLink()->getLinkStart();
+ str1->htext->insert(0, ls);
+ delete ls;
+ }
+ curX = str1->xMin; curY = str1->yMin;
+
+ while (str1 && (str2 = str1->yxNext)) {
+ hfont2 = getFont(str2);
+ space = str1->yMax - str1->yMin; // the height of the font's bounding box
+ horSpace = str2->xMin - str1->xMax;
+ // if strings line up on left-hand side AND they are on subsequent lines, we need a line break
+ addLineBreak = !noMerge && (fabs(str1->xMin - str2->xMin) < 0.4) && IS_CLOSER(str2->yMax, str1->yMax + space, str1->yMax);
+ vertSpace = str2->yMin - str1->yMax;
+
+//printf("coalesce %d %d %f? ", str1->dir, str2->dir, d);
+
+ if (str2->yMin >= str1->yMin && str2->yMin <= str1->yMax)
+ {
+ vertOverlap = str1->yMax - str2->yMin;
+ } else
+ if (str2->yMax >= str1->yMin && str2->yMax <= str1->yMax)
+ {
+ vertOverlap = str2->yMax - str1->yMin;
+ } else
+ {
+ vertOverlap = 0;
+ }
+
+ // Combine strings if:
+ // They appear to be the same font (complex mode only) && going in the same direction AND at least one of the following:
+ // 1. They appear to be part of the same line of text
+ // 2. They appear to be subsequent lines of a paragraph
+ // We assume (1) or (2) above, respectively, based on:
+ // (1) strings overlap vertically AND
+ // horizontal space between end of str1 and start of str2 is consistent with a single space or less;
+ // when rawOrder, the strings have to overlap vertically by at least 50%
+ // (2) Strings flow down the page, but the space between them is not too great, and they are lined up on the left
+ if (
+ (
+ (
+ (
+ (rawOrder && vertOverlap > 0.5 * space)
+ ||
+ (!rawOrder && str2->yMin < str1->yMax)
+ ) &&
+ (horSpace > -0.5 * space && horSpace < space)
+ ) ||
+ (vertSpace >= 0 && vertSpace < 0.5 * space && addLineBreak)
+ ) &&
+ (!complexMode || (hfont1->isEqualIgnoreBold(*hfont2))) && // in complex mode fonts must be the same, in other modes fonts do not metter
+ str1->dir == str2->dir // text direction the same
+ )
+ {
+// printf("yes\n");
+ n = str1->len + str2->len;
+ if ((addSpace = horSpace > wordBreakThreshold * space)) {
+ ++n;
+ }
+ if (addLineBreak) {
+ ++n;
+ }
+
+ str1->size = (n + 15) & ~15;
+ str1->text = (Unicode *)grealloc(str1->text,
+ str1->size * sizeof(Unicode));
+ str1->xRight = (double *)grealloc(str1->xRight,
+ str1->size * sizeof(double));
+ if (addSpace) {
+ str1->text[str1->len] = 0x20;
+ str1->htext->append(xml?" ":"&#160;");
+ str1->xRight[str1->len] = str2->xMin;
+ ++str1->len;
+ }
+ if (addLineBreak) {
+ str1->text[str1->len] = '\n';
+ str1->htext->append("<br/>");
+ str1->xRight[str1->len] = str2->xMin;
+ ++str1->len;
+ str1->yMin = str2->yMin;
+ str1->yMax = str2->yMax;
+ str1->xMax = str2->xMax;
+ int fontLineSize = hfont1->getLineSize();
+ int curLineSize = (int)(vertSpace + space);
+ if( curLineSize != fontLineSize )
+ {
+ HtmlFont *newfnt = new HtmlFont(*hfont1);
+ newfnt->setLineSize(curLineSize);
+ str1->fontpos = fonts->AddFont(*newfnt);
+ delete newfnt;
+ hfont1 = getFont(str1);
+ // we have to reget hfont2 because it's location could have
+ // changed on resize
+ hfont2 = getFont(str2);
+ }
+ }
+ for (i = 0; i < str2->len; ++i) {
+ str1->text[str1->len] = str2->text[i];
+ str1->xRight[str1->len] = str2->xRight[i];
+ ++str1->len;
+ }
+
+ /* fix <i>, <b> if str1 and str2 differ and handle switch of links */
+ HtmlLink *hlink1 = str1->getLink();
+ HtmlLink *hlink2 = str2->getLink();
+ bool switch_links = !hlink1 || !hlink2 || !hlink1->isEqualDest(*hlink2);
+ GBool finish_a = switch_links && hlink1 != NULL;
+ GBool finish_italic = hfont1->isItalic() && ( !hfont2->isItalic() || finish_a );
+ GBool finish_bold = hfont1->isBold() && ( !hfont2->isBold() || finish_a || finish_italic );
+ CloseTags( str1->htext, finish_a, finish_italic, finish_bold );
+ if( switch_links && hlink2 != NULL ) {
+ GooString *ls = hlink2->getLinkStart();
+ str1->htext->append(ls);
+ delete ls;
+ }
+ if( ( !hfont1->isItalic() || finish_italic ) && hfont2->isItalic() )
+ str1->htext->append("<i>", 3);
+ if( ( !hfont1->isBold() || finish_bold ) && hfont2->isBold() )
+ str1->htext->append("<b>", 3);
+
+
+ str1->htext->append(str2->htext);
+ // str1 now contains href for link of str2 (if it is defined)
+ str1->link = str2->link;
+ hfont1 = hfont2;
+ if (str2->xMax > str1->xMax) {
+ str1->xMax = str2->xMax;
+ }
+ if (str2->yMax > str1->yMax) {
+ str1->yMax = str2->yMax;
+ }
+ str1->yxNext = str2->yxNext;
+ delete str2;
+ } else { // keep strings separate
+// printf("no\n");
+ GBool finish_a = str1->getLink() != NULL;
+ GBool finish_bold = hfont1->isBold();
+ GBool finish_italic = hfont1->isItalic();
+ CloseTags( str1->htext, finish_a, finish_italic, finish_bold );
+
+ str1->xMin = curX; str1->yMin = curY;
+ str1 = str2;
+ curX = str1->xMin; curY = str1->yMin;
+ hfont1 = hfont2;
+ if( hfont1->isBold() )
+ str1->htext->insert(0,"<b>",3);
+ if( hfont1->isItalic() )
+ str1->htext->insert(0,"<i>",3);
+ if( str1->getLink() != NULL ) {
+ GooString *ls = str1->getLink()->getLinkStart();
+ str1->htext->insert(0, ls);
+ delete ls;
+ }
+ }
+ }
+ str1->xMin = curX; str1->yMin = curY;
+
+ GBool finish_bold = hfont1->isBold();
+ GBool finish_italic = hfont1->isItalic();
+ GBool finish_a = str1->getLink() != NULL;
+ CloseTags( str1->htext, finish_a, finish_italic, finish_bold );
+
+#if 0 //~ for debugging
+ for (str1 = yxStrings; str1; str1 = str1->yxNext) {
+ printf("x=%3d..%3d y=%3d..%3d size=%2d ",
+ (int)str1->xMin, (int)str1->xMax, (int)str1->yMin, (int)str1->yMax,
+ (int)(str1->yMax - str1->yMin));
+ printf("'%s'\n", str1->htext->getCString());
+ }
+ printf("\n------------------------------------------------------------\n\n");
+#endif
+
+}
+
+void HtmlPage::dumpAsXML(FILE* f,int page){
+ fprintf(f, "<page number=\"%d\" position=\"absolute\"", page);
+ fprintf(f," top=\"0\" left=\"0\" height=\"%d\" width=\"%d\">\n", pageHeight,pageWidth);
+
+ for(int i=fontsPageMarker;i < fonts->size();i++) {
+ GooString *fontCSStyle = fonts->CSStyle(i);
+ fprintf(f,"\t%s\n",fontCSStyle->getCString());
+ delete fontCSStyle;
+ }
+
+ int listlen=imgList->getLength();
+ for (int i = 0; i < listlen; i++) {
+ HtmlImage *img = (HtmlImage*)imgList->del(0);
+ fprintf(f,"<image top=\"%d\" left=\"%d\" ",xoutRound(img->yMin),xoutRound(img->xMin));
+ fprintf(f,"width=\"%d\" height=\"%d\" ",xoutRound(img->xMax-img->xMin),xoutRound(img->yMax-img->yMin));
+ fprintf(f,"src=\"%s\"/>\n",img->fName->getCString());
+ delete img;
+ }
+
+ for(HtmlString *tmp=yxStrings;tmp;tmp=tmp->yxNext){
+ if (tmp->htext){
+ fprintf(f,"<text top=\"%d\" left=\"%d\" ",xoutRound(tmp->yMin),xoutRound(tmp->xMin));
+ fprintf(f,"width=\"%d\" height=\"%d\" ",xoutRound(tmp->xMax-tmp->xMin),xoutRound(tmp->yMax-tmp->yMin));
+ fprintf(f,"font=\"%d\">", tmp->fontpos);
+ fputs(tmp->htext->getCString(),f);
+ fputs("</text>\n",f);
+ }
+ }
+ fputs("</page>\n",f);
+}
+
+static void printCSS(FILE *f)
+{
+ // Image flip/flop CSS
+ // Source:
+ // http://stackoverflow.com/questions/1309055/cross-browser-way-to-flip-html-image-via-javascript-css
+ // tested in Chrome, Fx (Linux) and IE9 (W7)
+ static const char css[] =
+ "<STYLE type=\"text/css\">" "\n"
+ "<!--" "\n"
+ ".xflip {" "\n"
+ " -moz-transform: scaleX(-1);" "\n"
+ " -webkit-transform: scaleX(-1);" "\n"
+ " -o-transform: scaleX(-1);" "\n"
+ " transform: scaleX(-1);" "\n"
+ " filter: fliph;" "\n"
+ "}" "\n"
+ ".yflip {" "\n"
+ " -moz-transform: scaleY(-1);" "\n"
+ " -webkit-transform: scaleY(-1);" "\n"
+ " -o-transform: scaleY(-1);" "\n"
+ " transform: scaleY(-1);" "\n"
+ " filter: flipv;" "\n"
+ "}" "\n"
+ ".xyflip {" "\n"
+ " -moz-transform: scaleX(-1) scaleY(-1);" "\n"
+ " -webkit-transform: scaleX(-1) scaleY(-1);" "\n"
+ " -o-transform: scaleX(-1) scaleY(-1);" "\n"
+ " transform: scaleX(-1) scaleY(-1);" "\n"
+ " filter: fliph + flipv;" "\n"
+ "}" "\n"
+ "-->" "\n"
+ "</STYLE>" "\n";
+
+ fwrite( css, sizeof(css)-1, 1, f );
+}
+
+int HtmlPage::dumpComplexHeaders(FILE * const file, FILE *& pageFile, int page) {
+ GooString* tmp;
+
+ if( !noframes )
+ {
+ GooString* pgNum=GooString::fromInt(page);
+ tmp = new GooString(DocName);
+ if (!singleHtml){
+ tmp->append('-')->append(pgNum)->append(".html");
+ pageFile = fopen(tmp->getCString(), "w");
+ } else {
+ tmp->append("-html")->append(".html");
+ pageFile = fopen(tmp->getCString(), "a");
+ }
+ delete pgNum;
+ if (!pageFile) {
+ error(errIO, -1, "Couldn't open html file '{0:t}'", tmp);
+ delete tmp;
+ return 1;
+ }
+
+ if (!singleHtml)
+ fprintf(pageFile,"%s\n<HTML xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<HEAD>\n<TITLE>Page %d</TITLE>\n\n", DOCTYPE, page);
+ else
+ fprintf(pageFile,"%s\n<HTML xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<HEAD>\n<TITLE>%s</TITLE>\n\n", DOCTYPE, tmp->getCString());
+
+ delete tmp;
+
+ GooString *htmlEncoding = HtmlOutputDev::mapEncodingToHtml(globalParams->getTextEncodingName());
+ if (!singleHtml)
+ fprintf(pageFile, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding->getCString());
+ else
+ fprintf(pageFile, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n <br/>\n", htmlEncoding->getCString());
+ delete htmlEncoding;
+ }
+ else
+ {
+ pageFile = file;
+ fprintf(pageFile,"<!-- Page %d -->\n", page);
+ fprintf(pageFile,"<a name=\"%d\"></a>\n", page);
+ }
+
+ return 0;
+}
+
+void HtmlPage::dumpComplex(FILE *file, int page){
+ FILE* pageFile;
+ GooString* tmp;
+
+ if( firstPage == -1 ) firstPage = page;
+
+ if (dumpComplexHeaders(file, pageFile, page)) { error(errIO, -1, "Couldn't write headers."); return; }
+
+ tmp=basename(DocName);
+
+ fputs("<STYLE type=\"text/css\">\n<!--\n",pageFile);
+ fputs("\tp {margin: 0; padding: 0;}",pageFile);
+ for(int i=fontsPageMarker;i!=fonts->size();i++) {
+ GooString *fontCSStyle;
+ if (!singleHtml)
+ fontCSStyle = fonts->CSStyle(i);
+ else
+ fontCSStyle = fonts->CSStyle(i,page);
+ fprintf(pageFile,"\t%s\n",fontCSStyle->getCString());
+ delete fontCSStyle;
+ }
+
+ fputs("-->\n</STYLE>\n",pageFile);
+
+ if( !noframes )
+ {
+ fputs("</HEAD>\n<BODY bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n",pageFile);
+ }
+
+ fprintf(pageFile,"<DIV id=\"page%d-div\" style=\"position:relative;width:%dpx;height:%dpx;\">\n",
+ page, pageWidth, pageHeight);
+
+ if( !ignore )
+ {
+ fprintf(pageFile,
+ "<IMG width=\"%d\" height=\"%d\" src=\"%s%03d.%s\" alt=\"background image\"/>\n",
+ pageWidth, pageHeight, tmp->getCString(),
+ (page-firstPage+1), imgExt->getCString());
+ }
+
+ delete tmp;
+
+ for(HtmlString *tmp1=yxStrings;tmp1;tmp1=tmp1->yxNext){
+ if (tmp1->htext){
+ fprintf(pageFile,
+ "<P style=\"position:absolute;top:%dpx;left:%dpx;white-space:nowrap\" class=\"ft",
+ xoutRound(tmp1->yMin),
+ xoutRound(tmp1->xMin));
+ if (!singleHtml) {
+ fputc('0', pageFile);
+ } else {
+ fprintf(pageFile, "%d", page);
+ }
+ fprintf(pageFile,"%d\">", tmp1->fontpos);
+ fputs(tmp1->htext->getCString(), pageFile);
+ fputs("</P>\n", pageFile);
+ }
+ }
+
+ fputs("</DIV>\n", pageFile);
+
+ if( !noframes )
+ {
+ fputs("</BODY>\n</HTML>\n",pageFile);
+ fclose(pageFile);
+ }
+}
+
+
+void HtmlPage::dump(FILE *f, int pageNum)
+{
+ if (complexMode || singleHtml)
+ {
+ if (xml) dumpAsXML(f, pageNum);
+ if (!xml) dumpComplex(f, pageNum);
+ }
+ else
+ {
+ fprintf(f,"<A name=%d></a>",pageNum);
+ // Loop over the list of image names on this page
+ int listlen=imgList->getLength();
+ for (int i = 0; i < listlen; i++) {
+ HtmlImage *img = (HtmlImage*)imgList->del(0);
+
+ // see printCSS() for class names
+ const char *styles[4] = { "", " class=\"xflip\"", " class=\"yflip\"", " class=\"xyflip\"" };
+ int style_index=0;
+ if (img->xMin > img->xMax) style_index += 1; // xFlip
+ if (img->yMin > img->yMax) style_index += 2; // yFlip
+
+ fprintf(f,"<IMG%s src=\"%s\"/><br/>\n",styles[style_index],img->fName->getCString());
+ delete img;
+ }
+
+ GooString* str;
+ for(HtmlString *tmp=yxStrings;tmp;tmp=tmp->yxNext){
+ if (tmp->htext){
+ str=new GooString(tmp->htext);
+ fputs(str->getCString(),f);
+ delete str;
+ fputs("<br/>\n",f);
+ }
+ }
+ fputs("<hr>\n",f);
+ }
+}
+
+
+
+void HtmlPage::clear() {
+ HtmlString *p1, *p2;
+
+ if (curStr) {
+ delete curStr;
+ curStr = NULL;
+ }
+ for (p1 = yxStrings; p1; p1 = p2) {
+ p2 = p1->yxNext;
+ delete p1;
+ }
+ yxStrings = NULL;
+ xyStrings = NULL;
+ yxCur1 = yxCur2 = NULL;
+
+ if( !noframes )
+ {
+ delete fonts;
+ fonts=new HtmlFontAccu();
+ fontsPageMarker = 0;
+ }
+ else
+ {
+ fontsPageMarker = fonts->size();
+ }
+
+ delete links;
+ links=new HtmlLinks();
+
+
+}
+
+void HtmlPage::setDocName(char *fname){
+ DocName=new GooString(fname);
+}
+
+void HtmlPage::addImage(GooString *fname, GfxState *state) {
+ HtmlImage *img = new HtmlImage(fname, state);
+ imgList->append(img);
+}
+
+//------------------------------------------------------------------------
+// HtmlMetaVar
+//------------------------------------------------------------------------
+
+HtmlMetaVar::HtmlMetaVar(const char *_name, const char *_content)
+{
+ name = new GooString(_name);
+ content = new GooString(_content);
+}
+
+HtmlMetaVar::~HtmlMetaVar()
+{
+ delete name;
+ delete content;
+}
+
+GooString* HtmlMetaVar::toString()
+{
+ GooString *result = new GooString("<META name=\"");
+ result->append(name);
+ result->append("\" content=\"");
+ result->append(content);
+ result->append("\"/>");
+ return result;
+}
+
+//------------------------------------------------------------------------
+// HtmlOutputDev
+//------------------------------------------------------------------------
+
+static const char* HtmlEncodings[][2] = {
+ {"Latin1", "ISO-8859-1"},
+ {NULL, NULL}
+};
+
+GooString* HtmlOutputDev::mapEncodingToHtml(GooString* encoding)
+{
+ GooString* enc = encoding;
+ for(int i = 0; HtmlEncodings[i][0] != NULL; i++)
+ {
+ if( enc->cmp(HtmlEncodings[i][0]) == 0 )
+ {
+ delete enc;
+ return new GooString(HtmlEncodings[i][1]);
+ }
+ }
+ return enc;
+}
+
+void HtmlOutputDev::doFrame(int firstPage){
+ GooString* fName=new GooString(Docname);
+ GooString* htmlEncoding;
+ fName->append(".html");
+
+ if (!(fContentsFrame = fopen(fName->getCString(), "w"))){
+ error(errIO, -1, "Couldn't open html file '{0:t}'", fName);
+ delete fName;
+ return;
+ }
+
+ delete fName;
+
+ fName=basename(Docname);
+ fputs(DOCTYPE, fContentsFrame);
+ fputs("\n<HTML>",fContentsFrame);
+ fputs("\n<HEAD>",fContentsFrame);
+ fprintf(fContentsFrame,"\n<TITLE>%s</TITLE>",docTitle->getCString());
+ htmlEncoding = mapEncodingToHtml(globalParams->getTextEncodingName());
+ fprintf(fContentsFrame, "\n<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding->getCString());
+ dumpMetaVars(fContentsFrame);
+ fprintf(fContentsFrame, "</HEAD>\n");
+ fputs("<FRAMESET cols=\"100,*\">\n",fContentsFrame);
+ fprintf(fContentsFrame,"<FRAME name=\"links\" src=\"%s_ind.html\">\n",fName->getCString());
+ fputs("<FRAME name=\"contents\" src=",fContentsFrame);
+ if (complexMode)
+ fprintf(fContentsFrame,"\"%s-%d.html\"",fName->getCString(), firstPage);
+ else
+ fprintf(fContentsFrame,"\"%ss.html\"",fName->getCString());
+
+ fputs(">\n</FRAMESET>\n</HTML>\n",fContentsFrame);
+
+ delete fName;
+ delete htmlEncoding;
+ fclose(fContentsFrame);
+}
+
+HtmlOutputDev::HtmlOutputDev(Catalog *catalogA, char *fileName, char *title,
+ char *author, char *keywords, char *subject, char *date,
+ char *extension,
+ GBool rawOrder, int firstPage, GBool outline)
+{
+ catalog = catalogA;
+ fContentsFrame = NULL;
+ docTitle = new GooString(title);
+ pages = NULL;
+ dumpJPEG=gTrue;
+ //write = gTrue;
+ this->rawOrder = rawOrder;
+ this->doOutline = outline;
+ ok = gFalse;
+ //this->firstPage = firstPage;
+ //pageNum=firstPage;
+ // open file
+ needClose = gFalse;
+ pages = new HtmlPage(rawOrder, extension);
+
+ glMetaVars = new GooList();
+ glMetaVars->append(new HtmlMetaVar("generator", "pdftohtml 0.36"));
+ if( author ) glMetaVars->append(new HtmlMetaVar("author", author));
+ if( keywords ) glMetaVars->append(new HtmlMetaVar("keywords", keywords));
+ if( date ) glMetaVars->append(new HtmlMetaVar("date", date));
+ if( subject ) glMetaVars->append(new HtmlMetaVar("subject", subject));
+
+ maxPageWidth = 0;
+ maxPageHeight = 0;
+
+ pages->setDocName(fileName);
+ Docname=new GooString (fileName);
+
+ // for non-xml output (complex or simple) with frames generate the left frame
+ if(!xml && !noframes)
+ {
+ if (!singleHtml)
+ {
+ GooString* left=new GooString(fileName);
+ left->append("_ind.html");
+
+ doFrame(firstPage);
+
+ if (!(fContentsFrame = fopen(left->getCString(), "w")))
+ {
+ error(errIO, -1, "Couldn't open html file '{0:t}'", left);
+ delete left;
+ return;
+ }
+ delete left;
+ fputs(DOCTYPE, fContentsFrame);
+ fputs("<HTML xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<HEAD>\n<TITLE></TITLE>\n</HEAD>\n<BODY>\n", fContentsFrame);
+
+ if (doOutline)
+ {
+ GooString *str = basename(Docname);
+ fprintf(fContentsFrame, "<A href=\"%s%s\" target=\"contents\">Outline</a><br/>", str->getCString(), complexMode ? "-outline.html" : "s.html#outline");
+ delete str;
+ }
+ }
+ if (!complexMode)
+ { /* not in complex mode */
+
+ GooString* right=new GooString(fileName);
+ right->append("s.html");
+
+ if (!(page=fopen(right->getCString(),"w"))){
+ error(errIO, -1, "Couldn't open html file '{0:t}'", right);
+ delete right;
+ return;
+ }
+ delete right;
+ fputs(DOCTYPE, page);
+ fputs("<HTML>\n<HEAD>\n<TITLE></TITLE>\n",page);
+ printCSS(page);
+ fputs("</HEAD>\n<BODY>\n",page);
+ }
+ }
+
+ if (noframes) {
+ if (stout) page=stdout;
+ else {
+ GooString* right=new GooString(fileName);
+ if (!xml) right->append(".html");
+ if (xml) right->append(".xml");
+ if (!(page=fopen(right->getCString(),"w"))){
+ error(errIO, -1, "Couldn't open html file '{0:t}'", right);
+ delete right;
+ return;
+ }
+ delete right;
+ }
+
+ GooString *htmlEncoding = mapEncodingToHtml(globalParams->getTextEncodingName());
+ if (xml)
+ {
+ fprintf(page, "<?xml version=\"1.0\" encoding=\"%s\"?>\n", htmlEncoding->getCString());
+ fputs("<!DOCTYPE pdf2xml SYSTEM \"pdf2xml.dtd\">\n\n", page);
+ fprintf(page,"<pdf2xml producer=\"%s\" version=\"%s\">\n", PACKAGE_NAME, PACKAGE_VERSION);
+ }
+ else
+ {
+ fprintf(page,"%s\n<HTML xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<HEAD>\n<TITLE>%s</TITLE>\n", DOCTYPE, docTitle->getCString());
+
+ fprintf(page, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding->getCString());
+
+ dumpMetaVars(page);
+ printCSS(page);
+ fprintf(page,"</HEAD>\n");
+ fprintf(page,"<BODY bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n");
+ }
+ delete htmlEncoding;
+ }
+ ok = gTrue;
+}
+
+HtmlOutputDev::~HtmlOutputDev() {
+ HtmlFont::clear();
+
+ delete Docname;
+ delete docTitle;
+
+ deleteGooList(glMetaVars, HtmlMetaVar);
+
+ if (fContentsFrame){
+ fputs("</BODY>\n</HTML>\n",fContentsFrame);
+ fclose(fContentsFrame);
+ }
+ if (page != NULL) {
+ if (xml) {
+ fputs("</pdf2xml>\n",page);
+ fclose(page);
+ } else
+ if ( !complexMode || xml || noframes )
+ {
+ fputs("</BODY>\n</HTML>\n",page);
+ fclose(page);
+ }
+ }
+ if (pages)
+ delete pages;
+}
+
+void HtmlOutputDev::startPage(int pageNum, GfxState *state) {
+#if 0
+ if (mode&&!xml){
+ if (write){
+ write=gFalse;
+ GooString* fname=Dirname(Docname);
+ fname->append("image.log");
+ if((tin=fopen(getFileNameFromPath(fname->getCString(),fname->getLength()),"w"))==NULL){
+ printf("Error : can not open %s",fname);
+ exit(1);
+ }
+ delete fname;
+ // if(state->getRotation()!=0)
+ // fprintf(tin,"ROTATE=%d rotate %d neg %d neg translate\n",state->getRotation(),state->getX1(),-state->getY1());
+ // else
+ fprintf(tin,"ROTATE=%d neg %d neg translate\n",state->getX1(),state->getY1());
+ }
+ }
+#endif
+
+ this->pageNum = pageNum;
+ GooString *str=basename(Docname);
+ pages->clear();
+ if(!noframes)
+ {
+ if (fContentsFrame)
+ {
+ if (complexMode)
+ fprintf(fContentsFrame,"<A href=\"%s-%d.html\"",str->getCString(),pageNum);
+ else
+ fprintf(fContentsFrame,"<A href=\"%ss.html#%d\"",str->getCString(),pageNum);
+ fprintf(fContentsFrame," target=\"contents\" >Page %d</a><br/>\n",pageNum);
+ }
+ }
+
+ pages->pageWidth=static_cast<int>(state->getPageWidth());
+ pages->pageHeight=static_cast<int>(state->getPageHeight());
+
+ delete str;
+}
+
+
+void HtmlOutputDev::endPage() {
+ Links *linksList = docPage->getLinks();
+ for (int i = 0; i < linksList->getNumLinks(); ++i)
+ {
+ doProcessLink(linksList->getLink(i));
+ }
+ delete linksList;
+
+ pages->conv();
+ pages->coalesce();
+ pages->dump(page, pageNum);
+
+ // I don't yet know what to do in the case when there are pages of different
+ // sizes and we want complex output: running ghostscript many times
+ // seems very inefficient. So for now I'll just use last page's size
+ maxPageWidth = pages->pageWidth;
+ maxPageHeight = pages->pageHeight;
+
+ //if(!noframes&&!xml) fputs("<br>\n", fContentsFrame);
+ if(!stout && !globalParams->getErrQuiet()) printf("Page-%d\n",(pageNum));
+}
+
+void HtmlOutputDev::updateFont(GfxState *state) {
+ pages->updateFont(state);
+}
+
+void HtmlOutputDev::beginString(GfxState *state, GooString *s) {
+ pages->beginString(state, s);
+}
+
+void HtmlOutputDev::endString(GfxState *state) {
+ pages->endString();
+}
+
+void HtmlOutputDev::drawChar(GfxState *state, double x, double y,
+ double dx, double dy,
+ double originX, double originY,
+ CharCode code, int /*nBytes*/, Unicode *u, int uLen)
+{
+ if ( !showHidden && (state->getRender() & 3) == 3) {
+ return;
+ }
+ pages->addChar(state, x, y, dx, dy, originX, originY, u, uLen);
+}
+
+void HtmlOutputDev::drawJpegImage(GfxState *state, Stream *str)
+{
+ FILE *f1;
+ int c;
+
+ // open the image file
+ GooString *fName=createImageFileName("jpg");
+ if (!(f1 = fopen(fName->getCString(), "wb"))) {
+ error(errIO, -1, "Couldn't open image file '%s'", fName->getCString());
+ delete fName;
+ return;
+ }
+
+ // initialize stream
+ str = str->getNextStream();
+ str->reset();
+
+ // copy the stream
+ while ((c = str->getChar()) != EOF)
+ fputc(c, f1);
+
+ fclose(f1);
+
+ if (fName) {
+ pages->addImage(fName, state);
+ }
+}
+
+void HtmlOutputDev::drawPngImage(GfxState *state, Stream *str, int width, int height,
+ GfxImageColorMap *colorMap, GBool isMask)
+{
+#ifdef ENABLE_LIBPNG
+ FILE *f1;
+
+ if (!colorMap && !isMask) {
+ error(errInternal, -1, "Can't have color image without a color map");
+ return;
+ }
+
+ // open the image file
+ GooString *fName=createImageFileName("png");
+ if (!(f1 = fopen(fName->getCString(), "wb"))) {
+ error(errIO, -1, "Couldn't open image file '%s'", fName->getCString());
+ delete fName;
+ return;
+ }
+
+ PNGWriter *writer = new PNGWriter( isMask ? PNGWriter::MONOCHROME : PNGWriter::RGB );
+ // TODO can we calculate the resolution of the image?
+ if (!writer->init(f1, width, height, 72, 72)) {
+ error(errInternal, -1, "Can't init PNG for image '%s'", fName->getCString());
+ delete writer;
+ fclose(f1);
+ return;
+ }
+
+ if (!isMask) {
+ Guchar *p;
+ GfxRGB rgb;
+ png_byte *row = (png_byte *) gmalloc(3 * width); // 3 bytes/pixel: RGB
+ png_bytep *row_pointer= &row;
+
+ // Initialize the image stream
+ ImageStream *imgStr = new ImageStream(str, width,
+ colorMap->getNumPixelComps(), colorMap->getBits());
+ imgStr->reset();
+
+ // For each line...
+ for (int y = 0; y < height; y++) {
+
+ // Convert into a PNG row
+ p = imgStr->getLine();
+ for (int x = 0; x < width; x++) {
+ colorMap->getRGB(p, &rgb);
+ // Write the RGB pixels into the row
+ row[3*x]= colToByte(rgb.r);
+ row[3*x+1]= colToByte(rgb.g);
+ row[3*x+2]= colToByte(rgb.b);
+ p += colorMap->getNumPixelComps();
+ }
+
+ if (!writer->writeRow(row_pointer)) {
+ error(errIO, -1, "Failed to write into PNG '%s'", fName->getCString());
+ delete writer;
+ delete imgStr;
+ fclose(f1);
+ return;
+ }
+ }
+ gfree(row);
+ imgStr->close();
+ delete imgStr;
+ }
+ else { // isMask == true
+ ImageStream *imgStr = new ImageStream(str, width, 1, 1);
+ imgStr->reset();
+
+ Guchar *png_row = (Guchar *)gmalloc( width );
+
+ for (int ri = 0; ri < height; ++ri)
+ {
+ // read the row of the mask
+ Guchar *bit_row = imgStr->getLine();
+
+ // invert for PNG
+ for(int i = 0; i < width; i++)
+ png_row[i] = bit_row[i] ? 0xff : 0x00;
+
+ if (!writer->writeRow( &png_row ))
+ {
+ error(errIO, -1, "Failed to write into PNG '%s'", fName->getCString());
+ delete writer;
+ fclose(f1);
+ delete imgStr;
+ gfree(png_row);
+ return;
+ }
+ }
+ imgStr->close();
+ delete imgStr;
+ gfree(png_row);
+ }
+
+ str->close();
+
+ writer->close();
+ delete writer;
+ fclose(f1);
+
+ pages->addImage(fName, state);
+#else
+ return;
+#endif
+}
+
+GooString *HtmlOutputDev::createImageFileName(const char *ext)
+{
+ GooString *fName=new GooString(Docname);
+ fName->append("-");
+ GooString *pgNum= GooString::fromInt(pageNum);
+ GooString *imgnum= GooString::fromInt(pages->getNumImages()+1);
+
+ fName->append(pgNum)->append("_")->append(imgnum)->append(".")->append(ext);
+ delete pgNum;
+ delete imgnum;
+
+ return fName;
+}
+
+void HtmlOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str,
+ int width, int height, GBool invert,
+ GBool interpolate, GBool inlineImg) {
+
+ if (ignore||(complexMode && !xml)) {
+ OutputDev::drawImageMask(state, ref, str, width, height, invert, interpolate, inlineImg);
+ return;
+ }
+
+ // dump JPEG file
+ if (dumpJPEG && str->getKind() == strDCT) {
+ drawJpegImage(state, str);
+ }
+ else {
+#ifdef ENABLE_LIBPNG
+ drawPngImage(state, str, width, height, NULL, gTrue);
+#else
+ OutputDev::drawImageMask(state, ref, str, width, height, invert, interpolate, inlineImg);
+#endif
+ }
+}
+
+void HtmlOutputDev::drawImage(GfxState *state, Object *ref, Stream *str,
+ int width, int height, GfxImageColorMap *colorMap,
+ GBool interpolate, int *maskColors, GBool inlineImg) {
+
+ if (ignore||(complexMode && !xml)) {
+ OutputDev::drawImage(state, ref, str, width, height, colorMap, interpolate,
+ maskColors, inlineImg);
+ return;
+ }
+
+ /*if( !globalParams->getErrQuiet() )
+ printf("image stream of kind %d\n", str->getKind());*/
+ // dump JPEG file
+ if (dumpJPEG && str->getKind() == strDCT) {
+ drawJpegImage(state, str);
+ }
+ else {
+#ifdef ENABLE_LIBPNG
+ drawPngImage(state, str, width, height, colorMap );
+#else
+ OutputDev::drawImage(state, ref, str, width, height, colorMap, interpolate,
+ maskColors, inlineImg);
+#endif
+ }
+}
+
+
+
+void HtmlOutputDev::doProcessLink(AnnotLink* link){
+ double _x1,_y1,_x2,_y2;
+ int x1,y1,x2,y2;
+
+ link->getRect(&_x1,&_y1,&_x2,&_y2);
+ cvtUserToDev(_x1,_y1,&x1,&y1);
+
+ cvtUserToDev(_x2,_y2,&x2,&y2);
+
+
+ GooString* _dest=getLinkDest(link);
+ HtmlLink t((double) x1,(double) y2,(double) x2,(double) y1,_dest);
+ pages->AddLink(t);
+ delete _dest;
+}
+
+GooString* HtmlOutputDev::getLinkDest(AnnotLink *link){
+ char *p;
+ if (!link->getAction())
+ return new GooString();
+ switch(link->getAction()->getKind())
+ {
+ case actionGoTo:
+ {
+ GooString* file=basename(Docname);
+ int page=1;
+ LinkGoTo *ha=(LinkGoTo *)link->getAction();
+ LinkDest *dest=NULL;
+ if (ha->getDest()!=NULL)
+ dest=ha->getDest()->copy();
+ else if (ha->getNamedDest()!=NULL)
+ dest=catalog->findDest(ha->getNamedDest());
+
+ if (dest){
+ if (dest->isPageRef()){
+ Ref pageref=dest->getPageRef();
+ page=catalog->findPage(pageref.num,pageref.gen);
+ }
+ else {
+ page=dest->getPageNum();
+ }
+
+ delete dest;
+
+ GooString *str=GooString::fromInt(page);
+ /* complex simple
+ frames file-4.html files.html#4
+ noframes file.html#4 file.html#4
+ */
+ if (noframes)
+ {
+ file->append(".html#");
+ file->append(str);
+ }
+ else
+ {
+ if( complexMode )
+ {
+ file->append("-");
+ file->append(str);
+ file->append(".html");
+ }
+ else
+ {
+ file->append("s.html#");
+ file->append(str);
+ }
+ }
+
+ if (printCommands) printf(" link to page %d ",page);
+ delete str;
+ return file;
+ }
+ else
+ {
+ return new GooString();
+ }
+ }
+ case actionGoToR:
+ {
+ LinkGoToR *ha=(LinkGoToR *) link->getAction();
+ LinkDest *dest=NULL;
+ int page=1;
+ GooString *file=new GooString();
+ if (ha->getFileName()){
+ delete file;
+ file=new GooString(ha->getFileName()->getCString());
+ }
+ if (ha->getDest()!=NULL) dest=ha->getDest()->copy();
+ if (dest&&file){
+ if (!(dest->isPageRef())) page=dest->getPageNum();
+ delete dest;
+
+ if (printCommands) printf(" link to page %d ",page);
+ if (printHtml){
+ p=file->getCString()+file->getLength()-4;
+ if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")){
+ file->del(file->getLength()-4,4);
+ file->append(".html");
+ }
+ file->append('#');
+ file->append(GooString::fromInt(page));
+ }
+ }
+ if (printCommands && file) printf("filename %s\n",file->getCString());
+ return file;
+ }
+ case actionURI:
+ {
+ LinkURI *ha=(LinkURI *) link->getAction();
+ GooString* file=new GooString(ha->getURI()->getCString());
+ // printf("uri : %s\n",file->getCString());
+ return file;
+ }
+ case actionLaunch:
+ {
+ LinkLaunch *ha=(LinkLaunch *) link->getAction();
+ GooString* file=new GooString(ha->getFileName()->getCString());
+ if (printHtml) {
+ p=file->getCString()+file->getLength()-4;
+ if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")){
+ file->del(file->getLength()-4,4);
+ file->append(".html");
+ }
+ if (printCommands) printf("filename %s",file->getCString());
+
+ return file;
+
+ }
+ }
+ default:
+ return new GooString();
+ }
+}
+
+void HtmlOutputDev::dumpMetaVars(FILE *file)
+{
+ GooString *var;
+
+ for(int i = 0; i < glMetaVars->getLength(); i++)
+ {
+ HtmlMetaVar *t = (HtmlMetaVar*)glMetaVars->get(i);
+ var = t->toString();
+ fprintf(file, "%s\n", var->getCString());
+ delete var;
+ }
+}
+
+GBool HtmlOutputDev::dumpDocOutline(PDFDoc* doc)
+{
+#ifdef DISABLE_OUTLINE
+ return gFalse;
+#else
+ FILE * output = NULL;
+ GBool bClose = gFalse;
+ Catalog *catalog = doc->getCatalog();
+
+ if (!ok)
+ return gFalse;
+
+ Outline *outline = doc->getOutline();
+ if (!outline)
+ return gFalse;
+
+ GooList *outlines = outline->getItems();
+ if (!outlines)
+ return gFalse;
+
+ if (!complexMode || xml)
+ {
+ output = page;
+ }
+ else if (complexMode && !xml)
+ {
+ if (noframes)
+ {
+ output = page;
+ fputs("<hr>\n", output);
+ }
+ else
+ {
+ GooString *str = Docname->copy();
+ str->append("-outline.html");
+ output = fopen(str->getCString(), "w");
+ if (output == NULL)
+ return gFalse;
+ delete str;
+ bClose = gTrue;
+
+ GooString *htmlEncoding =
+ HtmlOutputDev::mapEncodingToHtml(globalParams->getTextEncodingName());
+
+ fprintf(output, "<HTML xmlns=\"http://www.w3.org/1999/xhtml\" " \
+ "lang=\"\" xml:lang=\"\">\n" \
+ "<HEAD>\n" \
+ "<TITLE>Document Outline</TITLE>\n" \
+ "<META http-equiv=\"Content-Type\" content=\"text/html; " \
+ "charset=%s\"/>\n" \
+ "</HEAD>\n<BODY>\n", htmlEncoding->getCString());
+ delete htmlEncoding;
+ }
+ }
+
+ if (!xml)
+ {
+ GBool done = newHtmlOutlineLevel(output, outlines, catalog);
+ if (done && !complexMode)
+ fputs("<hr>\n", output);
+
+ if (bClose)
+ {
+ fputs("</BODY>\n</HTML>\n", output);
+ fclose(output);
+ }
+ }
+ else
+ newXmlOutlineLevel(output, outlines, catalog);
+
+ return gTrue;
+#endif
+}
+
+GBool HtmlOutputDev::newHtmlOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog, int level)
+{
+#ifdef DISABLE_OUTLINE
+ return gFalse;
+#else
+ GBool atLeastOne = gFalse;
+
+ if (level == 1)
+ {
+ fputs("<A name=\"outline\"></a>", output);
+ fputs("<h1>Document Outline</h1>\n", output);
+ }
+ fputs("<ul>\n",output);
+
+ for (int i = 0; i < outlines->getLength(); i++)
+ {
+ OutlineItem *item = (OutlineItem*)outlines->get(i);
+ GooString *titleStr = HtmlFont::HtmlFilter(item->getTitle(),
+ item->getTitleLength());
+
+ GooString *linkName = NULL;;
+ int page = getOutlinePageNum(item);
+ if (page > 0)
+ {
+ /* complex simple
+ frames file-4.html files.html#4
+ noframes file.html#4 file.html#4
+ */
+ linkName=basename(Docname);
+ GooString *str=GooString::fromInt(page);
+ if (noframes) {
+ linkName->append(".html#");
+ linkName->append(str);
+ } else {
+ if( complexMode ) {
+ linkName->append("-");
+ linkName->append(str);
+ linkName->append(".html");
+ } else {
+ linkName->append("s.html#");
+ linkName->append(str);
+ }
+ }
+ delete str;
+ }
+
+ fputs("<li>",output);
+ if (linkName)
+ fprintf(output,"<A href=\"%s\">", linkName->getCString());
+ fputs(titleStr->getCString(),output);
+ if (linkName) {
+ fputs("</A>",output);
+ delete linkName;
+ }
+ delete titleStr;
+ atLeastOne = gTrue;
+
+ item->open();
+ if (item->hasKids())
+ {
+ fputs("\n",output);
+ newHtmlOutlineLevel(output, item->getKids(), catalog, level+1);
+ }
+ item->close();
+ fputs("</li>\n",output);
+ }
+ fputs("</ul>\n",output);
+
+ return atLeastOne;
+#endif
+}
+
+void HtmlOutputDev::newXmlOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog)
+{
+#ifndef DISABLE_OUTLINE
+ fputs("<outline>\n", output);
+
+ for (int i = 0; i < outlines->getLength(); i++)
+ {
+ OutlineItem *item = (OutlineItem*)outlines->get(i);
+ GooString *titleStr = HtmlFont::HtmlFilter(item->getTitle(),
+ item->getTitleLength());
+ int page = getOutlinePageNum(item);
+ if (page > 0)
+ {
+ fprintf(output, "<item page=\"%d\">%s</item>\n",
+ page, titleStr->getCString());
+ }
+ else
+ {
+ fprintf(output, "<item>%s</item>\n", titleStr->getCString());
+ }
+ delete titleStr;
+
+ item->open();
+ if (item->hasKids())
+ {
+ newXmlOutlineLevel(output, item->getKids(), catalog);
+ }
+ item->close();
+ }
+
+ fputs("</outline>\n", output);
+#endif
+}
+
+#ifndef DISABLE_OUTLINE
+int HtmlOutputDev::getOutlinePageNum(OutlineItem *item)
+{
+ LinkAction *action = item->getAction();
+ LinkGoTo *link = NULL;
+ LinkDest *linkdest = NULL;
+ int pagenum = -1;
+
+ if (!action || action->getKind() != actionGoTo)
+ return pagenum;
+
+ link = dynamic_cast<LinkGoTo*>(action);
+
+ if (!link || !link->isOk())
+ return pagenum;
+
+ if (link->getDest())
+ linkdest = link->getDest()->copy();
+ else if (link->getNamedDest())
+ linkdest = catalog->findDest(link->getNamedDest());
+
+ if (!linkdest)
+ return pagenum;
+
+ if (linkdest->isPageRef()) {
+ Ref pageref = linkdest->getPageRef();
+ pagenum = catalog->findPage(pageref.num, pageref.gen);
+ } else {
+ pagenum = linkdest->getPageNum();
+ }
+
+ delete linkdest;
+ return pagenum;
+}
+#endif
diff --git a/utils/HtmlOutputDev.h b/utils/HtmlOutputDev.h
new file mode 100644
index 00000000..12b16bff
--- /dev/null
+++ b/utils/HtmlOutputDev.h
@@ -0,0 +1,354 @@
+//========================================================================
+//
+// HtmlOutputDev.h
+//
+// Copyright 1997 Derek B. Noonburg
+//
+// Changed 1999 by G.Ovtcharov
+//========================================================================
+
+//========================================================================
+//
+// Modified under the Poppler project - http://poppler.freedesktop.org
+//
+// All changes made under the Poppler project to this file are licensed
+// under GPL version 2 or later
+//
+// Copyright (C) 2006, 2007, 2009, 2012 Albert Astals Cid <aacid@kde.org>
+// Copyright (C) 2008, 2009 Warren Toomey <wkt@tuhs.org>
+// Copyright (C) 2009, 2011 Carlos Garcia Campos <carlosgc@gnome.org>
+// Copyright (C) 2009 Kovid Goyal <kovid@kovidgoyal.net>
+// Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
+// Copyright (C) 2011 Joshua Richardson <jric@chegg.com>
+// Copyright (C) 2011 Stephen Reichling <sreichling@chegg.com>
+// Copyright (C) 2012 Igor Slepchin <igor.redhat@gmail.com>
+// Copyright (C) 2012 Fabio D'Urso <fabiodurso@hotmail.it>
+//
+// To see a description of the changes please see the Changelog file that
+// came with your tarball or type make ChangeLog if you are building from git
+//
+//========================================================================
+
+#ifndef HTMLOUTPUTDEV_H
+#define HTMLOUTPUTDEV_H
+
+#ifdef __GNUC__
+#pragma interface
+#endif
+
+#include <stdio.h>
+#include "goo/gtypes.h"
+#include "goo/GooList.h"
+#include "GfxFont.h"
+#include "OutputDev.h"
+#include "HtmlLinks.h"
+#include "HtmlFonts.h"
+#include "Link.h"
+#include "Catalog.h"
+#include "UnicodeMap.h"
+
+
+#ifdef _WIN32
+# define SLASH '\\'
+#else
+# define SLASH '/'
+#endif
+
+#define xoutRound(x) ((int)(x + 0.5))
+
+#define DOCTYPE "<!DOCTYPE html>"
+
+class GfxState;
+class GooString;
+class PDFDoc;
+class OutlineItem;
+//------------------------------------------------------------------------
+// HtmlString
+//------------------------------------------------------------------------
+
+enum UnicodeTextDirection {
+ textDirUnknown,
+ textDirLeftRight,
+ textDirRightLeft,
+ textDirTopBottom
+};
+
+
+class HtmlString {
+public:
+
+ // Constructor.
+ HtmlString(GfxState *state, double fontSize, HtmlFontAccu* fonts);
+
+ // Destructor.
+ ~HtmlString();
+
+ // Add a character to the string.
+ void addChar(GfxState *state, double x, double y,
+ double dx, double dy,
+ Unicode u);
+ HtmlLink* getLink() { return link; }
+ const HtmlFont &getFont() const { return *fonts->Get(fontpos); }
+ void endString(); // postprocessing
+
+private:
+// aender die text variable
+ HtmlLink *link;
+ double xMin, xMax; // bounding box x coordinates
+ double yMin, yMax; // bounding box y coordinates
+ int col; // starting column
+ Unicode *text; // the text
+ double *xRight; // right-hand x coord of each char
+ HtmlString *yxNext; // next string in y-major order
+ HtmlString *xyNext; // next string in x-major order
+ int fontpos;
+ GooString* htext;
+ int len; // length of text and xRight
+ int size; // size of text and xRight arrays
+ UnicodeTextDirection dir; // direction (left to right/right to left)
+ HtmlFontAccu *fonts;
+
+ friend class HtmlPage;
+
+};
+
+
+//------------------------------------------------------------------------
+// HtmlPage
+//------------------------------------------------------------------------
+
+
+
+class HtmlPage {
+public:
+
+ // Constructor.
+ HtmlPage(GBool rawOrder, char *imgExtVal);
+
+ // Destructor.
+ ~HtmlPage();
+
+ // Begin a new string.
+ void beginString(GfxState *state, GooString *s);
+
+ // Add a character to the current string.
+ void addChar(GfxState *state, double x, double y,
+ double dx, double dy,
+ double ox, double oy,
+ Unicode *u, int uLen); //Guchar c);
+
+ void updateFont(GfxState *state);
+
+ // End the current string, sorting it into the list of strings.
+ void endString();
+
+ // Coalesce strings that look like parts of the same line.
+ void coalesce();
+
+ // Find a string. If <top> is true, starts looking at top of page;
+ // otherwise starts looking at <xMin>,<yMin>. If <bottom> is true,
+ // stops looking at bottom of page; otherwise stops looking at
+ // <xMax>,<yMax>. If found, sets the text bounding rectange and
+ // returns true; otherwise returns false.
+
+
+ // new functions
+ void AddLink(const HtmlLink& x){
+ links->AddLink(x);
+ }
+
+ // add an image to the current page
+ void addImage(GooString *fname, GfxState *state);
+
+ // number of images on the current page
+ int getNumImages() { return imgList->getLength(); }
+
+ void dump(FILE *f, int pageNum);
+
+ // Clear the page.
+ void clear();
+
+ void conv();
+private:
+ HtmlFont* getFont(HtmlString *hStr) { return fonts->Get(hStr->fontpos); }
+
+ double fontSize; // current font size
+ GBool rawOrder; // keep strings in content stream order
+
+ HtmlString *curStr; // currently active string
+
+ HtmlString *yxStrings; // strings in y-major order
+ HtmlString *xyStrings; // strings in x-major order
+ HtmlString *yxCur1, *yxCur2; // cursors for yxStrings list
+
+ void setDocName(char* fname);
+ void dumpAsXML(FILE* f,int page);
+ void dumpComplex(FILE* f, int page);
+ int dumpComplexHeaders(FILE * const file, FILE *& pageFile, int page);
+
+ // marks the position of the fonts that belong to current page (for noframes)
+ int fontsPageMarker;
+ HtmlFontAccu *fonts;
+ HtmlLinks *links;
+ GooList *imgList;
+
+ GooString *DocName;
+ GooString *imgExt;
+ int pageWidth;
+ int pageHeight;
+ int firstPage; // used to begin the numeration of pages
+
+ friend class HtmlOutputDev;
+};
+
+//------------------------------------------------------------------------
+// HtmlMetaVar
+//------------------------------------------------------------------------
+class HtmlMetaVar {
+public:
+ HtmlMetaVar(const char *_name, const char *_content);
+ ~HtmlMetaVar();
+
+ GooString* toString();
+
+private:
+
+ GooString *name;
+ GooString *content;
+};
+
+//------------------------------------------------------------------------
+// HtmlOutputDev
+//------------------------------------------------------------------------
+
+class HtmlOutputDev: public OutputDev {
+public:
+
+ // Open a text output file. If <fileName> is NULL, no file is written
+ // (this is useful, e.g., for searching text). If <useASCII7> is true,
+ // text is converted to 7-bit ASCII; otherwise, text is converted to
+ // 8-bit ISO Latin-1. <useASCII7> should also be set for Japanese
+ // (EUC-JP) text. If <rawOrder> is true, the text is kept in content
+ // stream order.
+ HtmlOutputDev(Catalog *catalogA, char *fileName, char *title,
+ char *author,
+ char *keywords,
+ char *subject,
+ char *date,
+ char *extension,
+ GBool rawOrder,
+ int firstPage = 1,
+ GBool outline = 0);
+
+ // Destructor.
+ virtual ~HtmlOutputDev();
+
+ // Check if file was successfully created.
+ virtual GBool isOk() { return ok; }
+
+ //---- get info about output device
+
+ // Does this device use upside-down coordinates?
+ // (Upside-down means (0,0) is the top left corner of the page.)
+ virtual GBool upsideDown() { return gTrue; }
+
+ // Does this device use drawChar() or drawString()?
+ virtual GBool useDrawChar() { return gTrue; }
+
+ // Does this device use beginType3Char/endType3Char? Otherwise,
+ // text in Type 3 fonts will be drawn with drawChar/drawString.
+ virtual GBool interpretType3Chars() { return gFalse; }
+
+ // Does this device need non-text content?
+ virtual GBool needNonText() { return gTrue; }
+
+ //----- initialization and control
+
+ virtual GBool checkPageSlice(Page *page, double hDPI, double vDPI,
+ int rotate, GBool useMediaBox, GBool crop,
+ int sliceX, int sliceY, int sliceW, int sliceH,
+ GBool printing,
+ GBool (* abortCheckCbk)(void *data) = NULL,
+ void * abortCheckCbkData = NULL,
+ GBool (*annotDisplayDecideCbk)(Annot *annot, void *user_data) = NULL,
+ void *annotDisplayDecideCbkData = NULL)
+ {
+ docPage = page;
+ return gTrue;
+ }
+
+
+ // Start a page.
+ virtual void startPage(int pageNum, GfxState *state);
+
+ // End a page.
+ virtual void endPage();
+
+ //----- update text state
+ virtual void updateFont(GfxState *state);
+
+ //----- text drawing
+ virtual void beginString(GfxState *state, GooString *s);
+ virtual void endString(GfxState *state);
+ virtual void drawChar(GfxState *state, double x, double y,
+ double dx, double dy,
+ double originX, double originY,
+ CharCode code, int nBytes, Unicode *u, int uLen);
+
+ virtual void drawImageMask(GfxState *state, Object *ref,
+ Stream *str,
+ int width, int height, GBool invert,
+ GBool interpolate, GBool inlineImg);
+ virtual void drawImage(GfxState *state, Object *ref, Stream *str,
+ int width, int height, GfxImageColorMap *colorMap,
+ GBool interpolate, int *maskColors, GBool inlineImg);
+
+ //new feature
+ virtual int DevType() {return 1234;}
+
+ int getPageWidth() { return maxPageWidth; }
+ int getPageHeight() { return maxPageHeight; }
+
+ GBool dumpDocOutline(PDFDoc* doc);
+
+private:
+ // convert encoding into a HTML standard, or encoding->getCString if not
+ // recognized. Will delete encoding for you and return a new one
+ // that you have to delete
+ static GooString* mapEncodingToHtml(GooString* encoding);
+ void doProcessLink(AnnotLink *link);
+ GooString* getLinkDest(AnnotLink *link);
+ void dumpMetaVars(FILE *);
+ void doFrame(int firstPage);
+ GBool newHtmlOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog, int level = 1);
+ void newXmlOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog);
+#ifndef DISABLE_OUTLINE
+ int getOutlinePageNum(OutlineItem *item);
+#endif
+ void drawJpegImage(GfxState *state, Stream *str);
+ void drawPngImage(GfxState *state, Stream *str, int width, int height,
+ GfxImageColorMap *colorMap, GBool isMask = gFalse);
+ GooString *createImageFileName(const char *ext);
+
+ FILE *fContentsFrame;
+ FILE *page; // html file
+ //FILE *tin; // image log file
+ //GBool write;
+ GBool needClose; // need to close the file?
+ HtmlPage *pages; // text for the current page
+ GBool rawOrder; // keep text in content stream order
+ GBool doOutline; // output document outline
+ GBool ok; // set up ok?
+ GBool dumpJPEG;
+ int pageNum;
+ int maxPageWidth;
+ int maxPageHeight;
+ GooString *Docname;
+ GooString *docTitle;
+ GooList *glMetaVars;
+ Catalog *catalog;
+ Page *docPage;
+ friend class HtmlPage;
+};
+
+#endif
diff --git a/utils/HtmlUtils.h b/utils/HtmlUtils.h
new file mode 100644
index 00000000..bdb89b9a
--- /dev/null
+++ b/utils/HtmlUtils.h
@@ -0,0 +1,51 @@
+//
+// HtmlUtils.h
+//
+// Created on: Jun 8, 2011
+// Author: Joshua Richardson <jric@chegg.com>
+// Copyright 2011
+//
+// All changes made under the Poppler project to this file are licensed
+// under GPL version 2 or later
+//
+// Copyright (C) 2011 Joshua Richardson <jric@chegg.com>
+//
+// To see a description of the changes please see the Changelog file that
+// came with your tarball or type make ChangeLog if you are building from git
+//
+//========================================================================
+
+#ifndef HTMLUTILS_H_
+#define HTMLUTILS_H_
+
+#include <math.h> // fabs
+#include "goo/gtypes.h" // GBool
+
+// Returns true iff the difference between a and b is less than the threshold
+// We always use fuzzy math when comparing decimal numbers due to imprecision
+inline GBool is_within(double a, double thresh, double b) {
+ return fabs(a-b) < thresh;
+}
+
+inline GBool rot_matrices_equal(const double * const mat0, const double * const mat1) {
+ return is_within(mat0[0], .1, mat1[0]) && is_within(mat0[1], .1, mat1[1]) &&
+ is_within(mat0[2], .1, mat1[2]) && is_within(mat0[3], .1, mat1[3]);
+}
+
+// rotation is (cos q, sin q, -sin q, cos q, 0, 0)
+// sin q is zero iff there is no rotation, or 180 deg. rotation;
+// for 180 rotation, cos q will be negative
+inline GBool isMatRotOrSkew(const double * const mat) {
+ return mat[0] < 0 || !is_within(mat[1], .1, 0);
+}
+
+// Alters the matrix so that it does not scale a vector's x component;
+// If the matrix does not skew, then that will also normalize the y
+// component, keeping any rotation, but removing scaling.
+inline void normalizeRotMat(double *mat) {
+ double scale = fabs(mat[0] + mat[1]);
+ if (!scale) return;
+ for (int i = 0; i < 4; i++) mat[i] /= scale;
+}
+
+#endif /* HTMLUTILS_H_ */
diff --git a/utils/ImageOutputDev.cc b/utils/ImageOutputDev.cc
new file mode 100644
index 00000000..0c06513c
--- /dev/null
+++ b/utils/ImageOutputDev.cc
@@ -0,0 +1,425 @@
+//========================================================================
+//
+// ImageOutputDev.cc
+//
+// Copyright 1998-2003 Glyph & Cog, LLC
+//
+//========================================================================
+
+//========================================================================
+//
+// Modified under the Poppler project - http://poppler.freedesktop.org
+//
+// All changes made under the Poppler project to this file are licensed
+// under GPL version 2 or later
+//
+// Copyright (C) 2005, 2007, 2011 Albert Astals Cid <aacid@kde.org>
+// Copyright (C) 2006 Rainer Keller <class321@gmx.de>
+// Copyright (C) 2008 Timothy Lee <timothy.lee@siriushk.com>
+// Copyright (C) 2008 Vasile Gaburici <gaburici@cs.umd.edu>
+// Copyright (C) 2009 Carlos Garcia Campos <carlosgc@gnome.org>
+// Copyright (C) 2009 William Bader <williambader@hotmail.com>
+// Copyright (C) 2010 Jakob Voss <jakob.voss@gbv.de>
+// Copyright (C) 2012 Adrian Johnson <ajohnson@redneon.com>
+//
+// To see a description of the changes please see the Changelog file that
+// came with your tarball or type make ChangeLog if you are building from git
+//
+//========================================================================
+
+#include "config.h"
+#include <poppler-config.h>
+
+#ifdef USE_GCC_PRAGMAS
+#pragma implementation
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <ctype.h>
+#include "goo/gmem.h"
+#include "Error.h"
+#include "GfxState.h"
+#include "Object.h"
+#include "Stream.h"
+#include "ImageOutputDev.h"
+
+ImageOutputDev::ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEGA, GBool listImagesA) {
+ listImages = listImagesA;
+ if (!listImages) {
+ fileRoot = copyString(fileRootA);
+ fileName = (char *)gmalloc(strlen(fileRoot) + 45);
+ }
+ dumpJPEG = dumpJPEGA;
+ pageNames = pageNamesA;
+ imgNum = 0;
+ pageNum = 0;
+ ok = gTrue;
+ if (listImages) {
+ printf("page num type width height color comp bpc enc interp object ID\n");
+ printf("---------------------------------------------------------------------\n");
+ }
+}
+
+
+ImageOutputDev::~ImageOutputDev() {
+ if (!listImages) {
+ gfree(fileName);
+ gfree(fileRoot);
+ }
+}
+
+void ImageOutputDev::setFilename(const char *fileExt) {
+ if (pageNames) {
+ sprintf(fileName, "%s-%03d-%03d.%s", fileRoot, pageNum, imgNum, fileExt);
+ } else {
+ sprintf(fileName, "%s-%03d.%s", fileRoot, imgNum, fileExt);
+ }
+}
+
+void ImageOutputDev::listImage(GfxState *state, Object *ref, Stream *str,
+ int width, int height,
+ GfxImageColorMap *colorMap,
+ GBool interpolate, GBool inlineImg,
+ ImageType imageType) {
+ const char *type;
+ const char *colorspace;
+ const char *enc;
+ int components, bpc;
+
+ printf("%4d %5d ", pageNum, imgNum);
+ type = "";
+ switch (imageType) {
+ case imgImage:
+ type = "image";
+ break;
+ case imgStencil:
+ type = "stencil";
+ break;
+ case imgMask:
+ type = "mask";
+ break;
+ case imgSmask:
+ type = "smask";
+ break;
+ }
+ printf("%-7s %5d %5d ", type, width, height);
+
+ colorspace = "-";
+ /* masks and stencils default to ncomps = 1 and bpc = 1 */
+ components = 1;
+ bpc = 1;
+ if (colorMap && colorMap->isOk()) {
+ switch (colorMap->getColorSpace()->getMode()) {
+ case csDeviceGray:
+ case csCalGray:
+ colorspace = "gray";
+ break;
+ case csDeviceRGB:
+ case csCalRGB:
+ colorspace = "rgb";
+ break;
+ case csDeviceCMYK:
+ colorspace = "cmyk";
+ break;
+ case csLab:
+ colorspace = "lab";
+ break;
+ case csICCBased:
+ colorspace = "icc";
+ break;
+ case csIndexed:
+ colorspace = "index";
+ break;
+ case csSeparation:
+ colorspace = "sep";
+ break;
+ case csDeviceN:
+ colorspace = "devn";
+ break;
+ case csPattern:
+ default:
+ colorspace = "-";
+ break;
+ }
+ components = colorMap->getNumPixelComps();
+ bpc = colorMap->getBits();
+ }
+ printf("%-5s %2d %2d ", colorspace, components, bpc);
+
+ switch (str->getKind()) {
+ case strCCITTFax:
+ enc = "ccitt";
+ break;
+ case strDCT:
+ enc = "jpeg";
+ break;
+ case strJPX:
+ enc = "jpx";
+ break;
+ case strJBIG2:
+ enc = "jbig2";
+ break;
+ case strFile:
+ case strFlate:
+ case strCachedFile:
+ case strASCIIHex:
+ case strASCII85:
+ case strLZW:
+ case strRunLength:
+ case strWeird:
+ default:
+ enc = "image";
+ break;
+ }
+ printf("%-5s ", enc);
+
+ printf("%-3s ", interpolate ? "yes" : "no");
+
+ if (inlineImg) {
+ printf("[inline]\n");
+ } else if (ref->isRef()) {
+ const Ref imageRef = ref->getRef();
+ if (imageRef.gen >= 100000) {
+ printf("[none]\n");
+ } else {
+ printf(" %6d %2d\n", imageRef.num, imageRef.gen);
+ }
+ } else {
+ printf("[none]\n");
+ }
+
+ ++imgNum;
+}
+
+void ImageOutputDev::writeMask(GfxState *state, Object *ref, Stream *str,
+ int width, int height, GBool invert,
+ GBool interpolate, GBool inlineImg) {
+ FILE *f;
+ int c;
+ int size, i;
+
+ // dump JPEG file
+ if (dumpJPEG && str->getKind() == strDCT && !inlineImg) {
+
+ // open the image file
+ setFilename("jpg");
+ ++imgNum;
+ if (!(f = fopen(fileName, "wb"))) {
+ error(errIO, -1, "Couldn't open image file '{0:s}'", fileName);
+ return;
+ }
+
+ // initialize stream
+ str = str->getNextStream();
+ str->reset();
+
+ // copy the stream
+ while ((c = str->getChar()) != EOF)
+ fputc(c, f);
+
+ str->close();
+ fclose(f);
+
+ // dump PBM file
+ } else {
+
+ // open the image file and write the PBM header
+ setFilename("pbm");
+ ++imgNum;
+ if (!(f = fopen(fileName, "wb"))) {
+ error(errIO, -1, "Couldn't open image file '{0:s}'", fileName);
+ return;
+ }
+ fprintf(f, "P4\n");
+ fprintf(f, "%d %d\n", width, height);
+
+ // initialize stream
+ str->reset();
+
+ // copy the stream
+ size = height * ((width + 7) / 8);
+ for (i = 0; i < size; ++i) {
+ fputc(str->getChar(), f);
+ }
+
+ str->close();
+ fclose(f);
+ }
+}
+
+void ImageOutputDev::writeImage(GfxState *state, Object *ref, Stream *str,
+ int width, int height,
+ GfxImageColorMap *colorMap,
+ GBool interpolate, int *maskColors, GBool inlineImg) {
+ FILE *f;
+ ImageStream *imgStr;
+ Guchar *p;
+ Guchar zero = 0;
+ GfxGray gray;
+ GfxRGB rgb;
+ int x, y;
+ int c;
+ int size, i;
+ int pbm_mask = 0xff;
+
+ // dump JPEG file
+ if (dumpJPEG && str->getKind() == strDCT &&
+ (colorMap->getNumPixelComps() == 1 ||
+ colorMap->getNumPixelComps() == 3) &&
+ !inlineImg) {
+
+ // open the image file
+ setFilename("jpg");
+ ++imgNum;
+ if (!(f = fopen(fileName, "wb"))) {
+ error(errIO, -1, "Couldn't open image file '{0:s}'", fileName);
+ return;
+ }
+
+ // initialize stream
+ str = str->getNextStream();
+ str->reset();
+
+ // copy the stream
+ while ((c = str->getChar()) != EOF)
+ fputc(c, f);
+
+ str->close();
+ fclose(f);
+
+ // dump PBM file
+ } else if (colorMap->getNumPixelComps() == 1 &&
+ colorMap->getBits() == 1) {
+
+ // open the image file and write the PBM header
+ setFilename("pbm");
+ ++imgNum;
+ if (!(f = fopen(fileName, "wb"))) {
+ error(errIO, -1, "Couldn't open image file '{0:s}'", fileName);
+ return;
+ }
+ fprintf(f, "P4\n");
+ fprintf(f, "%d %d\n", width, height);
+
+ // initialize stream
+ str->reset();
+
+ // if 0 comes out as 0 in the color map, the we _flip_ stream bits
+ // otherwise we pass through stream bits unmolested
+ colorMap->getGray(&zero, &gray);
+ if(colToByte(gray))
+ pbm_mask = 0;
+
+ // copy the stream
+ size = height * ((width + 7) / 8);
+ for (i = 0; i < size; ++i) {
+ fputc(str->getChar() ^ pbm_mask, f);
+ }
+
+ str->close();
+ fclose(f);
+
+ // dump PPM file
+ } else {
+
+ // open the image file and write the PPM header
+ setFilename("ppm");
+ ++imgNum;
+ if (!(f = fopen(fileName, "wb"))) {
+ error(errIO, -1, "Couldn't open image file '{0:s}'", fileName);
+ return;
+ }
+ fprintf(f, "P6\n");
+ fprintf(f, "%d %d\n", width, height);
+ fprintf(f, "255\n");
+
+ // initialize stream
+ imgStr = new ImageStream(str, width, colorMap->getNumPixelComps(),
+ colorMap->getBits());
+ imgStr->reset();
+
+ // for each line...
+ for (y = 0; y < height; ++y) {
+
+ // write the line
+ if ((p = imgStr->getLine())) {
+ for (x = 0; x < width; ++x) {
+ colorMap->getRGB(p, &rgb);
+ fputc(colToByte(rgb.r), f);
+ fputc(colToByte(rgb.g), f);
+ fputc(colToByte(rgb.b), f);
+ p += colorMap->getNumPixelComps();
+ }
+ } else {
+ for (x = 0; x < width; ++x) {
+ fputc(0, f);
+ fputc(0, f);
+ fputc(0, f);
+ }
+ }
+ }
+ imgStr->close();
+ delete imgStr;
+
+ fclose(f);
+ }
+}
+
+GBool ImageOutputDev::tilingPatternFill(GfxState *state, Gfx *gfx, Catalog *cat, Object *str,
+ double *pmat, int paintType, int tilingType, Dict *resDict,
+ double *mat, double *bbox,
+ int x0, int y0, int x1, int y1,
+ double xStep, double yStep) {
+ return gTrue;
+ // do nothing -- this avoids the potentially slow loop in Gfx.cc
+}
+
+void ImageOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str,
+ int width, int height, GBool invert,
+ GBool interpolate, GBool inlineImg) {
+ if (listImages)
+ listImage(state, ref, str, width, height, NULL, interpolate, inlineImg, imgMask);
+ else
+ writeMask(state, ref, str, width, height, invert, interpolate, inlineImg);
+}
+
+void ImageOutputDev::drawImage(GfxState *state, Object *ref, Stream *str,
+ int width, int height,
+ GfxImageColorMap *colorMap,
+ GBool interpolate, int *maskColors, GBool inlineImg) {
+ if (listImages)
+ listImage(state, ref, str, width, height, colorMap, interpolate, inlineImg, imgImage);
+ else
+ writeImage(state, ref, str, width, height, colorMap, interpolate, maskColors, inlineImg);
+}
+
+void ImageOutputDev::drawMaskedImage(
+ GfxState *state, Object *ref, Stream *str,
+ int width, int height, GfxImageColorMap *colorMap, GBool interpolate,
+ Stream *maskStr, int maskWidth, int maskHeight, GBool maskInvert, GBool maskInterpolate) {
+ if (listImages) {
+ listImage(state, ref, str, width, height, colorMap, interpolate, gFalse, imgImage);
+ listImage(state, ref, str, maskWidth, maskHeight, NULL, maskInterpolate, gFalse, imgMask);
+ } else {
+ drawImage(state, ref, str, width, height, colorMap, interpolate, NULL, gFalse);
+ drawImageMask(state, ref, maskStr, maskWidth, maskHeight, maskInvert,
+ maskInterpolate, gFalse);
+ }
+}
+
+void ImageOutputDev::drawSoftMaskedImage(
+ GfxState *state, Object *ref, Stream *str,
+ int width, int height, GfxImageColorMap *colorMap, GBool interpolate,
+ Stream *maskStr, int maskWidth, int maskHeight,
+ GfxImageColorMap *maskColorMap, GBool maskInterpolate) {
+ if (listImages) {
+ listImage(state, ref, str, width, height, colorMap, interpolate, gFalse, imgImage);
+ listImage(state, ref, maskStr, maskWidth, height, maskColorMap, maskInterpolate, gFalse, imgSmask);
+ } else {
+ drawImage(state, ref, str, width, height, colorMap, interpolate, NULL, gFalse);
+ drawImage(state, ref, maskStr, maskWidth, maskHeight,
+ maskColorMap, maskInterpolate, NULL, gFalse);
+ }
+}
diff --git a/utils/ImageOutputDev.h b/utils/ImageOutputDev.h
new file mode 100644
index 00000000..6201a249
--- /dev/null
+++ b/utils/ImageOutputDev.h
@@ -0,0 +1,148 @@
+//========================================================================
+//
+// ImageOutputDev.h
+//
+// Copyright 1998-2003 Glyph & Cog, LLC
+//
+//========================================================================
+
+//========================================================================
+//
+// Modified under the Poppler project - http://poppler.freedesktop.org
+//
+// All changes made under the Poppler project to this file are licensed
+// under GPL version 2 or later
+//
+// Copyright (C) 2006 Rainer Keller <class321@gmx.de>
+// Copyright (C) 2008 Timothy Lee <timothy.lee@siriushk.com>
+// Copyright (C) 2009 Carlos Garcia Campos <carlosgc@gnome.org>
+// Copyright (C) 2010 Jakob Voss <jakob.voss@gbv.de>
+// Copyright (C) 2012 Adrian Johnson <ajohnson@redneon.com>
+//
+// To see a description of the changes please see the Changelog file that
+// came with your tarball or type make ChangeLog if you are building from git
+//
+//========================================================================
+
+#ifndef IMAGEOUTPUTDEV_H
+#define IMAGEOUTPUTDEV_H
+
+#include "poppler/poppler-config.h"
+
+#ifdef USE_GCC_PRAGMAS
+#pragma interface
+#endif
+
+#include <stdio.h>
+#include "goo/gtypes.h"
+#include "OutputDev.h"
+
+class GfxState;
+
+//------------------------------------------------------------------------
+// ImageOutputDev
+//------------------------------------------------------------------------
+
+class ImageOutputDev: public OutputDev {
+public:
+ enum ImageType {
+ imgImage,
+ imgStencil,
+ imgMask,
+ imgSmask
+ };
+
+ // Create an OutputDev which will write images to files named
+ // <fileRoot>-NNN.<type> or <fileRoot>-PPP-NNN.<type>, if
+ // <pageNames> is set. Normally, all images are written as PBM
+ // (.pbm) or PPM (.ppm) files. If <dumpJPEG> is set, JPEG images
+ // are written as JPEG (.jpg) files.
+ ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEGA, GBool listImagesA);
+
+ // Destructor.
+ virtual ~ImageOutputDev();
+
+ // Check if file was successfully created.
+ virtual GBool isOk() { return ok; }
+
+ // Does this device use tilingPatternFill()? If this returns false,
+ // tiling pattern fills will be reduced to a series of other drawing
+ // operations.
+ virtual GBool useTilingPatternFill() { return gTrue; }
+
+ // Does this device use beginType3Char/endType3Char? Otherwise,
+ // text in Type 3 fonts will be drawn with drawChar/drawString.
+ virtual GBool interpretType3Chars() { return gFalse; }
+
+ // Does this device need non-text content?
+ virtual GBool needNonText() { return gTrue; }
+
+ // Start a page
+ virtual void startPage(int pageNumA, GfxState *state)
+ { pageNum = pageNumA; }
+
+ //---- get info about output device
+
+ // Does this device use upside-down coordinates?
+ // (Upside-down means (0,0) is the top left corner of the page.)
+ virtual GBool upsideDown() { return gTrue; }
+
+ // Does this device use drawChar() or drawString()?
+ virtual GBool useDrawChar() { return gFalse; }
+
+ //----- path painting
+ virtual GBool tilingPatternFill(GfxState *state, Gfx *gfx, Catalog *cat, Object *str,
+ double *pmat, int paintType, int tilingType, Dict *resDict,
+ double *mat, double *bbox,
+ int x0, int y0, int x1, int y1,
+ double xStep, double yStep);
+
+ //----- image drawing
+ virtual void drawImageMask(GfxState *state, Object *ref, Stream *str,
+ int width, int height, GBool invert,
+ GBool interpolate, GBool inlineImg);
+ virtual void drawImage(GfxState *state, Object *ref, Stream *str,
+ int width, int height, GfxImageColorMap *colorMap,
+ GBool interpolate, int *maskColors, GBool inlineImg);
+ virtual void drawMaskedImage(GfxState *state, Object *ref, Stream *str,
+ int width, int height,
+ GfxImageColorMap *colorMap,
+ GBool interpolate,
+ Stream *maskStr, int maskWidth, int maskHeight,
+ GBool maskInvert, GBool maskInterpolate);
+ virtual void drawSoftMaskedImage(GfxState *state, Object *ref, Stream *str,
+ int width, int height,
+ GfxImageColorMap *colorMap,
+ GBool interpolate,
+ Stream *maskStr,
+ int maskWidth, int maskHeight,
+ GfxImageColorMap *maskColorMap,
+ GBool maskInterpolate);
+
+private:
+ // Sets the output filename with a given file extension
+ void setFilename(const char *fileExt);
+ void listImage(GfxState *state, Object *ref, Stream *str,
+ int width, int height,
+ GfxImageColorMap *colorMap,
+ GBool interpolate, GBool inlineImg,
+ ImageType imageType);
+ void writeMask(GfxState *state, Object *ref, Stream *str,
+ int width, int height, GBool invert,
+ GBool interpolate, GBool inlineImg);
+ void writeImage(GfxState *state, Object *ref, Stream *str,
+ int width, int height, GfxImageColorMap *colorMap,
+ GBool interpolate, int *maskColors, GBool inlineImg);
+
+
+ char *fileRoot; // root of output file names
+ char *fileName; // buffer for output file names
+ GBool listImages; // list images instead of dumping
+ GBool dumpJPEG; // set to dump native JPEG files
+ GBool pageNames; // set to include page number in file names
+ int pageNum; // current page number
+ int imgNum; // current image number
+ GBool ok; // set up ok?
+};
+
+#endif
diff --git a/utils/Makefile.am b/utils/Makefile.am
new file mode 100644
index 00000000..ad845c19
--- /dev/null
+++ b/utils/Makefile.am
@@ -0,0 +1,137 @@
+if BUILD_SPLASH_OUTPUT
+
+pdftoppm_SOURCES = \
+ pdftoppm.cc \
+ $(common)
+
+pdftoppm_binary = pdftoppm
+
+pdftoppm_manpage = pdftoppm.1
+
+endif
+
+INCLUDES = \
+ -I$(top_srcdir) \
+ -I$(top_srcdir)/goo \
+ -I$(top_srcdir)/utils \
+ -I$(top_srcdir)/poppler \
+ $(UTILS_CFLAGS) \
+ $(FONTCONFIG_CFLAGS) \
+ $(PDFTOCAIRO_CFLAGS)
+
+LDADD = \
+ $(top_builddir)/poppler/libpoppler.la \
+ $(UTILS_LIBS) \
+ $(FONTCONFIG_LIBS)
+
+if BUILD_CAIRO_OUTPUT
+
+pdftocairo_SOURCES = \
+ pdftocairo.cc \
+ $(common)
+
+if USE_CMS
+PDFTOCAIRO_CFLAGS += $(LCMS_CFLAGS)
+PDFTOCAIRO_LIBS += $(LCMS_LIBS)
+endif
+
+pdftocairo_LDADD = \
+ $(top_builddir)/poppler/libpoppler-cairo.la \
+ $(LDADD) $(PDFTOCAIRO_LIBS)
+
+
+pdftocairo_binary = pdftocairo
+
+pdftocairo_manpage = pdftocairo.1
+
+endif
+
+AM_LDFLAGS = @auto_import_flags@
+
+bin_PROGRAMS = \
+ pdfdetach \
+ pdffonts \
+ pdfimages \
+ pdfinfo \
+ pdftops \
+ pdftotext \
+ pdftohtml \
+ pdfseparate \
+ pdfunite \
+ $(pdftoppm_binary) \
+ $(pdftocairo_binary)
+
+dist_man1_MANS = \
+ pdfdetach.1 \
+ pdffonts.1 \
+ pdfimages.1 \
+ pdfinfo.1 \
+ pdftops.1 \
+ pdftotext.1 \
+ pdftohtml.1 \
+ pdfseparate.1 \
+ pdfunite.1 \
+ $(pdftoppm_manpage) \
+ $(pdftocairo_manpage)
+
+common = parseargs.cc parseargs.h
+
+pdfdetach_SOURCES = \
+ pdfdetach.cc \
+ $(common)
+
+pdffonts_SOURCES = \
+ pdffonts.cc \
+ $(common)
+
+pdfimages_SOURCES = \
+ pdfimages.cc \
+ ImageOutputDev.cc \
+ ImageOutputDev.h \
+ $(common)
+
+pdfinfo_SOURCES = \
+ pdfinfo.cc \
+ printencodings.cc \
+ printencodings.h \
+ $(common)
+
+pdftops_SOURCES = \
+ pdftops.cc \
+ $(common)
+
+pdftotext_SOURCES = \
+ pdftotext.cc \
+ printencodings.cc \
+ printencodings.h \
+ $(common)
+
+pdftohtml_SOURCES = \
+ pdftohtml.cc \
+ HtmlFonts.cc \
+ HtmlFonts.h \
+ HtmlLinks.cc \
+ HtmlLinks.h \
+ HtmlOutputDev.cc \
+ HtmlOutputDev.h \
+ HtmlUtils.h \
+ $(common)
+
+# HtmlOutputDev uses goo/PNGWriter.h that may depend on libpng header
+pdftohtml_CXXFLAGS = $(AM_CXXFLAGS)
+if BUILD_LIBPNG
+pdftohtml_CXXFLAGS += $(LIBPNG_CFLAGS)
+endif
+
+pdfseparate_SOURCES = \
+ pdfseparate.cc \
+ $(common)
+
+pdfunite_SOURCES = \
+ pdfunite.cc \
+ $(common)
+
+# Yay, automake! It should be able to figure out that it has to dist
+# pdftoppm.1, but nooo. So we just add it here.
+
+EXTRA_DIST = pdf2xml.dtd pdftoppm.1
diff --git a/utils/Makefile.in b/utils/Makefile.in
new file mode 100644
index 00000000..cfda1da2
--- /dev/null
+++ b/utils/Makefile.in
@@ -0,0 +1,1052 @@
+# Makefile.in generated by automake 1.11.5 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+VPATH = @srcdir@
+am__make_dryrun = \
+ { \
+ am__dry=no; \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
+ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+ *) \
+ for am__flg in $$MAKEFLAGS; do \
+ case $$am__flg in \
+ *=*|--*) ;; \
+ *n*) am__dry=yes; break;; \
+ esac; \
+ done;; \
+ esac; \
+ test $$am__dry = yes; \
+ }
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+@BUILD_CAIRO_OUTPUT_TRUE@@USE_CMS_TRUE@am__append_1 = $(LCMS_CFLAGS)
+@BUILD_CAIRO_OUTPUT_TRUE@@USE_CMS_TRUE@am__append_2 = $(LCMS_LIBS)
+bin_PROGRAMS = pdfdetach$(EXEEXT) pdffonts$(EXEEXT) pdfimages$(EXEEXT) \
+ pdfinfo$(EXEEXT) pdftops$(EXEEXT) pdftotext$(EXEEXT) \
+ pdftohtml$(EXEEXT) pdfseparate$(EXEEXT) pdfunite$(EXEEXT) \
+ $(am__EXEEXT_1) $(am__EXEEXT_2)
+@BUILD_LIBPNG_TRUE@am__append_3 = $(LIBPNG_CFLAGS)
+subdir = utils
+DIST_COMMON = $(dist_man1_MANS) $(srcdir)/Makefile.am \
+ $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/acx_pthread.m4 \
+ $(top_srcdir)/m4/define-dir.m4 $(top_srcdir)/m4/gtk-doc.m4 \
+ $(top_srcdir)/m4/iconv.m4 $(top_srcdir)/m4/introspection.m4 \
+ $(top_srcdir)/m4/libjpeg.m4 $(top_srcdir)/m4/libtool.m4 \
+ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
+ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
+ $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h \
+ $(top_builddir)/poppler/poppler-config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+@BUILD_SPLASH_OUTPUT_TRUE@am__EXEEXT_1 = pdftoppm$(EXEEXT)
+@BUILD_CAIRO_OUTPUT_TRUE@am__EXEEXT_2 = pdftocairo$(EXEEXT)
+am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)"
+PROGRAMS = $(bin_PROGRAMS)
+am__objects_1 = parseargs.$(OBJEXT)
+am_pdfdetach_OBJECTS = pdfdetach.$(OBJEXT) $(am__objects_1)
+pdfdetach_OBJECTS = $(am_pdfdetach_OBJECTS)
+pdfdetach_LDADD = $(LDADD)
+am__DEPENDENCIES_1 =
+pdfdetach_DEPENDENCIES = $(top_builddir)/poppler/libpoppler.la \
+ $(am__DEPENDENCIES_1)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am_pdffonts_OBJECTS = pdffonts.$(OBJEXT) $(am__objects_1)
+pdffonts_OBJECTS = $(am_pdffonts_OBJECTS)
+pdffonts_LDADD = $(LDADD)
+pdffonts_DEPENDENCIES = $(top_builddir)/poppler/libpoppler.la \
+ $(am__DEPENDENCIES_1)
+am_pdfimages_OBJECTS = pdfimages.$(OBJEXT) ImageOutputDev.$(OBJEXT) \
+ $(am__objects_1)
+pdfimages_OBJECTS = $(am_pdfimages_OBJECTS)
+pdfimages_LDADD = $(LDADD)
+pdfimages_DEPENDENCIES = $(top_builddir)/poppler/libpoppler.la \
+ $(am__DEPENDENCIES_1)
+am_pdfinfo_OBJECTS = pdfinfo.$(OBJEXT) printencodings.$(OBJEXT) \
+ $(am__objects_1)
+pdfinfo_OBJECTS = $(am_pdfinfo_OBJECTS)
+pdfinfo_LDADD = $(LDADD)
+pdfinfo_DEPENDENCIES = $(top_builddir)/poppler/libpoppler.la \
+ $(am__DEPENDENCIES_1)
+am_pdfseparate_OBJECTS = pdfseparate.$(OBJEXT) $(am__objects_1)
+pdfseparate_OBJECTS = $(am_pdfseparate_OBJECTS)
+pdfseparate_LDADD = $(LDADD)
+pdfseparate_DEPENDENCIES = $(top_builddir)/poppler/libpoppler.la \
+ $(am__DEPENDENCIES_1)
+am__pdftocairo_SOURCES_DIST = pdftocairo.cc parseargs.cc parseargs.h
+@BUILD_CAIRO_OUTPUT_TRUE@am_pdftocairo_OBJECTS = pdftocairo.$(OBJEXT) \
+@BUILD_CAIRO_OUTPUT_TRUE@ $(am__objects_1)
+pdftocairo_OBJECTS = $(am_pdftocairo_OBJECTS)
+am__DEPENDENCIES_2 = $(top_builddir)/poppler/libpoppler.la \
+ $(am__DEPENDENCIES_1)
+@BUILD_CAIRO_OUTPUT_TRUE@@USE_CMS_TRUE@am__DEPENDENCIES_3 = \
+@BUILD_CAIRO_OUTPUT_TRUE@@USE_CMS_TRUE@ $(am__DEPENDENCIES_1)
+am__DEPENDENCIES_4 = $(am__DEPENDENCIES_3)
+@BUILD_CAIRO_OUTPUT_TRUE@pdftocairo_DEPENDENCIES = $(top_builddir)/poppler/libpoppler-cairo.la \
+@BUILD_CAIRO_OUTPUT_TRUE@ $(am__DEPENDENCIES_2) \
+@BUILD_CAIRO_OUTPUT_TRUE@ $(am__DEPENDENCIES_4)
+am__objects_2 = pdftohtml-parseargs.$(OBJEXT)
+am_pdftohtml_OBJECTS = pdftohtml-pdftohtml.$(OBJEXT) \
+ pdftohtml-HtmlFonts.$(OBJEXT) pdftohtml-HtmlLinks.$(OBJEXT) \
+ pdftohtml-HtmlOutputDev.$(OBJEXT) $(am__objects_2)
+pdftohtml_OBJECTS = $(am_pdftohtml_OBJECTS)
+pdftohtml_LDADD = $(LDADD)
+pdftohtml_DEPENDENCIES = $(top_builddir)/poppler/libpoppler.la \
+ $(am__DEPENDENCIES_1)
+pdftohtml_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(pdftohtml_CXXFLAGS) \
+ $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+am__pdftoppm_SOURCES_DIST = pdftoppm.cc parseargs.cc parseargs.h
+@BUILD_SPLASH_OUTPUT_TRUE@am_pdftoppm_OBJECTS = pdftoppm.$(OBJEXT) \
+@BUILD_SPLASH_OUTPUT_TRUE@ $(am__objects_1)
+pdftoppm_OBJECTS = $(am_pdftoppm_OBJECTS)
+pdftoppm_LDADD = $(LDADD)
+pdftoppm_DEPENDENCIES = $(top_builddir)/poppler/libpoppler.la \
+ $(am__DEPENDENCIES_1)
+am_pdftops_OBJECTS = pdftops.$(OBJEXT) $(am__objects_1)
+pdftops_OBJECTS = $(am_pdftops_OBJECTS)
+pdftops_LDADD = $(LDADD)
+pdftops_DEPENDENCIES = $(top_builddir)/poppler/libpoppler.la \
+ $(am__DEPENDENCIES_1)
+am_pdftotext_OBJECTS = pdftotext.$(OBJEXT) printencodings.$(OBJEXT) \
+ $(am__objects_1)
+pdftotext_OBJECTS = $(am_pdftotext_OBJECTS)
+pdftotext_LDADD = $(LDADD)
+pdftotext_DEPENDENCIES = $(top_builddir)/poppler/libpoppler.la \
+ $(am__DEPENDENCIES_1)
+am_pdfunite_OBJECTS = pdfunite.$(OBJEXT) $(am__objects_1)
+pdfunite_OBJECTS = $(am_pdfunite_OBJECTS)
+pdfunite_LDADD = $(LDADD)
+pdfunite_DEPENDENCIES = $(top_builddir)/poppler/libpoppler.la \
+ $(am__DEPENDENCIES_1)
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/poppler
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+LTCXXCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) \
+ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+ $(AM_CXXFLAGS) $(CXXFLAGS)
+AM_V_CXX = $(am__v_CXX_@AM_V@)
+am__v_CXX_ = $(am__v_CXX_@AM_DEFAULT_V@)
+am__v_CXX_0 = @echo " CXX " $@;
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+CXXLD = $(CXX)
+CXXLINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+ $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CXXLD = $(am__v_CXXLD_@AM_V@)
+am__v_CXXLD_ = $(am__v_CXXLD_@AM_DEFAULT_V@)
+am__v_CXXLD_0 = @echo " CXXLD " $@;
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+ $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo " CC " $@;
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo " CCLD " $@;
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo " GEN " $@;
+SOURCES = $(pdfdetach_SOURCES) $(pdffonts_SOURCES) \
+ $(pdfimages_SOURCES) $(pdfinfo_SOURCES) $(pdfseparate_SOURCES) \
+ $(pdftocairo_SOURCES) $(pdftohtml_SOURCES) $(pdftoppm_SOURCES) \
+ $(pdftops_SOURCES) $(pdftotext_SOURCES) $(pdfunite_SOURCES)
+DIST_SOURCES = $(pdfdetach_SOURCES) $(pdffonts_SOURCES) \
+ $(pdfimages_SOURCES) $(pdfinfo_SOURCES) $(pdfseparate_SOURCES) \
+ $(am__pdftocairo_SOURCES_DIST) $(pdftohtml_SOURCES) \
+ $(am__pdftoppm_SOURCES_DIST) $(pdftops_SOURCES) \
+ $(pdftotext_SOURCES) $(pdfunite_SOURCES)
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
+man1dir = $(mandir)/man1
+NROFF = nroff
+MANS = $(dist_man1_MANS)
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CAIRO_CFLAGS = @CAIRO_CFLAGS@
+CAIRO_FEATURE = @CAIRO_FEATURE@
+CAIRO_LIBS = @CAIRO_LIBS@
+CAIRO_REQ = @CAIRO_REQ@
+CAIRO_VERSION = @CAIRO_VERSION@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+FONTCONFIG_CFLAGS = @FONTCONFIG_CFLAGS@
+FONTCONFIG_LIBS = @FONTCONFIG_LIBS@
+FREETYPE_CFLAGS = @FREETYPE_CFLAGS@
+FREETYPE_CONFIG = @FREETYPE_CONFIG@
+FREETYPE_LIBS = @FREETYPE_LIBS@
+GLIB_MKENUMS = @GLIB_MKENUMS@
+GLIB_REQ = @GLIB_REQ@
+GLIB_REQUIRED = @GLIB_REQUIRED@
+GREP = @GREP@
+GTKDOC_CHECK = @GTKDOC_CHECK@
+GTKDOC_DEPS_CFLAGS = @GTKDOC_DEPS_CFLAGS@
+GTKDOC_DEPS_LIBS = @GTKDOC_DEPS_LIBS@
+GTKDOC_MKPDF = @GTKDOC_MKPDF@
+GTKDOC_REBASE = @GTKDOC_REBASE@
+GTK_TEST_CFLAGS = @GTK_TEST_CFLAGS@
+GTK_TEST_LIBS = @GTK_TEST_LIBS@
+HTML_DIR = @HTML_DIR@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+INTROSPECTION_CFLAGS = @INTROSPECTION_CFLAGS@
+INTROSPECTION_COMPILER = @INTROSPECTION_COMPILER@
+INTROSPECTION_GENERATE = @INTROSPECTION_GENERATE@
+INTROSPECTION_GIRDIR = @INTROSPECTION_GIRDIR@
+INTROSPECTION_LIBS = @INTROSPECTION_LIBS@
+INTROSPECTION_MAKEFILE = @INTROSPECTION_MAKEFILE@
+INTROSPECTION_SCANNER = @INTROSPECTION_SCANNER@
+INTROSPECTION_TYPELIBDIR = @INTROSPECTION_TYPELIBDIR@
+LCMS_CFLAGS = @LCMS_CFLAGS@
+LCMS_LIBS = @LCMS_LIBS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBCURL_CFLAGS = @LIBCURL_CFLAGS@
+LIBCURL_LIBS = @LIBCURL_LIBS@
+LIBICONV = @LIBICONV@
+LIBJPEG_CFLAGS = @LIBJPEG_CFLAGS@
+LIBJPEG_LIBS = @LIBJPEG_LIBS@
+LIBOBJS = @LIBOBJS@
+LIBOPENJPEG_CFLAGS = @LIBOPENJPEG_CFLAGS@
+LIBOPENJPEG_LIBS = @LIBOPENJPEG_LIBS@
+LIBPNG_CFLAGS = @LIBPNG_CFLAGS@
+LIBPNG_LIBS = @LIBPNG_LIBS@
+LIBS = @LIBS@
+LIBTIFF_CFLAGS = @LIBTIFF_CFLAGS@
+LIBTIFF_CFLAGSS = @LIBTIFF_CFLAGSS@
+LIBTIFF_LIBS = @LIBTIFF_LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBICONV = @LTLIBICONV@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+MOCQT4 = @MOCQT4@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PC_REQUIRES = @PC_REQUIRES@
+PC_REQUIRES_PRIVATE = @PC_REQUIRES_PRIVATE@
+PDFTOCAIRO_CFLAGS = @PDFTOCAIRO_CFLAGS@ $(am__append_1)
+PDFTOCAIRO_LIBS = @PDFTOCAIRO_LIBS@ $(am__append_2)
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+POPPLER_DATADIR = @POPPLER_DATADIR@
+POPPLER_GLIB_CFLAGS = @POPPLER_GLIB_CFLAGS@
+POPPLER_GLIB_DISABLE_DEPRECATED = @POPPLER_GLIB_DISABLE_DEPRECATED@
+POPPLER_GLIB_DISABLE_SINGLE_INCLUDES = @POPPLER_GLIB_DISABLE_SINGLE_INCLUDES@
+POPPLER_GLIB_LIBS = @POPPLER_GLIB_LIBS@
+POPPLER_MAJOR_VERSION = @POPPLER_MAJOR_VERSION@
+POPPLER_MICRO_VERSION = @POPPLER_MICRO_VERSION@
+POPPLER_MINOR_VERSION = @POPPLER_MINOR_VERSION@
+POPPLER_QT4_CFLAGS = @POPPLER_QT4_CFLAGS@
+POPPLER_QT4_CXXFLAGS = @POPPLER_QT4_CXXFLAGS@
+POPPLER_QT4_LIBS = @POPPLER_QT4_LIBS@
+POPPLER_QT4_TEST_CFLAGS = @POPPLER_QT4_TEST_CFLAGS@
+POPPLER_QT4_TEST_LIBS = @POPPLER_QT4_TEST_LIBS@
+POPPLER_VERSION = @POPPLER_VERSION@
+PTHREAD_CC = @PTHREAD_CC@
+PTHREAD_CFLAGS = @PTHREAD_CFLAGS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+TESTDATADIR = @TESTDATADIR@
+VERSION = @VERSION@
+XMKMF = @XMKMF@
+X_CFLAGS = @X_CFLAGS@
+X_EXTRA_LIBS = @X_EXTRA_LIBS@
+X_LIBS = @X_LIBS@
+X_PRE_LIBS = @X_PRE_LIBS@
+ZLIB_LIBS = @ZLIB_LIBS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+acx_pthread_config = @acx_pthread_config@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+auto_import_flags = @auto_import_flags@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+create_shared_lib = @create_shared_lib@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+win32_libs = @win32_libs@
+@BUILD_SPLASH_OUTPUT_TRUE@pdftoppm_SOURCES = \
+@BUILD_SPLASH_OUTPUT_TRUE@ pdftoppm.cc \
+@BUILD_SPLASH_OUTPUT_TRUE@ $(common)
+
+@BUILD_SPLASH_OUTPUT_TRUE@pdftoppm_binary = pdftoppm
+@BUILD_SPLASH_OUTPUT_TRUE@pdftoppm_manpage = pdftoppm.1
+INCLUDES = \
+ -I$(top_srcdir) \
+ -I$(top_srcdir)/goo \
+ -I$(top_srcdir)/utils \
+ -I$(top_srcdir)/poppler \
+ $(UTILS_CFLAGS) \
+ $(FONTCONFIG_CFLAGS) \
+ $(PDFTOCAIRO_CFLAGS)
+
+LDADD = \
+ $(top_builddir)/poppler/libpoppler.la \
+ $(UTILS_LIBS) \
+ $(FONTCONFIG_LIBS)
+
+@BUILD_CAIRO_OUTPUT_TRUE@pdftocairo_SOURCES = \
+@BUILD_CAIRO_OUTPUT_TRUE@ pdftocairo.cc \
+@BUILD_CAIRO_OUTPUT_TRUE@ $(common)
+
+@BUILD_CAIRO_OUTPUT_TRUE@pdftocairo_LDADD = \
+@BUILD_CAIRO_OUTPUT_TRUE@ $(top_builddir)/poppler/libpoppler-cairo.la \
+@BUILD_CAIRO_OUTPUT_TRUE@ $(LDADD) $(PDFTOCAIRO_LIBS)
+
+@BUILD_CAIRO_OUTPUT_TRUE@pdftocairo_binary = pdftocairo
+@BUILD_CAIRO_OUTPUT_TRUE@pdftocairo_manpage = pdftocairo.1
+AM_LDFLAGS = @auto_import_flags@
+dist_man1_MANS = \
+ pdfdetach.1 \
+ pdffonts.1 \
+ pdfimages.1 \
+ pdfinfo.1 \
+ pdftops.1 \
+ pdftotext.1 \
+ pdftohtml.1 \
+ pdfseparate.1 \
+ pdfunite.1 \
+ $(pdftoppm_manpage) \
+ $(pdftocairo_manpage)
+
+common = parseargs.cc parseargs.h
+pdfdetach_SOURCES = \
+ pdfdetach.cc \
+ $(common)
+
+pdffonts_SOURCES = \
+ pdffonts.cc \
+ $(common)
+
+pdfimages_SOURCES = \
+ pdfimages.cc \
+ ImageOutputDev.cc \
+ ImageOutputDev.h \
+ $(common)
+
+pdfinfo_SOURCES = \
+ pdfinfo.cc \
+ printencodings.cc \
+ printencodings.h \
+ $(common)
+
+pdftops_SOURCES = \
+ pdftops.cc \
+ $(common)
+
+pdftotext_SOURCES = \
+ pdftotext.cc \
+ printencodings.cc \
+ printencodings.h \
+ $(common)
+
+pdftohtml_SOURCES = \
+ pdftohtml.cc \
+ HtmlFonts.cc \
+ HtmlFonts.h \
+ HtmlLinks.cc \
+ HtmlLinks.h \
+ HtmlOutputDev.cc \
+ HtmlOutputDev.h \
+ HtmlUtils.h \
+ $(common)
+
+
+# HtmlOutputDev uses goo/PNGWriter.h that may depend on libpng header
+pdftohtml_CXXFLAGS = $(AM_CXXFLAGS) $(am__append_3)
+pdfseparate_SOURCES = \
+ pdfseparate.cc \
+ $(common)
+
+pdfunite_SOURCES = \
+ pdfunite.cc \
+ $(common)
+
+
+# Yay, automake! It should be able to figure out that it has to dist
+# pdftoppm.1, but nooo. So we just add it here.
+EXTRA_DIST = pdf2xml.dtd pdftoppm.1
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .cc .lo .o .obj
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign utils/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign utils/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-binPROGRAMS: $(bin_PROGRAMS)
+ @$(NORMAL_INSTALL)
+ @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+ if test -n "$$list"; then \
+ echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \
+ fi; \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed 's/$(EXEEXT)$$//' | \
+ while read p p1; do if test -f $$p || test -f $$p1; \
+ then echo "$$p"; echo "$$p"; else :; fi; \
+ done | \
+ sed -e 'p;s,.*/,,;n;h' -e 's|.*|.|' \
+ -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
+ sed 'N;N;N;s,\n, ,g' | \
+ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
+ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+ if ($$2 == $$4) files[d] = files[d] " " $$1; \
+ else { print "f", $$3 "/" $$4, $$1; } } \
+ END { for (d in files) print "f", d, files[d] }' | \
+ while read type dir files; do \
+ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+ test -z "$$files" || { \
+ echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \
+ $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
+ } \
+ ; done
+
+uninstall-binPROGRAMS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \
+ files=`for p in $$list; do echo "$$p"; done | \
+ sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
+ -e 's/$$/$(EXEEXT)/' `; \
+ test -n "$$list" || exit 0; \
+ echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \
+ cd "$(DESTDIR)$(bindir)" && rm -f $$files
+
+clean-binPROGRAMS:
+ @list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \
+ echo " rm -f" $$list; \
+ rm -f $$list || exit $$?; \
+ test -n "$(EXEEXT)" || exit 0; \
+ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+ echo " rm -f" $$list; \
+ rm -f $$list
+pdfdetach$(EXEEXT): $(pdfdetach_OBJECTS) $(pdfdetach_DEPENDENCIES) $(EXTRA_pdfdetach_DEPENDENCIES)
+ @rm -f pdfdetach$(EXEEXT)
+ $(AM_V_CXXLD)$(CXXLINK) $(pdfdetach_OBJECTS) $(pdfdetach_LDADD) $(LIBS)
+pdffonts$(EXEEXT): $(pdffonts_OBJECTS) $(pdffonts_DEPENDENCIES) $(EXTRA_pdffonts_DEPENDENCIES)
+ @rm -f pdffonts$(EXEEXT)
+ $(AM_V_CXXLD)$(CXXLINK) $(pdffonts_OBJECTS) $(pdffonts_LDADD) $(LIBS)
+pdfimages$(EXEEXT): $(pdfimages_OBJECTS) $(pdfimages_DEPENDENCIES) $(EXTRA_pdfimages_DEPENDENCIES)
+ @rm -f pdfimages$(EXEEXT)
+ $(AM_V_CXXLD)$(CXXLINK) $(pdfimages_OBJECTS) $(pdfimages_LDADD) $(LIBS)
+pdfinfo$(EXEEXT): $(pdfinfo_OBJECTS) $(pdfinfo_DEPENDENCIES) $(EXTRA_pdfinfo_DEPENDENCIES)
+ @rm -f pdfinfo$(EXEEXT)
+ $(AM_V_CXXLD)$(CXXLINK) $(pdfinfo_OBJECTS) $(pdfinfo_LDADD) $(LIBS)
+pdfseparate$(EXEEXT): $(pdfseparate_OBJECTS) $(pdfseparate_DEPENDENCIES) $(EXTRA_pdfseparate_DEPENDENCIES)
+ @rm -f pdfseparate$(EXEEXT)
+ $(AM_V_CXXLD)$(CXXLINK) $(pdfseparate_OBJECTS) $(pdfseparate_LDADD) $(LIBS)
+pdftocairo$(EXEEXT): $(pdftocairo_OBJECTS) $(pdftocairo_DEPENDENCIES) $(EXTRA_pdftocairo_DEPENDENCIES)
+ @rm -f pdftocairo$(EXEEXT)
+ $(AM_V_CXXLD)$(CXXLINK) $(pdftocairo_OBJECTS) $(pdftocairo_LDADD) $(LIBS)
+pdftohtml$(EXEEXT): $(pdftohtml_OBJECTS) $(pdftohtml_DEPENDENCIES) $(EXTRA_pdftohtml_DEPENDENCIES)
+ @rm -f pdftohtml$(EXEEXT)
+ $(AM_V_CXXLD)$(pdftohtml_LINK) $(pdftohtml_OBJECTS) $(pdftohtml_LDADD) $(LIBS)
+pdftoppm$(EXEEXT): $(pdftoppm_OBJECTS) $(pdftoppm_DEPENDENCIES) $(EXTRA_pdftoppm_DEPENDENCIES)
+ @rm -f pdftoppm$(EXEEXT)
+ $(AM_V_CXXLD)$(CXXLINK) $(pdftoppm_OBJECTS) $(pdftoppm_LDADD) $(LIBS)
+pdftops$(EXEEXT): $(pdftops_OBJECTS) $(pdftops_DEPENDENCIES) $(EXTRA_pdftops_DEPENDENCIES)
+ @rm -f pdftops$(EXEEXT)
+ $(AM_V_CXXLD)$(CXXLINK) $(pdftops_OBJECTS) $(pdftops_LDADD) $(LIBS)
+pdftotext$(EXEEXT): $(pdftotext_OBJECTS) $(pdftotext_DEPENDENCIES) $(EXTRA_pdftotext_DEPENDENCIES)
+ @rm -f pdftotext$(EXEEXT)
+ $(AM_V_CXXLD)$(CXXLINK) $(pdftotext_OBJECTS) $(pdftotext_LDADD) $(LIBS)
+pdfunite$(EXEEXT): $(pdfunite_OBJECTS) $(pdfunite_DEPENDENCIES) $(EXTRA_pdfunite_DEPENDENCIES)
+ @rm -f pdfunite$(EXEEXT)
+ $(AM_V_CXXLD)$(CXXLINK) $(pdfunite_OBJECTS) $(pdfunite_LDADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ImageOutputDev.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parseargs.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdfdetach.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdffonts.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdfimages.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdfinfo.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdfseparate.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdftocairo.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdftohtml-HtmlFonts.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdftohtml-HtmlLinks.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdftohtml-HtmlOutputDev.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdftohtml-parseargs.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdftohtml-pdftohtml.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdftoppm.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdftops.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdftotext.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdfunite.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/printencodings.Po@am__quote@
+
+.cc.o:
+@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ $<
+
+.cc.obj:
+@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.cc.lo:
+@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LTCXXCOMPILE) -c -o $@ $<
+
+pdftohtml-pdftohtml.o: pdftohtml.cc
+@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -MT pdftohtml-pdftohtml.o -MD -MP -MF $(DEPDIR)/pdftohtml-pdftohtml.Tpo -c -o pdftohtml-pdftohtml.o `test -f 'pdftohtml.cc' || echo '$(srcdir)/'`pdftohtml.cc
+@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pdftohtml-pdftohtml.Tpo $(DEPDIR)/pdftohtml-pdftohtml.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='pdftohtml.cc' object='pdftohtml-pdftohtml.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -c -o pdftohtml-pdftohtml.o `test -f 'pdftohtml.cc' || echo '$(srcdir)/'`pdftohtml.cc
+
+pdftohtml-pdftohtml.obj: pdftohtml.cc
+@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -MT pdftohtml-pdftohtml.obj -MD -MP -MF $(DEPDIR)/pdftohtml-pdftohtml.Tpo -c -o pdftohtml-pdftohtml.obj `if test -f 'pdftohtml.cc'; then $(CYGPATH_W) 'pdftohtml.cc'; else $(CYGPATH_W) '$(srcdir)/pdftohtml.cc'; fi`
+@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pdftohtml-pdftohtml.Tpo $(DEPDIR)/pdftohtml-pdftohtml.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='pdftohtml.cc' object='pdftohtml-pdftohtml.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -c -o pdftohtml-pdftohtml.obj `if test -f 'pdftohtml.cc'; then $(CYGPATH_W) 'pdftohtml.cc'; else $(CYGPATH_W) '$(srcdir)/pdftohtml.cc'; fi`
+
+pdftohtml-HtmlFonts.o: HtmlFonts.cc
+@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -MT pdftohtml-HtmlFonts.o -MD -MP -MF $(DEPDIR)/pdftohtml-HtmlFonts.Tpo -c -o pdftohtml-HtmlFonts.o `test -f 'HtmlFonts.cc' || echo '$(srcdir)/'`HtmlFonts.cc
+@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pdftohtml-HtmlFonts.Tpo $(DEPDIR)/pdftohtml-HtmlFonts.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='HtmlFonts.cc' object='pdftohtml-HtmlFonts.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -c -o pdftohtml-HtmlFonts.o `test -f 'HtmlFonts.cc' || echo '$(srcdir)/'`HtmlFonts.cc
+
+pdftohtml-HtmlFonts.obj: HtmlFonts.cc
+@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -MT pdftohtml-HtmlFonts.obj -MD -MP -MF $(DEPDIR)/pdftohtml-HtmlFonts.Tpo -c -o pdftohtml-HtmlFonts.obj `if test -f 'HtmlFonts.cc'; then $(CYGPATH_W) 'HtmlFonts.cc'; else $(CYGPATH_W) '$(srcdir)/HtmlFonts.cc'; fi`
+@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pdftohtml-HtmlFonts.Tpo $(DEPDIR)/pdftohtml-HtmlFonts.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='HtmlFonts.cc' object='pdftohtml-HtmlFonts.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -c -o pdftohtml-HtmlFonts.obj `if test -f 'HtmlFonts.cc'; then $(CYGPATH_W) 'HtmlFonts.cc'; else $(CYGPATH_W) '$(srcdir)/HtmlFonts.cc'; fi`
+
+pdftohtml-HtmlLinks.o: HtmlLinks.cc
+@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -MT pdftohtml-HtmlLinks.o -MD -MP -MF $(DEPDIR)/pdftohtml-HtmlLinks.Tpo -c -o pdftohtml-HtmlLinks.o `test -f 'HtmlLinks.cc' || echo '$(srcdir)/'`HtmlLinks.cc
+@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pdftohtml-HtmlLinks.Tpo $(DEPDIR)/pdftohtml-HtmlLinks.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='HtmlLinks.cc' object='pdftohtml-HtmlLinks.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -c -o pdftohtml-HtmlLinks.o `test -f 'HtmlLinks.cc' || echo '$(srcdir)/'`HtmlLinks.cc
+
+pdftohtml-HtmlLinks.obj: HtmlLinks.cc
+@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -MT pdftohtml-HtmlLinks.obj -MD -MP -MF $(DEPDIR)/pdftohtml-HtmlLinks.Tpo -c -o pdftohtml-HtmlLinks.obj `if test -f 'HtmlLinks.cc'; then $(CYGPATH_W) 'HtmlLinks.cc'; else $(CYGPATH_W) '$(srcdir)/HtmlLinks.cc'; fi`
+@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pdftohtml-HtmlLinks.Tpo $(DEPDIR)/pdftohtml-HtmlLinks.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='HtmlLinks.cc' object='pdftohtml-HtmlLinks.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -c -o pdftohtml-HtmlLinks.obj `if test -f 'HtmlLinks.cc'; then $(CYGPATH_W) 'HtmlLinks.cc'; else $(CYGPATH_W) '$(srcdir)/HtmlLinks.cc'; fi`
+
+pdftohtml-HtmlOutputDev.o: HtmlOutputDev.cc
+@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -MT pdftohtml-HtmlOutputDev.o -MD -MP -MF $(DEPDIR)/pdftohtml-HtmlOutputDev.Tpo -c -o pdftohtml-HtmlOutputDev.o `test -f 'HtmlOutputDev.cc' || echo '$(srcdir)/'`HtmlOutputDev.cc
+@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pdftohtml-HtmlOutputDev.Tpo $(DEPDIR)/pdftohtml-HtmlOutputDev.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='HtmlOutputDev.cc' object='pdftohtml-HtmlOutputDev.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -c -o pdftohtml-HtmlOutputDev.o `test -f 'HtmlOutputDev.cc' || echo '$(srcdir)/'`HtmlOutputDev.cc
+
+pdftohtml-HtmlOutputDev.obj: HtmlOutputDev.cc
+@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -MT pdftohtml-HtmlOutputDev.obj -MD -MP -MF $(DEPDIR)/pdftohtml-HtmlOutputDev.Tpo -c -o pdftohtml-HtmlOutputDev.obj `if test -f 'HtmlOutputDev.cc'; then $(CYGPATH_W) 'HtmlOutputDev.cc'; else $(CYGPATH_W) '$(srcdir)/HtmlOutputDev.cc'; fi`
+@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pdftohtml-HtmlOutputDev.Tpo $(DEPDIR)/pdftohtml-HtmlOutputDev.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='HtmlOutputDev.cc' object='pdftohtml-HtmlOutputDev.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -c -o pdftohtml-HtmlOutputDev.obj `if test -f 'HtmlOutputDev.cc'; then $(CYGPATH_W) 'HtmlOutputDev.cc'; else $(CYGPATH_W) '$(srcdir)/HtmlOutputDev.cc'; fi`
+
+pdftohtml-parseargs.o: parseargs.cc
+@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -MT pdftohtml-parseargs.o -MD -MP -MF $(DEPDIR)/pdftohtml-parseargs.Tpo -c -o pdftohtml-parseargs.o `test -f 'parseargs.cc' || echo '$(srcdir)/'`parseargs.cc
+@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pdftohtml-parseargs.Tpo $(DEPDIR)/pdftohtml-parseargs.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='parseargs.cc' object='pdftohtml-parseargs.o' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -c -o pdftohtml-parseargs.o `test -f 'parseargs.cc' || echo '$(srcdir)/'`parseargs.cc
+
+pdftohtml-parseargs.obj: parseargs.cc
+@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -MT pdftohtml-parseargs.obj -MD -MP -MF $(DEPDIR)/pdftohtml-parseargs.Tpo -c -o pdftohtml-parseargs.obj `if test -f 'parseargs.cc'; then $(CYGPATH_W) 'parseargs.cc'; else $(CYGPATH_W) '$(srcdir)/parseargs.cc'; fi`
+@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pdftohtml-parseargs.Tpo $(DEPDIR)/pdftohtml-parseargs.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='parseargs.cc' object='pdftohtml-parseargs.obj' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -c -o pdftohtml-parseargs.obj `if test -f 'parseargs.cc'; then $(CYGPATH_W) 'parseargs.cc'; else $(CYGPATH_W) '$(srcdir)/parseargs.cc'; fi`
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+install-man1: $(dist_man1_MANS)
+ @$(NORMAL_INSTALL)
+ @list1='$(dist_man1_MANS)'; \
+ list2=''; \
+ test -n "$(man1dir)" \
+ && test -n "`echo $$list1$$list2`" \
+ || exit 0; \
+ echo " $(MKDIR_P) '$(DESTDIR)$(man1dir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(man1dir)" || exit 1; \
+ { for i in $$list1; do echo "$$i"; done; \
+ if test -n "$$list2"; then \
+ for i in $$list2; do echo "$$i"; done \
+ | sed -n '/\.1[a-z]*$$/p'; \
+ fi; \
+ } | while read p; do \
+ if test -f $$p; then d=; else d="$(srcdir)/"; fi; \
+ echo "$$d$$p"; echo "$$p"; \
+ done | \
+ sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \
+ -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \
+ sed 'N;N;s,\n, ,g' | { \
+ list=; while read file base inst; do \
+ if test "$$base" = "$$inst"; then list="$$list $$file"; else \
+ echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man1dir)/$$inst'"; \
+ $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man1dir)/$$inst" || exit $$?; \
+ fi; \
+ done; \
+ for i in $$list; do echo "$$i"; done | $(am__base_list) | \
+ while read files; do \
+ test -z "$$files" || { \
+ echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man1dir)'"; \
+ $(INSTALL_DATA) $$files "$(DESTDIR)$(man1dir)" || exit $$?; }; \
+ done; }
+
+uninstall-man1:
+ @$(NORMAL_UNINSTALL)
+ @list='$(dist_man1_MANS)'; test -n "$(man1dir)" || exit 0; \
+ files=`{ for i in $$list; do echo "$$i"; done; \
+ } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \
+ -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \
+ dir='$(DESTDIR)$(man1dir)'; $(am__uninstall_files_from_dir)
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ mkid -fID $$unique
+tags: TAGS
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ set x; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: CTAGS
+CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @list='$(MANS)'; if test -n "$$list"; then \
+ list=`for p in $$list; do \
+ if test -f $$p; then d=; else d="$(srcdir)/"; fi; \
+ if test -f "$$d$$p"; then echo "$$d$$p"; else :; fi; done`; \
+ if test -n "$$list" && \
+ grep 'ab help2man is required to generate this page' $$list >/dev/null; then \
+ echo "error: found man pages containing the \`missing help2man' replacement text:" >&2; \
+ grep -l 'ab help2man is required to generate this page' $$list | sed 's/^/ /' >&2; \
+ echo " to fix them, install help2man, remove and regenerate the man pages;" >&2; \
+ echo " typically \`make maintainer-clean' will remove them" >&2; \
+ exit 1; \
+ else :; fi; \
+ else :; fi
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(PROGRAMS) $(MANS)
+installdirs:
+ for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-man
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-binPROGRAMS
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man: install-man1
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-binPROGRAMS uninstall-man
+
+uninstall-man: uninstall-man1
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-binPROGRAMS \
+ clean-generic clean-libtool ctags distclean distclean-compile \
+ distclean-generic distclean-libtool distclean-tags distdir dvi \
+ dvi-am html html-am info info-am install install-am \
+ install-binPROGRAMS install-data install-data-am install-dvi \
+ install-dvi-am install-exec install-exec-am install-html \
+ install-html-am install-info install-info-am install-man \
+ install-man1 install-pdf install-pdf-am install-ps \
+ install-ps-am install-strip installcheck installcheck-am \
+ installdirs maintainer-clean maintainer-clean-generic \
+ mostlyclean mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
+ uninstall-am uninstall-binPROGRAMS uninstall-man \
+ uninstall-man1
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/utils/parseargs.cc b/utils/parseargs.cc
new file mode 100644
index 00000000..a457a885
--- /dev/null
+++ b/utils/parseargs.cc
@@ -0,0 +1,220 @@
+/*
+ * parseargs.h
+ *
+ * Command line argument parser.
+ *
+ * Copyright 1996-2003 Glyph & Cog, LLC
+ */
+
+/*========================================================================
+
+ Modified under the Poppler project - http://poppler.freedesktop.org
+
+ Poppler project changes to this file are under the GPLv2 or later license
+
+ All changes made under the Poppler project to this file are licensed
+ under GPL version 2 or later
+
+ Copyright (C) 2008, 2009 Albert Astals Cid <aacid@kde.org>
+ Copyright (C) 2011, 2012 Adrian Johnson <ajohnson@redneon.com>
+
+ To see a description of the changes please see the Changelog file that
+ came with your tarball or type make ChangeLog if you are building from git
+
+========================================================================*/
+
+#include <stdio.h>
+#include <stddef.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include "parseargs.h"
+
+#include "goo/gstrtod.h"
+#include "goo/GooString.h"
+
+static const ArgDesc *findArg(const ArgDesc *args, char *arg);
+static GBool grabArg(const ArgDesc *arg, int i, int *argc, char *argv[]);
+
+GBool parseArgs(const ArgDesc *args, int *argc, char *argv[]) {
+ const ArgDesc *arg;
+ int i, j;
+ GBool ok;
+
+ ok = gTrue;
+ i = 1;
+ while (i < *argc) {
+ if (!strcmp(argv[i], "--")) {
+ --*argc;
+ for (j = i; j < *argc; ++j)
+ argv[j] = argv[j+1];
+ break;
+ } else if ((arg = findArg(args, argv[i]))) {
+ if (!grabArg(arg, i, argc, argv))
+ ok = gFalse;
+ } else {
+ ++i;
+ }
+ }
+ return ok;
+}
+
+void printUsage(const char *program, const char *otherArgs, const ArgDesc *args) {
+ const ArgDesc *arg;
+ const char *typ;
+ int w, w1;
+
+ w = 0;
+ for (arg = args; arg->arg; ++arg) {
+ if ((w1 = strlen(arg->arg)) > w)
+ w = w1;
+ }
+
+ fprintf(stderr, "Usage: %s [options]", program);
+ if (otherArgs)
+ fprintf(stderr, " %s", otherArgs);
+ fprintf(stderr, "\n");
+
+ for (arg = args; arg->arg; ++arg) {
+ fprintf(stderr, " %s", arg->arg);
+ w1 = 9 + w - strlen(arg->arg);
+ switch (arg->kind) {
+ case argInt:
+ case argIntDummy:
+ typ = " <int>";
+ break;
+ case argFP:
+ case argFPDummy:
+ typ = " <fp>";
+ break;
+ case argString:
+ case argStringDummy:
+ case argGooString:
+ typ = " <string>";
+ break;
+ case argFlag:
+ case argFlagDummy:
+ default:
+ typ = "";
+ break;
+ }
+ fprintf(stderr, "%-*s", w1, typ);
+ if (arg->usage)
+ fprintf(stderr, ": %s", arg->usage);
+ fprintf(stderr, "\n");
+ }
+}
+
+static const ArgDesc *findArg(const ArgDesc *args, char *arg) {
+ const ArgDesc *p;
+
+ for (p = args; p->arg; ++p) {
+ if (p->kind < argFlagDummy && !strcmp(p->arg, arg))
+ return p;
+ }
+ return NULL;
+}
+
+static GBool grabArg(const ArgDesc *arg, int i, int *argc, char *argv[]) {
+ int n;
+ int j;
+ GBool ok;
+
+ ok = gTrue;
+ n = 0;
+ switch (arg->kind) {
+ case argFlag:
+ *(GBool *)arg->val = gTrue;
+ n = 1;
+ break;
+ case argInt:
+ if (i + 1 < *argc && isInt(argv[i+1])) {
+ *(int *)arg->val = atoi(argv[i+1]);
+ n = 2;
+ } else {
+ ok = gFalse;
+ n = 1;
+ }
+ break;
+ case argFP:
+ if (i + 1 < *argc && isFP(argv[i+1])) {
+ *(double *)arg->val = gatof(argv[i+1]);
+ n = 2;
+ } else {
+ ok = gFalse;
+ n = 1;
+ }
+ break;
+ case argString:
+ if (i + 1 < *argc) {
+ strncpy((char *)arg->val, argv[i+1], arg->size - 1);
+ ((char *)arg->val)[arg->size - 1] = '\0';
+ n = 2;
+ } else {
+ ok = gFalse;
+ n = 1;
+ }
+ break;
+ case argGooString:
+ if (i + 1 < *argc) {
+ ((GooString*)arg->val)->Set(argv[i+1]);
+ n = 2;
+ } else {
+ ok = gFalse;
+ n = 1;
+ }
+ break;
+ default:
+ fprintf(stderr, "Internal error in arg table\n");
+ n = 1;
+ break;
+ }
+ if (n > 0) {
+ *argc -= n;
+ for (j = i; j < *argc; ++j)
+ argv[j] = argv[j+n];
+ }
+ return ok;
+}
+
+GBool isInt(char *s) {
+ if (*s == '-' || *s == '+')
+ ++s;
+ while (isdigit(*s))
+ ++s;
+ if (*s)
+ return gFalse;
+ return gTrue;
+}
+
+GBool isFP(char *s) {
+ int n;
+
+ if (*s == '-' || *s == '+')
+ ++s;
+ n = 0;
+ while (isdigit(*s)) {
+ ++s;
+ ++n;
+ }
+ if (*s == '.')
+ ++s;
+ while (isdigit(*s)) {
+ ++s;
+ ++n;
+ }
+ if (n > 0 && (*s == 'e' || *s == 'E')) {
+ ++s;
+ if (*s == '-' || *s == '+')
+ ++s;
+ n = 0;
+ if (!isdigit(*s))
+ return gFalse;
+ do {
+ ++s;
+ } while (isdigit(*s));
+ }
+ if (*s)
+ return gFalse;
+ return gTrue;
+}
diff --git a/utils/parseargs.h b/utils/parseargs.h
new file mode 100644
index 00000000..f035fa14
--- /dev/null
+++ b/utils/parseargs.h
@@ -0,0 +1,88 @@
+/*
+ * parseargs.h
+ *
+ * Command line argument parser.
+ *
+ * Copyright 1996-2003 Glyph & Cog, LLC
+ */
+
+/*========================================================================
+
+ Modified under the Poppler project - http://poppler.freedesktop.org
+
+ All changes made under the Poppler project to this file are licensed
+ under GPL version 2 or later
+
+ Copyright (C) 2008 Albert Astals Cid <aacid@kde.org>
+ Copyright (C) 2011 Adrian Johnson <ajohnson@redneon.com>
+
+ To see a description of the changes please see the Changelog file that
+ came with your tarball or type make ChangeLog if you are building from git
+
+========================================================================*/
+
+#ifndef PARSEARGS_H
+#define PARSEARGS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "goo/gtypes.h"
+
+/*
+ * Argument kinds.
+ */
+typedef enum {
+ argFlag, /* flag (present / not-present) */
+ /* [val: GBool *] */
+ argInt, /* integer arg */
+ /* [val: int *] */
+ argFP, /* floating point arg */
+ /* [val: double *] */
+ argString, /* string arg */
+ /* [val: char *] */
+ argGooString, /* string arg */
+ /* [val: GooString *] */
+ /* dummy entries -- these show up in the usage listing only; */
+ /* useful for X args, for example */
+ argFlagDummy,
+ argIntDummy,
+ argFPDummy,
+ argStringDummy
+} ArgKind;
+
+/*
+ * Argument descriptor.
+ */
+typedef struct {
+ const char *arg; /* the command line switch */
+ ArgKind kind; /* kind of arg */
+ void *val; /* place to store value */
+ int size; /* for argString: size of string */
+ const char *usage; /* usage string */
+} ArgDesc;
+
+/*
+ * Parse command line. Removes all args which are found in the arg
+ * descriptor list <args>. Stops parsing if "--" is found (and removes
+ * it). Returns gFalse if there was an error.
+ */
+extern GBool parseArgs(const ArgDesc *args, int *argc, char *argv[]);
+
+/*
+ * Print usage message, based on arg descriptor list.
+ */
+extern void printUsage(const char *program, const char *otherArgs, const ArgDesc *args);
+
+/*
+ * Check if a string is a valid integer or floating point number.
+ */
+extern GBool isInt(char *s);
+extern GBool isFP(char *s);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/utils/pdf2xml.dtd b/utils/pdf2xml.dtd
new file mode 100644
index 00000000..bf7f14f6
--- /dev/null
+++ b/utils/pdf2xml.dtd
@@ -0,0 +1,49 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!ELEMENT pdf2xml (page+, outline?)>
+<!ATTLIST pdf2xml
+ producer CDATA #REQUIRED
+ version CDATA #REQUIRED
+>
+<!ELEMENT page (fontspec*, image*, text*)>
+<!ATTLIST page
+ number CDATA #REQUIRED
+ position CDATA #REQUIRED
+ top CDATA #REQUIRED
+ left CDATA #REQUIRED
+ height CDATA #REQUIRED
+ width CDATA #REQUIRED
+>
+<!ELEMENT fontspec EMPTY>
+<!ATTLIST fontspec
+ id CDATA #REQUIRED
+ size CDATA #REQUIRED
+ family CDATA #REQUIRED
+ color CDATA #REQUIRED
+>
+<!ELEMENT text (#PCDATA | b | i | a)*>
+<!ATTLIST text
+ top CDATA #REQUIRED
+ left CDATA #REQUIRED
+ width CDATA #REQUIRED
+ height CDATA #REQUIRED
+ font CDATA #REQUIRED
+>
+<!ELEMENT b (#PCDATA | i)*>
+<!ELEMENT i (#PCDATA | b)*>
+<!ELEMENT a (#PCDATA)>
+<!ATTLIST a
+ href CDATA #REQUIRED
+>
+<!ELEMENT image EMPTY>
+<!ATTLIST image
+ top CDATA #REQUIRED
+ left CDATA #REQUIRED
+ width CDATA #REQUIRED
+ height CDATA #REQUIRED
+ src CDATA #REQUIRED
+>
+<!ELEMENT outline (item | outline)*>
+<!ELEMENT item (#PCDATA)>
+<!ATTLIST item
+ page CDATA #IMPLIED
+>
diff --git a/utils/pdfdetach.1 b/utils/pdfdetach.1
new file mode 100644
index 00000000..c80dde72
--- /dev/null
+++ b/utils/pdfdetach.1
@@ -0,0 +1,103 @@
+.\" Copyright 2011 Glyph & Cog, LLC
+.TH pdfdetach 1 "15 August 2011"
+.SH NAME
+pdfdetach \- Portable Document Format (PDF) document embedded file
+extractor (version 3.03)
+.SH SYNOPSIS
+.B pdfdetach
+[options]
+.RI [ PDF-file ]
+.SH DESCRIPTION
+.B Pdfdetach
+lists or extracts embedded files (attachments) from a Portable
+Document Format (PDF) file.
+.SH CONFIGURATION FILE
+Pdfdetach reads a configuration file at startup. It first tries to
+find the user's private config file, ~/.xpdfrc. If that doesn't
+exist, it looks for a system-wide config file, typically
+/usr/local/etc/xpdfrc (but this location can be changed when pdfinfo
+is built). See the
+.BR xpdfrc (5)
+man page for details.
+.SH OPTIONS
+Some of the following options can be set with configuration file
+commands. These are listed in square brackets with the description of
+the corresponding command line option.
+.TP
+.B \-list
+List all of the embedded files in the PDF file. File names are
+converted to the text encoding specified by the "\-enc" switch.
+.TP
+.BI \-save " number"
+Save the specified embedded file. By default, this uses the file name
+associated with the embedded file (as printed by the "\-list" switch);
+the file name can be changed with the "\-o" switch.
+.TP
+.BI \-saveall
+Save all of the embedded files. This uses the file names associated
+with the embedded files (as printed by the "\-list" switch). By
+default, the files are saved in the current directory; this can be
+changed with the "\-o" switch.
+.TP
+.BI \-o " path"
+Set the file name used when saving an embedded file with the "\-save"
+switch, or the directory used by "\-saveall".
+.TP
+.BI \-enc " encoding-name"
+Sets the encoding to use for text output (embedded file names). The
+.I encoding\-name
+must be defined with the unicodeMap command (see
+.BR xpdfrc (5)).
+This defaults to "Latin1" (which is a built-in encoding).
+.RB "[config file: " textEncoding ]
+.TP
+.BI \-opw " password"
+Specify the owner password for the PDF file. Providing this will
+bypass all security restrictions.
+.TP
+.BI \-upw " password"
+Specify the user password for the PDF file.
+.TP
+.BI \-cfg " config-file"
+Read
+.I config-file
+in place of ~/.xpdfrc or the system-wide config file.
+.TP
+.B \-v
+Print copyright and version information.
+.TP
+.B \-h
+Print usage information.
+.RB ( \-help
+and
+.B \-\-help
+are equivalent.)
+.SH EXIT CODES
+The Xpdf tools use the following exit codes:
+.TP
+0
+No error.
+.TP
+1
+Error opening a PDF file.
+.TP
+2
+Error opening an output file.
+.TP
+3
+Error related to PDF permissions.
+.TP
+99
+Other error.
+.SH AUTHOR
+The pdfinfo software and documentation are copyright 1996-2011 Glyph &
+Cog, LLC.
+.SH "SEE ALSO"
+.BR pdfimages (1),
+.BR pdffonts (1),
+.BR pdfinfo (1),
+.BR pdftocairo (1),
+.BR pdftohtml (1),
+.BR pdftoppm (1),
+.BR pdftops (1),
+.BR pdftotext (1) \ No newline at end of file
diff --git a/utils/pdfdetach.cc b/utils/pdfdetach.cc
new file mode 100644
index 00000000..3fbdfb7e
--- /dev/null
+++ b/utils/pdfdetach.cc
@@ -0,0 +1,318 @@
+//========================================================================
+//
+// pdfdetach.cc
+//
+// Copyright 2010 Glyph & Cog, LLC
+//
+//========================================================================
+
+//========================================================================
+//
+// Modified under the Poppler project - http://poppler.freedesktop.org
+//
+// All changes made under the Poppler project to this file are licensed
+// under GPL version 2 or later
+//
+// Copyright (C) 2011 Carlos Garcia Campos <carlosgc@gnome.org>
+//
+// To see a description of the changes please see the Changelog file that
+// came with your tarball or type make ChangeLog if you are building from git
+//
+//========================================================================
+
+#include "config.h"
+#include <poppler-config.h>
+#include <stdio.h>
+#include "goo/gtypes.h"
+#include "goo/gmem.h"
+#include "goo/GooList.h"
+#include "parseargs.h"
+#include "Annot.h"
+#include "GlobalParams.h"
+#include "Page.h"
+#include "PDFDoc.h"
+#include "PDFDocFactory.h"
+#include "FileSpec.h"
+#include "CharTypes.h"
+#include "Catalog.h"
+#include "UnicodeMap.h"
+#include "PDFDocEncoding.h"
+#include "Error.h"
+
+static GBool doList = gFalse;
+static int saveNum = 0;
+static GBool saveAll = gFalse;
+static char savePath[1024] = "";
+static char textEncName[128] = "";
+static char ownerPassword[33] = "\001";
+static char userPassword[33] = "\001";
+static char cfgFileName[256] = "";
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+
+static ArgDesc argDesc[] = {
+ {"-list", argFlag, &doList, 0,
+ "list all embedded files"},
+ {"-save", argInt, &saveNum, 0,
+ "save the specified embedded file"},
+ {"-saveall", argFlag, &saveAll, 0,
+ "save all embedded files"},
+ {"-o", argString, savePath, sizeof(savePath),
+ "file name for the saved embedded file"},
+ {"-enc", argString, textEncName, sizeof(textEncName),
+ "output text encoding name"},
+ {"-opw", argString, ownerPassword, sizeof(ownerPassword),
+ "owner password (for encrypted files)"},
+ {"-upw", argString, userPassword, sizeof(userPassword),
+ "user password (for encrypted files)"},
+ {"-cfg", argString, cfgFileName, sizeof(cfgFileName),
+ "configuration file to use in place of .xpdfrc"},
+ {"-v", argFlag, &printVersion, 0,
+ "print copyright and version info"},
+ {"-h", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"--help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-?", argFlag, &printHelp, 0,
+ "print usage information"},
+ {NULL}
+};
+
+int main(int argc, char *argv[]) {
+ GooString *fileName;
+ UnicodeMap *uMap;
+ GooString *ownerPW, *userPW;
+ PDFDoc *doc;
+ char uBuf[8];
+ char path[1024];
+ char *p;
+ GBool ok;
+ int exitCode;
+ GooList *embeddedFiles = NULL;
+ int nFiles, nPages, n, i, j;
+ FileSpec *fileSpec;
+ Page *page;
+ Annots *annots;
+ Annot *annot;
+ GooString *s1;
+ Unicode u;
+ GBool isUnicode;
+
+ exitCode = 99;
+
+ // parse args
+ ok = parseArgs(argDesc, &argc, argv);
+ if ((doList ? 1 : 0) +
+ ((saveNum != 0) ? 1 : 0) +
+ (saveAll ? 1 : 0) != 1) {
+ ok = gFalse;
+ }
+ if (!ok || argc != 2 || printVersion || printHelp) {
+ fprintf(stderr, "pdfdetach version %s\n", PACKAGE_VERSION);
+ fprintf(stderr, "%s\n", popplerCopyright);
+ fprintf(stderr, "%s\n", xpdfCopyright);
+ if (!printVersion) {
+ printUsage("pdfdetach", "<PDF-file>", argDesc);
+ }
+ goto err0;
+ }
+ fileName = new GooString(argv[1]);
+
+ // read config file
+ globalParams = new GlobalParams(cfgFileName);
+ if (textEncName[0]) {
+ globalParams->setTextEncoding(textEncName);
+ }
+
+ // get mapping to output encoding
+ if (!(uMap = globalParams->getTextEncoding())) {
+ error(errConfig, -1, "Couldn't get text encoding");
+ delete fileName;
+ goto err1;
+ }
+
+ // open PDF file
+ if (ownerPassword[0] != '\001') {
+ ownerPW = new GooString(ownerPassword);
+ } else {
+ ownerPW = NULL;
+ }
+ if (userPassword[0] != '\001') {
+ userPW = new GooString(userPassword);
+ } else {
+ userPW = NULL;
+ }
+
+ doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW);
+
+ if (userPW) {
+ delete userPW;
+ }
+ if (ownerPW) {
+ delete ownerPW;
+ }
+ if (!doc->isOk()) {
+ exitCode = 1;
+ goto err2;
+ }
+
+ embeddedFiles = new GooList();
+ for (i = 0; i < doc->getCatalog()->numEmbeddedFiles(); ++i)
+ embeddedFiles->append(doc->getCatalog()->embeddedFile(i));
+
+ nPages = doc->getCatalog()->getNumPages();
+ for (i = 0; i < nPages; ++i) {
+ page = doc->getCatalog()->getPage(i + 1);
+ annots = page->getAnnots();
+ if (!annots)
+ break;
+
+ for (j = 0; j < annots->getNumAnnots(); ++j) {
+ annot = annots->getAnnot(j);
+ if (annot->getType() != Annot::typeFileAttachment)
+ continue;
+ embeddedFiles->append(new FileSpec(static_cast<AnnotFileAttachment *>(annot)->getFile()));
+ }
+ }
+
+ nFiles = embeddedFiles->getLength();
+
+ // list embedded files
+ if (doList) {
+ printf("%d embedded files\n", nFiles);
+ for (i = 0; i < nFiles; ++i) {
+ fileSpec = static_cast<FileSpec *>(embeddedFiles->get(i));
+ printf("%d: ", i+1);
+ s1 = fileSpec->getFileName();
+ if ((s1->getChar(0) & 0xff) == 0xfe && (s1->getChar(1) & 0xff) == 0xff) {
+ isUnicode = gTrue;
+ j = 2;
+ } else {
+ isUnicode = gFalse;
+ j = 0;
+ }
+ while (j < fileSpec->getFileName()->getLength()) {
+ if (isUnicode) {
+ u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j+1) & 0xff);
+ j += 2;
+ } else {
+ u = pdfDocEncoding[s1->getChar(j) & 0xff];
+ ++j;
+ }
+ n = uMap->mapUnicode(u, uBuf, sizeof(uBuf));
+ fwrite(uBuf, 1, n, stdout);
+ }
+ fputc('\n', stdout);
+ }
+
+ // save all embedded files
+ } else if (saveAll) {
+ for (i = 0; i < nFiles; ++i) {
+ fileSpec = static_cast<FileSpec *>(embeddedFiles->get(i));
+ if (savePath[0]) {
+ n = strlen(savePath);
+ if (n > (int)sizeof(path) - 2) {
+ n = sizeof(path) - 2;
+ }
+ memcpy(path, savePath, n);
+ path[n] = '/';
+ p = path + n + 1;
+ } else {
+ p = path;
+ }
+ s1 = fileSpec->getFileName();
+ if ((s1->getChar(0) & 0xff) == 0xfe && (s1->getChar(1) & 0xff) == 0xff) {
+ isUnicode = gTrue;
+ j = 2;
+ } else {
+ isUnicode = gFalse;
+ j = 0;
+ }
+ while (j < fileSpec->getFileName()->getLength()) {
+ if (isUnicode) {
+ u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j+1) & 0xff);
+ j += 2;
+ } else {
+ u = pdfDocEncoding[s1->getChar(j) & 0xff];
+ ++j;
+ }
+ n = uMap->mapUnicode(u, uBuf, sizeof(uBuf));
+ if (p + n >= path + sizeof(path))
+ break;
+ memcpy(p, uBuf, n);
+ p += n;
+ }
+ *p = '\0';
+
+ if (!fileSpec->getEmbeddedFile()->save(path)) {
+ error(errIO, -1, "Error saving embedded file as '{0:s}'", p);
+ exitCode = 2;
+ goto err2;
+ }
+ }
+
+ // save an embedded file
+ } else {
+ if (saveNum < 1 || saveNum > nFiles) {
+ error(errCommandLine, -1, "Invalid file number");
+ goto err2;
+ }
+
+ fileSpec = static_cast<FileSpec *>(embeddedFiles->get(saveNum - 1));
+ if (savePath[0]) {
+ p = savePath;
+ } else {
+ p = path;
+ s1 = fileSpec->getFileName();
+ if ((s1->getChar(0) & 0xff) == 0xfe && (s1->getChar(1) & 0xff) == 0xff) {
+ isUnicode = gTrue;
+ j = 2;
+ } else {
+ isUnicode = gFalse;
+ j = 0;
+ }
+ while (j < fileSpec->getFileName()->getLength()) {
+ if (isUnicode) {
+ u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j+1) & 0xff);
+ j += 2;
+ } else {
+ u = pdfDocEncoding[s1->getChar(j) & 0xff];
+ ++j;
+ }
+ n = uMap->mapUnicode(u, uBuf, sizeof(uBuf));
+ if (p + n >= path + sizeof(path))
+ break;
+ memcpy(p, uBuf, n);
+ p += n;
+ }
+ *p = '\0';
+ p = path;
+ }
+
+ if (!fileSpec->getEmbeddedFile()->save(p)) {
+ error(errIO, -1, "Error saving embedded file as '{0:s}'", p);
+ exitCode = 2;
+ goto err2;
+ }
+ }
+
+ exitCode = 0;
+
+ // clean up
+ err2:
+ if (embeddedFiles)
+ deleteGooList(embeddedFiles, FileSpec);
+ uMap->decRefCnt();
+ delete doc;
+ err1:
+ delete globalParams;
+ err0:
+
+ // check for memory leaks
+ Object::memCheck(stderr);
+ gMemReport(stderr);
+
+ return exitCode;
+}
diff --git a/utils/pdffonts.1 b/utils/pdffonts.1
new file mode 100644
index 00000000..4afc3956
--- /dev/null
+++ b/utils/pdffonts.1
@@ -0,0 +1,119 @@
+.\" Copyright 1999-2011 Glyph & Cog, LLC
+.TH pdffonts 1 "15 August 2011"
+.SH NAME
+pdffonts \- Portable Document Format (PDF) font analyzer (version
+3.03)
+.SH SYNOPSIS
+.B pdffonts
+[options]
+.RI [ PDF-file ]
+.SH DESCRIPTION
+.B Pdffonts
+lists the fonts used in a Portable Document Format (PDF) file along
+with various information for each font.
+.PP
+The following information is listed for each font:
+.TP
+.B name
+the font name, exactly as given in the PDF file (potentially including
+a subset prefix)
+.TP
+.B type
+the font type -- see below for details
+.TP
+.B encoding
+the font encoding
+.TP
+.B emb
+"yes" if the font is embedded in the PDF file
+.TP
+.B sub
+"yes" if the font is a subset
+.TP
+.B uni
+"yes" if there is an explicit "ToUnicode" map in the PDF file (the
+absence of a ToUnicode map doesn't necessarily mean that the text
+can't be converted to Unicode)
+.TP
+.B object ID
+the font dictionary object ID (number and generation)
+.PP
+PDF files can contain the following types of fonts:
+.PP
+.RS
+Type 1
+.RE
+.RS
+Type 1C -- aka Compact Font Format (CFF)
+.RE
+.RS
+Type 3
+.RE
+.RS
+TrueType
+.RE
+.RS
+CID Type 0 -- 16-bit font with no specified type
+.RE
+.RS
+CID Type 0C -- 16-bit PostScript CFF font
+.RE
+.RS
+CID TrueType -- 16-bit TrueType font
+.RE
+.SH OPTIONS
+.TP
+.BI \-f " number"
+Specifies the first page to analyze.
+.TP
+.BI \-l " number"
+Specifies the last page to analyze.
+.TP
+.BI \-subst
+List the substitute fonts that poppler will use for non embedded fonts.
+.TP
+.BI \-opw " password"
+Specify the owner password for the PDF file. Providing this will
+bypass all security restrictions.
+.TP
+.BI \-upw " password"
+Specify the user password for the PDF file.
+.TP
+.B \-v
+Print copyright and version information.
+.TP
+.B \-h
+Print usage information.
+.RB ( \-help
+and
+.B \-\-help
+are equivalent.)
+.SH EXIT CODES
+The Xpdf tools use the following exit codes:
+.TP
+0
+No error.
+.TP
+1
+Error opening a PDF file.
+.TP
+2
+Error opening an output file.
+.TP
+3
+Error related to PDF permissions.
+.TP
+99
+Other error.
+.SH AUTHOR
+The pdffonts software and documentation are copyright 1996-2011 Glyph
+& Cog, LLC.
+.SH "SEE ALSO"
+.BR pdfdetach (1),
+.BR pdfimages (1),
+.BR pdfinfo (1),
+.BR pdftocairo (1),
+.BR pdftohtml (1),
+.BR pdftoppm (1),
+.BR pdftops (1),
+.BR pdftotext (1)
diff --git a/utils/pdffonts.cc b/utils/pdffonts.cc
new file mode 100644
index 00000000..820abbf7
--- /dev/null
+++ b/utils/pdffonts.cc
@@ -0,0 +1,224 @@
+//========================================================================
+//
+// pdffonts.cc
+//
+// Copyright 2001-2007 Glyph & Cog, LLC
+//
+//========================================================================
+
+//========================================================================
+//
+// Modified under the Poppler project - http://poppler.freedesktop.org
+//
+// All changes made under the Poppler project to this file are licensed
+// under GPL version 2 or later
+//
+// Copyright (C) 2006 Dominic Lachowicz <cinamod@hotmail.com>
+// Copyright (C) 2007-2008, 2010 Albert Astals Cid <aacid@kde.org>
+// Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
+// Copyright (C) 2012 Adrian Johnson <ajohnson@redneon.com>
+//
+// To see a description of the changes please see the Changelog file that
+// came with your tarball or type make ChangeLog if you are building from git
+//
+//========================================================================
+
+#include "config.h"
+#include <poppler-config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <math.h>
+#include "parseargs.h"
+#include "goo/GooString.h"
+#include "goo/gmem.h"
+#include "GlobalParams.h"
+#include "Object.h"
+#include "PDFDoc.h"
+#include "PDFDocFactory.h"
+#include "FontInfo.h"
+
+static const char *fontTypeNames[] = {
+ "unknown",
+ "Type 1",
+ "Type 1C",
+ "Type 1C (OT)",
+ "Type 3",
+ "TrueType",
+ "TrueType (OT)",
+ "CID Type 0",
+ "CID Type 0C",
+ "CID Type 0C (OT)",
+ "CID TrueType",
+ "CID TrueType (OT)"
+};
+
+static int firstPage = 1;
+static int lastPage = 0;
+static GBool showSubst = gFalse;
+static char ownerPassword[33] = "\001";
+static char userPassword[33] = "\001";
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+
+static const ArgDesc argDesc[] = {
+ {"-f", argInt, &firstPage, 0,
+ "first page to examine"},
+ {"-l", argInt, &lastPage, 0,
+ "last page to examine"},
+ {"-subst", argFlag, &showSubst, 0,
+ "show font substitutions"},
+ {"-opw", argString, ownerPassword, sizeof(ownerPassword),
+ "owner password (for encrypted files)"},
+ {"-upw", argString, userPassword, sizeof(userPassword),
+ "user password (for encrypted files)"},
+ {"-v", argFlag, &printVersion, 0,
+ "print copyright and version info"},
+ {"-h", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"--help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-?", argFlag, &printHelp, 0,
+ "print usage information"},
+ {NULL}
+};
+
+int main(int argc, char *argv[]) {
+ PDFDoc *doc;
+ GooString *fileName;
+ GooString *ownerPW, *userPW;
+ GBool ok;
+ int exitCode;
+
+ exitCode = 99;
+
+ // parse args
+ ok = parseArgs(argDesc, &argc, argv);
+ if (!ok || argc != 2 || printVersion || printHelp) {
+ fprintf(stderr, "pdffonts version %s\n", PACKAGE_VERSION);
+ fprintf(stderr, "%s\n", popplerCopyright);
+ fprintf(stderr, "%s\n", xpdfCopyright);
+ if (!printVersion) {
+ printUsage("pdffonts", "<PDF-file>", argDesc);
+ }
+ if (printVersion || printHelp)
+ exitCode = 0;
+ goto err0;
+ }
+ fileName = new GooString(argv[1]);
+
+ // read config file
+ globalParams = new GlobalParams();
+
+ // open PDF file
+ if (ownerPassword[0] != '\001') {
+ ownerPW = new GooString(ownerPassword);
+ } else {
+ ownerPW = NULL;
+ }
+ if (userPassword[0] != '\001') {
+ userPW = new GooString(userPassword);
+ } else {
+ userPW = NULL;
+ }
+ if (fileName->cmp("-") == 0) {
+ delete fileName;
+ fileName = new GooString("fd://0");
+ }
+
+ doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW);
+ delete fileName;
+
+ if (userPW) {
+ delete userPW;
+ }
+ if (ownerPW) {
+ delete ownerPW;
+ }
+ if (!doc->isOk()) {
+ exitCode = 1;
+ goto err1;
+ }
+
+ // get page range
+ if (firstPage < 1) {
+ firstPage = 1;
+ }
+ if (lastPage < 1 || lastPage > doc->getNumPages()) {
+ lastPage = doc->getNumPages();
+ }
+
+ // get the fonts
+ {
+ FontInfoScanner scanner(doc, firstPage - 1);
+ GooList *fonts = scanner.scan(lastPage - firstPage + 1);
+
+ if (showSubst) {
+ // print the font substitutions
+ printf("name object ID substitute font substitute font file\n");
+ printf("------------------------------------ --------- ------------------------------------ ------------------------------------\n");
+ if (fonts) {
+ for (int i = 0; i < fonts->getLength(); ++i) {
+ FontInfo *font = (FontInfo *)fonts->get(i);
+ if (font->getFile()) {
+ printf("%-36s",
+ font->getName() ? font->getName()->getCString() : "[none]");
+ const Ref fontRef = font->getRef();
+ if (fontRef.gen >= 100000) {
+ printf(" [none]");
+ } else {
+ printf(" %6d %2d", fontRef.num, fontRef.gen);
+ }
+ printf(" %-36s %s\n",
+ font->getSubstituteName() ? font->getSubstituteName()->getCString() : "[none]",
+ font->getFile()->getCString());
+ }
+ delete font;
+ }
+ delete fonts;
+ }
+ } else {
+ // print the font info
+ printf("name type encoding emb sub uni object ID\n");
+ printf("------------------------------------ ----------------- ---------------- --- --- --- ---------\n");
+ if (fonts) {
+ for (int i = 0; i < fonts->getLength(); ++i) {
+ FontInfo *font = (FontInfo *)fonts->get(i);
+ printf("%-36s %-17s %-16s %-3s %-3s %-3s",
+ font->getName() ? font->getName()->getCString() : "[none]",
+ fontTypeNames[font->getType()],
+ font->getEncoding()->getCString(),
+ font->getEmbedded() ? "yes" : "no",
+ font->getSubset() ? "yes" : "no",
+ font->getToUnicode() ? "yes" : "no");
+ const Ref fontRef = font->getRef();
+ if (fontRef.gen >= 100000) {
+ printf(" [none]\n");
+ } else {
+ printf(" %6d %2d\n", fontRef.num, fontRef.gen);
+ }
+ delete font;
+ }
+ delete fonts;
+ }
+ }
+ }
+
+ exitCode = 0;
+
+ err1:
+ delete doc;
+ delete globalParams;
+ err0:
+
+ // check for memory leaks
+ Object::memCheck(stderr);
+ gMemReport(stderr);
+
+ return exitCode;
+}
+
+
diff --git a/utils/pdfimages.1 b/utils/pdfimages.1
new file mode 100644
index 00000000..955d8b3c
--- /dev/null
+++ b/utils/pdfimages.1
@@ -0,0 +1,189 @@
+.\" Copyright 1998-2011 Glyph & Cog, LLC
+.TH pdfimages 1 "15 August 2011"
+.SH NAME
+pdfimages \- Portable Document Format (PDF) image extractor
+(version 3.03)
+.SH SYNOPSIS
+.B pdfimages
+[options]
+.I PDF-file image-root
+.SH DESCRIPTION
+.B Pdfimages
+saves images from a Portable Document Format (PDF) file as Portable
+Pixmap (PPM), Portable Bitmap (PBM), or JPEG files.
+.PP
+Pdfimages reads the PDF file
+.IR PDF-file ,
+scans one or more pages, and writes one PPM, PBM, or JPEG file for each image,
+.IR image-root - nnn . xxx ,
+where
+.I nnn
+is the image number and
+.I xxx
+is the image type (.ppm, .pbm, .jpg).
+.SH OPTIONS
+.TP
+.BI \-f " number"
+Specifies the first page to scan.
+.TP
+.BI \-l " number"
+Specifies the last page to scan.
+.TP
+.B \-j
+Normally, all images are written as PBM (for monochrome images) or PPM
+(for non-monochrome images) files. With this option, images in DCT
+format are saved as JPEG files. All non-DCT images are saved in
+PBM/PPM format as usual.
+.TP
+.B \-list
+Instead of writing the images, list the images along with various information for each image. Do not specify an
+.IR image-root
+with this option.
+.IP
+The following information is listed for each font:
+.RS
+.TP
+.B page
+the page number containing the image
+.TP
+.B num
+the image number
+.TP
+.B type
+the image type:
+.PP
+.RS
+image - an opaque image
+.RE
+.RS
+mask - a monochrome mask image
+.RE
+.RS
+smask - a soft-mask image
+.RE
+.RS
+stencil - a monochrome mask image used for painting a color or pattern
+.RE
+.PP
+Note: Tranparency in images is represented in PDF using a separate image for the image and the mask/smask.
+The mask/smask used as part of a transparent image always immediately follows the image in the image list.
+.TP
+.B width
+image width (in pixels)
+.TP
+.B height
+image height (in pixels)
+.PP
+Note: the image width/height is the size of the embedded image, not the size the image will be rendered at.
+.TP
+.B color
+image color space:
+.PP
+.RS
+gray - Gray
+.RE
+.RS
+rgb - RGB
+.RE
+.RS
+cmyk - CMYK
+.RE
+.RS
+lab - L*a*b
+.RE
+.RS
+icc - ICC Based
+.RE
+.RS
+index - Indexed Color
+.RE
+.RS
+sep - Separation
+.RE
+.RS
+devn - DeviceN
+.RE
+.TP
+.B comp
+number of color components
+.TP
+.B bpc
+bits per component
+.TP
+.B enc
+encoding:
+.PP
+.RS
+image - raster image (may be Flate or LZW compressed but does not use an image encoding)
+.RE
+.RS
+jpeg - Joint Photographic Experts Group
+.RE
+.RS
+jp2 - JPEG2000
+.RE
+.RS
+jbig2 - Joint Bi-Level Image Experts Group
+.RE
+.RS
+ccitt - CCITT Group 3 or Group 4 Fax
+.RE
+.TP
+.B interp
+"yes" if the interpolation is to be performed when scaling up the image
+.TP
+.B object ID
+the font dictionary object ID (number and generation)
+.RE
+.TP
+.BI \-opw " password"
+Specify the owner password for the PDF file. Providing this will
+bypass all security restrictions.
+.TP
+.BI \-upw " password"
+Specify the user password for the PDF file.
+.TP
+.B \-p
+Include page numbers in output file names.
+.TP
+.B \-q
+Don't print any messages or errors.
+.TP
+.B \-v
+Print copyright and version information.
+.TP
+.B \-h
+Print usage information.
+.RB ( \-help
+and
+.B \-\-help
+are equivalent.)
+.SH EXIT CODES
+The Xpdf tools use the following exit codes:
+.TP
+0
+No error.
+.TP
+1
+Error opening a PDF file.
+.TP
+2
+Error opening an output file.
+.TP
+3
+Error related to PDF permissions.
+.TP
+99
+Other error.
+.SH AUTHOR
+The pdfimages software and documentation are copyright 1998-2011 Glyph
+& Cog, LLC.
+.SH "SEE ALSO"
+.BR pdfdetach (1)
+.BR pdffonts (1),
+.BR pdfinfo (1),
+.BR pdftocairo (1),
+.BR pdftohtml (1),
+.BR pdftoppm (1),
+.BR pdftops (1),
+.BR pdftotext (1)
diff --git a/utils/pdfimages.cc b/utils/pdfimages.cc
new file mode 100644
index 00000000..82c301c7
--- /dev/null
+++ b/utils/pdfimages.cc
@@ -0,0 +1,191 @@
+//========================================================================
+//
+// pdfimages.cc
+//
+// Copyright 1998-2003 Glyph & Cog, LLC
+//
+// Modified for Debian by Hamish Moffatt, 22 May 2002.
+//
+//========================================================================
+
+//========================================================================
+//
+// Modified under the Poppler project - http://poppler.freedesktop.org
+//
+// All changes made under the Poppler project to this file are licensed
+// under GPL version 2 or later
+//
+// Copyright (C) 2007-2008, 2010 Albert Astals Cid <aacid@kde.org>
+// Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
+// Copyright (C) 2010 Jakob Voss <jakob.voss@gbv.de>
+// Copyright (C) 2012 Adrian Johnson <ajohnson@redneon.com>
+//
+// To see a description of the changes please see the Changelog file that
+// came with your tarball or type make ChangeLog if you are building from git
+//
+//========================================================================
+
+#include "config.h"
+#include <poppler-config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include "parseargs.h"
+#include "goo/GooString.h"
+#include "goo/gmem.h"
+#include "GlobalParams.h"
+#include "Object.h"
+#include "Stream.h"
+#include "Array.h"
+#include "Dict.h"
+#include "XRef.h"
+#include "Catalog.h"
+#include "Page.h"
+#include "PDFDoc.h"
+#include "PDFDocFactory.h"
+#include "ImageOutputDev.h"
+#include "Error.h"
+
+static int firstPage = 1;
+static int lastPage = 0;
+static GBool listImages = gFalse;
+static GBool dumpJPEG = gFalse;
+static GBool pageNames = gFalse;
+static char ownerPassword[33] = "\001";
+static char userPassword[33] = "\001";
+static GBool quiet = gFalse;
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+
+static const ArgDesc argDesc[] = {
+ {"-f", argInt, &firstPage, 0,
+ "first page to convert"},
+ {"-l", argInt, &lastPage, 0,
+ "last page to convert"},
+ {"-j", argFlag, &dumpJPEG, 0,
+ "write JPEG images as JPEG files"},
+ {"-list", argFlag, &listImages, 0,
+ "print list of images instead of saving"},
+ {"-opw", argString, ownerPassword, sizeof(ownerPassword),
+ "owner password (for encrypted files)"},
+ {"-upw", argString, userPassword, sizeof(userPassword),
+ "user password (for encrypted files)"},
+ {"-p", argFlag, &pageNames, 0,
+ "include page numbers in output file names"},
+ {"-q", argFlag, &quiet, 0,
+ "don't print any messages or errors"},
+ {"-v", argFlag, &printVersion, 0,
+ "print copyright and version info"},
+ {"-h", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"--help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-?", argFlag, &printHelp, 0,
+ "print usage information"},
+ {NULL}
+};
+
+int main(int argc, char *argv[]) {
+ PDFDoc *doc;
+ GooString *fileName;
+ char *imgRoot = NULL;
+ GooString *ownerPW, *userPW;
+ ImageOutputDev *imgOut;
+ GBool ok;
+ int exitCode;
+
+ exitCode = 99;
+
+ // parse args
+ ok = parseArgs(argDesc, &argc, argv);
+ if (!ok || (listImages && argc != 2) || (!listImages && argc != 3) || printVersion || printHelp) {
+ fprintf(stderr, "pdfimages version %s\n", PACKAGE_VERSION);
+ fprintf(stderr, "%s\n", popplerCopyright);
+ fprintf(stderr, "%s\n", xpdfCopyright);
+ if (!printVersion) {
+ printUsage("pdfimages", "<PDF-file> <image-root>", argDesc);
+ }
+ if (printVersion || printHelp)
+ exitCode = 0;
+ goto err0;
+ }
+ fileName = new GooString(argv[1]);
+ if (!listImages)
+ imgRoot = argv[2];
+
+ // read config file
+ globalParams = new GlobalParams();
+ if (quiet) {
+ globalParams->setErrQuiet(quiet);
+ }
+
+ // open PDF file
+ if (ownerPassword[0] != '\001') {
+ ownerPW = new GooString(ownerPassword);
+ } else {
+ ownerPW = NULL;
+ }
+ if (userPassword[0] != '\001') {
+ userPW = new GooString(userPassword);
+ } else {
+ userPW = NULL;
+ }
+ if (fileName->cmp("-") == 0) {
+ delete fileName;
+ fileName = new GooString("fd://0");
+ }
+
+ doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW);
+ delete fileName;
+
+ if (userPW) {
+ delete userPW;
+ }
+ if (ownerPW) {
+ delete ownerPW;
+ }
+ if (!doc->isOk()) {
+ exitCode = 1;
+ goto err1;
+ }
+
+ // check for copy permission
+#ifdef ENFORCE_PERMISSIONS
+ if (!doc->okToCopy()) {
+ error(errNotAllowed, -1, "Copying of images from this document is not allowed.");
+ exitCode = 3;
+ goto err1;
+ }
+#endif
+
+ // get page range
+ if (firstPage < 1)
+ firstPage = 1;
+ if (lastPage < 1 || lastPage > doc->getNumPages())
+ lastPage = doc->getNumPages();
+
+ // write image files
+ imgOut = new ImageOutputDev(imgRoot, pageNames, dumpJPEG, listImages);
+ if (imgOut->isOk()) {
+ doc->displayPages(imgOut, firstPage, lastPage, 72, 72, 0,
+ gTrue, gFalse, gFalse);
+ }
+ delete imgOut;
+
+ exitCode = 0;
+
+ // clean up
+ err1:
+ delete doc;
+ delete globalParams;
+ err0:
+
+ // check for memory leaks
+ Object::memCheck(stderr);
+ gMemReport(stderr);
+
+ return exitCode;
+}
diff --git a/utils/pdfinfo.1 b/utils/pdfinfo.1
new file mode 100644
index 00000000..a3ad1c36
--- /dev/null
+++ b/utils/pdfinfo.1
@@ -0,0 +1,146 @@
+.\" Copyright 1999-2011 Glyph & Cog, LLC
+.TH pdfinfo 1 "15 August 2011"
+.SH NAME
+pdfinfo \- Portable Document Format (PDF) document information
+extractor (version 3.03)
+.SH SYNOPSIS
+.B pdfinfo
+[options]
+.RI [ PDF-file ]
+.SH DESCRIPTION
+.B Pdfinfo
+prints the contents of the \'Info' dictionary (plus some other useful
+information) from a Portable Document Format (PDF) file.
+.PP
+The \'Info' dictionary contains the following values:
+.PP
+.RS
+title
+.RE
+.RS
+subject
+.RE
+.RS
+keywords
+.RE
+.RS
+author
+.RE
+.RS
+creator
+.RE
+.RS
+producer
+.RE
+.RS
+creation date
+.RE
+.RS
+modification date
+.RE
+.PP
+In addition, the following information is printed:
+.PP
+.RS
+tagged (yes/no)
+.RE
+.RS
+form (AcroForm / XFA / none)
+.RE
+.RS
+page count
+.RE
+.RS
+encrypted flag (yes/no)
+.RE
+.RS
+print and copy permissions (if encrypted)
+.RE
+.RS
+page size
+.RE
+.RS
+file size
+.RE
+.RS
+linearized (yes/no)
+.RE
+.RS
+PDF version
+.RE
+.RS
+metadata (only if requested)
+.RE
+.SH OPTIONS
+.TP
+.BI \-f " number"
+Specifies the first page to examine. If multiple pages are requested
+using the "\-f" and "\-l" options, the size of each requested page (and,
+optionally, the bounding boxes for each requested page) are printed.
+Otherwise, only page one is examined.
+.TP
+.BI \-l " number"
+Specifies the last page to examine.
+.TP
+.B \-box
+Prints the page box bounding boxes: MediaBox, CropBox, BleedBox,
+TrimBox, and ArtBox.
+.TP
+.B \-meta
+Prints document-level metadata. (This is the "Metadata" stream from
+the PDF file's Catalog object.)
+.TP
+.B \-rawdates
+Prints the raw (undecoded) date strings, directly from the PDF file.
+.TP
+.BI \-enc " encoding-name"
+Sets the encoding to use for text output. This defaults to "UTF-8".
+.TP
+.B \-listenc
+Lits the available encodings
+.TP
+.BI \-opw " password"
+Specify the owner password for the PDF file. Providing this will
+bypass all security restrictions.
+.TP
+.BI \-upw " password"
+Specify the user password for the PDF file.
+.TP
+.B \-v
+Print copyright and version information.
+.TP
+.B \-h
+Print usage information.
+.RB ( \-help
+and
+.B \-\-help
+are equivalent.)
+.SH EXIT CODES
+The Xpdf tools use the following exit codes:
+.TP
+0
+No error.
+.TP
+1
+Error opening a PDF file.
+.TP
+2
+Error opening an output file.
+.TP
+3
+Error related to PDF permissions.
+.TP
+99
+Other error.
+.SH AUTHOR
+The pdfinfo software and documentation are copyright 1996-2011 Glyph &
+Cog, LLC.
+.SH "SEE ALSO"
+.BR pdfdetach (1),
+.BR pdffonts (1),
+.BR pdfimages (1),
+.BR pdftocairo (1),
+.BR pdftohtml (1),
+.BR pdftoppm (1),
+.BR pdftops (1),
+.BR pdftotext (1)
diff --git a/utils/pdfinfo.cc b/utils/pdfinfo.cc
new file mode 100644
index 00000000..cdc5375d
--- /dev/null
+++ b/utils/pdfinfo.cc
@@ -0,0 +1,464 @@
+//========================================================================
+//
+// pdfinfo.cc
+//
+// Copyright 1998-2003 Glyph & Cog, LLC
+//
+//========================================================================
+
+//========================================================================
+//
+// Modified under the Poppler project - http://poppler.freedesktop.org
+//
+// All changes made under the Poppler project to this file are licensed
+// under GPL version 2 or later
+//
+// Copyright (C) 2006 Dom Lachowicz <cinamod@hotmail.com>
+// Copyright (C) 2007-2010, 2012 Albert Astals Cid <aacid@kde.org>
+// Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
+// Copyright (C) 2011 Vittal Aithal <vittal.aithal@cognidox.com>
+// Copyright (C) 2012 Adrian Johnson <ajohnson@redneon.com>
+//
+// To see a description of the changes please see the Changelog file that
+// came with your tarball or type make ChangeLog if you are building from git
+//
+//========================================================================
+
+#include "config.h"
+#include <poppler-config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <time.h>
+#include <math.h>
+#include "parseargs.h"
+#include "printencodings.h"
+#include "goo/GooString.h"
+#include "goo/gmem.h"
+#include "GlobalParams.h"
+#include "Object.h"
+#include "Stream.h"
+#include "Array.h"
+#include "Dict.h"
+#include "XRef.h"
+#include "Catalog.h"
+#include "Page.h"
+#include "PDFDoc.h"
+#include "PDFDocFactory.h"
+#include "CharTypes.h"
+#include "UnicodeMap.h"
+#include "PDFDocEncoding.h"
+#include "Error.h"
+#include "DateInfo.h"
+
+static void printInfoString(Dict *infoDict, const char *key, const char *text,
+ UnicodeMap *uMap);
+static void printInfoDate(Dict *infoDict, const char *key, const char *text);
+static void printBox(const char *text, PDFRectangle *box);
+
+static int firstPage = 1;
+static int lastPage = 0;
+static GBool printBoxes = gFalse;
+static GBool printMetadata = gFalse;
+static GBool rawDates = gFalse;
+static char textEncName[128] = "";
+static char ownerPassword[33] = "\001";
+static char userPassword[33] = "\001";
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+static GBool printEnc = gFalse;
+
+static const ArgDesc argDesc[] = {
+ {"-f", argInt, &firstPage, 0,
+ "first page to convert"},
+ {"-l", argInt, &lastPage, 0,
+ "last page to convert"},
+ {"-box", argFlag, &printBoxes, 0,
+ "print the page bounding boxes"},
+ {"-meta", argFlag, &printMetadata, 0,
+ "print the document metadata (XML)"},
+ {"-rawdates", argFlag, &rawDates, 0,
+ "print the undecoded date strings directly from the PDF file"},
+ {"-enc", argString, textEncName, sizeof(textEncName),
+ "output text encoding name"},
+ {"-listenc",argFlag, &printEnc, 0,
+ "list available encodings"},
+ {"-opw", argString, ownerPassword, sizeof(ownerPassword),
+ "owner password (for encrypted files)"},
+ {"-upw", argString, userPassword, sizeof(userPassword),
+ "user password (for encrypted files)"},
+ {"-v", argFlag, &printVersion, 0,
+ "print copyright and version info"},
+ {"-h", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"--help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-?", argFlag, &printHelp, 0,
+ "print usage information"},
+ {NULL}
+};
+
+int main(int argc, char *argv[]) {
+ PDFDoc *doc;
+ GooString *fileName;
+ GooString *ownerPW, *userPW;
+ UnicodeMap *uMap;
+ Page *page;
+ Object info, xfa;
+ Object *acroForm;
+ char buf[256];
+ double w, h, wISO, hISO;
+ FILE *f;
+ GooString *metadata;
+ GBool ok;
+ int exitCode;
+ int pg, i;
+ GBool multiPage;
+ int r;
+
+ exitCode = 99;
+
+ // parse args
+ ok = parseArgs(argDesc, &argc, argv);
+ if (!ok || (argc != 2 && !printEnc) || printVersion || printHelp) {
+ fprintf(stderr, "pdfinfo version %s\n", PACKAGE_VERSION);
+ fprintf(stderr, "%s\n", popplerCopyright);
+ fprintf(stderr, "%s\n", xpdfCopyright);
+ if (!printVersion) {
+ printUsage("pdfinfo", "<PDF-file>", argDesc);
+ }
+ if (printVersion || printHelp)
+ exitCode = 0;
+ goto err0;
+ }
+
+ // read config file
+ globalParams = new GlobalParams();
+
+ if (printEnc) {
+ printEncodings();
+ delete globalParams;
+ exitCode = 0;
+ goto err0;
+ }
+
+ fileName = new GooString(argv[1]);
+
+ if (textEncName[0]) {
+ globalParams->setTextEncoding(textEncName);
+ }
+
+ // get mapping to output encoding
+ if (!(uMap = globalParams->getTextEncoding())) {
+ error(errCommandLine, -1, "Couldn't get text encoding");
+ delete fileName;
+ goto err1;
+ }
+
+ // open PDF file
+ if (ownerPassword[0] != '\001') {
+ ownerPW = new GooString(ownerPassword);
+ } else {
+ ownerPW = NULL;
+ }
+ if (userPassword[0] != '\001') {
+ userPW = new GooString(userPassword);
+ } else {
+ userPW = NULL;
+ }
+
+ if (fileName->cmp("-") == 0) {
+ delete fileName;
+ fileName = new GooString("fd://0");
+ }
+
+ doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW);
+
+ if (userPW) {
+ delete userPW;
+ }
+ if (ownerPW) {
+ delete ownerPW;
+ }
+ if (!doc->isOk()) {
+ exitCode = 1;
+ goto err2;
+ }
+
+ // get page range
+ if (firstPage < 1) {
+ firstPage = 1;
+ }
+ if (lastPage == 0) {
+ multiPage = gFalse;
+ lastPage = 1;
+ } else {
+ multiPage = gTrue;
+ }
+ if (lastPage < 1 || lastPage > doc->getNumPages()) {
+ lastPage = doc->getNumPages();
+ }
+
+ // print doc info
+ doc->getDocInfo(&info);
+ if (info.isDict()) {
+ printInfoString(info.getDict(), "Title", "Title: ", uMap);
+ printInfoString(info.getDict(), "Subject", "Subject: ", uMap);
+ printInfoString(info.getDict(), "Keywords", "Keywords: ", uMap);
+ printInfoString(info.getDict(), "Author", "Author: ", uMap);
+ printInfoString(info.getDict(), "Creator", "Creator: ", uMap);
+ printInfoString(info.getDict(), "Producer", "Producer: ", uMap);
+ if (rawDates) {
+ printInfoString(info.getDict(), "CreationDate", "CreationDate: ",
+ uMap);
+ printInfoString(info.getDict(), "ModDate", "ModDate: ",
+ uMap);
+ } else {
+ printInfoDate(info.getDict(), "CreationDate", "CreationDate: ");
+ printInfoDate(info.getDict(), "ModDate", "ModDate: ");
+ }
+ }
+ info.free();
+
+ // print tagging info
+ printf("Tagged: %s\n",
+ doc->getStructTreeRoot()->isDict() ? "yes" : "no");
+
+ // print form info
+ if ((acroForm = doc->getCatalog()->getAcroForm())->isDict()) {
+ acroForm->dictLookup("XFA", &xfa);
+ if (xfa.isStream() || xfa.isArray()) {
+ printf("Form: XFA\n");
+ } else {
+ printf("Form: AcroForm\n");
+ }
+ xfa.free();
+ } else {
+ printf("Form: none\n");
+ }
+
+ // print page count
+ printf("Pages: %d\n", doc->getNumPages());
+
+ // print encryption info
+ printf("Encrypted: ");
+ if (doc->isEncrypted()) {
+ printf("yes (print:%s copy:%s change:%s addNotes:%s)\n",
+ doc->okToPrint(gTrue) ? "yes" : "no",
+ doc->okToCopy(gTrue) ? "yes" : "no",
+ doc->okToChange(gTrue) ? "yes" : "no",
+ doc->okToAddNotes(gTrue) ? "yes" : "no");
+ } else {
+ printf("no\n");
+ }
+
+ // print page size
+ for (pg = firstPage; pg <= lastPage; ++pg) {
+ w = doc->getPageCropWidth(pg);
+ h = doc->getPageCropHeight(pg);
+ if (multiPage) {
+ printf("Page %4d size: %g x %g pts", pg, w, h);
+ } else {
+ printf("Page size: %g x %g pts", w, h);
+ }
+ if ((fabs(w - 612) < 0.1 && fabs(h - 792) < 0.1) ||
+ (fabs(w - 792) < 0.1 && fabs(h - 612) < 0.1)) {
+ printf(" (letter)");
+ } else {
+ hISO = sqrt(sqrt(2.0)) * 7200 / 2.54;
+ wISO = hISO / sqrt(2.0);
+ for (i = 0; i <= 6; ++i) {
+ if ((fabs(w - wISO) < 1 && fabs(h - hISO) < 1) ||
+ (fabs(w - hISO) < 1 && fabs(h - wISO) < 1)) {
+ printf(" (A%d)", i);
+ break;
+ }
+ hISO = wISO;
+ wISO /= sqrt(2.0);
+ }
+ }
+ printf("\n");
+ r = doc->getPageRotate(pg);
+ if (multiPage) {
+ printf("Page %4d rot: %d\n", pg, r);
+ } else {
+ printf("Page rot: %d\n", r);
+ }
+ }
+
+ // print the boxes
+ if (printBoxes) {
+ if (multiPage) {
+ for (pg = firstPage; pg <= lastPage; ++pg) {
+ page = doc->getPage(pg);
+ if (!page) {
+ error(errSyntaxError, -1, "Failed to print boxes for page {0:d}", pg);
+ continue;
+ }
+ sprintf(buf, "Page %4d MediaBox: ", pg);
+ printBox(buf, page->getMediaBox());
+ sprintf(buf, "Page %4d CropBox: ", pg);
+ printBox(buf, page->getCropBox());
+ sprintf(buf, "Page %4d BleedBox: ", pg);
+ printBox(buf, page->getBleedBox());
+ sprintf(buf, "Page %4d TrimBox: ", pg);
+ printBox(buf, page->getTrimBox());
+ sprintf(buf, "Page %4d ArtBox: ", pg);
+ printBox(buf, page->getArtBox());
+ }
+ } else {
+ page = doc->getPage(firstPage);
+ if (!page) {
+ error(errSyntaxError, -1, "Failed to print boxes for page {0:d}", firstPage);
+ } else {
+ printBox("MediaBox: ", page->getMediaBox());
+ printBox("CropBox: ", page->getCropBox());
+ printBox("BleedBox: ", page->getBleedBox());
+ printBox("TrimBox: ", page->getTrimBox());
+ printBox("ArtBox: ", page->getArtBox());
+ }
+ }
+ }
+
+ // print file size
+#ifdef VMS
+ f = fopen(fileName->getCString(), "rb", "ctx=stm");
+#else
+ f = fopen(fileName->getCString(), "rb");
+#endif
+ if (f) {
+#if HAVE_FSEEKO
+ fseeko(f, 0, SEEK_END);
+ printf("File size: %u bytes\n", (Guint)ftello(f));
+#elif HAVE_FSEEK64
+ fseek64(f, 0, SEEK_END);
+ printf("File size: %u bytes\n", (Guint)ftell64(f));
+#else
+ fseek(f, 0, SEEK_END);
+ printf("File size: %d bytes\n", (int)ftell(f));
+#endif
+ fclose(f);
+ }
+
+ // print linearization info
+ printf("Optimized: %s\n", doc->isLinearized() ? "yes" : "no");
+
+ // print PDF version
+ printf("PDF version: %d.%d\n", doc->getPDFMajorVersion(), doc->getPDFMinorVersion());
+
+ // print the metadata
+ if (printMetadata && (metadata = doc->readMetadata())) {
+ fputs("Metadata:\n", stdout);
+ fputs(metadata->getCString(), stdout);
+ fputc('\n', stdout);
+ delete metadata;
+ }
+
+ exitCode = 0;
+
+ // clean up
+ err2:
+ uMap->decRefCnt();
+ delete doc;
+ delete fileName;
+ err1:
+ delete globalParams;
+ err0:
+
+ // check for memory leaks
+ Object::memCheck(stderr);
+ gMemReport(stderr);
+
+ return exitCode;
+}
+
+static void printInfoString(Dict *infoDict, const char *key, const char *text,
+ UnicodeMap *uMap) {
+ Object obj;
+ GooString *s1;
+ GBool isUnicode;
+ Unicode u, u2;
+ char buf[8];
+ int i, n;
+
+ if (infoDict->lookup(key, &obj)->isString()) {
+ fputs(text, stdout);
+ s1 = obj.getString();
+ if ((s1->getChar(0) & 0xff) == 0xfe &&
+ (s1->getChar(1) & 0xff) == 0xff) {
+ isUnicode = gTrue;
+ i = 2;
+ } else {
+ isUnicode = gFalse;
+ i = 0;
+ }
+ while (i < obj.getString()->getLength()) {
+ if (isUnicode) {
+ u = ((s1->getChar(i) & 0xff) << 8) |
+ (s1->getChar(i+1) & 0xff);
+ i += 2;
+ if (u >= 0xd800 && u <= 0xdbff && i < obj.getString()->getLength()) {
+ // surrogate pair
+ u2 = ((s1->getChar(i) & 0xff) << 8) |
+ (s1->getChar(i+1) & 0xff);
+ i += 2;
+ if (u2 >= 0xdc00 && u2 <= 0xdfff) {
+ u = 0x10000 + ((u - 0xd800) << 10) + (u2 - 0xdc00);
+ }
+ }
+ } else {
+ u = pdfDocEncoding[s1->getChar(i) & 0xff];
+ ++i;
+ }
+ n = uMap->mapUnicode(u, buf, sizeof(buf));
+ fwrite(buf, 1, n, stdout);
+ }
+ fputc('\n', stdout);
+ }
+ obj.free();
+}
+
+static void printInfoDate(Dict *infoDict, const char *key, const char *text) {
+ Object obj;
+ char *s;
+ int year, mon, day, hour, min, sec, tz_hour, tz_minute;
+ char tz;
+ struct tm tmStruct;
+ char buf[256];
+
+ if (infoDict->lookup(key, &obj)->isString()) {
+ fputs(text, stdout);
+ s = obj.getString()->getCString();
+ // TODO do something with the timezone info
+ if ( parseDateString( s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute ) ) {
+ tmStruct.tm_year = year - 1900;
+ tmStruct.tm_mon = mon - 1;
+ tmStruct.tm_mday = day;
+ tmStruct.tm_hour = hour;
+ tmStruct.tm_min = min;
+ tmStruct.tm_sec = sec;
+ tmStruct.tm_wday = -1;
+ tmStruct.tm_yday = -1;
+ tmStruct.tm_isdst = -1;
+ // compute the tm_wday and tm_yday fields
+ if (mktime(&tmStruct) != (time_t)-1 &&
+ strftime(buf, sizeof(buf), "%c", &tmStruct)) {
+ fputs(buf, stdout);
+ } else {
+ fputs(s, stdout);
+ }
+ } else {
+ fputs(s, stdout);
+ }
+ fputc('\n', stdout);
+ }
+ obj.free();
+}
+
+static void printBox(const char *text, PDFRectangle *box) {
+ printf("%s%8.2f %8.2f %8.2f %8.2f\n",
+ text, box->x1, box->y1, box->x2, box->y2);
+}
diff --git a/utils/pdfseparate.1 b/utils/pdfseparate.1
new file mode 100644
index 00000000..077faec8
--- /dev/null
+++ b/utils/pdfseparate.1
@@ -0,0 +1,49 @@
+.\" Copyright 2011 The Poppler Developers - http://poppler.freedesktop.org
+.TH pdfseparate 1 "15 September 2011"
+.SH NAME
+pdfseparate \- Portable Document Format (PDF) page extractor
+.SH SYNOPSIS
+.B pdfseparate
+[options]
+.I PDF-file PDF-page-pattern
+.SH DESCRIPTION
+.B pdfseparate
+extract single pages from a Portable Document Format (PDF).
+.PP
+pdfseparate reads the PDF file
+.IR PDF-file ,
+extracts one or more pages, and writes one PDF file for each page to
+.IR PDF-page-pattern ,
+PDF-page-pattern should contain
+.BR %d .
+%d is replaced by the page number.
+.TP
+The PDF-file should not be encrypted.
+.SH OPTIONS
+.TP
+.BI \-f " number"
+Specifies the first page to extract. If \-f is omitted, extraction starts with page 1.
+.TP
+.BI \-l " number"
+Specifies the last page to extract. if \-p is omitted, extraction ends with the last page.
+.TP
+.B \-v
+Print copyright and version information.
+.TP
+.B \-h
+Print usage information.
+.RB ( \-help
+and
+.B \-\-help
+are equivalent.)
+.SH EXAMPLE
+pdfseparate sample.pdf sample-%d.pdf
+.TP
+extracts all pages from sample.pdf, if i.e. sample.pdf has 3 pages, it produces
+.TP
+sample-1.pdf, sample-2.pdf, sample-3.pdf
+.SH AUTHOR
+The pdfseparate software and documentation are copyright 1996-2004 Glyph
+& Cog, LLC and copyright 2005-2011 The Poppler Developers - http://poppler.freedesktop.org
+.SH "SEE ALSO"
+.BR pdfunite (1),
diff --git a/utils/pdfseparate.cc b/utils/pdfseparate.cc
new file mode 100644
index 00000000..25fac5a9
--- /dev/null
+++ b/utils/pdfseparate.cc
@@ -0,0 +1,122 @@
+//========================================================================
+//
+// pdfseparate.cc
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright (C) 2011, 2012 Thomas Freitag <Thomas.Freitag@alfa.de>
+// Copyright (C) 2012 Albert Astals Cid <aacid@kde.org>
+//
+//========================================================================
+#include "config.h"
+#include <poppler-config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include "parseargs.h"
+#include "goo/GooString.h"
+#include "PDFDoc.h"
+#include "ErrorCodes.h"
+#include "GlobalParams.h"
+
+static int firstPage = 0;
+static int lastPage = 0;
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+
+static const ArgDesc argDesc[] = {
+ {"-f", argInt, &firstPage, 0,
+ "first page to extract"},
+ {"-l", argInt, &lastPage, 0,
+ "last page to extract"},
+ {"-v", argFlag, &printVersion, 0,
+ "print copyright and version info"},
+ {"-h", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"--help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-?", argFlag, &printHelp, 0,
+ "print usage information"},
+ {NULL}
+};
+
+bool extractPages (const char *srcFileName, const char *destFileName) {
+ char pathName[1024];
+ GooString *gfileName = new GooString (srcFileName);
+ PDFDoc *doc = new PDFDoc (gfileName, NULL, NULL, NULL);
+
+ if (!doc->isOk()) {
+ error(errSyntaxError, -1, "Could not extract page(s) from damaged file ('{0:s}')", srcFileName);
+ return false;
+ }
+ if (doc->isEncrypted()) {
+ error(errSyntaxError, -1, "Could not extract page(s) from encrypted file ('{0:s}')", srcFileName);
+ return false;
+ }
+
+ if (firstPage == 0 && lastPage == 0) {
+ firstPage = 1;
+ lastPage = doc->getNumPages();
+ }
+ if (lastPage == 0)
+ lastPage = doc->getNumPages();
+ if (firstPage == 0)
+ firstPage = 1;
+ if (firstPage != lastPage && strstr(destFileName, "%d") == NULL) {
+ error(errSyntaxError, -1, "'{0:s}' must contain '%%d' if more than one page should be extracted", destFileName);
+ return false;
+ }
+ for (int pageNo = firstPage; pageNo <= lastPage; pageNo++) {
+ sprintf (pathName, destFileName, pageNo);
+ GooString *gpageName = new GooString (pathName);
+ int errCode = doc->savePageAs(gpageName, pageNo);
+ if ( errCode != errNone) {
+ delete gpageName;
+ delete gfileName;
+ return false;
+ }
+ delete gpageName;
+ }
+ delete gfileName;
+ return true;
+}
+
+int
+main (int argc, char *argv[])
+{
+ Object info;
+ GBool ok;
+ int exitCode;
+
+ exitCode = 99;
+
+ // parse args
+ ok = parseArgs (argDesc, &argc, argv);
+ if (!ok || argc != 3 || printVersion || printHelp)
+ {
+ fprintf (stderr, "pdfseparate version %s\n", PACKAGE_VERSION);
+ fprintf (stderr, "%s\n", popplerCopyright);
+ fprintf (stderr, "%s\n", xpdfCopyright);
+ if (!printVersion)
+ {
+ printUsage ("pdfseparate", "<PDF-sourcefile> <PDF-pattern-destfile>",
+ argDesc);
+ }
+ if (printVersion || printHelp)
+ exitCode = 0;
+ goto err0;
+ }
+ globalParams = new GlobalParams();
+ ok = extractPages (argv[1], argv[2]);
+ if (ok) {
+ exitCode = 0;
+ }
+ delete globalParams;
+
+err0:
+
+ return exitCode;
+}
diff --git a/utils/pdftocairo.1 b/utils/pdftocairo.1
new file mode 100644
index 00000000..19b62efb
--- /dev/null
+++ b/utils/pdftocairo.1
@@ -0,0 +1,261 @@
+.TH pdftoppm 1
+.SH NAME
+pdftocairo \- Portable Document Format (PDF) to PNG/JPEG/PDF/PS/EPS/SVG using cairo
+.SH SYNOPSIS
+.B pdftocairo
+[options]
+.IR PDF-file
+.RI [ output-file ]
+.SH DESCRIPTION
+.B pdftocairo
+converts Portable Document Format (PDF) files, using the cairo output device of the poppler PDF library, to any of the following output formats:
+.IP \(bu
+Portable Network Graphics (PNG)
+.IP \(bu
+JPEG Interchange Format (JPEG)
+.IP \(bu
+Portable Document Format (PDF)
+.IP \(bu
+PostScript (PS)
+.IP \(bu
+Encapsulated PostScript (EPS)
+.IP \(bu
+Scalable Vector Graphics (SVG)
+.PP
+.B pdftocairo
+reads the PDF file,
+.IR PDF-file ,
+and writes to
+.IR output-file .
+The image formats (PNG and JPEG) generate one file per page with the page number and file type appended to
+.IR output-file
+(except when \-singlefile is used).
+When the output format is a vector format (PDF, PS, EPS, and SVG) or when \-singlefile is used,
+.IR output-file
+is the full filename.
+
+If the
+.IR PDF-file
+is \*(lq\-\*(rq , the PDF is read from stdin.
+If the
+.IR output-file
+is \*(lq\-\*(rq , the output file will be written to stdout. Using stdout is not valid with image formats unless \-singlefile is used.
+If
+.IR output-file
+is not used, the output filename will be derived from the
+.IR PDF-file
+filename.
+.PP
+Not all options are valid with all output formats. One (and only one) of the output format options (\-png, \-jpeg, \-pdf, \-ps, \-eps, or \-svg) must be used.
+.PP
+The resolution options (\-r, \-rx, \-ry) set the resolution of the
+image output formats. The image dimensions will depend on the PDF page
+size and the resolution. For the vector outputs, regions of the page
+that can not be represented natively in the output format (eg
+translucency in PS) will be rasterized at the resolution specified by
+the resolution options.
+.PP
+The \-scale-to options may be used to set a fixed image size. The
+image resolution will vary with the page size.
+.PP
+The cropping options (\-x, \-y, \-W, and \-H) use units of pixels with
+the image formats and PostScript points (1/72 inch) with the vector
+formats. When cropping is used with vector output the cropped region is
+centered unless \-nocenter is used in which case the cropped region is
+at the top left (SVG) or bottom left (PDF, PS, EPS).
+.PP
+.SH OPTIONS
+.TP
+.BI \-png
+Generates a PNG file(s)
+.TP
+.BI \-jpeg
+Generates a JPEG file(s)
+.TP
+.BI \-pdf
+Generates a PDF file
+.TP
+.BI \-ps
+Generate a PS file
+.TP
+.BI \-eps
+Generate an EPS file. An EPS file contains a single image, so if you
+use this option with a multi-page PDF file, you must use \-f and \-l
+to specify a single page. The page size options (\-origpagesizes,
+\-paper, \-paperw, \-paperh) can not be used with this option.
+.TP
+.BI \-svg
+Generate a SVG (Scalable Vector Graphics) file
+.TP
+.BI \-f " number"
+Specifies the first page to convert.
+.TP
+.BI \-l " number"
+Specifies the last page to convert.
+.TP
+.B \-o
+Generates only the odd numbered pages.
+.TP
+.B \-e
+Generates only the even numbered pages.
+.TP
+.BI \-singlefile
+Writes only the first page and does not add digits.
+.TP
+.BI \-r " number"
+Specifies the X and Y resolution, in pixels per inch of image files (or rasterized regions in vector output). The default is 150 PPI.
+.TP
+.BI \-rx " number"
+Specifies the X resolution, in pixels per inch of image files (or rasterized regions in vector output). The default is 150 PPI.
+.TP
+.BI \-ry " number"
+Specifies the Y resolution, in pixels per inch of image files (or rasterized regions in vector output). The default is 150 PPI.
+.TP
+.BI \-scale-to " number"
+Scales the long side of each page (width for landscape pages, height
+for portrait pages) to fit in scale-to pixels. The size of the short
+side will be determined by the aspect ratio of the page (PNG/JPEG only).
+.TP
+.BI \-scale-to-x " number"
+Scales each page horizontally to fit in scale-to-x pixels. If
+scale-to-y is set to -1, the vertical size will determined by the
+aspect ratio of the page (PNG/JPEG only).
+.TP
+.BI \-scale-to-y " number"
+Scales each page vertically to fit in scale-to-y pixels. If scale-to-x
+is set to -1, the horizontal size will determined by the aspect ratio
+of the page (PNG/JPEG only).
+.TP
+.BI \-x " number"
+Specifies the x-coordinate of the crop area top left corner in pixels (image output) or points (vector output)
+.TP
+.BI \-y " number"
+Specifies the y-coordinate of the crop area top left corner in pixels (image output) or points (vector output)
+.TP
+.BI \-W " number"
+Specifies the width of crop area in pixels (image output) or points (vector output) (default is 0)
+.TP
+.BI \-H " number"
+Specifies the height of crop area in pixels (image output) or points (vector output) (default is 0)
+.TP
+.BI \-sz " number"
+Specifies the size of crop square in pixels (image output) or points (vector output) (sets \-W and \-H)
+.TP
+.B \-cropbox
+Uses the crop box rather than media box when generating the files
+.TP
+.B \-mono
+Generate a monochrome file (PNG only).
+.TP
+.B \-gray
+Generate a grayscale file (PNG and JPEG only).
+.TP
+.B \-transp
+Use a transparent page color instead of white (PNG only).
+.TP
+.BI \-icc " icc-file"
+Use the specified ICC file as the output profile (PNG only). The profile will be embedded in the PNG file.
+.TP
+.B \-level2
+Generate Level 2 PostScript (PS only).
+.TP
+.B \-level3
+Generate Level 3 PostScript (PS only). This enables all Level 2 features plus
+shading patterns and masked images. This is the default setting.
+.TP
+.B \-origpagesizes
+Generate a file with variable page sizes and orientations (PS and PDF
+only). The size of each page will be the original page in the PDF
+file. If the output is PS the file will contain %%DocumentMedia and
+%%PageMedia DSC comments specifying the size of each page. Any
+specification of the page size via \-paper, \-paperw, or \-paperh will
+get overridden as long as each page of the PDF file has a defined
+paper size.
+.TP
+.BI \-paper " size"
+Set the paper size to one of "letter", "legal", "A4", or "A3"
+(PS,PDF,SVG only). This can also be set to "match", which will set
+the paper size to match the size specified in the PDF
+file. \-origpagesizes overrides this setting if the PDF file has
+defined page sizes.
+.TP
+.BI \-paperw " size"
+Set the paper width, in points (PS,PDF,SVG only). \-origpagesizes overrides this setting
+if the PDF file has defined page sizes.
+.TP
+.BI \-paperh " size"
+Set the paper height, in points (PS,PDF,SVG only). \-origpagesizes overrides this setting
+if the PDF file has defined page sizes.
+.TP
+.B \-nocrop
+By default, output is cropped to the CropBox specified in the PDF
+file. This option disables cropping.
+.TP
+.B \-expand
+Expand PDF pages smaller than the paper to fill the paper (PS,PDF,SVG only). By
+default, these pages are not scaled.
+.TP
+.B \-noshrink
+Don't scale PDF pages which are larger than the paper (PS,PDF,SVG only). By default,
+pages larger than the paper are shrunk to fit.
+.TP
+.B \-nocenter
+By default, PDF pages smaller than the paper (after any scaling) are
+centered on the paper. This option causes them to be aligned to the
+lower-left corner of the paper instead (PS,PDF,SVG only).
+.TP
+.B \-duplex
+Adds the %%IncludeFeature: *Duplex DuplexNoTumble DSC comment to the
+PostScript file (PS only). This tells the print manager to enable duplexing.
+.TP
+.BI \-opw " password"
+Specify the owner password for the PDF file. Providing this will
+bypass all security restrictions.
+.TP
+.BI \-upw " password"
+Specify the user password for the PDF file.
+.TP
+.B \-q
+Don't print any messages or errors.
+.TP
+.B \-v
+Print copyright and version information.
+.TP
+.B \-h
+Print usage information.
+.RB ( \-help
+and
+.B \-\-help
+are equivalent.)
+.SH EXIT CODES
+The poppler tools use the following exit codes:
+.TP
+0
+No error.
+.TP
+1
+Error opening a PDF file.
+.TP
+2
+Error opening an output file.
+.TP
+3
+Error related to PDF permissions.
+.TP
+4
+Error related to ICC profile.
+.TP
+99
+Other error.
+.SH AUTHOR
+The pdftocairo software and documentation are copyright 1996-2004 Glyph
+& Cog, LLC and copyright 2005-2011 The Poppler Developers.
+.SH "SEE ALSO"
+.BR pdfdetach (1),
+.BR pdffonts (1),
+.BR pdfimages (1),
+.BR pdfinfo (1),
+.BR pdftohtml (1),
+.BR pdftoppm (1),
+.BR pdftops (1),
+.BR pdftotext (1)
diff --git a/utils/pdftocairo.cc b/utils/pdftocairo.cc
new file mode 100644
index 00000000..8d13e316
--- /dev/null
+++ b/utils/pdftocairo.cc
@@ -0,0 +1,1015 @@
+//========================================================================
+//
+// pdftocairo.cc
+//
+// Copyright 2003 Glyph & Cog, LLC
+//
+//========================================================================
+
+//========================================================================
+//
+// Modified under the Poppler project - http://poppler.freedesktop.org
+//
+// All changes made under the Poppler project to this file are licensed
+// under GPL version 2 or later
+//
+// Copyright (C) 2007 Ilmari Heikkinen <ilmari.heikkinen@gmail.com>
+// Copyright (C) 2008 Richard Airlie <richard.airlie@maglabs.net>
+// Copyright (C) 2009 Michael K. Johnson <a1237@danlj.org>
+// Copyright (C) 2009 Shen Liang <shenzhuxi@gmail.com>
+// Copyright (C) 2009 Stefan Thomas <thomas@eload24.com>
+// Copyright (C) 2009, 2010 Albert Astals Cid <aacid@kde.org>
+// Copyright (C) 2010, 2011, 2012 Adrian Johnson <ajohnson@redneon.com>
+// Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
+// Copyright (C) 2010 Jonathan Liu <net147@gmail.com>
+// Copyright (C) 2010 William Bader <williambader@hotmail.com>
+// Copyright (C) 2011 Thomas Freitag <Thomas.Freitag@alfa.de>
+// Copyright (C) 2011 Carlos Garcia Campos <carlosgc@gnome.org>
+// Copyright (C) 2012 Koji Otani <sho@bbr.jp>
+//
+// To see a description of the changes please see the Changelog file that
+// came with your tarball or type make ChangeLog if you are building from git
+//
+//========================================================================
+
+#include "config.h"
+#include <poppler-config.h>
+#include <stdio.h>
+#include <math.h>
+#include <string.h>
+#include "parseargs.h"
+#include "goo/gmem.h"
+#include "goo/gtypes.h"
+#include "goo/gtypes_p.h"
+#include "goo/GooString.h"
+#include "goo/ImgWriter.h"
+#include "goo/JpegWriter.h"
+#include "goo/PNGWriter.h"
+#include "GlobalParams.h"
+#include "Object.h"
+#include "PDFDoc.h"
+#include "PDFDocFactory.h"
+#include "CairoOutputDev.h"
+#if USE_CMS
+#ifdef USE_LCMS1
+#include <lcms.h>
+#else
+#include <lcms2.h>
+#endif
+#endif
+#include <cairo.h>
+#if CAIRO_HAS_PS_SURFACE
+#include <cairo-ps.h>
+#endif
+#if CAIRO_HAS_PDF_SURFACE
+#include <cairo-pdf.h>
+#endif
+#if CAIRO_HAS_SVG_SURFACE
+#include <cairo-svg.h>
+#endif
+
+
+static GBool png = gFalse;
+static GBool jpeg = gFalse;
+static GBool ps = gFalse;
+static GBool eps = gFalse;
+static GBool pdf = gFalse;
+static GBool svg = gFalse;
+
+static int firstPage = 1;
+static int lastPage = 0;
+static GBool printOnlyOdd = gFalse;
+static GBool printOnlyEven = gFalse;
+static GBool singleFile = gFalse;
+static double resolution = 0.0;
+static double x_resolution = 150.0;
+static double y_resolution = 150.0;
+static int scaleTo = 0;
+static int x_scaleTo = 0;
+static int y_scaleTo = 0;
+static int crop_x = 0;
+static int crop_y = 0;
+static int crop_w = 0;
+static int crop_h = 0;
+static int sz = 0;
+static GBool useCropBox = gFalse;
+static GBool mono = gFalse;
+static GBool gray = gFalse;
+static GBool transp = gFalse;
+static GooString icc;
+
+static GBool level2 = gFalse;
+static GBool level3 = gFalse;
+static GBool doOrigPageSizes = gFalse;
+static char paperSize[15] = "";
+static int paperWidth = -1;
+static int paperHeight = -1;
+static GBool noCrop = gFalse;
+static GBool expand = gFalse;
+static GBool noShrink = gFalse;
+static GBool noCenter = gFalse;
+static GBool duplex = gFalse;
+
+static char ownerPassword[33] = "";
+static char userPassword[33] = "";
+static GBool quiet = gFalse;
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+
+static const ArgDesc argDesc[] = {
+#if ENABLE_LIBPNG
+ {"-png", argFlag, &png, 0,
+ "generate a PNG file"},
+#endif
+#if ENABLE_LIBJPEG
+ {"-jpeg", argFlag, &jpeg, 0,
+ "generate a JPEG file"},
+#endif
+#if CAIRO_HAS_PS_SURFACE
+ {"-ps", argFlag, &ps, 0,
+ "generate PostScript file"},
+ {"-eps", argFlag, &eps, 0,
+ "generate Encapsulated PostScript (EPS)"},
+#endif
+#if CAIRO_HAS_PDF_SURFACE
+ {"-pdf", argFlag, &pdf, 0,
+ "generate a PDF file"},
+#endif
+#if CAIRO_HAS_SVG_SURFACE
+ {"-svg", argFlag, &svg, 0,
+ "generate a Scalable Vector Graphics (SVG) file"},
+#endif
+
+ {"-f", argInt, &firstPage, 0,
+ "first page to print"},
+ {"-l", argInt, &lastPage, 0,
+ "last page to print"},
+ {"-o", argFlag, &printOnlyOdd, 0,
+ "print only odd pages"},
+ {"-e", argFlag, &printOnlyEven, 0,
+ "print only even pages"},
+ {"-singlefile", argFlag, &singleFile, 0,
+ "write only the first page and do not add digits"},
+
+ {"-r", argFP, &resolution, 0,
+ "resolution, in PPI (default is 150)"},
+ {"-rx", argFP, &x_resolution, 0,
+ "X resolution, in PPI (default is 150)"},
+ {"-ry", argFP, &y_resolution, 0,
+ "Y resolution, in PPI (default is 150)"},
+ {"-scale-to", argInt, &scaleTo, 0,
+ "scales each page to fit within scale-to*scale-to pixel box"},
+ {"-scale-to-x", argInt, &x_scaleTo, 0,
+ "scales each page horizontally to fit in scale-to-x pixels"},
+ {"-scale-to-y", argInt, &y_scaleTo, 0,
+ "scales each page vertically to fit in scale-to-y pixels"},
+
+ {"-x", argInt, &crop_x, 0,
+ "x-coordinate of the crop area top left corner"},
+ {"-y", argInt, &crop_y, 0,
+ "y-coordinate of the crop area top left corner"},
+ {"-W", argInt, &crop_w, 0,
+ "width of crop area in pixels (default is 0)"},
+ {"-H", argInt, &crop_h, 0,
+ "height of crop area in pixels (default is 0)"},
+ {"-sz", argInt, &sz, 0,
+ "size of crop square in pixels (sets W and H)"},
+ {"-cropbox",argFlag, &useCropBox, 0,
+ "use the crop box rather than media box"},
+
+ {"-mono", argFlag, &mono, 0,
+ "generate a monochrome image file (PNG, JPEG)"},
+ {"-gray", argFlag, &gray, 0,
+ "generate a grayscale image file (PNG, JPEG)"},
+ {"-transp", argFlag, &transp, 0,
+ "use a transparent background instead of white (PNG)"},
+#if USE_CMS
+ {"-icc", argGooString, &icc, 0,
+ "ICC color profile to use"},
+#endif
+
+ {"-level2", argFlag, &level2, 0,
+ "generate Level 2 PostScript (PS, EPS)"},
+ {"-level3", argFlag, &level3, 0,
+ "generate Level 3 PostScript (PS, EPS)"},
+ {"-origpagesizes",argFlag, &doOrigPageSizes,0,
+ "conserve original page sizes (PS, PDF, SVG)"},
+ {"-paper", argString, paperSize, sizeof(paperSize),
+ "paper size (letter, legal, A4, A3, match)"},
+ {"-paperw", argInt, &paperWidth, 0,
+ "paper width, in points"},
+ {"-paperh", argInt, &paperHeight, 0,
+ "paper height, in points"},
+ {"-nocrop", argFlag, &noCrop, 0,
+ "don't crop pages to CropBox"},
+ {"-expand", argFlag, &expand, 0,
+ "expand pages smaller than the paper size"},
+ {"-noshrink", argFlag, &noShrink, 0,
+ "don't shrink pages larger than the paper size"},
+ {"-nocenter", argFlag, &noCenter, 0,
+ "don't center pages smaller than the paper size"},
+ {"-duplex", argFlag, &duplex, 0,
+ "enable duplex printing"},
+
+ {"-opw", argString, ownerPassword, sizeof(ownerPassword),
+ "owner password (for encrypted files)"},
+ {"-upw", argString, userPassword, sizeof(userPassword),
+ "user password (for encrypted files)"},
+
+ {"-q", argFlag, &quiet, 0,
+ "don't print any messages or errors"},
+ {"-v", argFlag, &printVersion, 0,
+ "print copyright and version info"},
+ {"-h", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"--help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-?", argFlag, &printHelp, 0,
+ "print usage information"},
+ {NULL}
+};
+
+
+static cairo_surface_t *surface;
+static GBool printing;
+static FILE *output_file;
+
+#if USE_CMS
+static unsigned char *icc_data;
+static int icc_data_size;
+static cmsHPROFILE profile;
+#endif
+
+
+void writePageImage(GooString *filename)
+{
+ ImgWriter *writer = 0;
+ FILE *file;
+ int height, width, stride;
+ unsigned char *data;
+
+ if (png) {
+#if ENABLE_LIBPNG
+ if (transp)
+ writer = new PNGWriter(PNGWriter::RGBA);
+ else if (gray)
+ writer = new PNGWriter(PNGWriter::GRAY);
+ else if (mono)
+ writer = new PNGWriter(PNGWriter::MONOCHROME);
+ else
+ writer = new PNGWriter(PNGWriter::RGB);
+
+#if USE_CMS
+#ifdef USE_LCMS1
+ if (icc_data)
+ static_cast<PNGWriter*>(writer)->setICCProfile(cmsTakeProductName(profile), icc_data, icc_data_size);
+ else
+ static_cast<PNGWriter*>(writer)->setSRGBProfile();
+#else
+ if (icc_data) {
+ cmsUInt8Number profileID[17];
+ profileID[16] = '\0';
+
+ cmsGetHeaderProfileID(profile,profileID);
+ static_cast<PNGWriter*>(writer)->setICCProfile(reinterpret_cast<char *>(profileID), icc_data, icc_data_size);
+ } else {
+ static_cast<PNGWriter*>(writer)->setSRGBProfile();
+ }
+#endif
+#endif
+#endif
+
+ } else if (jpeg) {
+#if ENABLE_LIBJPEG
+ if (gray)
+ writer = new JpegWriter(JCS_GRAYSCALE);
+ else
+ writer = new JpegWriter(JCS_RGB);
+#endif
+ }
+ if (!writer)
+ return;
+
+ if (filename->cmp("fd://0") == 0)
+ file = stdout;
+ else
+ file = fopen(filename->getCString(), "wb");
+
+ if (!file) {
+ fprintf(stderr, "Error opening output file %s\n", filename->getCString());
+ exit(2);
+ }
+
+ height = cairo_image_surface_get_height(surface);
+ width = cairo_image_surface_get_width(surface);
+ stride = cairo_image_surface_get_stride(surface);
+ data = cairo_image_surface_get_data(surface);
+
+ if (!writer->init(file, width, height, x_resolution, y_resolution)) {
+ fprintf(stderr, "Error writing %s\n", filename->getCString());
+ exit(2);
+ }
+ unsigned char *row = (unsigned char *) gmallocn(width, 4);
+
+ for (int y = 0; y < height; y++ ) {
+ uint32_t *pixel = (uint32_t *) (data + y*stride);
+ unsigned char *rowp = row;
+ for (int x = 0; x < width; x++, pixel++) {
+ if (transp) {
+ // unpremultiply into RGBA format
+ uint8_t a;
+ a = (*pixel & 0xff000000) >> 24;
+ if (a == 0) {
+ *rowp++ = 0;
+ *rowp++ = 0;
+ *rowp++ = 0;
+ } else {
+ *rowp++ = (((*pixel & 0xff0000) >> 16) * 255 + a / 2) / a;
+ *rowp++ = (((*pixel & 0x00ff00) >> 8) * 255 + a / 2) / a;
+ *rowp++ = (((*pixel & 0x0000ff) >> 0) * 255 + a / 2) / a;
+ }
+ *rowp++ = a;
+ } else if (gray || mono) {
+ // convert to gray
+ // The PDF Reference specifies the DeviceRGB to DeviceGray conversion as
+ // gray = 0.3*red + 0.59*green + 0.11*blue
+ int r = (*pixel & 0x00ff0000) >> 16;
+ int g = (*pixel & 0x0000ff00) >> 8;
+ int b = (*pixel & 0x000000ff) >> 0;
+ // an arbitrary integer approximation of .3*r + .59*g + .11*b
+ int y = (r*19661+g*38666+b*7209 + 32829)>>16;
+ *rowp++ = y;
+ } else {
+ // copy into RGB format
+ *rowp++ = (*pixel & 0x00ff0000) >> 16;
+ *rowp++ = (*pixel & 0x0000ff00) >> 8;
+ *rowp++ = (*pixel & 0x000000ff) >> 0;
+ }
+ }
+ writer->writeRow(&row);
+ }
+ gfree(row);
+ writer->close();
+ delete writer;
+ if (file == stdout) fflush(file);
+ else fclose(file);
+}
+
+static void getCropSize(double page_w, double page_h, double *width, double *height)
+{
+ int w = crop_w;
+ int h = crop_h;
+
+ if (w == 0)
+ w = (int)ceil(page_w);
+
+ if (h == 0)
+ h = (int)ceil(page_h);
+
+ *width = (crop_x + w > page_w ? (int)ceil(page_w - crop_x) : w);
+ *height = (crop_y + h > page_h ? (int)ceil(page_h - crop_y) : h);
+}
+
+static void getOutputSize(double page_w, double page_h, double *width, double *height)
+{
+
+ if (printing) {
+ if (doOrigPageSizes) {
+ *width = page_w;
+ *height = page_h;
+ } else {
+ *width = paperWidth;
+ *height = paperHeight;
+ }
+ } else {
+ getCropSize(page_w * (x_resolution / 72.0),
+ page_h * (y_resolution / 72.0),
+ width, height);
+ }
+}
+
+static void getFitToPageTransform(double page_w, double page_h,
+ double paper_w, double paper_h,
+ cairo_matrix_t *m)
+{
+ double x_scale, y_scale, scale;
+
+ x_scale = paper_w / page_w;
+ y_scale = paper_h / page_h;
+ if (x_scale < y_scale)
+ scale = x_scale;
+ else
+ scale = y_scale;
+
+ cairo_matrix_init_identity (m);
+ if (scale > 1.0) {
+ // page is smaller than paper
+ if (expand) {
+ // expand to fit
+ cairo_matrix_scale (m, scale, scale);
+ } else if (!noCenter) {
+ // centre page
+ cairo_matrix_translate (m, (paper_w - page_w)/2, (paper_h - page_h)/2);
+ } else {
+ if (!svg) {
+ // move to PostScript origin
+ cairo_matrix_translate (m, 0, (paper_h - page_h));
+ }
+ }
+ } else if (scale < 1.0)
+ // page is larger than paper
+ if (!noShrink) {
+ // shrink to fit
+ cairo_matrix_scale (m, scale, scale);
+ }
+}
+
+static cairo_status_t writeStream(void *closure, const unsigned char *data, unsigned int length)
+{
+ FILE *file = (FILE *)closure;
+
+ if (fwrite(data, length, 1, file) == 1)
+ return CAIRO_STATUS_SUCCESS;
+ else
+ return CAIRO_STATUS_WRITE_ERROR;
+}
+
+static void beginDocument(GooString *outputFileName, double w, double h)
+{
+ if (printing) {
+ if (outputFileName->cmp("fd://0") == 0)
+ output_file = stdout;
+ else
+ output_file = fopen(outputFileName->getCString(), "wb");
+
+ if (ps || eps) {
+#if CAIRO_HAS_PS_SURFACE
+ surface = cairo_ps_surface_create_for_stream(writeStream, output_file, w, h);
+ if (level2)
+ cairo_ps_surface_restrict_to_level (surface, CAIRO_PS_LEVEL_2);
+ if (eps)
+ cairo_ps_surface_set_eps (surface, 1);
+ if (duplex) {
+ cairo_ps_surface_dsc_comment(surface, "%%Requirements: duplex");
+ cairo_ps_surface_dsc_begin_setup(surface);
+ cairo_ps_surface_dsc_comment(surface, "%%IncludeFeature: *Duplex DuplexNoTumble");
+ }
+ cairo_ps_surface_dsc_begin_page_setup (surface);
+#endif
+ } else if (pdf) {
+#if CAIRO_HAS_PDF_SURFACE
+ surface = cairo_pdf_surface_create_for_stream(writeStream, output_file, w, h);
+#endif
+ } else if (svg) {
+#if CAIRO_HAS_SVG_SURFACE
+ surface = cairo_svg_surface_create_for_stream(writeStream, output_file, w, h);
+ cairo_svg_surface_restrict_to_version (surface, CAIRO_SVG_VERSION_1_2);
+#endif
+ }
+ }
+}
+
+static void beginPage(double w, double h)
+{
+ if (printing) {
+ if (ps || eps) {
+#if CAIRO_HAS_PS_SURFACE
+ if (w > h) {
+ cairo_ps_surface_dsc_comment (surface, "%%PageOrientation: Landscape");
+ cairo_ps_surface_set_size (surface, h, w);
+ } else {
+ cairo_ps_surface_dsc_comment (surface, "%%PageOrientation: Portrait");
+ cairo_ps_surface_set_size (surface, w, h);
+ }
+#endif
+ }
+
+#if CAIRO_HAS_PDF_SURFACE
+ if (pdf)
+ cairo_pdf_surface_set_size (surface, w, h);
+#endif
+
+ cairo_surface_set_fallback_resolution (surface, x_resolution, y_resolution);
+
+ } else {
+ surface = cairo_image_surface_create(CAIRO_FORMAT_ARGB32, ceil(w), ceil(h));
+ }
+}
+
+static void renderPage(PDFDoc *doc, CairoOutputDev *cairoOut, int pg,
+ double page_w, double page_h,
+ double output_w, double output_h)
+{
+ cairo_t *cr;
+ cairo_status_t status;
+ cairo_matrix_t m;
+
+ cr = cairo_create(surface);
+ cairoOut->setCairo(cr);
+ cairoOut->setPrinting(printing);
+
+ cairo_save(cr);
+ if (ps && output_w > output_h) {
+ // rotate 90 deg for landscape
+ cairo_translate (cr, 0, output_w);
+ cairo_matrix_init (&m, 0, -1, 1, 0, 0, 0);
+ cairo_transform (cr, &m);
+ }
+ cairo_translate (cr, -crop_x, -crop_y);
+ if (printing) {
+ double cropped_w, cropped_h;
+ getCropSize(page_w, page_h, &cropped_w, &cropped_h);
+ getFitToPageTransform(cropped_w, cropped_h, output_w, output_h, &m);
+ cairo_transform (cr, &m);
+ cairo_rectangle(cr, crop_x, crop_y, cropped_w, cropped_h);
+ cairo_clip(cr);
+ } else {
+ cairo_scale (cr, x_resolution/72.0, y_resolution/72.0);
+ }
+ doc->displayPageSlice(cairoOut,
+ pg,
+ 72.0, 72.0,
+ 0, /* rotate */
+ !useCropBox, /* useMediaBox */
+ gFalse, /* Crop */
+ printing,
+ -1, -1, -1, -1);
+ cairo_restore(cr);
+ cairoOut->setCairo(NULL);
+
+ // Blend onto white page
+ if (!printing && !transp) {
+ cairo_save(cr);
+ cairo_set_operator(cr, CAIRO_OPERATOR_DEST_OVER);
+ cairo_set_source_rgb(cr, 1, 1, 1);
+ cairo_paint(cr);
+ cairo_restore(cr);
+ }
+
+ status = cairo_status(cr);
+ if (status)
+ error(errInternal, -1, "cairo error: {0:s}\n", cairo_status_to_string(status));
+ cairo_destroy (cr);
+}
+
+static void endPage(GooString *imageFileName)
+{
+ cairo_status_t status;
+
+ if (printing) {
+ cairo_surface_show_page(surface);
+ } else {
+ writePageImage(imageFileName);
+ cairo_surface_finish(surface);
+ status = cairo_surface_status(surface);
+ if (status)
+ error(errInternal, -1, "cairo error: {0:s}\n", cairo_status_to_string(status));
+ cairo_surface_destroy(surface);
+ }
+
+}
+
+static void endDocument()
+{
+ cairo_status_t status;
+
+ if (printing) {
+ cairo_surface_finish(surface);
+ status = cairo_surface_status(surface);
+ if (status)
+ error(errInternal, -1, "cairo error: {0:s}\n", cairo_status_to_string(status));
+ cairo_surface_destroy(surface);
+ fclose(output_file);
+ }
+}
+
+static GBool setPSPaperSize(char *size, int &psPaperWidth, int &psPaperHeight) {
+ if (!strcmp(size, "match")) {
+ psPaperWidth = psPaperHeight = -1;
+ } else if (!strcmp(size, "letter")) {
+ psPaperWidth = 612;
+ psPaperHeight = 792;
+ } else if (!strcmp(size, "legal")) {
+ psPaperWidth = 612;
+ psPaperHeight = 1008;
+ } else if (!strcmp(size, "A4")) {
+ psPaperWidth = 595;
+ psPaperHeight = 842;
+ } else if (!strcmp(size, "A3")) {
+ psPaperWidth = 842;
+ psPaperHeight = 1190;
+ } else {
+ return gFalse;
+ }
+ return gTrue;
+}
+
+static int numberOfCharacters(unsigned int n)
+{
+ int charNum = 0;
+ while (n >= 10)
+ {
+ n = n / 10;
+ charNum++;
+ }
+ charNum++;
+ return charNum;
+}
+
+static GooString *getImageFileName(GooString *outputFileName, int numDigits, int page)
+{
+ char buf[10];
+ GooString *imageName = new GooString(outputFileName);
+ if (!singleFile) {
+ snprintf(buf, sizeof(buf), "-%0*d", numDigits, page);
+ imageName->appendf(buf);
+ }
+ if (png)
+ imageName->append(".png");
+ else if (jpeg)
+ imageName->append(".jpg");
+
+ return imageName;
+}
+
+// If (printing || singleFile) the output file name includes the
+// extension. Otherwise it is the file name base.
+static GooString *getOutputFileName(GooString *fileName, GooString *outputName)
+{
+ GooString *name;
+ char *s;
+ char *p;
+
+ if (outputName) {
+ if (outputName->cmp("-") == 0) {
+ if (!printing && !singleFile) {
+ fprintf(stderr, "Error: stdout may only be used with the ps, eps, pdf, svg output options or if -singlefile is used.\n");
+ exit(99);
+ }
+ return new GooString("fd://0");
+ }
+ return new GooString(outputName);
+ }
+
+ if (fileName->cmp("fd://0") == 0) {
+ fprintf(stderr, "Error: an output filename or '-' must be supplied when the PDF file is stdin.\n");
+ exit(99);
+ }
+
+ // be careful not to overwrite the input file when the output format is PDF
+ if (pdf && fileName->cmpN("http://", 7) != 0 && fileName->cmpN("https://", 8) != 0) {
+ fprintf(stderr, "Error: an output filename or '-' must be supplied when the output format is PDF and input PDF file is a local file.\n");
+ exit(99);
+ }
+
+ // strip everything up to last '/'
+ s = fileName->getCString();
+ p = strrchr(s, '/');
+ if (p) {
+ p++;
+ if (*p == 0) {
+ fprintf(stderr, "Error: invalid output filename.\n");
+ exit(99);
+ }
+ name = new GooString(p);
+ } else {
+ name = new GooString(s);
+ }
+
+ // remove .pdf extension
+ p = strrchr(name->getCString(), '.');
+ if (p && strcasecmp(p, ".pdf") == 0) {
+ GooString *name2 = new GooString(name->getCString(), name->getLength() - 4);
+ delete name;
+ name = name2;
+ }
+
+ // append new extension
+ if (ps)
+ name->append(".ps");
+ else if (eps)
+ name->append(".eps");
+ else if (pdf)
+ name->append(".pdf");
+ else if (svg)
+ name->append(".svg");
+
+ return name;
+}
+
+static void checkInvalidPrintOption(GBool option, const char *option_name)
+{
+ if (option) {
+ fprintf(stderr, "Error: %s may only be used with the -png or -jpeg output options.\n", option_name);
+ exit(99);
+ }
+}
+
+static void checkInvalidImageOption(GBool option, const char *option_name)
+{
+ if (option) {
+ fprintf(stderr, "Error: %s may only be used with the -ps, -eps, -pdf, or -svg output options.\n", option_name);
+ exit(99);
+ }
+}
+
+int main(int argc, char *argv[]) {
+ PDFDoc *doc;
+ GooString *fileName = NULL;
+ GooString *outputName = NULL;
+ GooString *outputFileName = NULL;
+ GooString *imageFileName = NULL;
+ GooString *ownerPW, *userPW;
+ CairoOutputDev *cairoOut;
+ int pg, pg_num_len;
+ double pg_w, pg_h, tmp, output_w, output_h;
+ int num_outputs;
+
+ // parse args
+ if (!parseArgs(argDesc, &argc, argv))
+ exit(99);
+
+ if ( resolution != 0.0 &&
+ (x_resolution == 150.0 ||
+ y_resolution == 150.0)) {
+ x_resolution = resolution;
+ y_resolution = resolution;
+ }
+ if (argc < 2 || argc > 3 || printVersion || printHelp) {
+ fprintf(stderr, "pdftocairo version %s\n", PACKAGE_VERSION);
+ fprintf(stderr, "%s\n", popplerCopyright);
+ fprintf(stderr, "%s\n", xpdfCopyright);
+ if (!printVersion) {
+ printUsage("pdftocairo", "<PDF-file> [<output-file>]", argDesc);
+ }
+ if (printVersion || printHelp)
+ exit(0);
+ else
+ exit(99);
+ }
+
+ num_outputs = (png ? 1 : 0) +
+ (jpeg ? 1 : 0) +
+ (ps ? 1 : 0) +
+ (eps ? 1 : 0) +
+ (pdf ? 1 : 0) +
+ (svg ? 1 : 0);
+ if (num_outputs == 0) {
+ fprintf(stderr, "Error: one of the output format options (-png, -jpeg, -ps, -eps, -pdf, -svg) must be used.\n");
+ exit(99);
+ }
+ if (num_outputs > 1) {
+ fprintf(stderr, "Error: use only one of the output format options (-png, -jpeg, -ps, -eps, -pdf, -svg).\n");
+ exit(99);
+ }
+ if (png || jpeg)
+ printing = gFalse;
+ else
+ printing = gTrue;
+
+ if (printing) {
+ checkInvalidPrintOption(mono, "-mono");
+ checkInvalidPrintOption(gray, "-gray");
+ checkInvalidPrintOption(transp, "-transp");
+ checkInvalidPrintOption(icc.getCString()[0], "-icc");
+ checkInvalidPrintOption(singleFile, "-singlefile");
+ } else {
+ checkInvalidImageOption(level2, "-level2");
+ checkInvalidImageOption(level3, "-level3");
+ checkInvalidImageOption(doOrigPageSizes, "-origpagesizes");
+ checkInvalidImageOption(paperSize[0], "-paper");
+ checkInvalidImageOption(paperWidth > 0, "-paperw");
+ checkInvalidImageOption(paperHeight > 0, "-paperh");
+ checkInvalidImageOption(noCrop, "-nocrop");
+ checkInvalidImageOption(expand, "-expand");
+ checkInvalidImageOption(noShrink, "-noshrink");
+ checkInvalidImageOption(noCenter, "-nocenter");
+ checkInvalidImageOption(duplex, "-duplex");
+ }
+
+ if (icc.getCString()[0] && !png) {
+ fprintf(stderr, "Error: -icc may only be used with png output.\n");
+ exit(99);
+ }
+
+ if (transp && !png) {
+ fprintf(stderr, "Error: -transp may only be used with png output.\n");
+ exit(99);
+ }
+
+ if (mono && gray) {
+ fprintf(stderr, "Error: -mono and -gray may not be used together.\n");
+ exit(99);
+ }
+
+ if (mono && !png) {
+ fprintf(stderr, "Error: -mono may only be used with png output.\n");
+ exit(99);
+ }
+
+ if (level2 && level3) {
+ fprintf(stderr, "Error: use only one of the 'level' options.\n");
+ exit(99);
+ }
+ if (!level2 && !level3)
+ level3 = gTrue;
+
+ if (eps && (doOrigPageSizes || paperSize[0] || paperWidth > 0 || paperHeight > 0)) {
+ fprintf(stderr, "Error: page size options may not be used with eps output.\n");
+ exit(99);
+ }
+
+ if (paperSize[0]) {
+ if (!setPSPaperSize(paperSize, paperWidth, paperHeight)) {
+ fprintf(stderr, "Invalid paper size\n");
+ exit(99);
+ }
+ }
+
+ globalParams = new GlobalParams();
+ if (quiet) {
+ globalParams->setErrQuiet(quiet);
+ }
+
+ // open PDF file
+ if (ownerPassword[0]) {
+ ownerPW = new GooString(ownerPassword);
+ } else {
+ ownerPW = NULL;
+ }
+ if (userPassword[0]) {
+ userPW = new GooString(userPassword);
+ } else {
+ userPW = NULL;
+ }
+
+ fileName = new GooString(argv[1]);
+ if (fileName->cmp("-") == 0) {
+ delete fileName;
+ fileName = new GooString("fd://0");
+ }
+ if (argc == 3)
+ outputName = new GooString(argv[2]);
+ else
+ outputName = NULL;
+
+ outputFileName = getOutputFileName(fileName, outputName);
+
+#if USE_CMS
+ icc_data = NULL;
+ if (icc.getCString()[0]) {
+ FILE *file = fopen(icc.getCString(), "rb");
+ if (!file) {
+ fprintf(stderr, "Error: unable to open icc profile %s\n", icc.getCString());
+ exit(4);
+ }
+ fseek (file, 0, SEEK_END);
+ icc_data_size = ftell(file);
+ fseek (file, 0, SEEK_SET);
+ icc_data = (unsigned char*)gmalloc(icc_data_size);
+ if (fread(icc_data, icc_data_size, 1, file) != 1) {
+ fprintf(stderr, "Error: unable to read icc profile %s\n", icc.getCString());
+ exit(4);
+ }
+ fclose(file);
+ profile = cmsOpenProfileFromMem(icc_data, icc_data_size);
+ if (!profile) {
+ fprintf(stderr, "Error: lcms error opening profile\n");
+ exit(4);
+ }
+ } else {
+ profile = cmsCreate_sRGBProfile();
+ }
+ GfxColorSpace::setDisplayProfile(profile);
+#endif
+
+ doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW);
+ if (!doc->isOk()) {
+ fprintf(stderr, "Error opening PDF file.\n");
+ exit(1);
+ }
+
+#ifdef ENFORCE_PERMISSIONS
+ // check for print permission
+ if (printing && !doc->okToPrint()) {
+ fprintf(stderr, "Printing this document is not allowed.\n");
+ exit(3);
+ }
+#endif
+
+ // get page range
+ if (firstPage < 1)
+ firstPage = 1;
+ if (singleFile && lastPage < 1)
+ lastPage = firstPage;
+ if (lastPage < 1 || lastPage > doc->getNumPages())
+ lastPage = doc->getNumPages();
+
+ if (eps && firstPage != lastPage) {
+ fprintf(stderr, "EPS files can only contain one page.\n");
+ exit(99);
+ }
+
+ if (singleFile && firstPage < lastPage) {
+ if (!quiet) {
+ fprintf(stderr,
+ "Warning: Single file will write only the first of the %d pages.\n",
+ lastPage + 1 - firstPage);
+ }
+ lastPage = firstPage;
+ }
+
+ // Make sure firstPage is always used so that beginDocument() is called
+ if ((printOnlyEven && firstPage % 2 == 0) || (printOnlyOdd && firstPage % 2 == 1))
+ firstPage++;
+
+ cairoOut = new CairoOutputDev();
+ cairoOut->startDoc(doc);
+ if (sz != 0)
+ crop_w = crop_h = sz;
+ pg_num_len = numberOfCharacters(doc->getNumPages());
+ for (pg = firstPage; pg <= lastPage; ++pg) {
+ if (printOnlyEven && pg % 2 == 0) continue;
+ if (printOnlyOdd && pg % 2 == 1) continue;
+ if (useCropBox) {
+ pg_w = doc->getPageCropWidth(pg);
+ pg_h = doc->getPageCropHeight(pg);
+ } else {
+ pg_w = doc->getPageMediaWidth(pg);
+ pg_h = doc->getPageMediaHeight(pg);
+ }
+
+ if (printing && pg == firstPage) {
+ if (paperWidth < 0 || paperHeight < 0) {
+ paperWidth = (int)ceil(pg_w);
+ paperHeight = (int)ceil(pg_h);
+ }
+ }
+
+ if (scaleTo != 0) {
+ resolution = (72.0 * scaleTo) / (pg_w > pg_h ? pg_w : pg_h);
+ x_resolution = y_resolution = resolution;
+ } else {
+ if (x_scaleTo > 0) {
+ x_resolution = (72.0 * x_scaleTo) / pg_w;
+ if (y_scaleTo == -1)
+ y_resolution = x_resolution;
+ }
+ if (y_scaleTo > 0) {
+ y_resolution = (72.0 * y_scaleTo) / pg_h;
+ if (x_scaleTo == -1)
+ x_resolution = y_resolution;
+ }
+ }
+ if ((doc->getPageRotate(pg) == 90) || (doc->getPageRotate(pg) == 270)) {
+ tmp = pg_w;
+ pg_w = pg_h;
+ pg_h = tmp;
+ }
+ if (imageFileName) {
+ delete imageFileName;
+ imageFileName = NULL;
+ }
+ if (!printing)
+ imageFileName = getImageFileName(outputFileName, pg_num_len, pg);
+ getOutputSize(pg_w, pg_h, &output_w, &output_h);
+
+ if (pg == firstPage)
+ beginDocument(outputFileName, output_w, output_h);
+ beginPage(output_w, output_h);
+ renderPage(doc, cairoOut, pg, pg_w, pg_h, output_w, output_h);
+ endPage(imageFileName);
+ }
+ endDocument();
+
+ // clean up
+ delete cairoOut;
+ delete doc;
+ delete globalParams;
+ if (fileName)
+ delete fileName;
+ if (outputName)
+ delete outputName;
+ if (outputFileName)
+ delete outputFileName;
+ if (imageFileName)
+ delete imageFileName;
+ if (ownerPW)
+ delete ownerPW;
+ if (userPW)
+ delete ownerPW;
+
+#if USE_CMS
+ cmsCloseProfile(profile);
+ if (icc_data)
+ gfree(icc_data);
+#endif
+
+ // check for memory leaks
+ Object::memCheck(stderr);
+ gMemReport(stderr);
+
+ return 0;
+}
diff --git a/utils/pdftohtml.1 b/utils/pdftohtml.1
new file mode 100644
index 00000000..44137e4d
--- /dev/null
+++ b/utils/pdftohtml.1
@@ -0,0 +1,108 @@
+.TH PDFTOHTML 1
+.\" NAME should be all caps, SECTION should be 1-8, maybe w/ subsection
+.\" other parms are allowed: see man(7), man(1)
+.SH NAME
+pdftohtml \- program to convert PDF files into HTML, XML and PNG images
+.SH SYNOPSIS
+.B pdftohtml
+.I "[options] <PDF-file> [<HTML-file> <XML-file>]"
+.SH "DESCRIPTION"
+This manual page documents briefly the
+.BR pdftohtml
+command.
+This manual page was written for the Debian GNU/Linux distribution
+because the original program does not have a manual page.
+.PP
+.B pdftohtml
+is a program that converts PDF documents into HTML. It generates its output in
+the current working directory.
+.SH OPTIONS
+A summary of options are included below.
+.TP
+.B \-h, \-help
+Show summary of options.
+.TP
+.B \-f <int>
+first page to print
+.TP
+.B \-l <int>
+last page to print
+.TP
+.B \-q
+do not print any messages or errors
+.TP
+.B \-v
+print copyright and version info
+.TP
+.B \-p
+exchange .pdf links with .html
+.TP
+.B \-c
+generate complex output
+.TP
+.B \-s
+generate single HTML that includes all pages
+.TP
+.B \-i
+ignore images
+.TP
+.B \-noframes
+generate no frames. Not supported in complex output mode.
+.TP
+.B \-stdout
+use standard output
+.TP
+.B \-zoom <fp>
+zoom the PDF document (default 1.5)
+.TP
+.B \-xml
+output for XML post-processing
+.TP
+.B \-enc <string>
+output text encoding name
+.TP
+.B \-opw <string>
+owner password (for encrypted files)
+.TP
+.B \-upw <string>
+user password (for encrypted files)
+.TP
+.B \-hidden
+force hidden text extraction
+.TP
+.B \-dev
+output device name for Ghostscript (png16m, jpeg etc).
+Unless this option is specified, Splash will be used
+.TP
+.B \-fmt
+image file format for Splash output (png or jpg).
+If complex is selected, but neither \-fmt or \-dev are specified,
+\-fmt png will be assumed
+.TP
+.B \-nomerge
+do not merge paragraphs
+.TP
+.B \-nodrm
+override document DRM settings
+.TP
+.B \-wbt <fp>
+adjust the word break threshold percent. Default is 10.
+Word break occurs when distance between two adjacent characters is
+greater than this percent of character height.
+
+.SH AUTHOR
+
+Pdftohtml was developed by Gueorgui Ovtcharov and Rainer Dorsch. It is
+based and benefits a lot from Derek Noonburg's xpdf package.
+
+This manual page was written by Søren Boll Overgaard <boll@debian.org>,
+for the Debian GNU/Linux system (but may be used by others).
+.SH "SEE ALSO"
+.BR pdfdetach (1),
+.BR pdffonts (1),
+.BR pdfimages (1),
+.BR pdfinfo (1),
+.BR pdftocairo (1),
+.BR pdftoppm (1),
+.BR pdftops (1),
+.BR pdftotext (1)
diff --git a/utils/pdftohtml.cc b/utils/pdftohtml.cc
new file mode 100644
index 00000000..d6475b01
--- /dev/null
+++ b/utils/pdftohtml.cc
@@ -0,0 +1,603 @@
+//========================================================================
+//
+// pdftohtml.cc
+//
+//
+// Copyright 1999-2000 G. Ovtcharov
+//========================================================================
+
+//========================================================================
+//
+// Modified under the Poppler project - http://poppler.freedesktop.org
+//
+// All changes made under the Poppler project to this file are licensed
+// under GPL version 2 or later
+//
+// Copyright (C) 2007-2008, 2010, 2012 Albert Astals Cid <aacid@kde.org>
+// Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
+// Copyright (C) 2010 Mike Slegeir <tehpola@yahoo.com>
+// Copyright (C) 2010 Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp>
+// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac@cdacmumbai.in) and Onkar Potdar (onkar@cdacmumbai.in)
+// Copyright (C) 2011 Steven Murdoch <Steven.Murdoch@cl.cam.ac.uk>
+// Copyright (C) 2012 Igor Slepchin <igor.redhat@gmail.com>
+// Copyright (C) 2012 Ihar Filipau <thephilips@gmail.com>
+//
+// To see a description of the changes please see the Changelog file that
+// came with your tarball or type make ChangeLog if you are building from git
+//
+//========================================================================
+
+#include "config.h"
+#include <poppler-config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#ifdef HAVE_DIRENT_H
+#include <dirent.h>
+#endif
+#include <time.h>
+#include "parseargs.h"
+#include "goo/GooString.h"
+#include "goo/gmem.h"
+#include "Object.h"
+#include "Stream.h"
+#include "Array.h"
+#include "Dict.h"
+#include "XRef.h"
+#include "Catalog.h"
+#include "Page.h"
+#include "Outline.h"
+#include "PDFDoc.h"
+#include "PDFDocFactory.h"
+#include "HtmlOutputDev.h"
+#ifdef HAVE_SPLASH
+#include "SplashOutputDev.h"
+#include "splash/SplashBitmap.h"
+#endif
+#include "PSOutputDev.h"
+#include "GlobalParams.h"
+#include "PDFDocEncoding.h"
+#include "Error.h"
+#include "DateInfo.h"
+#include "goo/gfile.h"
+
+#ifndef GHOSTSCRIPT
+# define GHOSTSCRIPT "gs"
+#endif
+
+static int firstPage = 1;
+static int lastPage = 0;
+static GBool rawOrder = gTrue;
+GBool printCommands = gTrue;
+static GBool printHelp = gFalse;
+GBool printHtml = gFalse;
+GBool complexMode=gFalse;
+GBool singleHtml=gFalse; // singleHtml
+GBool ignore=gFalse;
+static GBool useSplash=gTrue;
+static char extension[5]="png";
+static double scale=1.5;
+GBool noframes=gFalse;
+GBool stout=gFalse;
+GBool xml=gFalse;
+static GBool errQuiet=gFalse;
+static GBool noDrm=gFalse;
+double wordBreakThreshold=10; // 10%, below converted into a coefficient - 0.1
+
+GBool showHidden = gFalse;
+GBool noMerge = gFalse;
+static char ownerPassword[33] = "";
+static char userPassword[33] = "";
+static char gsDevice[33] = "none";
+static GBool printVersion = gFalse;
+
+static GooString* getInfoString(Dict *infoDict, const char *key);
+static GooString* getInfoDate(Dict *infoDict, const char *key);
+
+static char textEncName[128] = "";
+
+static const ArgDesc argDesc[] = {
+ {"-f", argInt, &firstPage, 0,
+ "first page to convert"},
+ {"-l", argInt, &lastPage, 0,
+ "last page to convert"},
+ /*{"-raw", argFlag, &rawOrder, 0,
+ "keep strings in content stream order"},*/
+ {"-q", argFlag, &errQuiet, 0,
+ "don't print any messages or errors"},
+ {"-h", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-p", argFlag, &printHtml, 0,
+ "exchange .pdf links by .html"},
+ {"-c", argFlag, &complexMode, 0,
+ "generate complex document"},
+ {"-s", argFlag, &singleHtml, 0,
+ "generate single document that includes all pages"},
+ {"-i", argFlag, &ignore, 0,
+ "ignore images"},
+ {"-noframes", argFlag, &noframes, 0,
+ "generate no frames"},
+ {"-stdout" ,argFlag, &stout, 0,
+ "use standard output"},
+ {"-zoom", argFP, &scale, 0,
+ "zoom the pdf document (default 1.5)"},
+ {"-xml", argFlag, &xml, 0,
+ "output for XML post-processing"},
+ {"-hidden", argFlag, &showHidden, 0,
+ "output hidden text"},
+ {"-nomerge", argFlag, &noMerge, 0,
+ "do not merge paragraphs"},
+ {"-enc", argString, textEncName, sizeof(textEncName),
+ "output text encoding name"},
+ {"-dev", argString, gsDevice, sizeof(gsDevice),
+ "output device name for Ghostscript (png16m, jpeg etc)"},
+ {"-fmt", argString, extension, sizeof(extension),
+ "image file format for Splash output (png or jpg)"},
+ {"-v", argFlag, &printVersion, 0,
+ "print copyright and version info"},
+ {"-opw", argString, ownerPassword, sizeof(ownerPassword),
+ "owner password (for encrypted files)"},
+ {"-upw", argString, userPassword, sizeof(userPassword),
+ "user password (for encrypted files)"},
+ {"-nodrm", argFlag, &noDrm, 0,
+ "override document DRM settings"},
+ {"-wbt", argFP, &wordBreakThreshold, 0,
+ "word break threshold (default 10 percent)"},
+ {NULL}
+};
+
+#ifdef HAVE_SPLASH
+class SplashOutputDevNoText : public SplashOutputDev {
+public:
+ SplashOutputDevNoText(SplashColorMode colorModeA, int bitmapRowPadA,
+ GBool reverseVideoA, SplashColorPtr paperColorA,
+ GBool bitmapTopDownA = gTrue,
+ GBool allowAntialiasA = gTrue) : SplashOutputDev(colorModeA,
+ bitmapRowPadA, reverseVideoA, paperColorA, bitmapTopDownA,
+ allowAntialiasA) { }
+ virtual ~SplashOutputDevNoText() { }
+
+ void drawChar(GfxState *state, double x, double y,
+ double dx, double dy,
+ double originX, double originY,
+ CharCode code, int nBytes, Unicode *u, int uLen) { }
+ GBool beginType3Char(GfxState *state, double x, double y,
+ double dx, double dy,
+ CharCode code, Unicode *u, int uLen) { return false; }
+ void endType3Char(GfxState *state) { }
+ void beginTextObject(GfxState *state) { }
+ GBool deviceHasTextClip(GfxState *state) { return false; }
+ void endTextObject(GfxState *state) { }
+ GBool interpretType3Chars() { return gFalse; }
+};
+#endif
+
+int main(int argc, char *argv[]) {
+ PDFDoc *doc = NULL;
+ GooString *fileName = NULL;
+ GooString *docTitle = NULL;
+ GooString *author = NULL, *keywords = NULL, *subject = NULL, *date = NULL;
+ GooString *htmlFileName = NULL;
+ GooString *psFileName = NULL;
+ HtmlOutputDev *htmlOut = NULL;
+#ifdef HAVE_SPLASH
+ SplashOutputDev *splashOut = NULL;
+#endif
+ PSOutputDev *psOut = NULL;
+ GBool doOutline;
+ GBool ok;
+ char *p;
+ GooString *ownerPW, *userPW;
+ Object info;
+ const char * extsList[] = {"png", "jpeg", "bmp", "pcx", "tiff", "pbm", NULL};
+
+ // parse args
+ ok = parseArgs(argDesc, &argc, argv);
+ if (!ok || argc < 2 || argc > 3 || printHelp || printVersion) {
+ fprintf(stderr, "pdftohtml version %s\n", PACKAGE_VERSION);
+ fprintf(stderr, "%s\n", popplerCopyright);
+ fprintf(stderr, "%s\n", "Copyright 1999-2003 Gueorgui Ovtcharov and Rainer Dorsch");
+ fprintf(stderr, "%s\n\n", xpdfCopyright);
+ if (!printVersion) {
+ printUsage("pdftohtml", "<PDF-file> [<html-file> <xml-file>]", argDesc);
+ }
+ exit(1);
+ }
+
+ // init error file
+ //errorInit();
+
+ // read config file
+ globalParams = new GlobalParams();
+
+ if (errQuiet) {
+ globalParams->setErrQuiet(errQuiet);
+ printCommands = gFalse; // I'm not 100% what is the differecne between them
+ }
+
+ if (textEncName[0]) {
+ globalParams->setTextEncoding(textEncName);
+ if( !globalParams->getTextEncoding() ) {
+ goto error;
+ }
+ }
+
+ // convert from user-friendly percents into a coefficient
+ wordBreakThreshold /= 100.0;
+
+ // open PDF file
+ if (ownerPassword[0]) {
+ ownerPW = new GooString(ownerPassword);
+ } else {
+ ownerPW = NULL;
+ }
+ if (userPassword[0]) {
+ userPW = new GooString(userPassword);
+ } else {
+ userPW = NULL;
+ }
+
+ fileName = new GooString(argv[1]);
+
+ if (fileName->cmp("-") == 0) {
+ delete fileName;
+ fileName = new GooString("fd://0");
+ }
+
+ doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW);
+
+ if (userPW) {
+ delete userPW;
+ }
+ if (ownerPW) {
+ delete ownerPW;
+ }
+ if (!doc->isOk()) {
+ goto error;
+ }
+
+ // check for copy permission
+ if (!doc->okToCopy()) {
+ if (!noDrm) {
+ error(errNotAllowed, -1, "Copying of text from this document is not allowed.");
+ goto error;
+ }
+ fprintf(stderr, "Document has copy-protection bit set.\n");
+ }
+
+ // construct text file name
+ if (argc == 3) {
+ GooString* tmp = new GooString(argv[2]);
+ if (!xml) {
+ if (tmp->getLength() >= 5) {
+ p = tmp->getCString() + tmp->getLength() - 5;
+ if (!strcmp(p, ".html") || !strcmp(p, ".HTML")) {
+ htmlFileName = new GooString(tmp->getCString(), tmp->getLength() - 5);
+ }
+ }
+ } else {
+ if (tmp->getLength() >= 4) {
+ p = tmp->getCString() + tmp->getLength() - 4;
+ if (!strcmp(p, ".xml") || !strcmp(p, ".XML")) {
+ htmlFileName = new GooString(tmp->getCString(), tmp->getLength() - 4);
+ }
+ }
+ }
+ if (!htmlFileName) {
+ htmlFileName =new GooString(tmp);
+ }
+ delete tmp;
+ } else if (fileName->cmp("fd://0") == 0) {
+ error(errCommandLine, -1, "You have to provide an output filename when reading form stdin.");
+ goto error;
+ } else {
+ p = fileName->getCString() + fileName->getLength() - 4;
+ if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF"))
+ htmlFileName = new GooString(fileName->getCString(),
+ fileName->getLength() - 4);
+ else
+ htmlFileName = fileName->copy();
+ // htmlFileName->append(".html");
+ }
+
+ if (scale>3.0) scale=3.0;
+ if (scale<0.5) scale=0.5;
+
+ if (complexMode || singleHtml) {
+ //noframes=gFalse;
+ stout=gFalse;
+ }
+
+ if (stout) {
+ noframes=gTrue;
+ complexMode=gFalse;
+ singleHtml=gFalse;
+ }
+
+ if (xml)
+ {
+ complexMode = gTrue;
+ singleHtml = gFalse;
+ noframes = gTrue;
+ noMerge = gTrue;
+ }
+
+ // get page range
+ if (firstPage < 1)
+ firstPage = 1;
+ if (lastPage < 1 || lastPage > doc->getNumPages())
+ lastPage = doc->getNumPages();
+
+ doc->getDocInfo(&info);
+ if (info.isDict()) {
+ docTitle = getInfoString(info.getDict(), "Title");
+ author = getInfoString(info.getDict(), "Author");
+ keywords = getInfoString(info.getDict(), "Keywords");
+ subject = getInfoString(info.getDict(), "Subject");
+ date = getInfoDate(info.getDict(), "ModDate");
+ if( !date )
+ date = getInfoDate(info.getDict(), "CreationDate");
+ }
+ info.free();
+ if( !docTitle ) docTitle = new GooString(htmlFileName);
+
+ if( strcmp("none", gsDevice) ) {
+ useSplash = gFalse;
+ /* determine extensions of output background images */
+ int i;
+ for(i = 0; extsList[i]; i++)
+ {
+ if( strstr(gsDevice, extsList[i]) != (char *) NULL )
+ {
+ strncpy(extension, extsList[i], sizeof(extension));
+ break;
+ }
+ }
+ }
+
+#ifndef HAVE_SPLASH
+ if( useSplash ) {
+ fprintf(stderr, "You are trying to use the -fmt option but your pdftohtml was built without support for it. Please use the -dev option\n");
+ delete docTitle;
+ delete author;
+ delete keywords;
+ delete subject;
+ delete date;
+ delete htmlFileName;
+ delete globalParams;
+ delete fileName;
+ delete doc;
+ return -1;
+ }
+#endif
+
+ if (!singleHtml)
+ rawOrder = complexMode; // todo: figure out what exactly rawOrder do :)
+ else
+ rawOrder = singleHtml;
+
+#ifdef DISABLE_OUTLINE
+ doOutline = gFalse;
+#else
+ doOutline = doc->getOutline()->getItems() != NULL;
+#endif
+ // write text file
+ htmlOut = new HtmlOutputDev(doc->getCatalog(), htmlFileName->getCString(),
+ docTitle->getCString(),
+ author ? author->getCString() : NULL,
+ keywords ? keywords->getCString() : NULL,
+ subject ? subject->getCString() : NULL,
+ date ? date->getCString() : NULL,
+ extension,
+ rawOrder,
+ firstPage,
+ doOutline);
+ delete docTitle;
+ if( author )
+ {
+ delete author;
+ }
+ if( keywords )
+ {
+ delete keywords;
+ }
+ if( subject )
+ {
+ delete subject;
+ }
+ if( date )
+ {
+ delete date;
+ }
+
+ if (htmlOut->isOk())
+ {
+ doc->displayPages(htmlOut, firstPage, lastPage, 72 * scale, 72 * scale, 0,
+ gTrue, gFalse, gFalse);
+ htmlOut->dumpDocOutline(doc);
+ }
+
+ if ((complexMode || singleHtml) && !xml && !ignore) {
+ if(useSplash) {
+#ifdef HAVE_SPLASH
+ GooString *imgFileName = NULL;
+ // White paper color
+ SplashColor color;
+ color[0] = color[1] = color[2] = 255;
+ // If the user specified "jpg" use JPEG, otherwise PNG
+ SplashImageFileFormat format = strcmp(extension, "jpg") ?
+ splashFormatPng : splashFormatJpeg;
+
+ splashOut = new SplashOutputDevNoText(splashModeRGB8, 4, gFalse, color);
+ splashOut->startDoc(doc);
+
+ for (int pg = firstPage; pg <= lastPage; ++pg) {
+ doc->displayPage(splashOut, pg,
+ 72 * scale, 72 * scale,
+ 0, gTrue, gFalse, gFalse);
+ SplashBitmap *bitmap = splashOut->getBitmap();
+
+ imgFileName = GooString::format("{0:s}{1:03d}.{2:s}",
+ htmlFileName->getCString(), pg, extension);
+
+ bitmap->writeImgFile(format, imgFileName->getCString(),
+ 72 * scale, 72 * scale);
+
+ delete imgFileName;
+ }
+
+ delete splashOut;
+#endif
+ } else {
+ int h=xoutRound(htmlOut->getPageHeight()/scale);
+ int w=xoutRound(htmlOut->getPageWidth()/scale);
+ //int h=xoutRound(doc->getPageHeight(1)/scale);
+ //int w=xoutRound(doc->getPageWidth(1)/scale);
+
+ psFileName = new GooString(htmlFileName->getCString());
+ psFileName->append(".ps");
+
+ psOut = new PSOutputDev(psFileName->getCString(), doc,
+ NULL, firstPage, lastPage, psModePS, w, h);
+ psOut->setDisplayText(gFalse);
+ doc->displayPages(psOut, firstPage, lastPage, 72, 72, 0,
+ gTrue, gFalse, gFalse);
+ delete psOut;
+
+ /*sprintf(buf, "%s -sDEVICE=png16m -dBATCH -dNOPROMPT -dNOPAUSE -r%d -sOutputFile=%s%%03d.png -g%dx%d -q %s", GHOSTSCRIPT, resolution, htmlFileName->getCString(), w, h,
+ psFileName->getCString());*/
+
+ GooString *gsCmd = new GooString(GHOSTSCRIPT);
+ GooString *tw, *th, *sc;
+ gsCmd->append(" -sDEVICE=");
+ gsCmd->append(gsDevice);
+ gsCmd->append(" -dBATCH -dNOPROMPT -dNOPAUSE -r");
+ sc = GooString::fromInt(static_cast<int>(72*scale));
+ gsCmd->append(sc);
+ gsCmd->append(" -sOutputFile=");
+ gsCmd->append("\"");
+ gsCmd->append(htmlFileName);
+ gsCmd->append("%03d.");
+ gsCmd->append(extension);
+ gsCmd->append("\" -g");
+ tw = GooString::fromInt(static_cast<int>(scale*w));
+ gsCmd->append(tw);
+ gsCmd->append("x");
+ th = GooString::fromInt(static_cast<int>(scale*h));
+ gsCmd->append(th);
+ gsCmd->append(" -q \"");
+ gsCmd->append(psFileName);
+ gsCmd->append("\"");
+ // printf("running: %s\n", gsCmd->getCString());
+ if( !executeCommand(gsCmd->getCString()) && !errQuiet) {
+ error(errIO, -1, "Failed to launch Ghostscript!\n");
+ }
+ unlink(psFileName->getCString());
+ delete tw;
+ delete th;
+ delete sc;
+ delete gsCmd;
+ delete psFileName;
+ }
+ }
+
+ delete htmlOut;
+
+ // clean up
+ error:
+ if(doc) delete doc;
+ delete fileName;
+ if(globalParams) delete globalParams;
+
+ if(htmlFileName) delete htmlFileName;
+ HtmlFont::clear();
+
+ // check for memory leaks
+ Object::memCheck(stderr);
+ gMemReport(stderr);
+
+ return 0;
+}
+
+static GooString* getInfoString(Dict *infoDict, const char *key) {
+ Object obj;
+ // Raw value as read from PDF (may be in pdfDocEncoding or UCS2)
+ GooString *rawString;
+ // Value converted to unicode
+ Unicode *unicodeString;
+ int unicodeLength;
+ // Value HTML escaped and converted to desired encoding
+ GooString *encodedString = NULL;
+ // Is rawString UCS2 (as opposed to pdfDocEncoding)
+ GBool isUnicode;
+
+ if (infoDict->lookup(key, &obj)->isString()) {
+ rawString = obj.getString();
+
+ // Convert rawString to unicode
+ if (rawString->hasUnicodeMarker()) {
+ isUnicode = gTrue;
+ unicodeLength = (obj.getString()->getLength() - 2) / 2;
+ } else {
+ isUnicode = gFalse;
+ unicodeLength = obj.getString()->getLength();
+ }
+ unicodeString = new Unicode[unicodeLength];
+
+ for (int i=0; i<unicodeLength; i++) {
+ if (isUnicode) {
+ unicodeString[i] = ((rawString->getChar((i+1)*2) & 0xff) << 8) |
+ (rawString->getChar(((i+1)*2)+1) & 0xff);
+ } else {
+ unicodeString[i] = pdfDocEncoding[rawString->getChar(i) & 0xff];
+ }
+ }
+
+ // HTML escape and encode unicode
+ encodedString = HtmlFont::HtmlFilter(unicodeString, unicodeLength);
+ delete[] unicodeString;
+ }
+
+ obj.free();
+ return encodedString;
+}
+
+static GooString* getInfoDate(Dict *infoDict, const char *key) {
+ Object obj;
+ char *s;
+ int year, mon, day, hour, min, sec, tz_hour, tz_minute;
+ char tz;
+ struct tm tmStruct;
+ GooString *result = NULL;
+ char buf[256];
+
+ if (infoDict->lookup(key, &obj)->isString()) {
+ s = obj.getString()->getCString();
+ // TODO do something with the timezone info
+ if ( parseDateString( s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute ) ) {
+ tmStruct.tm_year = year - 1900;
+ tmStruct.tm_mon = mon - 1;
+ tmStruct.tm_mday = day;
+ tmStruct.tm_hour = hour;
+ tmStruct.tm_min = min;
+ tmStruct.tm_sec = sec;
+ tmStruct.tm_wday = -1;
+ tmStruct.tm_yday = -1;
+ tmStruct.tm_isdst = -1;
+ mktime(&tmStruct); // compute the tm_wday and tm_yday fields
+ if (strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%S+00:00", &tmStruct)) {
+ result = new GooString(buf);
+ } else {
+ result = new GooString(s);
+ }
+ } else {
+ result = new GooString(s);
+ }
+ }
+ obj.free();
+ return result;
+}
+
diff --git a/utils/pdftoppm.1 b/utils/pdftoppm.1
new file mode 100644
index 00000000..2321d6d0
--- /dev/null
+++ b/utils/pdftoppm.1
@@ -0,0 +1,158 @@
+.\" Copyright 2005-2011 Glyph & Cog, LLC
+.TH pdftoppm 1 "15 August 2011"
+.SH NAME
+pdftoppm \- Portable Document Format (PDF) to Portable Pixmap (PPM)
+converter (version 3.03)
+.SH SYNOPSIS
+.B pdftoppm
+[options]
+.I PDF-file PPM-root
+.SH DESCRIPTION
+.B Pdftoppm
+converts Portable Document Format (PDF) files to color image files in
+Portable Pixmap (PPM) format, grayscale image files in Portable
+Graymap (PGM) format, or monochrome image files in Portable Bitmap
+(PBM) format.
+.PP
+Pdftoppm reads the PDF file,
+.IR PDF-file ,
+and writes one PPM file for each page,
+.IR PPM-root - number .ppm,
+where
+.I number
+is the page number.
+.SH OPTIONS
+.TP
+.BI \-f " number"
+Specifies the first page to convert.
+.TP
+.BI \-l " number"
+Specifies the last page to convert.
+.TP
+.B \-o
+Generates only the odd numbered pages.
+.TP
+.B \-e
+Generates only the even numbered pages.
+.TP
+.BI \-singlefile
+Writes only the first page and does not add digits.
+.TP
+.BI \-r " number"
+Specifies the X and Y resolution, in DPI. The default is 150 DPI.
+.TP
+.BI \-rx " number"
+Specifies the X resolution, in DPI. The default is 150 DPI.
+.TP
+.BI \-ry " number"
+Specifies the Y resolution, in DPI. The default is 150 DPI.
+.TP
+.BI \-scale-to " number"
+Scales the long side of each page (width for landscape pages, height
+for portrait pages) to fit in scale-to pixels. The size of the short
+side will be determined by the aspect ratio of the page.
+.TP
+.BI \-scale-to-x " number"
+Scales each page horizontally to fit in scale-to-x pixels. If
+scale-to-y is set to -1, the vertical size will determined by the
+aspect ratio of the page.
+.TP
+.BI \-scale-to-y " number"
+Scales each page vertically to fit in scale-to-y pixels. If scale-to-x
+is set to -1, the horizontal size will determined by the aspect ratio
+of the page.
+.TP
+.BI \-x " number"
+Specifies the x-coordinate of the crop area top left corner
+.TP
+.BI \-y " number"
+Specifies the y-coordinate of the crop area top left corner
+.TP
+.BI \-W " number"
+Specifies the width of crop area in pixels (default is 0)
+.TP
+.BI \-H " number"
+Specifies the height of crop area in pixels (default is 0)
+.TP
+.BI \-sz " number"
+Specifies the size of crop square in pixels (sets W and H)
+.TP
+.B \-cropbox
+Uses the crop box rather than media box when generating the files
+.TP
+.B \-mono
+Generate a monochrome PBM file (instead of a color PPM file).
+.TP
+.B \-gray
+Generate a grayscale PGM file (instead of a color PPM file).
+.TP
+.B \-png
+Generates a PNG file instead a PPM file.
+.TP
+.B \-jpeg
+Generates a JPEG file instead a PPM file.
+.TP
+.B \-tiff
+Generates a TIFF file instead a PPM file.
+.TP
+.BI \-tiffcompression " none | packbits | jpeg | lzw | deflate"
+Specifies the TIFF compression type. This defaults to "none".
+.TP
+.BI \-freetype " yes | no"
+Enable or disable FreeType (a TrueType / Type 1 font rasterizer).
+This defaults to "yes".
+.TP
+.BI \-aa " yes | no"
+Enable or disable font anti-aliasing. This defaults to "yes".
+.TP
+.BI \-aaVector " yes | no"
+Enable or disable vector anti-aliasing. This defaults to "yes".
+.TP
+.BI \-opw " password"
+Specify the owner password for the PDF file. Providing this will
+bypass all security restrictions.
+.TP
+.BI \-upw " password"
+Specify the user password for the PDF file.
+.TP
+.B \-q
+Don't print any messages or errors.
+.TP
+.B \-v
+Print copyright and version information.
+.TP
+.B \-h
+Print usage information.
+.RB ( \-help
+and
+.B \-\-help
+are equivalent.)
+.SH EXIT CODES
+The Xpdf tools use the following exit codes:
+.TP
+0
+No error.
+.TP
+1
+Error opening a PDF file.
+.TP
+2
+Error opening an output file.
+.TP
+3
+Error related to PDF permissions.
+.TP
+99
+Other error.
+.SH AUTHOR
+The pdftoppm software and documentation are copyright 1996-2011 Glyph
+& Cog, LLC.
+.SH "SEE ALSO"
+.BR pdfdetach (1),
+.BR pdffonts (1),
+.BR pdfimages (1),
+.BR pdfinfo (1),
+.BR pdftocairo (1),
+.BR pdftohtml (1),
+.BR pdftops (1),
+.BR pdftotext (1)
diff --git a/utils/pdftoppm.cc b/utils/pdftoppm.cc
new file mode 100644
index 00000000..04a0dfba
--- /dev/null
+++ b/utils/pdftoppm.cc
@@ -0,0 +1,440 @@
+//========================================================================
+//
+// pdftoppm.cc
+//
+// Copyright 2003 Glyph & Cog, LLC
+//
+//========================================================================
+
+//========================================================================
+//
+// Modified under the Poppler project - http://poppler.freedesktop.org
+//
+// All changes made under the Poppler project to this file are licensed
+// under GPL version 2 or later
+//
+// Copyright (C) 2007 Ilmari Heikkinen <ilmari.heikkinen@gmail.com>
+// Copyright (C) 2008 Richard Airlie <richard.airlie@maglabs.net>
+// Copyright (C) 2009 Michael K. Johnson <a1237@danlj.org>
+// Copyright (C) 2009 Shen Liang <shenzhuxi@gmail.com>
+// Copyright (C) 2009 Stefan Thomas <thomas@eload24.com>
+// Copyright (C) 2009-2011 Albert Astals Cid <aacid@kde.org>
+// Copyright (C) 2010, 2012 Adrian Johnson <ajohnson@redneon.com>
+// Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
+// Copyright (C) 2010 Jonathan Liu <net147@gmail.com>
+// Copyright (C) 2010 William Bader <williambader@hotmail.com>
+// Copyright (C) 2011 Thomas Freitag <Thomas.Freitag@alfa.de>
+//
+// To see a description of the changes please see the Changelog file that
+// came with your tarball or type make ChangeLog if you are building from git
+//
+//========================================================================
+
+#include "config.h"
+#include <poppler-config.h>
+#ifdef _WIN32
+#include <fcntl.h> // for O_BINARY
+#include <io.h> // for setmode
+#endif
+#include <stdio.h>
+#include <math.h>
+#include "parseargs.h"
+#include "goo/gmem.h"
+#include "goo/GooString.h"
+#include "GlobalParams.h"
+#include "Object.h"
+#include "PDFDoc.h"
+#include "PDFDocFactory.h"
+#include "splash/SplashBitmap.h"
+#include "splash/Splash.h"
+#include "SplashOutputDev.h"
+
+static int firstPage = 1;
+static int lastPage = 0;
+static GBool printOnlyOdd = gFalse;
+static GBool printOnlyEven = gFalse;
+static GBool singleFile = gFalse;
+static double resolution = 0.0;
+static double x_resolution = 150.0;
+static double y_resolution = 150.0;
+static int scaleTo = 0;
+static int x_scaleTo = 0;
+static int y_scaleTo = 0;
+static int x = 0;
+static int y = 0;
+static int w = 0;
+static int h = 0;
+static int sz = 0;
+static GBool useCropBox = gFalse;
+static GBool mono = gFalse;
+static GBool gray = gFalse;
+static GBool png = gFalse;
+static GBool jpeg = gFalse;
+static GBool jpegcmyk = gFalse;
+static GBool tiff = gFalse;
+#if SPLASH_CMYK
+static GBool overprint = gFalse;
+#endif
+static char enableFreeTypeStr[16] = "";
+static char antialiasStr[16] = "";
+static char vectorAntialiasStr[16] = "";
+static char ownerPassword[33] = "";
+static char userPassword[33] = "";
+static char TiffCompressionStr[16] = "";
+static GBool quiet = gFalse;
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+
+static const ArgDesc argDesc[] = {
+ {"-f", argInt, &firstPage, 0,
+ "first page to print"},
+ {"-l", argInt, &lastPage, 0,
+ "last page to print"},
+ {"-o", argFlag, &printOnlyOdd, 0,
+ "print only odd pages"},
+ {"-e", argFlag, &printOnlyEven, 0,
+ "print only even pages"},
+ {"-singlefile", argFlag, &singleFile, 0,
+ "write only the first page and do not add digits"},
+
+ {"-r", argFP, &resolution, 0,
+ "resolution, in DPI (default is 150)"},
+ {"-rx", argFP, &x_resolution, 0,
+ "X resolution, in DPI (default is 150)"},
+ {"-ry", argFP, &y_resolution, 0,
+ "Y resolution, in DPI (default is 150)"},
+ {"-scale-to", argInt, &scaleTo, 0,
+ "scales each page to fit within scale-to*scale-to pixel box"},
+ {"-scale-to-x", argInt, &x_scaleTo, 0,
+ "scales each page horizontally to fit in scale-to-x pixels"},
+ {"-scale-to-y", argInt, &y_scaleTo, 0,
+ "scales each page vertically to fit in scale-to-y pixels"},
+
+ {"-x", argInt, &x, 0,
+ "x-coordinate of the crop area top left corner"},
+ {"-y", argInt, &y, 0,
+ "y-coordinate of the crop area top left corner"},
+ {"-W", argInt, &w, 0,
+ "width of crop area in pixels (default is 0)"},
+ {"-H", argInt, &h, 0,
+ "height of crop area in pixels (default is 0)"},
+ {"-sz", argInt, &sz, 0,
+ "size of crop square in pixels (sets W and H)"},
+ {"-cropbox",argFlag, &useCropBox, 0,
+ "use the crop box rather than media box"},
+
+ {"-mono", argFlag, &mono, 0,
+ "generate a monochrome PBM file"},
+ {"-gray", argFlag, &gray, 0,
+ "generate a grayscale PGM file"},
+#if ENABLE_LIBPNG
+ {"-png", argFlag, &png, 0,
+ "generate a PNG file"},
+#endif
+#if ENABLE_LIBJPEG
+ {"-jpeg", argFlag, &jpeg, 0,
+ "generate a JPEG file"},
+#if SPLASH_CMYK
+ {"-jpegcmyk",argFlag, &jpegcmyk, 0,
+ "generate a CMYK JPEG file"},
+#endif
+#endif
+#if SPLASH_CMYK
+ {"-overprint",argFlag, &overprint, 0,
+ "enable overprint"},
+#endif
+#if ENABLE_LIBTIFF
+ {"-tiff", argFlag, &tiff, 0,
+ "generate a TIFF file"},
+ {"-tiffcompression", argString, TiffCompressionStr, sizeof(TiffCompressionStr),
+ "set TIFF compression: none, packbits, jpeg, lzw, deflate"},
+#endif
+#if HAVE_FREETYPE_FREETYPE_H | HAVE_FREETYPE_H
+ {"-freetype", argString, enableFreeTypeStr, sizeof(enableFreeTypeStr),
+ "enable FreeType font rasterizer: yes, no"},
+#endif
+
+ {"-aa", argString, antialiasStr, sizeof(antialiasStr),
+ "enable font anti-aliasing: yes, no"},
+ {"-aaVector", argString, vectorAntialiasStr, sizeof(vectorAntialiasStr),
+ "enable vector anti-aliasing: yes, no"},
+
+ {"-opw", argString, ownerPassword, sizeof(ownerPassword),
+ "owner password (for encrypted files)"},
+ {"-upw", argString, userPassword, sizeof(userPassword),
+ "user password (for encrypted files)"},
+
+ {"-q", argFlag, &quiet, 0,
+ "don't print any messages or errors"},
+ {"-v", argFlag, &printVersion, 0,
+ "print copyright and version info"},
+ {"-h", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"--help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-?", argFlag, &printHelp, 0,
+ "print usage information"},
+ {NULL}
+};
+
+static void savePageSlice(PDFDoc *doc,
+ SplashOutputDev *splashOut,
+ int pg, int x, int y, int w, int h,
+ double pg_w, double pg_h,
+ char *ppmFile) {
+ if (w == 0) w = (int)ceil(pg_w);
+ if (h == 0) h = (int)ceil(pg_h);
+ w = (x+w > pg_w ? (int)ceil(pg_w-x) : w);
+ h = (y+h > pg_h ? (int)ceil(pg_h-y) : h);
+ doc->displayPageSlice(splashOut,
+ pg, x_resolution, y_resolution,
+ 0,
+ !useCropBox, gFalse, gFalse,
+ x, y, w, h
+ );
+
+ SplashBitmap *bitmap = splashOut->getBitmap();
+
+ if (ppmFile != NULL) {
+ if (png) {
+ bitmap->writeImgFile(splashFormatPng, ppmFile, x_resolution, y_resolution);
+ } else if (jpeg) {
+ bitmap->writeImgFile(splashFormatJpeg, ppmFile, x_resolution, y_resolution);
+ } else if (jpegcmyk) {
+ bitmap->writeImgFile(splashFormatJpegCMYK, ppmFile, x_resolution, y_resolution);
+ } else if (tiff) {
+ bitmap->writeImgFile(splashFormatTiff, ppmFile, x_resolution, y_resolution, TiffCompressionStr);
+ } else {
+ bitmap->writePNMFile(ppmFile);
+ }
+ } else {
+#ifdef _WIN32
+ setmode(fileno(stdout), O_BINARY);
+#endif
+
+ if (png) {
+ bitmap->writeImgFile(splashFormatPng, stdout, x_resolution, y_resolution);
+ } else if (jpeg) {
+ bitmap->writeImgFile(splashFormatJpeg, stdout, x_resolution, y_resolution);
+ } else if (tiff) {
+ bitmap->writeImgFile(splashFormatTiff, stdout, x_resolution, y_resolution, TiffCompressionStr);
+ } else {
+ bitmap->writePNMFile(stdout);
+ }
+ }
+}
+
+static int numberOfCharacters(unsigned int n)
+{
+ int charNum = 0;
+ while (n >= 10)
+ {
+ n = n / 10;
+ charNum++;
+ }
+ charNum++;
+ return charNum;
+}
+
+int main(int argc, char *argv[]) {
+ PDFDoc *doc;
+ GooString *fileName = NULL;
+ char *ppmRoot = NULL;
+ char *ppmFile;
+ GooString *ownerPW, *userPW;
+ SplashColor paperColor;
+ SplashOutputDev *splashOut;
+ GBool ok;
+ int exitCode;
+ int pg, pg_num_len;
+ double pg_w, pg_h, tmp;
+
+ exitCode = 99;
+
+ // parse args
+ ok = parseArgs(argDesc, &argc, argv);
+ if (mono && gray) {
+ ok = gFalse;
+ }
+ if ( resolution != 0.0 &&
+ (x_resolution == 150.0 ||
+ y_resolution == 150.0)) {
+ x_resolution = resolution;
+ y_resolution = resolution;
+ }
+ if (!ok || argc > 3 || printVersion || printHelp) {
+ fprintf(stderr, "pdftoppm version %s\n", PACKAGE_VERSION);
+ fprintf(stderr, "%s\n", popplerCopyright);
+ fprintf(stderr, "%s\n", xpdfCopyright);
+ if (!printVersion) {
+ printUsage("pdftoppm", "[PDF-file [PPM-file-prefix]]", argDesc);
+ }
+ if (printVersion || printHelp)
+ exitCode = 0;
+ goto err0;
+ }
+ if (argc > 1) fileName = new GooString(argv[1]);
+ if (argc == 3) ppmRoot = argv[2];
+
+ // read config file
+ globalParams = new GlobalParams();
+ if (enableFreeTypeStr[0]) {
+ if (!globalParams->setEnableFreeType(enableFreeTypeStr)) {
+ fprintf(stderr, "Bad '-freetype' value on command line\n");
+ }
+ }
+ if (antialiasStr[0]) {
+ if (!globalParams->setAntialias(antialiasStr)) {
+ fprintf(stderr, "Bad '-aa' value on command line\n");
+ }
+ }
+ if (vectorAntialiasStr[0]) {
+ if (!globalParams->setVectorAntialias(vectorAntialiasStr)) {
+ fprintf(stderr, "Bad '-aaVector' value on command line\n");
+ }
+ }
+ if (quiet) {
+ globalParams->setErrQuiet(quiet);
+ }
+
+ // open PDF file
+ if (ownerPassword[0]) {
+ ownerPW = new GooString(ownerPassword);
+ } else {
+ ownerPW = NULL;
+ }
+ if (userPassword[0]) {
+ userPW = new GooString(userPassword);
+ } else {
+ userPW = NULL;
+ }
+
+ if (fileName == NULL) {
+ fileName = new GooString("fd://0");
+ }
+ if (fileName->cmp("-") == 0) {
+ delete fileName;
+ fileName = new GooString("fd://0");
+ }
+ doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW);
+ delete fileName;
+
+ if (userPW) {
+ delete userPW;
+ }
+ if (ownerPW) {
+ delete ownerPW;
+ }
+ if (!doc->isOk()) {
+ exitCode = 1;
+ goto err1;
+ }
+
+ // get page range
+ if (firstPage < 1)
+ firstPage = 1;
+ if (singleFile && lastPage < 1)
+ lastPage = firstPage;
+ if (lastPage < 1 || lastPage > doc->getNumPages())
+ lastPage = doc->getNumPages();
+
+ if (singleFile && firstPage < lastPage) {
+ if (!quiet) {
+ fprintf(stderr,
+ "Warning: Single file will write only the first of the %d pages.\n",
+ lastPage + 1 - firstPage);
+ }
+ lastPage = firstPage;
+ }
+
+ // write PPM files
+#if SPLASH_CMYK
+ if (jpegcmyk || overprint) {
+ globalParams->setOverprintPreview(gTrue);
+ paperColor[0] = 0;
+ paperColor[1] = 0;
+ paperColor[2] = 0;
+ paperColor[3] = 0;
+ } else
+#endif
+ {
+ paperColor[0] = 255;
+ paperColor[1] = 255;
+ paperColor[2] = 255;
+ }
+ splashOut = new SplashOutputDev(mono ? splashModeMono1 :
+ gray ? splashModeMono8 :
+#if SPLASH_CMYK
+ (jpegcmyk || overprint) ? splashModeCMYK8 :
+#endif
+ splashModeRGB8, 4,
+ gFalse, paperColor);
+ splashOut->startDoc(doc);
+ if (sz != 0) w = h = sz;
+ pg_num_len = numberOfCharacters(doc->getNumPages());
+ for (pg = firstPage; pg <= lastPage; ++pg) {
+ if (printOnlyEven && pg % 2 == 0) continue;
+ if (printOnlyOdd && pg % 2 == 1) continue;
+ if (useCropBox) {
+ pg_w = doc->getPageCropWidth(pg);
+ pg_h = doc->getPageCropHeight(pg);
+ } else {
+ pg_w = doc->getPageMediaWidth(pg);
+ pg_h = doc->getPageMediaHeight(pg);
+ }
+
+ if (scaleTo != 0) {
+ resolution = (72.0 * scaleTo) / (pg_w > pg_h ? pg_w : pg_h);
+ x_resolution = y_resolution = resolution;
+ } else {
+ if (x_scaleTo > 0) {
+ x_resolution = (72.0 * x_scaleTo) / pg_w;
+ if (y_scaleTo == -1)
+ y_resolution = x_resolution;
+ }
+ if (y_scaleTo > 0) {
+ y_resolution = (72.0 * y_scaleTo) / pg_h;
+ if (x_scaleTo == -1)
+ x_resolution = y_resolution;
+ }
+ }
+ pg_w = pg_w * (x_resolution / 72.0);
+ pg_h = pg_h * (y_resolution / 72.0);
+ if ((doc->getPageRotate(pg) == 90) || (doc->getPageRotate(pg) == 270)) {
+ tmp = pg_w;
+ pg_w = pg_h;
+ pg_h = tmp;
+ }
+ if (ppmRoot != NULL) {
+ const char *ext = png ? "png" : (jpeg || jpegcmyk) ? "jpg" : tiff ? "tif" : mono ? "pbm" : gray ? "pgm" : "ppm";
+ if (singleFile) {
+ ppmFile = new char[strlen(ppmRoot) + 1 + strlen(ext) + 1];
+ sprintf(ppmFile, "%s.%s", ppmRoot, ext);
+ } else {
+ ppmFile = new char[strlen(ppmRoot) + 1 + pg_num_len + 1 + strlen(ext) + 1];
+ sprintf(ppmFile, "%s-%0*d.%s", ppmRoot, pg_num_len, pg, ext);
+ }
+ savePageSlice(doc, splashOut, pg, x, y, w, h, pg_w, pg_h, ppmFile);
+ delete[] ppmFile;
+ } else {
+ savePageSlice(doc, splashOut, pg, x, y, w, h, pg_w, pg_h, NULL);
+ }
+ }
+ delete splashOut;
+
+ exitCode = 0;
+
+ // clean up
+ err1:
+ delete doc;
+ delete globalParams;
+ err0:
+
+ // check for memory leaks
+ Object::memCheck(stderr);
+ gMemReport(stderr);
+
+ return exitCode;
+}
diff --git a/utils/pdftops.1 b/utils/pdftops.1
new file mode 100644
index 00000000..c7aeaf52
--- /dev/null
+++ b/utils/pdftops.1
@@ -0,0 +1,227 @@
+.\" Copyright 1996-2011 Glyph & Cog, LLC
+.TH pdftops 1 "15 August 2011"
+.SH NAME
+pdftops \- Portable Document Format (PDF) to PostScript converter
+(version 3.03)
+.SH SYNOPSIS
+.B pdftops
+[options]
+.RI <PDF-file>
+.RI [<PS-file>]
+.SH DESCRIPTION
+.B Pdftops
+converts Portable Document Format (PDF) files to PostScript so they
+can be printed.
+.PP
+Pdftops reads the PDF file,
+.IR PDF-file ,
+and writes a PostScript file,
+.IR PS-file .
+If
+.I PS-file
+is not specified, pdftops converts
+.I file.pdf
+to
+.I file.ps
+(or
+.I file.eps
+with the \-eps option). If
+.I PS-file
+is \'-', the PostScript is sent to stdout.
+.SH OPTIONS
+.TP
+.BI \-f " number"
+Specifies the first page to print.
+.TP
+.BI \-l " number"
+Specifies the last page to print.
+.TP
+.B \-level1
+Generate Level 1 PostScript. The resulting PostScript files will be
+significantly larger (if they contain images), but will print on Level
+1 printers. This also converts all images to black and white. No
+more than one of the PostScript level options (\-level1, \-level1sep,
+\-level2, \-level2sep, \-level3, \-level3Sep) may be given.
+.TP
+.B \-level1sep
+Generate Level 1 separable PostScript. All colors are converted to
+CMYK. Images are written with separate stream data for the four
+components.
+.TP
+.B \-level2
+Generate Level 2 PostScript. Level 2 supports color images and image
+compression. This is the default setting.
+.TP
+.B \-level2sep
+Generate Level 2 separable PostScript. All colors are converted to
+CMYK. The PostScript separation convention operators are used to
+handle custom (spot) colors.
+.TP
+.B \-level3
+Generate Level 3 PostScript. This enables all Level 2 features plus
+CID font embedding.
+.TP
+.B \-level3Sep
+Generate Level 3 separable PostScript. The separation handling is the
+same as for \-level2Sep.
+.TP
+.B \-origpagesizes
+Generate a PostScript file with variable page sizes and orientations,
+taking for each page the size of the original page in the PDF file.
+The PostScript file contains "<</PageSize [WIDTH HEIGHT]>> setpagedevice"
+lines in each page header, so that the paper input tray gets correctly
+changed on the printer. This option should be used when pdftops is used
+as a print filter. Any specification of the page size via \-paper,
+\-paperw, or \-paperh will get overridden as long as each page of the
+PDF file has a defined paper size. No more than one of the mode options
+(\-origpagesizes, \-eps, \-form) may be given.
+.TP
+.B \-eps
+Generate an Encapsulated PostScript (EPS) file. An EPS file contains
+a single image, so if you use this option with a multi-page PDF file,
+you must use \-f and \-l to specify a single page. No more than one of
+the mode options (\-origpagesizes, \-eps, \-form) may be given.
+.TP
+.B \-form
+Generate a PostScript form which can be imported by software that
+understands forms. A form contains a single page, so if you use this
+option with a multi-page PDF file, you must use \-f and \-l to specify a
+single page. The \-level1 option cannot be used with \-form. No more
+than one of the mode options (\-origpagesizes, \-eps, \-form) may be
+given.
+.TP
+.B \-opi
+Generate OPI comments for all images and forms which have OPI
+information. (This option is only available if pdftops was compiled
+with OPI support.)
+.TP
+.B \-binary
+Write binary data in Level 1 PostScript. By default, pdftops writes
+hex-encoded data in Level 1 PostScript. Binary data is non-standard
+in Level 1 PostScript but reduces the file size and can be useful
+when Level 1 PostScript is required only for its restricted use
+of PostScript operators.
+.TP
+.BI \-r " number"
+Set the resolution in DPI when pdftops rasterizes images with
+transparencies or, for Level 1 PostScript, when pdftops
+rasterizes images with color masks.
+By default, pdftops rasterizes images to 300 DPI.
+.TP
+.B \-noembt1
+By default, any Type 1 fonts which are embedded in the PDF file are
+copied into the PostScript file. This option causes pdftops to
+substitute base fonts instead. Embedded fonts make PostScript files
+larger, but may be necessary for readable output.
+.TP
+.B \-noembtt
+By default, any TrueType fonts which are embedded in the PDF file are
+copied into the PostScript file. This option causes pdftops to
+substitute base fonts instead. Embedded fonts make PostScript files
+larger, but may be necessary for readable output. Also, some
+PostScript interpreters do not have TrueType rasterizers.
+.TP
+.B \-noembcidps
+By default, any CID PostScript fonts which are embedded in the PDF
+file are copied into the PostScript file. This option disables that
+embedding. No attempt is made to substitute for non-embedded CID
+PostScript fonts.
+.TP
+.B \-noembcidtt
+By default, any CID TrueType fonts which are embedded in the PDF file
+are copied into the PostScript file. This option disables that
+embedding. No attempt is made to substitute for non-embedded CID
+TrueType fonts.
+.TP
+.B \-passfonts
+By default, references to non-embedded 8-bit fonts in the PDF file are
+substituted with the closest "Helvetica", "Times-Roman", or "Courier" font.
+This option passes references to non-embedded fonts
+through to the PostScript file.
+.TP
+.B \-preload
+preload images and forms
+.TP
+.BI \-paper " size"
+Set the paper size to one of "letter", "legal", "A4", or "A3". This
+can also be set to "match", which will set the paper size to match the
+size specified in the PDF file. \-origpagesizes overrides this setting
+if the PDF file has defined page sizes.
+.TP
+.BI \-paperw " size"
+Set the paper width, in points. \-origpagesizes overrides this setting
+if the PDF file has defined page sizes.
+.TP
+.BI \-paperh " size"
+Set the paper height, in points. \-origpagesizes overrides this setting
+if the PDF file has defined page sizes.
+.TP
+.B \-nocrop
+By default, output is cropped to the CropBox specified in the PDF
+file. This option disables cropping.
+.TP
+.B \-expand
+Expand PDF pages smaller than the paper to fill the paper. By
+default, these pages are not scaled.
+.TP
+.B \-noshrink
+Don't scale PDF pages which are larger than the paper. By default,
+pages larger than the paper are shrunk to fit.
+.TP
+.B \-nocenter
+By default, PDF pages smaller than the paper (after any scaling) are
+centered on the paper. This option causes them to be aligned to the
+lower-left corner of the paper instead.
+.TP
+.B \-duplex
+Set the Duplex pagedevice entry in the PostScript file. This tells
+duplex-capable printers to enable duplexing.
+.TP
+.BI \-opw " password"
+Specify the owner password for the PDF file. Providing this will
+bypass all security restrictions.
+.TP
+.BI \-upw " password"
+Specify the user password for the PDF file.
+.TP
+.B \-q
+Don't print any messages or errors.
+.TP
+.B \-v
+Print copyright and version information.
+.TP
+.B \-h
+Print usage information.
+.RB ( \-help
+and
+.B \-\-help
+are equivalent.)
+.SH EXIT CODES
+The Xpdf tools use the following exit codes:
+.TP
+0
+No error.
+.TP
+1
+Error opening a PDF file.
+.TP
+2
+Error opening an output file.
+.TP
+3
+Error related to PDF permissions.
+.TP
+99
+Other error.
+.SH AUTHOR
+The pdftops software and documentation are copyright 1996-2011 Glyph &
+Cog, LLC.
+.SH "SEE ALSO"
+.BR pdfdetach (1),
+.BR pdffonts (1),
+.BR pdfimages (1),
+.BR pdfinfo (1),
+.BR pdftocairo (1),
+.BR pdftohtml (1),
+.BR pdftoppm (1),
+.BR pdftotext (1)
diff --git a/utils/pdftops.cc b/utils/pdftops.cc
new file mode 100644
index 00000000..7f5a0ce3
--- /dev/null
+++ b/utils/pdftops.cc
@@ -0,0 +1,422 @@
+//========================================================================
+//
+// pdftops.cc
+//
+// Copyright 1996-2003 Glyph & Cog, LLC
+//
+// Modified for Debian by Hamish Moffatt, 22 May 2002.
+//
+//========================================================================
+
+//========================================================================
+//
+// Modified under the Poppler project - http://poppler.freedesktop.org
+//
+// All changes made under the Poppler project to this file are licensed
+// under GPL version 2 or later
+//
+// Copyright (C) 2006 Kristian Høgsberg <krh@redhat.com>
+// Copyright (C) 2007-2008, 2010 Albert Astals Cid <aacid@kde.org>
+// Copyright (C) 2009 Till Kamppeter <till.kamppeter@gmail.com>
+// Copyright (C) 2009 Sanjoy Mahajan <sanjoy@mit.edu>
+// Copyright (C) 2009, 2011, 2012 William Bader <williambader@hotmail.com>
+// Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
+// Copyright (C) 2012 Thomas Freitag <Thomas.Freitag@alfa.de>
+//
+// To see a description of the changes please see the Changelog file that
+// came with your tarball or type make ChangeLog if you are building from git
+//
+//========================================================================
+
+#include "config.h"
+#include <poppler-config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include "parseargs.h"
+#include "goo/GooString.h"
+#include "goo/gmem.h"
+#include "GlobalParams.h"
+#include "Object.h"
+#include "Stream.h"
+#include "Array.h"
+#include "Dict.h"
+#include "XRef.h"
+#include "Catalog.h"
+#include "Page.h"
+#include "PDFDoc.h"
+#include "PDFDocFactory.h"
+#include "PSOutputDev.h"
+#include "Error.h"
+
+static GBool setPSPaperSize(char *size, int &psPaperWidth, int &psPaperHeight) {
+ if (!strcmp(size, "match")) {
+ psPaperWidth = psPaperHeight = -1;
+ } else if (!strcmp(size, "letter")) {
+ psPaperWidth = 612;
+ psPaperHeight = 792;
+ } else if (!strcmp(size, "legal")) {
+ psPaperWidth = 612;
+ psPaperHeight = 1008;
+ } else if (!strcmp(size, "A4")) {
+ psPaperWidth = 595;
+ psPaperHeight = 842;
+ } else if (!strcmp(size, "A3")) {
+ psPaperWidth = 842;
+ psPaperHeight = 1190;
+ } else {
+ return gFalse;
+ }
+ return gTrue;
+}
+
+
+static int firstPage = 1;
+static int lastPage = 0;
+static GBool level1 = gFalse;
+static GBool level1Sep = gFalse;
+static GBool level2 = gFalse;
+static GBool level2Sep = gFalse;
+static GBool level3 = gFalse;
+static GBool level3Sep = gFalse;
+static GBool doOrigPageSizes = gFalse;
+static GBool doEPS = gFalse;
+static GBool doForm = gFalse;
+#if OPI_SUPPORT
+static GBool doOPI = gFalse;
+#endif
+static int splashResolution = 0;
+static GBool psBinary = gFalse;
+static GBool noEmbedT1Fonts = gFalse;
+static GBool noEmbedTTFonts = gFalse;
+static GBool noEmbedCIDPSFonts = gFalse;
+static GBool noEmbedCIDTTFonts = gFalse;
+static GBool fontPassthrough = gFalse;
+static GBool preload = gFalse;
+static char paperSize[15] = "";
+static int paperWidth = -1;
+static int paperHeight = -1;
+static GBool noCrop = gFalse;
+static GBool expand = gFalse;
+static GBool noShrink = gFalse;
+static GBool noCenter = gFalse;
+static GBool duplex = gFalse;
+static char ownerPassword[33] = "\001";
+static char userPassword[33] = "\001";
+static GBool quiet = gFalse;
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+#if SPLASH_CMYK
+static GBool overprint = gFalse;
+#endif
+
+static const ArgDesc argDesc[] = {
+ {"-f", argInt, &firstPage, 0,
+ "first page to print"},
+ {"-l", argInt, &lastPage, 0,
+ "last page to print"},
+ {"-level1", argFlag, &level1, 0,
+ "generate Level 1 PostScript"},
+ {"-level1sep", argFlag, &level1Sep, 0,
+ "generate Level 1 separable PostScript"},
+ {"-level2", argFlag, &level2, 0,
+ "generate Level 2 PostScript"},
+ {"-level2sep", argFlag, &level2Sep, 0,
+ "generate Level 2 separable PostScript"},
+ {"-level3", argFlag, &level3, 0,
+ "generate Level 3 PostScript"},
+ {"-level3sep", argFlag, &level3Sep, 0,
+ "generate Level 3 separable PostScript"},
+ {"-origpagesizes",argFlag, &doOrigPageSizes,0,
+ "conserve original page sizes"},
+ {"-eps", argFlag, &doEPS, 0,
+ "generate Encapsulated PostScript (EPS)"},
+ {"-form", argFlag, &doForm, 0,
+ "generate a PostScript form"},
+#if OPI_SUPPORT
+ {"-opi", argFlag, &doOPI, 0,
+ "generate OPI comments"},
+#endif
+ {"-r", argInt, &splashResolution, 0,
+ "resolution for rasterization, in DPI (default is 300)"},
+ {"-binary", argFlag, &psBinary, 0,
+ "write binary data in Level 1 PostScript"},
+ {"-noembt1", argFlag, &noEmbedT1Fonts, 0,
+ "don't embed Type 1 fonts"},
+ {"-noembtt", argFlag, &noEmbedTTFonts, 0,
+ "don't embed TrueType fonts"},
+ {"-noembcidps", argFlag, &noEmbedCIDPSFonts, 0,
+ "don't embed CID PostScript fonts"},
+ {"-noembcidtt", argFlag, &noEmbedCIDTTFonts, 0,
+ "don't embed CID TrueType fonts"},
+ {"-passfonts", argFlag, &fontPassthrough,0,
+ "don't substitute missing fonts"},
+ {"-preload", argFlag, &preload, 0,
+ "preload images and forms"},
+ {"-paper", argString, paperSize, sizeof(paperSize),
+ "paper size (letter, legal, A4, A3, match)"},
+ {"-paperw", argInt, &paperWidth, 0,
+ "paper width, in points"},
+ {"-paperh", argInt, &paperHeight, 0,
+ "paper height, in points"},
+ {"-nocrop", argFlag, &noCrop, 0,
+ "don't crop pages to CropBox"},
+ {"-expand", argFlag, &expand, 0,
+ "expand pages smaller than the paper size"},
+ {"-noshrink", argFlag, &noShrink, 0,
+ "don't shrink pages larger than the paper size"},
+ {"-nocenter", argFlag, &noCenter, 0,
+ "don't center pages smaller than the paper size"},
+ {"-duplex", argFlag, &duplex, 0,
+ "enable duplex printing"},
+ {"-opw", argString, ownerPassword, sizeof(ownerPassword),
+ "owner password (for encrypted files)"},
+ {"-upw", argString, userPassword, sizeof(userPassword),
+ "user password (for encrypted files)"},
+#if SPLASH_CMYK
+ {"-overprint",argFlag, &overprint, 0,
+ "enable overprint"},
+#endif
+ {"-q", argFlag, &quiet, 0,
+ "don't print any messages or errors"},
+ {"-v", argFlag, &printVersion, 0,
+ "print copyright and version info"},
+ {"-h", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"--help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-?", argFlag, &printHelp, 0,
+ "print usage information"},
+ {NULL}
+};
+
+int main(int argc, char *argv[]) {
+ PDFDoc *doc;
+ GooString *fileName;
+ GooString *psFileName;
+ PSLevel level;
+ PSOutMode mode;
+ GooString *ownerPW, *userPW;
+ PSOutputDev *psOut;
+ GBool ok;
+ char *p;
+ int exitCode;
+
+ exitCode = 99;
+
+ // parse args
+ ok = parseArgs(argDesc, &argc, argv);
+ if (!ok || argc < 2 || argc > 3 || printVersion || printHelp) {
+ fprintf(stderr, "pdftops version %s\n", PACKAGE_VERSION);
+ fprintf(stderr, "%s\n", popplerCopyright);
+ fprintf(stderr, "%s\n", xpdfCopyright);
+ if (!printVersion) {
+ printUsage("pdftops", "<PDF-file> [<PS-file>]", argDesc);
+ }
+ if (printVersion || printHelp)
+ exit(0);
+ else
+ exit(1);
+ }
+ if ((level1 ? 1 : 0) +
+ (level1Sep ? 1 : 0) +
+ (level2 ? 1 : 0) +
+ (level2Sep ? 1 : 0) +
+ (level3 ? 1 : 0) +
+ (level3Sep ? 1 : 0) > 1) {
+ fprintf(stderr, "Error: use only one of the 'level' options.\n");
+ exit(1);
+ }
+ if ((doOrigPageSizes ? 1 : 0) +
+ (doEPS ? 1 : 0) +
+ (doForm ? 1 : 0) > 1) {
+ fprintf(stderr, "Error: use only one of -origpagesizes, -eps, and -form\n");
+ exit(1);
+ }
+ if (level1) {
+ level = psLevel1;
+ } else if (level1Sep) {
+ level = psLevel1Sep;
+ } else if (level2Sep) {
+ level = psLevel2Sep;
+ } else if (level3) {
+ level = psLevel3;
+ } else if (level3Sep) {
+ level = psLevel3Sep;
+ } else {
+ level = psLevel2;
+ }
+ if (doForm && level < psLevel2) {
+ fprintf(stderr, "Error: forms are only available with Level 2 output.\n");
+ exit(1);
+ }
+ mode = doOrigPageSizes ? psModePSOrigPageSizes
+ : doEPS ? psModeEPS
+ : doForm ? psModeForm
+ : psModePS;
+ fileName = new GooString(argv[1]);
+
+ // read config file
+ globalParams = new GlobalParams();
+ if (paperSize[0]) {
+ if (!setPSPaperSize(paperSize, paperWidth, paperHeight)) {
+ fprintf(stderr, "Invalid paper size\n");
+ delete fileName;
+ goto err0;
+ }
+ }
+#if SPLASH_CMYK
+ if (overprint) {
+ globalParams->setOverprintPreview(gTrue);
+ }
+#endif
+ if (expand) {
+ globalParams->setPSExpandSmaller(gTrue);
+ }
+ if (noShrink) {
+ globalParams->setPSShrinkLarger(gFalse);
+ }
+ if (noCenter) {
+ globalParams->setPSCenter(gFalse);
+ }
+ if (level1 || level1Sep || level2 || level2Sep || level3 || level3Sep) {
+ globalParams->setPSLevel(level);
+ }
+ if (splashResolution > 0) {
+ globalParams->setPSRasterResolution(splashResolution);
+ }
+ if (noEmbedT1Fonts) {
+ globalParams->setPSEmbedType1(!noEmbedT1Fonts);
+ }
+ if (noEmbedTTFonts) {
+ globalParams->setPSEmbedTrueType(!noEmbedTTFonts);
+ }
+ if (noEmbedCIDPSFonts) {
+ globalParams->setPSEmbedCIDPostScript(!noEmbedCIDPSFonts);
+ }
+ if (noEmbedCIDTTFonts) {
+ globalParams->setPSEmbedCIDTrueType(!noEmbedCIDTTFonts);
+ }
+ if (fontPassthrough) {
+ globalParams->setPSFontPassthrough(fontPassthrough);
+ }
+ if (preload) {
+ globalParams->setPSPreload(preload);
+ }
+#if OPI_SUPPORT
+ if (doOPI) {
+ globalParams->setPSOPI(doOPI);
+ }
+#endif
+ if (psBinary) {
+ globalParams->setPSBinary(psBinary);
+ }
+ if (quiet) {
+ globalParams->setErrQuiet(quiet);
+ }
+
+ // open PDF file
+ if (ownerPassword[0] != '\001') {
+ ownerPW = new GooString(ownerPassword);
+ } else {
+ ownerPW = NULL;
+ }
+ if (userPassword[0] != '\001') {
+ userPW = new GooString(userPassword);
+ } else {
+ userPW = NULL;
+ }
+ if (fileName->cmp("-") == 0) {
+ delete fileName;
+ fileName = new GooString("fd://0");
+ }
+
+ doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW);
+
+ if (userPW) {
+ delete userPW;
+ }
+ if (ownerPW) {
+ delete ownerPW;
+ }
+ if (!doc->isOk()) {
+ exitCode = 1;
+ goto err1;
+ }
+
+#ifdef ENFORCE_PERMISSIONS
+ // check for print permission
+ if (!doc->okToPrint()) {
+ error(errNotAllowed, -1, "Printing this document is not allowed.");
+ exitCode = 3;
+ goto err1;
+ }
+#endif
+
+ // construct PostScript file name
+ if (argc == 3) {
+ psFileName = new GooString(argv[2]);
+ } else if (fileName->cmp("fd://0") == 0) {
+ error(errCommandLine, -1, "You have to provide an output filename when reading form stdin.");
+ goto err1;
+ } else {
+ p = fileName->getCString() + fileName->getLength() - 4;
+ if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")) {
+ psFileName = new GooString(fileName->getCString(),
+ fileName->getLength() - 4);
+ } else {
+ psFileName = fileName->copy();
+ }
+ psFileName->append(doEPS ? ".eps" : ".ps");
+ }
+
+ // get page range
+ if (firstPage < 1) {
+ firstPage = 1;
+ }
+ if (lastPage < 1 || lastPage > doc->getNumPages()) {
+ lastPage = doc->getNumPages();
+ }
+
+ // check for multi-page EPS or form
+ if ((doEPS || doForm) && firstPage != lastPage) {
+ error(errCommandLine, -1, "EPS and form files can only contain one page.");
+ goto err2;
+ }
+
+ // write PostScript file
+ psOut = new PSOutputDev(psFileName->getCString(), doc,
+ NULL, firstPage, lastPage, mode,
+ paperWidth,
+ paperHeight,
+ duplex);
+ if (psOut->isOk()) {
+ doc->displayPages(psOut, firstPage, lastPage, 72, 72,
+ 0, noCrop, !noCrop, gTrue);
+ } else {
+ delete psOut;
+ exitCode = 2;
+ goto err2;
+ }
+ delete psOut;
+
+ exitCode = 0;
+
+ // clean up
+ err2:
+ delete psFileName;
+ err1:
+ delete doc;
+ delete fileName;
+ err0:
+ delete globalParams;
+
+ // check for memory leaks
+ Object::memCheck(stderr);
+ gMemReport(stderr);
+
+ return exitCode;
+}
diff --git a/utils/pdftotext.1 b/utils/pdftotext.1
new file mode 100644
index 00000000..0199b03c
--- /dev/null
+++ b/utils/pdftotext.1
@@ -0,0 +1,137 @@
+.\" Copyright 1997-2011 Glyph & Cog, LLC
+.TH pdftotext 1 "15 August 2011"
+.SH NAME
+pdftotext \- Portable Document Format (PDF) to text converter
+(version 3.03)
+.SH SYNOPSIS
+.B pdftotext
+[options]
+.RI [ PDF-file
+.RI [ text-file ]]
+.SH DESCRIPTION
+.B Pdftotext
+converts Portable Document Format (PDF) files to plain text.
+.PP
+Pdftotext reads the PDF file,
+.IR PDF-file ,
+and writes a text file,
+.IR text-file .
+If
+.I text-file
+is not specified, pdftotext converts
+.I file.pdf
+to
+.IR file.txt .
+If
+.I text-file
+is \'-', the text is sent to stdout.
+.SH OPTIONS
+.TP
+.BI \-f " number"
+Specifies the first page to convert.
+.TP
+.BI \-l " number"
+Specifies the last page to convert.
+.TP
+.BI \-r " number"
+Specifies the resolution, in DPI. The default is 72 DPI.
+.TP
+.BI \-x " number"
+Specifies the x-coordinate of the crop area top left corner
+.TP
+.BI \-y " number"
+Specifies the y-coordinate of the crop area top left corner
+.TP
+.BI \-W " number"
+Specifies the width of crop area in pixels (default is 0)
+.TP
+.BI \-H " number"
+Specifies the height of crop area in pixels (default is 0)
+.TP
+.B \-layout
+Maintain (as best as possible) the original physical layout of the
+text. The default is to \'undo' physical layout (columns,
+hyphenation, etc.) and output the text in reading order.
+.TP
+.BI \-fixed " number"
+Assume fixed-pitch (or tabular) text, with the specified character
+width (in points). This forces physical layout mode.
+.TP
+.B \-raw
+Keep the text in content stream order. This is a hack which often
+"undoes" column formatting, etc. Use of raw mode is no longer
+recommended.
+.TP
+.B \-htmlmeta
+Generate a simple HTML file, including the meta information. This
+simply wraps the text in <pre> and </pre> and prepends the meta
+headers.
+.TP
+.B \-bbox
+Generate an XHTML file containing bounding box information for each
+word in the file.
+.TP
+.BI \-enc " encoding-name"
+Sets the encoding to use for text output. This defaults to "UTF-8".
+.TP
+.B \-listenc
+Lits the available encodings
+.TP
+.BI \-eol " unix | dos | mac"
+Sets the end-of-line convention to use for text output.
+.TP
+.B \-nopgbrk
+Don't insert page breaks (form feed characters) between pages.
+.TP
+.BI \-opw " password"
+Specify the owner password for the PDF file. Providing this will
+bypass all security restrictions.
+.TP
+.BI \-upw " password"
+Specify the user password for the PDF file.
+.TP
+.B \-q
+Don't print any messages or errors.
+.TP
+.B \-v
+Print copyright and version information.
+.TP
+.B \-h
+Print usage information.
+.RB ( \-help
+and
+.B \-\-help
+are equivalent.)
+.SH BUGS
+Some PDF files contain fonts whose encodings have been mangled beyond
+recognition. There is no way (short of OCR) to extract text from
+these files.
+.SH EXIT CODES
+The Xpdf tools use the following exit codes:
+.TP
+0
+No error.
+.TP
+1
+Error opening a PDF file.
+.TP
+2
+Error opening an output file.
+.TP
+3
+Error related to PDF permissions.
+.TP
+99
+Other error.
+.SH AUTHOR
+The pdftotext software and documentation are copyright 1996-2011 Glyph
+& Cog, LLC.
+.SH "SEE ALSO"
+.BR pdfdetach (1),
+.BR pdffonts (1),
+.BR pdfimages (1),
+.BR pdfinfo (1),
+.BR pdftocairo (1),
+.BR pdftohtml (1),
+.BR pdftoppm (1),
+.BR pdftops (1)
diff --git a/utils/pdftotext.cc b/utils/pdftotext.cc
new file mode 100644
index 00000000..a170f1b7
--- /dev/null
+++ b/utils/pdftotext.cc
@@ -0,0 +1,486 @@
+//========================================================================
+//
+// pdftotext.cc
+//
+// Copyright 1997-2003 Glyph & Cog, LLC
+//
+// Modified for Debian by Hamish Moffatt, 22 May 2002.
+//
+//========================================================================
+
+//========================================================================
+//
+// Modified under the Poppler project - http://poppler.freedesktop.org
+//
+// All changes made under the Poppler project to this file are licensed
+// under GPL version 2 or later
+//
+// Copyright (C) 2006 Dominic Lachowicz <cinamod@hotmail.com>
+// Copyright (C) 2007-2008, 2010, 2011 Albert Astals Cid <aacid@kde.org>
+// Copyright (C) 2009 Jan Jockusch <jan@jockusch.de>
+// Copyright (C) 2010 Hib Eris <hib@hiberis.nl>
+// Copyright (C) 2010 Kenneth Berland <ken@hero.com>
+// Copyright (C) 2011 Tom Gleason <tom@buildadam.com>
+// Copyright (C) 2011 Steven Murdoch <Steven.Murdoch@cl.cam.ac.uk>
+//
+// To see a description of the changes please see the Changelog file that
+// came with your tarball or type make ChangeLog if you are building from git
+//
+//========================================================================
+
+#include "config.h"
+#include <poppler-config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include "parseargs.h"
+#include "printencodings.h"
+#include "goo/GooString.h"
+#include "goo/gmem.h"
+#include "GlobalParams.h"
+#include "Object.h"
+#include "Stream.h"
+#include "Array.h"
+#include "Dict.h"
+#include "XRef.h"
+#include "Catalog.h"
+#include "Page.h"
+#include "PDFDoc.h"
+#include "PDFDocFactory.h"
+#include "TextOutputDev.h"
+#include "CharTypes.h"
+#include "UnicodeMap.h"
+#include "PDFDocEncoding.h"
+#include "Error.h"
+#include <string>
+
+static void printInfoString(FILE *f, Dict *infoDict, const char *key,
+ const char *text1, const char *text2, UnicodeMap *uMap);
+static void printInfoDate(FILE *f, Dict *infoDict, const char *key, const char *fmt);
+
+static int firstPage = 1;
+static int lastPage = 0;
+static double resolution = 72.0;
+static int x = 0;
+static int y = 0;
+static int w = 0;
+static int h = 0;
+static GBool bbox = gFalse;
+static GBool physLayout = gFalse;
+static double fixedPitch = 0;
+static GBool rawOrder = gFalse;
+static GBool htmlMeta = gFalse;
+static char textEncName[128] = "";
+static char textEOL[16] = "";
+static GBool noPageBreaks = gFalse;
+static char ownerPassword[33] = "\001";
+static char userPassword[33] = "\001";
+static GBool quiet = gFalse;
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+static GBool printEnc = gFalse;
+
+static const ArgDesc argDesc[] = {
+ {"-f", argInt, &firstPage, 0,
+ "first page to convert"},
+ {"-l", argInt, &lastPage, 0,
+ "last page to convert"},
+ {"-r", argFP, &resolution, 0,
+ "resolution, in DPI (default is 72)"},
+ {"-x", argInt, &x, 0,
+ "x-coordinate of the crop area top left corner"},
+ {"-y", argInt, &y, 0,
+ "y-coordinate of the crop area top left corner"},
+ {"-W", argInt, &w, 0,
+ "width of crop area in pixels (default is 0)"},
+ {"-H", argInt, &h, 0,
+ "height of crop area in pixels (default is 0)"},
+ {"-layout", argFlag, &physLayout, 0,
+ "maintain original physical layout"},
+ {"-fixed", argFP, &fixedPitch, 0,
+ "assume fixed-pitch (or tabular) text"},
+ {"-raw", argFlag, &rawOrder, 0,
+ "keep strings in content stream order"},
+ {"-htmlmeta", argFlag, &htmlMeta, 0,
+ "generate a simple HTML file, including the meta information"},
+ {"-enc", argString, textEncName, sizeof(textEncName),
+ "output text encoding name"},
+ {"-listenc",argFlag, &printEnc, 0,
+ "list available encodings"},
+ {"-eol", argString, textEOL, sizeof(textEOL),
+ "output end-of-line convention (unix, dos, or mac)"},
+ {"-nopgbrk", argFlag, &noPageBreaks, 0,
+ "don't insert page breaks between pages"},
+ {"-bbox", argFlag, &bbox, 0,
+ "output bounding box for each word and page size to html. Sets -htmlmeta"},
+ {"-opw", argString, ownerPassword, sizeof(ownerPassword),
+ "owner password (for encrypted files)"},
+ {"-upw", argString, userPassword, sizeof(userPassword),
+ "user password (for encrypted files)"},
+ {"-q", argFlag, &quiet, 0,
+ "don't print any messages or errors"},
+ {"-v", argFlag, &printVersion, 0,
+ "print copyright and version info"},
+ {"-h", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"--help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-?", argFlag, &printHelp, 0,
+ "print usage information"},
+ {NULL}
+};
+
+static std::string myStringReplace(const std::string &inString, const std::string &oldToken, const std::string &newToken) {
+ std::string result = inString;
+ size_t foundLoc;
+ int advance = 0;
+ do {
+ foundLoc = result.find(oldToken, advance);
+ if (foundLoc != std::string::npos){
+ result.replace(foundLoc, oldToken.length(), newToken);
+ advance = foundLoc + newToken.length();
+ }
+ } while (foundLoc != std::string::npos );
+ return result;
+}
+
+static std::string myXmlTokenReplace(const char *inString){
+ std::string myString(inString);
+ myString = myStringReplace(myString, "&", "&amp;" );
+ myString = myStringReplace(myString, "'", "&apos;" );
+ myString = myStringReplace(myString, "\"", "&quot;" );
+ myString = myStringReplace(myString, "<", "&lt;" );
+ myString = myStringReplace(myString, ">", "&gt;" );
+ return myString;
+}
+
+int main(int argc, char *argv[]) {
+ PDFDoc *doc;
+ GooString *fileName;
+ GooString *textFileName;
+ GooString *ownerPW, *userPW;
+ TextOutputDev *textOut;
+ FILE *f;
+ UnicodeMap *uMap;
+ Object info;
+ GBool ok;
+ char *p;
+ int exitCode;
+
+ exitCode = 99;
+
+ // parse args
+ ok = parseArgs(argDesc, &argc, argv);
+ if (bbox) {
+ htmlMeta = gTrue;
+ }
+ if (!ok || (argc < 2 && !printEnc) || argc > 3 || printVersion || printHelp) {
+ fprintf(stderr, "pdftotext version %s\n", PACKAGE_VERSION);
+ fprintf(stderr, "%s\n", popplerCopyright);
+ fprintf(stderr, "%s\n", xpdfCopyright);
+ if (!printVersion) {
+ printUsage("pdftotext", "<PDF-file> [<text-file>]", argDesc);
+ }
+ if (printVersion || printHelp)
+ exitCode = 0;
+ goto err0;
+ }
+
+ // read config file
+ globalParams = new GlobalParams();
+
+ if (printEnc) {
+ printEncodings();
+ delete globalParams;
+ exitCode = 0;
+ goto err0;
+ }
+
+ fileName = new GooString(argv[1]);
+ if (fixedPitch) {
+ physLayout = gTrue;
+ }
+
+ if (textEncName[0]) {
+ globalParams->setTextEncoding(textEncName);
+ }
+ if (textEOL[0]) {
+ if (!globalParams->setTextEOL(textEOL)) {
+ fprintf(stderr, "Bad '-eol' value on command line\n");
+ }
+ }
+ if (noPageBreaks) {
+ globalParams->setTextPageBreaks(gFalse);
+ }
+ if (quiet) {
+ globalParams->setErrQuiet(quiet);
+ }
+
+ // get mapping to output encoding
+ if (!(uMap = globalParams->getTextEncoding())) {
+ error(errCommandLine, -1, "Couldn't get text encoding");
+ delete fileName;
+ goto err1;
+ }
+
+ // open PDF file
+ if (ownerPassword[0] != '\001') {
+ ownerPW = new GooString(ownerPassword);
+ } else {
+ ownerPW = NULL;
+ }
+ if (userPassword[0] != '\001') {
+ userPW = new GooString(userPassword);
+ } else {
+ userPW = NULL;
+ }
+
+ if (fileName->cmp("-") == 0) {
+ delete fileName;
+ fileName = new GooString("fd://0");
+ }
+
+ doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW);
+
+ if (userPW) {
+ delete userPW;
+ }
+ if (ownerPW) {
+ delete ownerPW;
+ }
+ if (!doc->isOk()) {
+ exitCode = 1;
+ goto err2;
+ }
+
+#ifdef ENFORCE_PERMISSIONS
+ // check for copy permission
+ if (!doc->okToCopy()) {
+ error(errNotAllowed, -1, "Copying of text from this document is not allowed.");
+ exitCode = 3;
+ goto err2;
+ }
+#endif
+
+ // construct text file name
+ if (argc == 3) {
+ textFileName = new GooString(argv[2]);
+ } else if (fileName->cmp("fd://0") == 0) {
+ error(errCommandLine, -1, "You have to provide an output filename when reading form stdin.");
+ goto err2;
+ } else {
+ p = fileName->getCString() + fileName->getLength() - 4;
+ if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")) {
+ textFileName = new GooString(fileName->getCString(),
+ fileName->getLength() - 4);
+ } else {
+ textFileName = fileName->copy();
+ }
+ textFileName->append(htmlMeta ? ".html" : ".txt");
+ }
+
+ // get page range
+ if (firstPage < 1) {
+ firstPage = 1;
+ }
+ if (lastPage < 1 || lastPage > doc->getNumPages()) {
+ lastPage = doc->getNumPages();
+ }
+
+ // write HTML header
+ if (htmlMeta) {
+ if (!textFileName->cmp("-")) {
+ f = stdout;
+ } else {
+ if (!(f = fopen(textFileName->getCString(), "wb"))) {
+ error(errIO, -1, "Couldn't open text file '{0:t}'", textFileName);
+ exitCode = 2;
+ goto err3;
+ }
+ }
+ fputs("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">", f);
+ fputs("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n", f);
+ fputs("<head>\n", f);
+ doc->getDocInfo(&info);
+ if (info.isDict()) {
+ Object obj;
+ if (info.getDict()->lookup("Title", &obj)->isString()) {
+ printInfoString(f, info.getDict(), "Title", "<title>", "</title>\n", uMap);
+ } else {
+ fputs("<title></title>\n", f);
+ }
+ obj.free();
+ printInfoString(f, info.getDict(), "Subject",
+ "<meta name=\"Subject\" content=\"", "\"/>\n", uMap);
+ printInfoString(f, info.getDict(), "Keywords",
+ "<meta name=\"Keywords\" content=\"", "\"/>\n", uMap);
+ printInfoString(f, info.getDict(), "Author",
+ "<meta name=\"Author\" content=\"", "\"/>\n", uMap);
+ printInfoString(f, info.getDict(), "Creator",
+ "<meta name=\"Creator\" content=\"", "\"/>\n", uMap);
+ printInfoString(f, info.getDict(), "Producer",
+ "<meta name=\"Producer\" content=\"", "\"/>\n", uMap);
+ printInfoDate(f, info.getDict(), "CreationDate",
+ "<meta name=\"CreationDate\" content=\"\"/>\n");
+ printInfoDate(f, info.getDict(), "LastModifiedDate",
+ "<meta name=\"ModDate\" content=\"\"/>\n");
+ }
+ info.free();
+ fputs("</head>\n", f);
+ fputs("<body>\n", f);
+ if (!bbox) fputs("<pre>\n", f);
+ if (f != stdout) {
+ fclose(f);
+ }
+ }
+
+ // write text file
+ if (bbox) {
+ textOut = new TextOutputDev(NULL, physLayout, fixedPitch, rawOrder, htmlMeta);
+ if (!(f = fopen(textFileName->getCString(), "ab"))) {
+ error(errIO, -1, "Couldn't open text file '{0:t}' for append", textFileName);
+ exitCode = 2;
+ delete textOut;
+ goto err3;
+ }
+
+ if (textOut->isOk()) {
+ fprintf(f, "<doc>\n");
+ for (int page = firstPage; page <= lastPage; ++page) {
+ fprintf(f, " <page width=\"%f\" height=\"%f\">\n",doc->getPageMediaWidth(page), doc->getPageMediaHeight(page));
+ doc->displayPage(textOut, page, resolution, resolution, 0, gTrue, gFalse, gFalse);
+ TextWordList *wordlist = textOut->makeWordList();
+ const int word_length = wordlist != NULL ? wordlist->getLength() : 0;
+ TextWord *word;
+ double xMinA, yMinA, xMaxA, yMaxA;
+ if (word_length == 0)
+ fprintf(stderr, "no word list\n");
+
+ for (int i = 0; i < word_length; ++i) {
+ word = wordlist->get(i);
+ word->getBBox(&xMinA, &yMinA, &xMaxA, &yMaxA);
+ const std::string myString = myXmlTokenReplace(word->getText()->getCString());
+ fprintf(f," <word xMin=\"%f\" yMin=\"%f\" xMax=\"%f\" yMax=\"%f\">%s</word>\n", xMinA, yMinA, xMaxA, yMaxA, myString.c_str());
+ }
+ fprintf(f, " </page>\n");
+ delete wordlist;
+ }
+ fprintf(f, "</doc>\n");
+ }
+ fclose(f);
+ } else {
+ textOut = new TextOutputDev(textFileName->getCString(),
+ physLayout, fixedPitch, rawOrder, htmlMeta);
+ if (textOut->isOk()) {
+ if ((w==0) && (h==0) && (x==0) && (y==0)) {
+ doc->displayPages(textOut, firstPage, lastPage, resolution, resolution, 0,
+ gTrue, gFalse, gFalse);
+ } else {
+
+ for (int page = firstPage; page <= lastPage; ++page) {
+ doc->displayPageSlice(textOut, page, resolution, resolution, 0,
+ gTrue, gFalse, gFalse,
+ x, y, w, h);
+ }
+ }
+
+ } else {
+ delete textOut;
+ exitCode = 2;
+ goto err3;
+ }
+ }
+ delete textOut;
+
+ // write end of HTML file
+ if (htmlMeta) {
+ if (!textFileName->cmp("-")) {
+ f = stdout;
+ } else {
+ if (!(f = fopen(textFileName->getCString(), "ab"))) {
+ error(errIO, -1, "Couldn't open text file '{0:t}'", textFileName);
+ exitCode = 2;
+ goto err3;
+ }
+ }
+ if (!bbox) fputs("</pre>\n", f);
+ fputs("</body>\n", f);
+ fputs("</html>\n", f);
+ if (f != stdout) {
+ fclose(f);
+ }
+ }
+
+ exitCode = 0;
+
+ // clean up
+ err3:
+ delete textFileName;
+ err2:
+ delete doc;
+ delete fileName;
+ uMap->decRefCnt();
+ err1:
+ delete globalParams;
+ err0:
+
+ // check for memory leaks
+ Object::memCheck(stderr);
+ gMemReport(stderr);
+
+ return exitCode;
+}
+
+static void printInfoString(FILE *f, Dict *infoDict, const char *key,
+ const char *text1, const char *text2, UnicodeMap *uMap) {
+ Object obj;
+ GooString *s1;
+ GBool isUnicode;
+ Unicode u;
+ char buf[8];
+ int i, n;
+
+ if (infoDict->lookup(key, &obj)->isString()) {
+ fputs(text1, f);
+ s1 = obj.getString();
+ if ((s1->getChar(0) & 0xff) == 0xfe &&
+ (s1->getChar(1) & 0xff) == 0xff) {
+ isUnicode = gTrue;
+ i = 2;
+ } else {
+ isUnicode = gFalse;
+ i = 0;
+ }
+ while (i < obj.getString()->getLength()) {
+ if (isUnicode) {
+ u = ((s1->getChar(i) & 0xff) << 8) |
+ (s1->getChar(i+1) & 0xff);
+ i += 2;
+ } else {
+ u = pdfDocEncoding[s1->getChar(i) & 0xff];
+ ++i;
+ }
+ n = uMap->mapUnicode(u, buf, sizeof(buf));
+ fwrite(buf, 1, n, f);
+ }
+ fputs(text2, f);
+ }
+ obj.free();
+}
+
+static void printInfoDate(FILE *f, Dict *infoDict, const char *key, const char *fmt) {
+ Object obj;
+ char *s;
+
+ if (infoDict->lookup(key, &obj)->isString()) {
+ s = obj.getString()->getCString();
+ if (s[0] == 'D' && s[1] == ':') {
+ s += 2;
+ }
+ fprintf(f, fmt, s);
+ }
+ obj.free();
+}
diff --git a/utils/pdfunite.1 b/utils/pdfunite.1
new file mode 100644
index 00000000..9b1f2e8f
--- /dev/null
+++ b/utils/pdfunite.1
@@ -0,0 +1,33 @@
+.\" Copyright 2011 The Poppler Developers - http://poppler.freedesktop.org
+.TH pdfunite 1 "15 September 2011"
+.SH NAME
+pdfunite \- Portable Document Format (PDF) page merger
+.SH SYNOPSIS
+.B pdfunite
+[options]
+.I PDF-sourcefile1..PDF-sourcefilen PDF-destfile
+.SH DESCRIPTION
+.B pdfunite
+merges several PDF (Portable Document Format) files in order of their occurence on command line to one PDF result file.
+.TP
+Neither of the PDF-sourcefile1 to PDF-sourcefilen should be encrypted.
+.SH OPTIONS
+.TP
+.B \-v
+Print copyright and version information.
+.TP
+.B \-h
+Print usage information.
+.RB ( \-help
+and
+.B \-\-help
+are equivalent.)
+.SH EXAMPLE
+pdfunite sample1.pdf sample2.pdf sample.pdf
+.TP
+merges all pages from sample1.pdf and sample2.pdf (in that order) and creates sample.pdf
+.SH AUTHOR
+The pdfunite software and documentation are copyright 1996-2004 Glyph & Cog, LLC
+and copyright 2005-2011 The Poppler Developers - http://poppler.freedesktop.org
+.SH "SEE ALSO"
+.BR pdfseparate (1),
diff --git a/utils/pdfunite.cc b/utils/pdfunite.cc
new file mode 100644
index 00000000..212f89be
--- /dev/null
+++ b/utils/pdfunite.cc
@@ -0,0 +1,182 @@
+//========================================================================
+//
+// pdfunite.cc
+//
+// This file is licensed under the GPLv2 or later
+//
+// Copyright (C) 2011 Thomas Freitag <Thomas.Freitag@alfa.de>
+// Copyright (C) 2012 Arseny Solokha <asolokha@gmx.com>
+// Copyright (C) 2012 Fabio D'Urso <fabiodurso@hotmail.it>
+//
+//========================================================================
+#include <PDFDoc.h>
+#include <GlobalParams.h>
+#include "parseargs.h"
+#include "config.h"
+#include <poppler-config.h>
+#include <vector>
+
+static GBool printVersion = gFalse;
+static GBool printHelp = gFalse;
+
+static const ArgDesc argDesc[] = {
+ {"-v", argFlag, &printVersion, 0,
+ "print copyright and version info"},
+ {"-h", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"--help", argFlag, &printHelp, 0,
+ "print usage information"},
+ {"-?", argFlag, &printHelp, 0,
+ "print usage information"},
+ {NULL}
+};
+
+///////////////////////////////////////////////////////////////////////////
+int main (int argc, char *argv[])
+///////////////////////////////////////////////////////////////////////////
+// Merge PDF files given by arguments 1 to argc-2 and write the result
+// to the file specified by argument argc-1.
+///////////////////////////////////////////////////////////////////////////
+{
+ int objectsCount = 0;
+ Guint numOffset = 0;
+ std::vector<Object> pages;
+ std::vector<Guint> offsets;
+ XRef *yRef, *countRef;
+ FILE *f;
+ OutStream *outStr;
+ int i;
+ int j, rootNum;
+ std::vector<PDFDoc *>docs;
+ int majorVersion = 0;
+ int minorVersion = 0;
+ char *fileName = argv[argc - 1];
+ int exitCode;
+
+ exitCode = 99;
+ if (argc <= 3 || printVersion || printHelp) {
+ fprintf(stderr, "pdfunite version %s\n", PACKAGE_VERSION);
+ fprintf(stderr, "%s\n", popplerCopyright);
+ fprintf(stderr, "%s\n", xpdfCopyright);
+ if (!printVersion) {
+ printUsage("pdfunite", "<PDF-sourcefile-1>..<PDF-sourcefile-n> <PDF-destfile>",
+ argDesc);
+ }
+ if (printVersion || printHelp)
+ exitCode = 0;
+ return exitCode;
+ }
+ exitCode = 0;
+ globalParams = new GlobalParams();
+
+ for (i = 1; i < argc - 1; i++) {
+ GooString *gfileName = new GooString(argv[i]);
+ PDFDoc *doc = new PDFDoc(gfileName, NULL, NULL, NULL);
+ if (doc->isOk() && !doc->isEncrypted()) {
+ docs.push_back(doc);
+ if (doc->getPDFMajorVersion() > majorVersion) {
+ majorVersion = doc->getPDFMajorVersion();
+ minorVersion = doc->getPDFMinorVersion();
+ } else if (doc->getPDFMajorVersion() == majorVersion) {
+ if (doc->getPDFMinorVersion() > minorVersion) {
+ minorVersion = doc->getPDFMinorVersion();
+ }
+ }
+ } else if (doc->isOk()) {
+ error(errUnimplemented, -1, "Could not merge encrypted files ('{0:s}')", argv[i]);
+ return -1;
+ } else {
+ error(errSyntaxError, -1, "Could not merge damaged documents ('{0:s}')", argv[i]);
+ return -1;
+ }
+ }
+
+ if (!(f = fopen(fileName, "wb"))) {
+ error(errIO, -1, "Could not open file '{0:s}'", fileName);
+ return -1;
+ }
+ outStr = new FileOutStream(f, 0);
+
+ yRef = new XRef();
+ countRef = new XRef();
+ yRef->add(0, 65535, 0, gFalse);
+ PDFDoc::writeHeader(outStr, majorVersion, minorVersion);
+
+ for (i = 0; i < (int) docs.size(); i++) {
+ for (j = 1; j <= docs[i]->getNumPages(); j++) {
+ PDFRectangle *cropBox = NULL;
+ if (docs[i]->getCatalog()->getPage(j)->isCropped())
+ cropBox = docs[i]->getCatalog()->getPage(j)->getCropBox();
+ docs[i]->replacePageDict(j,
+ docs[i]->getCatalog()->getPage(j)->getRotate(),
+ docs[i]->getCatalog()->getPage(j)->getMediaBox(), cropBox, NULL);
+ Ref *refPage = docs[i]->getCatalog()->getPageRef(j);
+ Object page;
+ docs[i]->getXRef()->fetch(refPage->num, refPage->gen, &page);
+ pages.push_back(page);
+ offsets.push_back(numOffset);
+ Dict *pageDict = page.getDict();
+ docs[i]->markPageObjects(pageDict, yRef, countRef, numOffset);
+ }
+ objectsCount += docs[i]->writePageObjects(outStr, yRef, numOffset);
+ numOffset = yRef->getNumObjects() + 1;
+ }
+
+ rootNum = yRef->getNumObjects() + 1;
+ yRef->add(rootNum, 0, outStr->getPos(), gTrue);
+ outStr->printf("%d 0 obj\n", rootNum);
+ outStr->printf("<< /Type /Catalog /Pages %d 0 R", rootNum + 1);
+ outStr->printf(">>\nendobj\n");
+ objectsCount++;
+
+ yRef->add(rootNum + 1, 0, outStr->getPos(), gTrue);
+ outStr->printf("%d 0 obj\n", rootNum + 1);
+ outStr->printf("<< /Type /Pages /Kids [");
+ for (j = 0; j < (int) pages.size(); j++)
+ outStr->printf(" %d 0 R", rootNum + j + 2);
+ outStr->printf(" ] /Count %d >>\nendobj\n", pages.size());
+ objectsCount++;
+
+ for (i = 0; i < (int) pages.size(); i++) {
+ yRef->add(rootNum + i + 2, 0, outStr->getPos(), gTrue);
+ outStr->printf("%d 0 obj\n", rootNum + i + 2);
+ outStr->printf("<< ");
+ Dict *pageDict = pages[i].getDict();
+ for (j = 0; j < pageDict->getLength(); j++) {
+ if (j > 0)
+ outStr->printf(" ");
+ const char *key = pageDict->getKey(j);
+ Object value;
+ pageDict->getValNF(j, &value);
+ if (strcmp(key, "Parent") == 0) {
+ outStr->printf("/Parent %d 0 R", rootNum + 1);
+ } else {
+ outStr->printf("/%s ", key);
+ PDFDoc::writeObject(&value, NULL, outStr, yRef, offsets[i]);
+ }
+ value.free();
+ }
+ outStr->printf(" >>\nendobj\n");
+ objectsCount++;
+ }
+ Guint uxrefOffset = outStr->getPos();
+ Ref ref;
+ ref.num = rootNum;
+ ref.gen = 0;
+ Dict *trailerDict = PDFDoc::createTrailerDict(objectsCount, gFalse, 0, &ref, yRef,
+ fileName, outStr->getPos());
+ PDFDoc::writeXRefTableTrailer(trailerDict, yRef, gFalse /* do not write unnecessary entries */,
+ uxrefOffset, outStr, yRef);
+ delete trailerDict;
+
+ outStr->close();
+ fclose(f);
+ delete yRef;
+ delete countRef;
+ for (j = 0; j < (int) pages.size (); j++) pages[j].free();
+ for (i = 0; i < (int) docs.size (); i++) delete docs[i];
+ delete globalParams;
+ return exitCode;
+}
diff --git a/utils/printencodings.cc b/utils/printencodings.cc
new file mode 100644
index 00000000..dec6f98f
--- /dev/null
+++ b/utils/printencodings.cc
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2008, Albert Astals Cid <aacid@kde.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "printencodings.h"
+
+#include "GlobalParams.h"
+#include "goo/GooList.h"
+#include "goo/GooString.h"
+
+void printEncodings()
+{
+ GooList *encNames = globalParams->getEncodingNames();
+ printf("Available encodings are:\n");
+ for (int i = 0; i < encNames->getLength(); ++i) {
+ GooString *enc = (GooString*)encNames->get(i);
+ printf("%s\n", enc->getCString());
+ }
+ delete encNames;
+}
diff --git a/utils/printencodings.h b/utils/printencodings.h
new file mode 100644
index 00000000..5be2819f
--- /dev/null
+++ b/utils/printencodings.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2008, Albert Astals Cid <aacid@kde.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef PRINTENCODINGS_H
+#define PRINTENCODINGS_H
+
+void printEncodings();
+
+#endif