diff options
author | Anas Nashif <anas.nashif@intel.com> | 2012-11-10 08:38:23 -0800 |
---|---|---|
committer | Anas Nashif <anas.nashif@intel.com> | 2012-11-10 08:38:23 -0800 |
commit | 65619a8a518ba0f513e57429e461d199264a9929 (patch) | |
tree | d87b5dd33429139a1900fb37928466df569cad3f /utils | |
download | poppler-65619a8a518ba0f513e57429e461d199264a9929.tar.gz poppler-65619a8a518ba0f513e57429e461d199264a9929.tar.bz2 poppler-65619a8a518ba0f513e57429e461d199264a9929.zip |
Imported Upstream version 0.20.4upstream/0.20.4
Diffstat (limited to 'utils')
39 files changed, 11317 insertions, 0 deletions
diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt new file mode 100644 index 00000000..4a29841b --- /dev/null +++ b/utils/CMakeLists.txt @@ -0,0 +1,133 @@ + +set(common_srcs + parseargs.cc +) +set(common_libs + poppler +) +if (FONTCONFIG_FOUND) + set(common_libs ${common_libs} ${FONTCONFIG_LIBRARIES}) +endif (FONTCONFIG_FOUND) + +if (ENABLE_SPLASH) + # pdftoppm + set(pdftoppm_SOURCES ${common_srcs} + pdftoppm.cc + ) + add_executable(pdftoppm ${pdftoppm_SOURCES}) + target_link_libraries(pdftoppm ${common_libs}) + install(TARGETS pdftoppm DESTINATION bin) + install(FILES pdftoppm.1 DESTINATION share/man/man1) +endif (ENABLE_SPLASH) + +if (HAVE_CAIRO) + # pdftocairo + set(pdftocairo_SOURCES ${common_srcs} + pdftocairo.cc + ${CMAKE_SOURCE_DIR}/poppler/CairoFontEngine.cc + ${CMAKE_SOURCE_DIR}/poppler/CairoOutputDev.cc + ${CMAKE_SOURCE_DIR}/poppler/CairoRescaleBox.cc + ) + include_directories( + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_BINARY_DIR} + ${CAIRO_INCLUDE_DIRS} + ) + add_definitions(${CAIRO_CFLAGS}) + add_executable(pdftocairo ${pdftocairo_SOURCES}) + target_link_libraries(pdftocairo ${CAIRO_LIBRARIES} ${FREETYPE_LIBRARIES} ${common_libs}) + if(LCMS_FOUND) + target_link_libraries(pdftocairo ${LCMS_LIBRARIES}) + endif(LCMS_FOUND) + if(LCMS2_FOUND) + target_link_libraries(pdftocairo ${LCMS2_LIBRARIES}) + endif(LCMS2_FOUND) + install(TARGETS pdftocairo DESTINATION bin) + install(FILES pdftocairo.1 DESTINATION share/man/man1) +endif (HAVE_CAIRO) + +# pdfdetach +set(pdfdetach_SOURCES ${common_srcs} + pdfdetach.cc +) +add_executable(pdfdetach ${pdfdetach_SOURCES}) +target_link_libraries(pdfdetach ${common_libs}) +install(TARGETS pdfdetach DESTINATION bin) +install(FILES pdfdetach.1 DESTINATION share/man/man1) + +# pdffonts +set(pdffonts_SOURCES ${common_srcs} + pdffonts.cc +) +add_executable(pdffonts ${pdffonts_SOURCES}) +target_link_libraries(pdffonts ${common_libs}) +install(TARGETS pdffonts DESTINATION bin) +install(FILES pdffonts.1 DESTINATION share/man/man1) + +# pdfimages +set(pdfimages_SOURCES ${common_srcs} + pdfimages.cc + ImageOutputDev.cc + ImageOutputDev.h +) +add_executable(pdfimages ${pdfimages_SOURCES}) +target_link_libraries(pdfimages ${common_libs}) +install(TARGETS pdfimages DESTINATION bin) +install(FILES pdfimages.1 DESTINATION share/man/man1) + +# pdfinfo +set(pdfinfo_SOURCES ${common_srcs} + pdfinfo.cc printencodings.cc +) +add_executable(pdfinfo ${pdfinfo_SOURCES}) +target_link_libraries(pdfinfo ${common_libs}) +install(TARGETS pdfinfo DESTINATION bin) +install(FILES pdfinfo.1 DESTINATION share/man/man1) + +# pdftops +set(pdftops_SOURCES ${common_srcs} + pdftops.cc +) +add_executable(pdftops ${pdftops_SOURCES}) +target_link_libraries(pdftops ${common_libs}) +install(TARGETS pdftops DESTINATION bin) +install(FILES pdftops.1 DESTINATION share/man/man1) + +# pdftotext +set(pdftotext_SOURCES ${common_srcs} + pdftotext.cc printencodings.cc +) +add_executable(pdftotext ${pdftotext_SOURCES}) +target_link_libraries(pdftotext ${common_libs}) +install(TARGETS pdftotext DESTINATION bin) +install(FILES pdftotext.1 DESTINATION share/man/man1) + +# pdftohtml +set(pdftohtml_SOURCES ${common_srcs} + pdftohtml.cc + HtmlFonts.cc + HtmlLinks.cc + HtmlOutputDev.cc +) +add_executable(pdftohtml ${pdftohtml_SOURCES}) +target_link_libraries(pdftohtml ${common_libs}) +install(TARGETS pdftohtml DESTINATION bin) +install(FILES pdftohtml.1 DESTINATION share/man/man1) + +# pdfseparate +set(pdfseparate_SOURCES ${common_srcs} + pdfseparate.cc +) +add_executable(pdfseparate ${pdfseparate_SOURCES}) +target_link_libraries(pdfseparate ${common_libs}) +install(TARGETS pdfseparate DESTINATION bin) +install(FILES pdfseparate.1 DESTINATION share/man/man1) + +# pdfunite +set(pdfunite_SOURCES ${common_srcs} + pdfunite.cc +) +add_executable(pdfunite ${pdfunite_SOURCES}) +target_link_libraries(pdfunite ${common_libs}) +install(TARGETS pdfunite DESTINATION bin) +install(FILES pdfunite.1 DESTINATION share/man/man1) diff --git a/utils/HtmlFonts.cc b/utils/HtmlFonts.cc new file mode 100644 index 00000000..be02c5f3 --- /dev/null +++ b/utils/HtmlFonts.cc @@ -0,0 +1,386 @@ +//======================================================================== +// +// This file comes from pdftohtml project +// http://pdftohtml.sourceforge.net +// +// Copyright from: +// Gueorgui Ovtcharov +// Rainer Dorsch <http://www.ra.informatik.uni-stuttgart.de/~rainer/> +// Mikhail Kruk <meshko@cs.brandeis.edu> +// +//======================================================================== + +//======================================================================== +// +// Modified under the Poppler project - http://poppler.freedesktop.org +// +// All changes made under the Poppler project to this file are licensed +// under GPL version 2 or later +// +// Copyright (C) 2007, 2010 Albert Astals Cid <aacid@kde.org> +// Copyright (C) 2008 Boris Toloknov <tlknv@yandex.ru> +// Copyright (C) 2008 Tomas Are Haavet <tomasare@gmail.com> +// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac@cdacmumbai.in) and Onkar Potdar (onkar@cdacmumbai.in) +// Copyright (C) 2011 Joshua Richardson <jric@chegg.com> +// Copyright (C) 2011 Stephen Reichling <sreichling@chegg.com> +// Copyright (C) 2012 Igor Slepchin <igor.slepchin@gmail.com> +// +// To see a description of the changes please see the Changelog file that +// came with your tarball or type make ChangeLog if you are building from git +// +//======================================================================== + +#include "HtmlFonts.h" +#include "HtmlUtils.h" +#include "GlobalParams.h" +#include "UnicodeMap.h" +#include "GfxFont.h" +#include <stdio.h> + + struct Fonts{ + const char *Fontname; + const char *name; + }; + +const int font_num=13; + +static Fonts fonts[font_num+1]={ + {"Courier", "Courier" }, + {"Courier-Bold", "Courier"}, + {"Courier-BoldOblique", "Courier"}, + {"Courier-Oblique", "Courier"}, + {"Helvetica", "Helvetica"}, + {"Helvetica-Bold", "Helvetica"}, + {"Helvetica-BoldOblique", "Helvetica"}, + {"Helvetica-Oblique", "Helvetica"}, + {"Symbol", "Symbol" }, + {"Times-Bold", "Times" }, + {"Times-BoldItalic", "Times" }, + {"Times-Italic", "Times" }, + {"Times-Roman", "Times" }, + {" " , "Times" }, +}; + +#define xoutRound(x) ((int)(x + 0.5)) +extern GBool xml; + +GooString* HtmlFont::DefaultFont=new GooString("Times"); // Arial,Helvetica,sans-serif + +HtmlFontColor::HtmlFontColor(GfxRGB rgb){ + r=static_cast<int>(rgb.r/65535.0*255.0); + g=static_cast<int>(rgb.g/65535.0*255.0); + b=static_cast<int>(rgb.b/65535.0*255.0); + if (!(Ok(r)&&Ok(b)&&Ok(g))) { + if (!globalParams->getErrQuiet()) fprintf(stderr, "Error : Bad color (%d,%d,%d) reset to (0,0,0)\n", r, g, b); + r=0;g=0;b=0; + } +} + +GooString *HtmlFontColor::convtoX(unsigned int xcol) const{ + GooString *xret=new GooString(); + char tmp; + unsigned int k; + k = (xcol/16); + if ((k>=0)&&(k<10)) tmp=(char) ('0'+k); else tmp=(char)('a'+k-10); + xret->append(tmp); + k = (xcol%16); + if ((k>=0)&&(k<10)) tmp=(char) ('0'+k); else tmp=(char)('a'+k-10); + xret->append(tmp); + return xret; +} + +GooString *HtmlFontColor::toString() const{ + GooString *tmp=new GooString("#"); + GooString *tmpr=convtoX(r); + GooString *tmpg=convtoX(g); + GooString *tmpb=convtoX(b); + tmp->append(tmpr); + tmp->append(tmpg); + tmp->append(tmpb); + delete tmpr; + delete tmpg; + delete tmpb; + return tmp; +} + +HtmlFont::HtmlFont(GfxFont *font, int _size, GfxRGB rgb){ + //if (col) color=HtmlFontColor(col); + //else color=HtmlFontColor(); + color=HtmlFontColor(rgb); + GooString* ftname=font->getName(); + if (!ftname) ftname = getDefaultFont(); + + GooString *fontname = NULL; + + if( ftname ){ + fontname = new GooString(ftname); + FontName=new GooString(ftname); + } + else { + fontname = NULL; + FontName = NULL; + } + + lineSize = -1; + + size=(_size-1); + italic = gFalse; + bold = gFalse; + rotOrSkewed = gFalse; + + if (font->isBold() || font->getWeight() >= GfxFont::W700) bold=gTrue; + if (font->isItalic()) italic=gTrue; + + if (fontname){ + if (!bold && strstr(fontname->lowerCase()->getCString(),"bold")) { + bold=gTrue; + } + + if (!italic && + (strstr(fontname->lowerCase()->getCString(),"italic")|| + strstr(fontname->lowerCase()->getCString(),"oblique"))) { + italic=gTrue; + } + + int i=0; + while (strcmp(ftname->getCString(),fonts[i].Fontname)&&(i<font_num)) + { + i++; + } + pos=i; + delete fontname; + } else + pos = font_num; + if (!DefaultFont) DefaultFont=new GooString(fonts[font_num].name); + +} + +HtmlFont::HtmlFont(const HtmlFont& x){ + size=x.size; + lineSize=x.lineSize; + italic=x.italic; + bold=x.bold; + pos=x.pos; + color=x.color; + if (x.FontName) FontName=new GooString(x.FontName); + rotOrSkewed = x.rotOrSkewed; + memcpy(rotSkewMat, x.rotSkewMat, sizeof(rotSkewMat)); + } + + +HtmlFont::~HtmlFont(){ + if (FontName) delete FontName; +} + +HtmlFont& HtmlFont::operator=(const HtmlFont& x){ + if (this==&x) return *this; + size=x.size; + lineSize=x.lineSize; + italic=x.italic; + bold=x.bold; + pos=x.pos; + color=x.color; + if (FontName) delete FontName; + if (x.FontName) FontName=new GooString(x.FontName); + return *this; +} + +void HtmlFont::clear(){ + if(DefaultFont) delete DefaultFont; + DefaultFont = NULL; +} + + + +/* + This function is used to compare font uniquely for insertion into + the list of all encountered fonts +*/ +GBool HtmlFont::isEqual(const HtmlFont& x) const{ + return (size==x.size) && + (lineSize==x.lineSize) && + (pos==x.pos) && (bold==x.bold) && (italic==x.italic) && + (color.isEqual(x.getColor())) && isRotOrSkewed() == x.isRotOrSkewed() && + (!isRotOrSkewed() || rot_matrices_equal(getRotMat(), x.getRotMat())); +} + +/* + This one is used to decide whether two pieces of text can be joined together + and therefore we don't care about bold/italics properties +*/ +GBool HtmlFont::isEqualIgnoreBold(const HtmlFont& x) const{ + return ((size==x.size) && + (!strcmp(fonts[pos].name, fonts[x.pos].name)) && + (color.isEqual(x.getColor()))); +} + +GooString* HtmlFont::getFontName(){ + if (pos!=font_num) return new GooString(fonts[pos].name); + else return new GooString(DefaultFont); +} + +GooString* HtmlFont::getFullName(){ + if (FontName) + return new GooString(FontName); + else return new GooString(DefaultFont); +} + +void HtmlFont::setDefaultFont(GooString* defaultFont){ + if (DefaultFont) delete DefaultFont; + DefaultFont=new GooString(defaultFont); +} + + +GooString* HtmlFont::getDefaultFont(){ + return DefaultFont; +} + +// this method if plain wrong todo +GooString* HtmlFont::HtmlFilter(Unicode* u, int uLen) { + GooString *tmp = new GooString(); + UnicodeMap *uMap; + char buf[8]; + int n; + + // get the output encoding + if (!(uMap = globalParams->getTextEncoding())) { + return tmp; + } + + for (int i = 0; i < uLen; ++i) { + switch (u[i]) + { + case '"': tmp->append("""); break; + case '&': tmp->append("&"); break; + case '<': tmp->append("<"); break; + case '>': tmp->append(">"); break; + case ' ': tmp->append( !xml && ( i+1 >= uLen || !tmp->getLength() || tmp->getChar( tmp->getLength()-1 ) == ' ' ) ? " " : " " ); + break; + default: + { + // convert unicode to string + if ((n = uMap->mapUnicode(u[i], buf, sizeof(buf))) > 0) { + tmp->append(buf, n); + } + } + } + } + + uMap->decRefCnt(); + return tmp; +} + +GooString* HtmlFont::simple(HtmlFont* font, Unicode* content, int uLen){ + GooString *cont=HtmlFilter (content, uLen); + + /*if (font.isBold()) { + cont->insert(0,"<b>",3); + cont->append("</b>",4); + } + if (font.isItalic()) { + cont->insert(0,"<i>",3); + cont->append("</i>",4); + } */ + + return cont; +} + +HtmlFontAccu::HtmlFontAccu(){ + accu=new std::vector<HtmlFont>(); +} + +HtmlFontAccu::~HtmlFontAccu(){ + if (accu) delete accu; +} + +int HtmlFontAccu::AddFont(const HtmlFont& font){ + std::vector<HtmlFont>::iterator i; + for (i=accu->begin();i!=accu->end();i++) + { + if (font.isEqual(*i)) + { + return (int)(i-(accu->begin())); + } + } + + accu->push_back(font); + return (accu->size()-1); +} + +// get CSS font definition for font #i +GooString* HtmlFontAccu::CSStyle(int i, int j){ + GooString *tmp=new GooString(); + GooString *iStr=GooString::fromInt(i); + GooString *jStr=GooString::fromInt(j); + + std::vector<HtmlFont>::iterator g=accu->begin(); + g+=i; + HtmlFont font=*g; + GooString *Size=GooString::fromInt(font.getSize()); + GooString *colorStr=font.getColor().toString(); + GooString *fontName=font.getFontName(); + GooString *lSize; + + if(!xml){ + tmp->append(".ft"); + tmp->append(jStr); + tmp->append(iStr); + tmp->append("{font-size:"); + tmp->append(Size); + if( font.getLineSize() != -1 && font.getLineSize() != 0 ) + { + lSize = GooString::fromInt(font.getLineSize()); + tmp->append("px;line-height:"); + tmp->append(lSize); + delete lSize; + } + tmp->append("px;font-family:"); + tmp->append(fontName); //font.getFontName()); + tmp->append(";color:"); + tmp->append(colorStr); + // if there is rotation or skew, include the matrix + if (font.isRotOrSkewed()) { + const double * const text_mat = font.getRotMat(); + GooString matrix_str(" matrix("); + matrix_str.appendf("{0:10.10g}, {1:10.10g}, {2:10.10g}, {3:10.10g}, 0, 0)", + text_mat[0], text_mat[1], text_mat[2], text_mat[3]); + tmp->append(";-moz-transform:"); + tmp->append(&matrix_str); + tmp->append(";-webkit-transform:"); + tmp->append(&matrix_str); + tmp->append(";-o-transform:"); + tmp->append(&matrix_str); + tmp->append(";-ms-transform:"); + tmp->append(&matrix_str); + // Todo: 75% is a wild guess that seems to work pretty well; + // We probably need to calculate the real percentage + // Based on the characteristic baseline and bounding box of current font + // PDF origin is at baseline + tmp->append(";-moz-transform-origin: left 75%"); + tmp->append(";-webkit-transform-origin: left 75%"); + tmp->append(";-o-transform-origin: left 75%"); + tmp->append(";-ms-transform-origin: left 75%"); + } + tmp->append(";}"); + } + if (xml) { + tmp->append("<fontspec id=\""); + tmp->append(iStr); + tmp->append("\" size=\""); + tmp->append(Size); + tmp->append("\" family=\""); + tmp->append(fontName); //font.getFontName()); + tmp->append("\" color=\""); + tmp->append(colorStr); + tmp->append("\"/>"); + } + + delete fontName; + delete colorStr; + delete jStr; + delete iStr; + delete Size; + return tmp; +} + + diff --git a/utils/HtmlFonts.h b/utils/HtmlFonts.h new file mode 100644 index 00000000..22368b28 --- /dev/null +++ b/utils/HtmlFonts.h @@ -0,0 +1,117 @@ +//======================================================================== +// +// This file comes from pdftohtml project +// http://pdftohtml.sourceforge.net +// +// Copyright from: +// Gueorgui Ovtcharov +// Rainer Dorsch <http://www.ra.informatik.uni-stuttgart.de/~rainer/> +// Mikhail Kruk <meshko@cs.brandeis.edu> +// +//======================================================================== + +//======================================================================== +// +// Modified under the Poppler project - http://poppler.freedesktop.org +// +// All changes made under the Poppler project to this file are licensed +// under GPL version 2 or later +// +// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac@cdacmumbai.in) and Onkar Potdar (onkar@cdacmumbai.in) +// Copyright (C) 2010 Albert Astals Cid <aacid@kde.org> +// Copyright (C) 2011 Steven Murdoch <Steven.Murdoch@cl.cam.ac.uk> +// Copyright (C) 2011 Joshua Richardson <jric@chegg.com> +// Copyright (C) 2012 Igor Slepchin <igor.slepchin@gmail.com> +// +// To see a description of the changes please see the Changelog file that +// came with your tarball or type make ChangeLog if you are building from git +// +//======================================================================== + +#ifndef _HTML_FONTS_H +#define _HTML_FONTS_H +#include "goo/GooString.h" +#include "GfxState.h" +#include "CharTypes.h" +#include <vector> + +class HtmlFontColor{ + private: + unsigned int r; + unsigned int g; + unsigned int b; + GBool Ok(unsigned int xcol){ return ((xcol<=255)&&(xcol>=0));} + GooString *convtoX(unsigned int xcol) const; + public: + HtmlFontColor():r(0),g(0),b(0){} + HtmlFontColor(GfxRGB rgb); + HtmlFontColor(const HtmlFontColor& x){r=x.r;g=x.g;b=x.b;} + HtmlFontColor& operator=(const HtmlFontColor &x){ + r=x.r;g=x.g;b=x.b; + return *this; + } + ~HtmlFontColor(){}; + GooString* toString() const; + GBool isEqual(const HtmlFontColor& col) const{ + return ((r==col.r)&&(g==col.g)&&(b==col.b)); + } +} ; + + +class HtmlFont{ + private: + unsigned int size; + int lineSize; + GBool italic; + GBool bold; + GBool rotOrSkewed; + int pos; // position of the font name in the fonts array + static GooString *DefaultFont; + GooString *FontName; + HtmlFontColor color; + double rotSkewMat[4]; // only four values needed for rotation and skew +public: + + HtmlFont(){FontName=NULL; rotOrSkewed = gFalse;} + HtmlFont(GfxFont *font,int _size, GfxRGB rgb); + HtmlFont(const HtmlFont& x); + HtmlFont& operator=(const HtmlFont& x); + HtmlFontColor getColor() const {return color;} + ~HtmlFont(); + static void clear(); + GooString* getFullName(); + GBool isItalic() const {return italic;} + GBool isBold() const {return bold;} + GBool isRotOrSkewed() const { return rotOrSkewed; } + unsigned int getSize() const {return size;} + int getLineSize() const {return lineSize;} + void setLineSize(int _lineSize) { lineSize = _lineSize; } + void setRotMat(const double * const mat) + { rotOrSkewed = gTrue; memcpy(rotSkewMat, mat, sizeof(rotSkewMat)); } + const double *getRotMat() const { return rotSkewMat; } + GooString* getFontName(); + static GooString* getDefaultFont(); + static void setDefaultFont(GooString* defaultFont); + static GooString* HtmlFilter(Unicode* u, int uLen); //char* s); + GBool isEqual(const HtmlFont& x) const; + GBool isEqualIgnoreBold(const HtmlFont& x) const; + static GooString* simple(HtmlFont *font, Unicode *content, int uLen); + void print() const {printf("font: %s %d %s%spos: %d\n", FontName->getCString(), size, bold ? "bold " : "", italic ? "italic " : "", pos);}; +}; + +class HtmlFontAccu{ +private: + std::vector<HtmlFont> *accu; + +public: + HtmlFontAccu(); + ~HtmlFontAccu(); + int AddFont(const HtmlFont& font); + HtmlFont *Get(int i){ + return &(*accu)[i]; + } + GooString* CSStyle(int i, int j = 0); + int size() const {return accu->size();} + +}; +#endif diff --git a/utils/HtmlLinks.cc b/utils/HtmlLinks.cc new file mode 100644 index 00000000..1d609f67 --- /dev/null +++ b/utils/HtmlLinks.cc @@ -0,0 +1,143 @@ +//======================================================================== +// +// This file comes from pdftohtml project +// http://pdftohtml.sourceforge.net +// +// Copyright from: +// Gueorgui Ovtcharov +// Rainer Dorsch <http://www.ra.informatik.uni-stuttgart.de/~rainer/> +// Mikhail Kruk <meshko@cs.brandeis.edu> +// +//======================================================================== + +//======================================================================== +// +// Modified under the Poppler project - http://poppler.freedesktop.org +// +// All changes made under the Poppler project to this file are licensed +// under GPL version 2 or later +// +// Copyright (C) 2008 Boris Toloknov <tlknv@yandex.ru> +// Copyright (C) 2010 Albert Astals Cid <aacid@kde.org> +// +// To see a description of the changes please see the Changelog file that +// came with your tarball or type make ChangeLog if you are building from git +// +//======================================================================== + +#include "HtmlLinks.h" + +extern GBool xml; + +HtmlLink::HtmlLink(const HtmlLink& x){ + Xmin=x.Xmin; + Ymin=x.Ymin; + Xmax=x.Xmax; + Ymax=x.Ymax; + dest=new GooString(x.dest); +} + +HtmlLink::HtmlLink(double xmin,double ymin,double xmax,double ymax,GooString * _dest) +{ + if (xmin < xmax) { + Xmin=xmin; + Xmax=xmax; + } else { + Xmin=xmax; + Xmax=xmin; + } + if (ymin < ymax) { + Ymin=ymin; + Ymax=ymax; + } else { + Ymin=ymax; + Ymax=ymin; + } + dest=new GooString(_dest); +} + +HtmlLink::~HtmlLink(){ + delete dest; +} + +GBool HtmlLink::isEqualDest(const HtmlLink& x) const{ + return (!strcmp(dest->getCString(), x.dest->getCString())); +} + +GBool HtmlLink::inLink(double xmin,double ymin,double xmax,double ymax) const { + double y=(ymin+ymax)/2; + if (y>Ymax) return gFalse; + return (y>Ymin)&&(xmin<Xmax)&&(xmax>Xmin); + } + +static GooString* EscapeSpecialChars( GooString* s ) +{ + GooString* tmp = NULL; + for( int i = 0, j = 0; i < s->getLength(); i++, j++ ){ + const char *replace = NULL; + switch ( s->getChar(i) ){ + case '"': replace = """; break; + case '&': replace = "&"; break; + case '<': replace = "<"; break; + case '>': replace = ">"; break; + default: continue; + } + if( replace ){ + if( !tmp ) tmp = new GooString( s ); + if( tmp ){ + tmp->del( j, 1 ); + int l = strlen( replace ); + tmp->insert( j, replace, l ); + j += l - 1; + } + } + } + return tmp ? tmp : s; +} + +GooString* HtmlLink::getLinkStart() { + GooString *res = new GooString("<a href=\""); + GooString *d = xml ? EscapeSpecialChars(dest) : dest; + res->append( d ); + if( d != dest ) delete d; + res->append("\">"); + return res; +} + +/*GooString* HtmlLink::Link(GooString* content){ + //GooString* _dest=new GooString(dest); + GooString *tmp=new GooString("<a href=\""); + tmp->append(dest); + tmp->append("\">"); + tmp->append(content); + tmp->append("</a>"); + //delete _dest; + return tmp; + }*/ + + + +HtmlLinks::HtmlLinks(){ + accu=new std::vector<HtmlLink>(); +} + +HtmlLinks::~HtmlLinks(){ + delete accu; + accu=NULL; +} + +GBool HtmlLinks::inLink(double xmin,double ymin,double xmax,double ymax,int& p)const { + + for(std::vector<HtmlLink>::iterator i=accu->begin();i!=accu->end();i++){ + if (i->inLink(xmin,ymin,xmax,ymax)) { + p=(i - accu->begin()); + return 1; + } + } + return 0; +} + +HtmlLink* HtmlLinks::getLink(int i) const{ + return &(*accu)[i]; +} + diff --git a/utils/HtmlLinks.h b/utils/HtmlLinks.h new file mode 100644 index 00000000..4a48dfa9 --- /dev/null +++ b/utils/HtmlLinks.h @@ -0,0 +1,73 @@ +//======================================================================== +// +// This file comes from pdftohtml project +// http://pdftohtml.sourceforge.net +// +// Copyright from: +// Gueorgui Ovtcharov +// Rainer Dorsch <http://www.ra.informatik.uni-stuttgart.de/~rainer/> +// Mikhail Kruk <meshko@cs.brandeis.edu> +// +//======================================================================== + +//======================================================================== +// +// Modified under the Poppler project - http://poppler.freedesktop.org +// +// All changes made under the Poppler project to this file are licensed +// under GPL version 2 or later +// +// Copyright (C) 2010 Albert Astals Cid <aacid@kde.org> +// +// To see a description of the changes please see the Changelog file that +// came with your tarball or type make ChangeLog if you are building from git +// +//======================================================================== + +#ifndef _HTML_LINKS +#define _HTML_LINKS + +#include <stdlib.h> +#include <string.h> +#include <vector> +#include "goo/GooString.h" + +class HtmlLink{ + +private: + double Xmin; + double Ymin; + double Xmax; + double Ymax; + GooString* dest; + +public: + HtmlLink(const HtmlLink& x); + HtmlLink(double xmin,double ymin,double xmax,double ymax,GooString *_dest); + ~HtmlLink(); + GBool isEqualDest(const HtmlLink& x) const; + GooString *getDest(){return new GooString(dest);} + double getX1() const {return Xmin;} + double getX2() const {return Xmax;} + double getY1() const {return Ymin;} + double getY2() const {return Ymax;} + GBool inLink(double xmin,double ymin,double xmax,double ymax) const ; + //GooString *Link(GooString *content); + GooString* getLinkStart(); + +}; + +class HtmlLinks{ +private: + std::vector<HtmlLink> *accu; +public: + HtmlLinks(); + ~HtmlLinks(); + void AddLink(const HtmlLink& x) {accu->push_back(x);} + GBool inLink(double xmin,double ymin,double xmax,double ymax,int& p) const; + HtmlLink* getLink(int i) const; + +}; + +#endif + diff --git a/utils/HtmlOutputDev.cc b/utils/HtmlOutputDev.cc new file mode 100644 index 00000000..1d1b6285 --- /dev/null +++ b/utils/HtmlOutputDev.cc @@ -0,0 +1,1886 @@ +//======================================================================== +// +// HtmlOutputDev.cc +// +// Copyright 1997-2002 Glyph & Cog, LLC +// +// Changed 1999-2000 by G.Ovtcharov +// +// Changed 2002 by Mikhail Kruk +// +//======================================================================== + +//======================================================================== +// +// Modified under the Poppler project - http://poppler.freedesktop.org +// +// All changes made under the Poppler project to this file are licensed +// under GPL version 2 or later +// +// Copyright (C) 2005-2012 Albert Astals Cid <aacid@kde.org> +// Copyright (C) 2008 Kjartan Maraas <kmaraas@gnome.org> +// Copyright (C) 2008 Boris Toloknov <tlknv@yandex.ru> +// Copyright (C) 2008 Haruyuki Kawabe <Haruyuki.Kawabe@unisys.co.jp> +// Copyright (C) 2008 Tomas Are Haavet <tomasare@gmail.com> +// Copyright (C) 2009 Warren Toomey <wkt@tuhs.org> +// Copyright (C) 2009, 2011 Carlos Garcia Campos <carlosgc@gnome.org> +// Copyright (C) 2009 Reece Dunn <msclrhd@gmail.com> +// Copyright (C) 2010 Adrian Johnson <ajohnson@redneon.com> +// Copyright (C) 2010 Hib Eris <hib@hiberis.nl> +// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac@cdacmumbai.in) and Onkar Potdar (onkar@cdacmumbai.in) +// Copyright (C) 2011 Joshua Richardson <jric@chegg.com> +// Copyright (C) 2011 Stephen Reichling <sreichling@chegg.com> +// Copyright (C) 2011, 2012 Igor Slepchin <igor.slepchin@gmail.com> +// Copyright (C) 2012 Ihar Filipau <thephilips@gmail.com> +// +// To see a description of the changes please see the Changelog file that +// came with your tarball or type make ChangeLog if you are building from git +// +//======================================================================== + +#ifdef __GNUC__ +#pragma implementation +#endif + +#include "config.h" +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <stddef.h> +#include <ctype.h> +#include <math.h> +#include <iostream> +#include "goo/GooString.h" +#include "goo/GooList.h" +#include "UnicodeMap.h" +#include "goo/gmem.h" +#include "Error.h" +#include "GfxState.h" +#include "Page.h" +#include "Annot.h" +#include "PNGWriter.h" +#include "GlobalParams.h" +#include "HtmlOutputDev.h" +#include "HtmlFonts.h" +#include "HtmlUtils.h" +#include "Outline.h" +#include "PDFDoc.h" + +#define DEBUG __FILE__ << ": " << __LINE__ << ": DEBUG: " + +class HtmlImage +{ +public: + HtmlImage(GooString *_fName, GfxState *state) + : fName(_fName) { + state->transform(0, 0, &xMin, &yMax); + state->transform(1, 1, &xMax, &yMin); + } + ~HtmlImage() { delete fName; } + + double xMin, xMax; // image x coordinates + double yMin, yMax; // image y coordinates + GooString *fName; // image file name +}; + +// returns true if x is closer to y than x is to z +static inline bool IS_CLOSER(float x, float y, float z) { return fabs((x)-(y)) < fabs((x)-(z)); } + +extern GBool complexMode; +extern GBool singleHtml; +extern GBool ignore; +extern GBool printCommands; +extern GBool printHtml; +extern GBool noframes; +extern GBool stout; +extern GBool xml; +extern GBool showHidden; +extern GBool noMerge; + +extern double wordBreakThreshold; + +static GBool debug = gFalse; +static GooString *gstr_buff0 = NULL; // a workspace in which I format strings + +static GooString* basename(GooString* str){ + + char *p=str->getCString(); + int len=str->getLength(); + for (int i=len-1;i>=0;i--) + if (*(p+i)==SLASH) + return new GooString((p+i+1),len-i-1); + return new GooString(str); +} + +#if 0 +static GooString* Dirname(GooString* str){ + + char *p=str->getCString(); + int len=str->getLength(); + for (int i=len-1;i>=0;i--) + if (*(p+i)==SLASH) + return new GooString(p,i+1); + return new GooString(); +} +#endif + +static const char *print_matrix(const double *mat) { + delete gstr_buff0; + + gstr_buff0 = GooString::format("[{0:g} {1:g} {2:g} {3:g} {4:g} {5:g}]", + *mat, mat[1], mat[2], mat[3], mat[4], mat[5]); + return gstr_buff0->getCString(); +} + +static const char *print_uni_str(const Unicode *u, const unsigned uLen) { + GooString *gstr_buff1 = NULL; + + delete gstr_buff0; + + if (!uLen) return ""; + gstr_buff0 = GooString::format("{0:c}", (*u < 0x7F ? *u & 0xFF : '?')); + for (unsigned i = 1; i < uLen; i++) { + if (u[i] < 0x7F) { + gstr_buff1 = gstr_buff0->append(u[i] < 0x7F ? static_cast<char>(u[i]) & 0xFF : '?'); + delete gstr_buff0; + gstr_buff0 = gstr_buff1; + } + } + + return gstr_buff0->getCString(); +} + +//------------------------------------------------------------------------ +// HtmlString +//------------------------------------------------------------------------ + +HtmlString::HtmlString(GfxState *state, double fontSize, HtmlFontAccu* _fonts) : fonts(_fonts) { + GfxFont *font; + double x, y; + + state->transform(state->getCurX(), state->getCurY(), &x, &y); + if ((font = state->getFont())) { + double ascent = font->getAscent(); + double descent = font->getDescent(); + if( ascent > 1.05 ){ + //printf( "ascent=%.15g is too high, descent=%.15g\n", ascent, descent ); + ascent = 1.05; + } + if( descent < -0.4 ){ + //printf( "descent %.15g is too low, ascent=%.15g\n", descent, ascent ); + descent = -0.4; + } + yMin = y - ascent * fontSize; + yMax = y - descent * fontSize; + GfxRGB rgb; + state->getFillRGB(&rgb); + HtmlFont hfont=HtmlFont(font, static_cast<int>(fontSize-1), rgb); + if (isMatRotOrSkew(state->getTextMat())) { + double normalizedMatrix[4]; + memcpy(normalizedMatrix, state->getTextMat(), sizeof(normalizedMatrix)); + // browser rotates the opposite way + // so flip the sign of the angle -> sin() components change sign + if (debug) + std::cerr << DEBUG << "before transform: " << print_matrix(normalizedMatrix) << std::endl; + normalizedMatrix[1] *= -1; + normalizedMatrix[2] *= -1; + if (debug) + std::cerr << DEBUG << "after reflecting angle: " << print_matrix(normalizedMatrix) << std::endl; + normalizeRotMat(normalizedMatrix); + if (debug) + std::cerr << DEBUG << "after norm: " << print_matrix(normalizedMatrix) << std::endl; + hfont.setRotMat(normalizedMatrix); + } + fontpos = fonts->AddFont(hfont); + } else { + // this means that the PDF file draws text without a current font, + // which should never happen + yMin = y - 0.95 * fontSize; + yMax = y + 0.35 * fontSize; + fontpos=0; + } + if (yMin == yMax) { + // this is a sanity check for a case that shouldn't happen -- but + // if it does happen, we want to avoid dividing by zero later + yMin = y; + yMax = y + 1; + } + col = 0; + text = NULL; + xRight = NULL; + link = NULL; + len = size = 0; + yxNext = NULL; + xyNext = NULL; + htext=new GooString(); + dir = textDirUnknown; +} + + +HtmlString::~HtmlString() { + gfree(text); + delete htext; + gfree(xRight); +} + +void HtmlString::addChar(GfxState *state, double x, double y, + double dx, double dy, Unicode u) { + if (dir == textDirUnknown) { + //dir = UnicodeMap::getDirection(u); + dir = textDirLeftRight; + } + + if (len == size) { + size += 16; + text = (Unicode *)grealloc(text, size * sizeof(Unicode)); + xRight = (double *)grealloc(xRight, size * sizeof(double)); + } + text[len] = u; + if (len == 0) { + xMin = x; + } + xMax = xRight[len] = x + dx; +//printf("added char: %f %f xright = %f\n", x, dx, x+dx); + ++len; +} + +void HtmlString::endString() +{ + if( dir == textDirRightLeft && len > 1 ) + { + //printf("will reverse!\n"); + for (int i = 0; i < len / 2; i++) + { + Unicode ch = text[i]; + text[i] = text[len - i - 1]; + text[len - i - 1] = ch; + } + } +} + +//------------------------------------------------------------------------ +// HtmlPage +//------------------------------------------------------------------------ + +HtmlPage::HtmlPage(GBool rawOrder, char *imgExtVal) { + this->rawOrder = rawOrder; + curStr = NULL; + yxStrings = NULL; + xyStrings = NULL; + yxCur1 = yxCur2 = NULL; + fonts=new HtmlFontAccu(); + links=new HtmlLinks(); + imgList=new GooList(); + pageWidth=0; + pageHeight=0; + fontsPageMarker = 0; + DocName=NULL; + firstPage = -1; + imgExt = new GooString(imgExtVal); +} + +HtmlPage::~HtmlPage() { + clear(); + delete DocName; + delete fonts; + delete links; + delete imgExt; + deleteGooList(imgList, HtmlImage); +} + +void HtmlPage::updateFont(GfxState *state) { + GfxFont *font; + double *fm; + char *name; + int code; + double w; + + // adjust the font size + fontSize = state->getTransformedFontSize(); + if ((font = state->getFont()) && font->getType() == fontType3) { + // This is a hack which makes it possible to deal with some Type 3 + // fonts. The problem is that it's impossible to know what the + // base coordinate system used in the font is without actually + // rendering the font. This code tries to guess by looking at the + // width of the character 'm' (which breaks if the font is a + // subset that doesn't contain 'm'). + for (code = 0; code < 256; ++code) { + if ((name = ((Gfx8BitFont *)font)->getCharName(code)) && + name[0] == 'm' && name[1] == '\0') { + break; + } + } + if (code < 256) { + w = ((Gfx8BitFont *)font)->getWidth(code); + if (w != 0) { + // 600 is a generic average 'm' width -- yes, this is a hack + fontSize *= w / 0.6; + } + } + fm = font->getFontMatrix(); + if (fm[0] != 0) { + fontSize *= fabs(fm[3] / fm[0]); + } + } +} + +void HtmlPage::beginString(GfxState *state, GooString *s) { + curStr = new HtmlString(state, fontSize, fonts); +} + + +void HtmlPage::conv(){ + HtmlString *tmp; + + int linkIndex = 0; + HtmlFont* h; + for(tmp=yxStrings;tmp;tmp=tmp->yxNext){ + int pos=tmp->fontpos; + // printf("%d\n",pos); + h=fonts->Get(pos); + + if (tmp->htext) delete tmp->htext; + tmp->htext=HtmlFont::simple(h,tmp->text,tmp->len); + + if (links->inLink(tmp->xMin,tmp->yMin,tmp->xMax,tmp->yMax, linkIndex)){ + tmp->link = links->getLink(linkIndex); + /*GooString *t=tmp->htext; + tmp->htext=links->getLink(k)->Link(tmp->htext); + delete t;*/ + } + } + +} + + +void HtmlPage::addChar(GfxState *state, double x, double y, + double dx, double dy, + double ox, double oy, Unicode *u, int uLen) { + double x1, y1, w1, h1, dx2, dy2; + int n, i; + state->transform(x, y, &x1, &y1); + n = curStr->len; + + // check that new character is in the same direction as current string + // and is not too far away from it before adding + //if ((UnicodeMap::getDirection(u[0]) != curStr->dir) || + // XXX + if (debug) { + double *text_mat = state->getTextMat(); + // rotation is (cos q, sin q, -sin q, cos q, 0, 0) + // sin q is zero iff there is no rotation, or 180 deg. rotation; + // for 180 rotation, cos q will be negative + if (text_mat[0] < 0 || !is_within(text_mat[1], .1, 0)) { + std::cerr << DEBUG << "rotation matrix for \"" << print_uni_str(u, uLen) << '"' << std::endl; + std::cerr << "text " << print_matrix(state->getTextMat()); + } + } + if (n > 0 && // don't start a new string, unless there is already a string + // TODO: the following line assumes that text is flowing left to + // right, which will not necessarily be the case, e.g. if rotated; + // It assesses whether or not two characters are close enough to + // be part of the same string + fabs(x1 - curStr->xRight[n-1]) > wordBreakThreshold * (curStr->yMax - curStr->yMin) && + // rotation is (cos q, sin q, -sin q, cos q, 0, 0) + // sin q is zero iff there is no rotation, or 180 deg. rotation; + // for 180 rotation, cos q will be negative + !rot_matrices_equal(curStr->getFont().getRotMat(), state->getTextMat())) + { + endString(); + beginString(state, NULL); + } + state->textTransformDelta(state->getCharSpace() * state->getHorizScaling(), + 0, &dx2, &dy2); + dx -= dx2; + dy -= dy2; + state->transformDelta(dx, dy, &w1, &h1); + if (uLen != 0) { + w1 /= uLen; + h1 /= uLen; + } + for (i = 0; i < uLen; ++i) { + Unicode u1 = u[i]; + if (u1 >= 0xd800 && u1 <= 0xdbff && i < uLen) { + // surrogate pair + const Unicode u2 = u[i + 1]; + if (u2 >= 0xdc00 && u2 <= 0xdfff) { + u1 = 0x10000 + ((u1 - 0xd800) << 10) + (u2 - 0xdc00); + + curStr->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, u1); + } + ++i; + } else { + curStr->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, u1); + } + } +} + +void HtmlPage::endString() { + HtmlString *p1, *p2; + double h, y1, y2; + + // throw away zero-length strings -- they don't have valid xMin/xMax + // values, and they're useless anyway + if (curStr->len == 0) { + delete curStr; + curStr = NULL; + return; + } + + curStr->endString(); + +#if 0 //~tmp + if (curStr->yMax - curStr->yMin > 20) { + delete curStr; + curStr = NULL; + return; + } +#endif + + // insert string in y-major list + h = curStr->yMax - curStr->yMin; + y1 = curStr->yMin + 0.5 * h; + y2 = curStr->yMin + 0.8 * h; + if (rawOrder) { + p1 = yxCur1; + p2 = NULL; + } else if ((!yxCur1 || + (y1 >= yxCur1->yMin && + (y2 >= yxCur1->yMax || curStr->xMax >= yxCur1->xMin))) && + (!yxCur2 || + (y1 < yxCur2->yMin || + (y2 < yxCur2->yMax && curStr->xMax < yxCur2->xMin)))) { + p1 = yxCur1; + p2 = yxCur2; + } else { + for (p1 = NULL, p2 = yxStrings; p2; p1 = p2, p2 = p2->yxNext) { + if (y1 < p2->yMin || (y2 < p2->yMax && curStr->xMax < p2->xMin)) + break; + } + yxCur2 = p2; + } + yxCur1 = curStr; + if (p1) + p1->yxNext = curStr; + else + yxStrings = curStr; + curStr->yxNext = p2; + curStr = NULL; +} + +static const char *strrstr( const char *s, const char *ss ) +{ + const char *p = strstr( s, ss ); + for( const char *pp = p; pp != NULL; pp = strstr( p+1, ss ) ){ + p = pp; + } + return p; +} + +static void CloseTags( GooString *htext, GBool &finish_a, GBool &finish_italic, GBool &finish_bold ) +{ + const char *last_italic = finish_italic && ( finish_bold || finish_a ) ? strrstr( htext->getCString(), "<i>" ) : NULL; + const char *last_bold = finish_bold && ( finish_italic || finish_a ) ? strrstr( htext->getCString(), "<b>" ) : NULL; + const char *last_a = finish_a && ( finish_italic || finish_bold ) ? strrstr( htext->getCString(), "<a " ) : NULL; + if( finish_a && ( finish_italic || finish_bold ) && last_a > ( last_italic > last_bold ? last_italic : last_bold ) ){ + htext->append("</a>", 4); + finish_a = false; + } + if( finish_italic && finish_bold && last_italic > last_bold ){ + htext->append("</i>", 4); + finish_italic = false; + } + if( finish_bold ) + htext->append("</b>", 4); + if( finish_italic ) + htext->append("</i>", 4); + if( finish_a ) + htext->append("</a>"); +} + +// Strings are lines of text; +// This function aims to combine strings into lines and paragraphs if !noMerge +// It may also strip out duplicate strings (if they are on top of each other); sometimes they are to create a font effect +void HtmlPage::coalesce() { + HtmlString *str1, *str2; + HtmlFont *hfont1, *hfont2; + double space, horSpace, vertSpace, vertOverlap; + GBool addSpace, addLineBreak; + int n, i; + double curX, curY; + +#if 0 //~ for debugging + for (str1 = yxStrings; str1; str1 = str1->yxNext) { + printf("x=%f..%f y=%f..%f size=%2d '", + str1->xMin, str1->xMax, str1->yMin, str1->yMax, + (int)(str1->yMax - str1->yMin)); + for (i = 0; i < str1->len; ++i) { + fputc(str1->text[i] & 0xff, stdout); + } + printf("'\n"); + } + printf("\n------------------------------------------------------------\n\n"); +#endif + str1 = yxStrings; + + if( !str1 ) return; + + //----- discard duplicated text (fake boldface, drop shadows) + if( !complexMode ) + { /* if not in complex mode get rid of duplicate strings */ + HtmlString *str3; + GBool found; + while (str1) + { + double size = str1->yMax - str1->yMin; + double xLimit = str1->xMin + size * 0.2; + found = gFalse; + for (str2 = str1, str3 = str1->yxNext; + str3 && str3->xMin < xLimit; + str2 = str3, str3 = str2->yxNext) + { + if (str3->len == str1->len && + !memcmp(str3->text, str1->text, str1->len * sizeof(Unicode)) && + fabs(str3->yMin - str1->yMin) < size * 0.2 && + fabs(str3->yMax - str1->yMax) < size * 0.2 && + fabs(str3->xMax - str1->xMax) < size * 0.2) + { + found = gTrue; + //printf("found duplicate!\n"); + break; + } + } + if (found) + { + str2->xyNext = str3->xyNext; + str2->yxNext = str3->yxNext; + delete str3; + } + else + { + str1 = str1->yxNext; + } + } + } /*- !complexMode */ + + str1 = yxStrings; + + hfont1 = getFont(str1); + if( hfont1->isBold() ) + str1->htext->insert(0,"<b>",3); + if( hfont1->isItalic() ) + str1->htext->insert(0,"<i>",3); + if( str1->getLink() != NULL ) { + GooString *ls = str1->getLink()->getLinkStart(); + str1->htext->insert(0, ls); + delete ls; + } + curX = str1->xMin; curY = str1->yMin; + + while (str1 && (str2 = str1->yxNext)) { + hfont2 = getFont(str2); + space = str1->yMax - str1->yMin; // the height of the font's bounding box + horSpace = str2->xMin - str1->xMax; + // if strings line up on left-hand side AND they are on subsequent lines, we need a line break + addLineBreak = !noMerge && (fabs(str1->xMin - str2->xMin) < 0.4) && IS_CLOSER(str2->yMax, str1->yMax + space, str1->yMax); + vertSpace = str2->yMin - str1->yMax; + +//printf("coalesce %d %d %f? ", str1->dir, str2->dir, d); + + if (str2->yMin >= str1->yMin && str2->yMin <= str1->yMax) + { + vertOverlap = str1->yMax - str2->yMin; + } else + if (str2->yMax >= str1->yMin && str2->yMax <= str1->yMax) + { + vertOverlap = str2->yMax - str1->yMin; + } else + { + vertOverlap = 0; + } + + // Combine strings if: + // They appear to be the same font (complex mode only) && going in the same direction AND at least one of the following: + // 1. They appear to be part of the same line of text + // 2. They appear to be subsequent lines of a paragraph + // We assume (1) or (2) above, respectively, based on: + // (1) strings overlap vertically AND + // horizontal space between end of str1 and start of str2 is consistent with a single space or less; + // when rawOrder, the strings have to overlap vertically by at least 50% + // (2) Strings flow down the page, but the space between them is not too great, and they are lined up on the left + if ( + ( + ( + ( + (rawOrder && vertOverlap > 0.5 * space) + || + (!rawOrder && str2->yMin < str1->yMax) + ) && + (horSpace > -0.5 * space && horSpace < space) + ) || + (vertSpace >= 0 && vertSpace < 0.5 * space && addLineBreak) + ) && + (!complexMode || (hfont1->isEqualIgnoreBold(*hfont2))) && // in complex mode fonts must be the same, in other modes fonts do not metter + str1->dir == str2->dir // text direction the same + ) + { +// printf("yes\n"); + n = str1->len + str2->len; + if ((addSpace = horSpace > wordBreakThreshold * space)) { + ++n; + } + if (addLineBreak) { + ++n; + } + + str1->size = (n + 15) & ~15; + str1->text = (Unicode *)grealloc(str1->text, + str1->size * sizeof(Unicode)); + str1->xRight = (double *)grealloc(str1->xRight, + str1->size * sizeof(double)); + if (addSpace) { + str1->text[str1->len] = 0x20; + str1->htext->append(xml?" ":" "); + str1->xRight[str1->len] = str2->xMin; + ++str1->len; + } + if (addLineBreak) { + str1->text[str1->len] = '\n'; + str1->htext->append("<br/>"); + str1->xRight[str1->len] = str2->xMin; + ++str1->len; + str1->yMin = str2->yMin; + str1->yMax = str2->yMax; + str1->xMax = str2->xMax; + int fontLineSize = hfont1->getLineSize(); + int curLineSize = (int)(vertSpace + space); + if( curLineSize != fontLineSize ) + { + HtmlFont *newfnt = new HtmlFont(*hfont1); + newfnt->setLineSize(curLineSize); + str1->fontpos = fonts->AddFont(*newfnt); + delete newfnt; + hfont1 = getFont(str1); + // we have to reget hfont2 because it's location could have + // changed on resize + hfont2 = getFont(str2); + } + } + for (i = 0; i < str2->len; ++i) { + str1->text[str1->len] = str2->text[i]; + str1->xRight[str1->len] = str2->xRight[i]; + ++str1->len; + } + + /* fix <i>, <b> if str1 and str2 differ and handle switch of links */ + HtmlLink *hlink1 = str1->getLink(); + HtmlLink *hlink2 = str2->getLink(); + bool switch_links = !hlink1 || !hlink2 || !hlink1->isEqualDest(*hlink2); + GBool finish_a = switch_links && hlink1 != NULL; + GBool finish_italic = hfont1->isItalic() && ( !hfont2->isItalic() || finish_a ); + GBool finish_bold = hfont1->isBold() && ( !hfont2->isBold() || finish_a || finish_italic ); + CloseTags( str1->htext, finish_a, finish_italic, finish_bold ); + if( switch_links && hlink2 != NULL ) { + GooString *ls = hlink2->getLinkStart(); + str1->htext->append(ls); + delete ls; + } + if( ( !hfont1->isItalic() || finish_italic ) && hfont2->isItalic() ) + str1->htext->append("<i>", 3); + if( ( !hfont1->isBold() || finish_bold ) && hfont2->isBold() ) + str1->htext->append("<b>", 3); + + + str1->htext->append(str2->htext); + // str1 now contains href for link of str2 (if it is defined) + str1->link = str2->link; + hfont1 = hfont2; + if (str2->xMax > str1->xMax) { + str1->xMax = str2->xMax; + } + if (str2->yMax > str1->yMax) { + str1->yMax = str2->yMax; + } + str1->yxNext = str2->yxNext; + delete str2; + } else { // keep strings separate +// printf("no\n"); + GBool finish_a = str1->getLink() != NULL; + GBool finish_bold = hfont1->isBold(); + GBool finish_italic = hfont1->isItalic(); + CloseTags( str1->htext, finish_a, finish_italic, finish_bold ); + + str1->xMin = curX; str1->yMin = curY; + str1 = str2; + curX = str1->xMin; curY = str1->yMin; + hfont1 = hfont2; + if( hfont1->isBold() ) + str1->htext->insert(0,"<b>",3); + if( hfont1->isItalic() ) + str1->htext->insert(0,"<i>",3); + if( str1->getLink() != NULL ) { + GooString *ls = str1->getLink()->getLinkStart(); + str1->htext->insert(0, ls); + delete ls; + } + } + } + str1->xMin = curX; str1->yMin = curY; + + GBool finish_bold = hfont1->isBold(); + GBool finish_italic = hfont1->isItalic(); + GBool finish_a = str1->getLink() != NULL; + CloseTags( str1->htext, finish_a, finish_italic, finish_bold ); + +#if 0 //~ for debugging + for (str1 = yxStrings; str1; str1 = str1->yxNext) { + printf("x=%3d..%3d y=%3d..%3d size=%2d ", + (int)str1->xMin, (int)str1->xMax, (int)str1->yMin, (int)str1->yMax, + (int)(str1->yMax - str1->yMin)); + printf("'%s'\n", str1->htext->getCString()); + } + printf("\n------------------------------------------------------------\n\n"); +#endif + +} + +void HtmlPage::dumpAsXML(FILE* f,int page){ + fprintf(f, "<page number=\"%d\" position=\"absolute\"", page); + fprintf(f," top=\"0\" left=\"0\" height=\"%d\" width=\"%d\">\n", pageHeight,pageWidth); + + for(int i=fontsPageMarker;i < fonts->size();i++) { + GooString *fontCSStyle = fonts->CSStyle(i); + fprintf(f,"\t%s\n",fontCSStyle->getCString()); + delete fontCSStyle; + } + + int listlen=imgList->getLength(); + for (int i = 0; i < listlen; i++) { + HtmlImage *img = (HtmlImage*)imgList->del(0); + fprintf(f,"<image top=\"%d\" left=\"%d\" ",xoutRound(img->yMin),xoutRound(img->xMin)); + fprintf(f,"width=\"%d\" height=\"%d\" ",xoutRound(img->xMax-img->xMin),xoutRound(img->yMax-img->yMin)); + fprintf(f,"src=\"%s\"/>\n",img->fName->getCString()); + delete img; + } + + for(HtmlString *tmp=yxStrings;tmp;tmp=tmp->yxNext){ + if (tmp->htext){ + fprintf(f,"<text top=\"%d\" left=\"%d\" ",xoutRound(tmp->yMin),xoutRound(tmp->xMin)); + fprintf(f,"width=\"%d\" height=\"%d\" ",xoutRound(tmp->xMax-tmp->xMin),xoutRound(tmp->yMax-tmp->yMin)); + fprintf(f,"font=\"%d\">", tmp->fontpos); + fputs(tmp->htext->getCString(),f); + fputs("</text>\n",f); + } + } + fputs("</page>\n",f); +} + +static void printCSS(FILE *f) +{ + // Image flip/flop CSS + // Source: + // http://stackoverflow.com/questions/1309055/cross-browser-way-to-flip-html-image-via-javascript-css + // tested in Chrome, Fx (Linux) and IE9 (W7) + static const char css[] = + "<STYLE type=\"text/css\">" "\n" + "<!--" "\n" + ".xflip {" "\n" + " -moz-transform: scaleX(-1);" "\n" + " -webkit-transform: scaleX(-1);" "\n" + " -o-transform: scaleX(-1);" "\n" + " transform: scaleX(-1);" "\n" + " filter: fliph;" "\n" + "}" "\n" + ".yflip {" "\n" + " -moz-transform: scaleY(-1);" "\n" + " -webkit-transform: scaleY(-1);" "\n" + " -o-transform: scaleY(-1);" "\n" + " transform: scaleY(-1);" "\n" + " filter: flipv;" "\n" + "}" "\n" + ".xyflip {" "\n" + " -moz-transform: scaleX(-1) scaleY(-1);" "\n" + " -webkit-transform: scaleX(-1) scaleY(-1);" "\n" + " -o-transform: scaleX(-1) scaleY(-1);" "\n" + " transform: scaleX(-1) scaleY(-1);" "\n" + " filter: fliph + flipv;" "\n" + "}" "\n" + "-->" "\n" + "</STYLE>" "\n"; + + fwrite( css, sizeof(css)-1, 1, f ); +} + +int HtmlPage::dumpComplexHeaders(FILE * const file, FILE *& pageFile, int page) { + GooString* tmp; + + if( !noframes ) + { + GooString* pgNum=GooString::fromInt(page); + tmp = new GooString(DocName); + if (!singleHtml){ + tmp->append('-')->append(pgNum)->append(".html"); + pageFile = fopen(tmp->getCString(), "w"); + } else { + tmp->append("-html")->append(".html"); + pageFile = fopen(tmp->getCString(), "a"); + } + delete pgNum; + if (!pageFile) { + error(errIO, -1, "Couldn't open html file '{0:t}'", tmp); + delete tmp; + return 1; + } + + if (!singleHtml) + fprintf(pageFile,"%s\n<HTML xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<HEAD>\n<TITLE>Page %d</TITLE>\n\n", DOCTYPE, page); + else + fprintf(pageFile,"%s\n<HTML xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<HEAD>\n<TITLE>%s</TITLE>\n\n", DOCTYPE, tmp->getCString()); + + delete tmp; + + GooString *htmlEncoding = HtmlOutputDev::mapEncodingToHtml(globalParams->getTextEncodingName()); + if (!singleHtml) + fprintf(pageFile, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding->getCString()); + else + fprintf(pageFile, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n <br/>\n", htmlEncoding->getCString()); + delete htmlEncoding; + } + else + { + pageFile = file; + fprintf(pageFile,"<!-- Page %d -->\n", page); + fprintf(pageFile,"<a name=\"%d\"></a>\n", page); + } + + return 0; +} + +void HtmlPage::dumpComplex(FILE *file, int page){ + FILE* pageFile; + GooString* tmp; + + if( firstPage == -1 ) firstPage = page; + + if (dumpComplexHeaders(file, pageFile, page)) { error(errIO, -1, "Couldn't write headers."); return; } + + tmp=basename(DocName); + + fputs("<STYLE type=\"text/css\">\n<!--\n",pageFile); + fputs("\tp {margin: 0; padding: 0;}",pageFile); + for(int i=fontsPageMarker;i!=fonts->size();i++) { + GooString *fontCSStyle; + if (!singleHtml) + fontCSStyle = fonts->CSStyle(i); + else + fontCSStyle = fonts->CSStyle(i,page); + fprintf(pageFile,"\t%s\n",fontCSStyle->getCString()); + delete fontCSStyle; + } + + fputs("-->\n</STYLE>\n",pageFile); + + if( !noframes ) + { + fputs("</HEAD>\n<BODY bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n",pageFile); + } + + fprintf(pageFile,"<DIV id=\"page%d-div\" style=\"position:relative;width:%dpx;height:%dpx;\">\n", + page, pageWidth, pageHeight); + + if( !ignore ) + { + fprintf(pageFile, + "<IMG width=\"%d\" height=\"%d\" src=\"%s%03d.%s\" alt=\"background image\"/>\n", + pageWidth, pageHeight, tmp->getCString(), + (page-firstPage+1), imgExt->getCString()); + } + + delete tmp; + + for(HtmlString *tmp1=yxStrings;tmp1;tmp1=tmp1->yxNext){ + if (tmp1->htext){ + fprintf(pageFile, + "<P style=\"position:absolute;top:%dpx;left:%dpx;white-space:nowrap\" class=\"ft", + xoutRound(tmp1->yMin), + xoutRound(tmp1->xMin)); + if (!singleHtml) { + fputc('0', pageFile); + } else { + fprintf(pageFile, "%d", page); + } + fprintf(pageFile,"%d\">", tmp1->fontpos); + fputs(tmp1->htext->getCString(), pageFile); + fputs("</P>\n", pageFile); + } + } + + fputs("</DIV>\n", pageFile); + + if( !noframes ) + { + fputs("</BODY>\n</HTML>\n",pageFile); + fclose(pageFile); + } +} + + +void HtmlPage::dump(FILE *f, int pageNum) +{ + if (complexMode || singleHtml) + { + if (xml) dumpAsXML(f, pageNum); + if (!xml) dumpComplex(f, pageNum); + } + else + { + fprintf(f,"<A name=%d></a>",pageNum); + // Loop over the list of image names on this page + int listlen=imgList->getLength(); + for (int i = 0; i < listlen; i++) { + HtmlImage *img = (HtmlImage*)imgList->del(0); + + // see printCSS() for class names + const char *styles[4] = { "", " class=\"xflip\"", " class=\"yflip\"", " class=\"xyflip\"" }; + int style_index=0; + if (img->xMin > img->xMax) style_index += 1; // xFlip + if (img->yMin > img->yMax) style_index += 2; // yFlip + + fprintf(f,"<IMG%s src=\"%s\"/><br/>\n",styles[style_index],img->fName->getCString()); + delete img; + } + + GooString* str; + for(HtmlString *tmp=yxStrings;tmp;tmp=tmp->yxNext){ + if (tmp->htext){ + str=new GooString(tmp->htext); + fputs(str->getCString(),f); + delete str; + fputs("<br/>\n",f); + } + } + fputs("<hr>\n",f); + } +} + + + +void HtmlPage::clear() { + HtmlString *p1, *p2; + + if (curStr) { + delete curStr; + curStr = NULL; + } + for (p1 = yxStrings; p1; p1 = p2) { + p2 = p1->yxNext; + delete p1; + } + yxStrings = NULL; + xyStrings = NULL; + yxCur1 = yxCur2 = NULL; + + if( !noframes ) + { + delete fonts; + fonts=new HtmlFontAccu(); + fontsPageMarker = 0; + } + else + { + fontsPageMarker = fonts->size(); + } + + delete links; + links=new HtmlLinks(); + + +} + +void HtmlPage::setDocName(char *fname){ + DocName=new GooString(fname); +} + +void HtmlPage::addImage(GooString *fname, GfxState *state) { + HtmlImage *img = new HtmlImage(fname, state); + imgList->append(img); +} + +//------------------------------------------------------------------------ +// HtmlMetaVar +//------------------------------------------------------------------------ + +HtmlMetaVar::HtmlMetaVar(const char *_name, const char *_content) +{ + name = new GooString(_name); + content = new GooString(_content); +} + +HtmlMetaVar::~HtmlMetaVar() +{ + delete name; + delete content; +} + +GooString* HtmlMetaVar::toString() +{ + GooString *result = new GooString("<META name=\""); + result->append(name); + result->append("\" content=\""); + result->append(content); + result->append("\"/>"); + return result; +} + +//------------------------------------------------------------------------ +// HtmlOutputDev +//------------------------------------------------------------------------ + +static const char* HtmlEncodings[][2] = { + {"Latin1", "ISO-8859-1"}, + {NULL, NULL} +}; + +GooString* HtmlOutputDev::mapEncodingToHtml(GooString* encoding) +{ + GooString* enc = encoding; + for(int i = 0; HtmlEncodings[i][0] != NULL; i++) + { + if( enc->cmp(HtmlEncodings[i][0]) == 0 ) + { + delete enc; + return new GooString(HtmlEncodings[i][1]); + } + } + return enc; +} + +void HtmlOutputDev::doFrame(int firstPage){ + GooString* fName=new GooString(Docname); + GooString* htmlEncoding; + fName->append(".html"); + + if (!(fContentsFrame = fopen(fName->getCString(), "w"))){ + error(errIO, -1, "Couldn't open html file '{0:t}'", fName); + delete fName; + return; + } + + delete fName; + + fName=basename(Docname); + fputs(DOCTYPE, fContentsFrame); + fputs("\n<HTML>",fContentsFrame); + fputs("\n<HEAD>",fContentsFrame); + fprintf(fContentsFrame,"\n<TITLE>%s</TITLE>",docTitle->getCString()); + htmlEncoding = mapEncodingToHtml(globalParams->getTextEncodingName()); + fprintf(fContentsFrame, "\n<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding->getCString()); + dumpMetaVars(fContentsFrame); + fprintf(fContentsFrame, "</HEAD>\n"); + fputs("<FRAMESET cols=\"100,*\">\n",fContentsFrame); + fprintf(fContentsFrame,"<FRAME name=\"links\" src=\"%s_ind.html\">\n",fName->getCString()); + fputs("<FRAME name=\"contents\" src=",fContentsFrame); + if (complexMode) + fprintf(fContentsFrame,"\"%s-%d.html\"",fName->getCString(), firstPage); + else + fprintf(fContentsFrame,"\"%ss.html\"",fName->getCString()); + + fputs(">\n</FRAMESET>\n</HTML>\n",fContentsFrame); + + delete fName; + delete htmlEncoding; + fclose(fContentsFrame); +} + +HtmlOutputDev::HtmlOutputDev(Catalog *catalogA, char *fileName, char *title, + char *author, char *keywords, char *subject, char *date, + char *extension, + GBool rawOrder, int firstPage, GBool outline) +{ + catalog = catalogA; + fContentsFrame = NULL; + docTitle = new GooString(title); + pages = NULL; + dumpJPEG=gTrue; + //write = gTrue; + this->rawOrder = rawOrder; + this->doOutline = outline; + ok = gFalse; + //this->firstPage = firstPage; + //pageNum=firstPage; + // open file + needClose = gFalse; + pages = new HtmlPage(rawOrder, extension); + + glMetaVars = new GooList(); + glMetaVars->append(new HtmlMetaVar("generator", "pdftohtml 0.36")); + if( author ) glMetaVars->append(new HtmlMetaVar("author", author)); + if( keywords ) glMetaVars->append(new HtmlMetaVar("keywords", keywords)); + if( date ) glMetaVars->append(new HtmlMetaVar("date", date)); + if( subject ) glMetaVars->append(new HtmlMetaVar("subject", subject)); + + maxPageWidth = 0; + maxPageHeight = 0; + + pages->setDocName(fileName); + Docname=new GooString (fileName); + + // for non-xml output (complex or simple) with frames generate the left frame + if(!xml && !noframes) + { + if (!singleHtml) + { + GooString* left=new GooString(fileName); + left->append("_ind.html"); + + doFrame(firstPage); + + if (!(fContentsFrame = fopen(left->getCString(), "w"))) + { + error(errIO, -1, "Couldn't open html file '{0:t}'", left); + delete left; + return; + } + delete left; + fputs(DOCTYPE, fContentsFrame); + fputs("<HTML xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<HEAD>\n<TITLE></TITLE>\n</HEAD>\n<BODY>\n", fContentsFrame); + + if (doOutline) + { + GooString *str = basename(Docname); + fprintf(fContentsFrame, "<A href=\"%s%s\" target=\"contents\">Outline</a><br/>", str->getCString(), complexMode ? "-outline.html" : "s.html#outline"); + delete str; + } + } + if (!complexMode) + { /* not in complex mode */ + + GooString* right=new GooString(fileName); + right->append("s.html"); + + if (!(page=fopen(right->getCString(),"w"))){ + error(errIO, -1, "Couldn't open html file '{0:t}'", right); + delete right; + return; + } + delete right; + fputs(DOCTYPE, page); + fputs("<HTML>\n<HEAD>\n<TITLE></TITLE>\n",page); + printCSS(page); + fputs("</HEAD>\n<BODY>\n",page); + } + } + + if (noframes) { + if (stout) page=stdout; + else { + GooString* right=new GooString(fileName); + if (!xml) right->append(".html"); + if (xml) right->append(".xml"); + if (!(page=fopen(right->getCString(),"w"))){ + error(errIO, -1, "Couldn't open html file '{0:t}'", right); + delete right; + return; + } + delete right; + } + + GooString *htmlEncoding = mapEncodingToHtml(globalParams->getTextEncodingName()); + if (xml) + { + fprintf(page, "<?xml version=\"1.0\" encoding=\"%s\"?>\n", htmlEncoding->getCString()); + fputs("<!DOCTYPE pdf2xml SYSTEM \"pdf2xml.dtd\">\n\n", page); + fprintf(page,"<pdf2xml producer=\"%s\" version=\"%s\">\n", PACKAGE_NAME, PACKAGE_VERSION); + } + else + { + fprintf(page,"%s\n<HTML xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<HEAD>\n<TITLE>%s</TITLE>\n", DOCTYPE, docTitle->getCString()); + + fprintf(page, "<META http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding->getCString()); + + dumpMetaVars(page); + printCSS(page); + fprintf(page,"</HEAD>\n"); + fprintf(page,"<BODY bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n"); + } + delete htmlEncoding; + } + ok = gTrue; +} + +HtmlOutputDev::~HtmlOutputDev() { + HtmlFont::clear(); + + delete Docname; + delete docTitle; + + deleteGooList(glMetaVars, HtmlMetaVar); + + if (fContentsFrame){ + fputs("</BODY>\n</HTML>\n",fContentsFrame); + fclose(fContentsFrame); + } + if (page != NULL) { + if (xml) { + fputs("</pdf2xml>\n",page); + fclose(page); + } else + if ( !complexMode || xml || noframes ) + { + fputs("</BODY>\n</HTML>\n",page); + fclose(page); + } + } + if (pages) + delete pages; +} + +void HtmlOutputDev::startPage(int pageNum, GfxState *state) { +#if 0 + if (mode&&!xml){ + if (write){ + write=gFalse; + GooString* fname=Dirname(Docname); + fname->append("image.log"); + if((tin=fopen(getFileNameFromPath(fname->getCString(),fname->getLength()),"w"))==NULL){ + printf("Error : can not open %s",fname); + exit(1); + } + delete fname; + // if(state->getRotation()!=0) + // fprintf(tin,"ROTATE=%d rotate %d neg %d neg translate\n",state->getRotation(),state->getX1(),-state->getY1()); + // else + fprintf(tin,"ROTATE=%d neg %d neg translate\n",state->getX1(),state->getY1()); + } + } +#endif + + this->pageNum = pageNum; + GooString *str=basename(Docname); + pages->clear(); + if(!noframes) + { + if (fContentsFrame) + { + if (complexMode) + fprintf(fContentsFrame,"<A href=\"%s-%d.html\"",str->getCString(),pageNum); + else + fprintf(fContentsFrame,"<A href=\"%ss.html#%d\"",str->getCString(),pageNum); + fprintf(fContentsFrame," target=\"contents\" >Page %d</a><br/>\n",pageNum); + } + } + + pages->pageWidth=static_cast<int>(state->getPageWidth()); + pages->pageHeight=static_cast<int>(state->getPageHeight()); + + delete str; +} + + +void HtmlOutputDev::endPage() { + Links *linksList = docPage->getLinks(); + for (int i = 0; i < linksList->getNumLinks(); ++i) + { + doProcessLink(linksList->getLink(i)); + } + delete linksList; + + pages->conv(); + pages->coalesce(); + pages->dump(page, pageNum); + + // I don't yet know what to do in the case when there are pages of different + // sizes and we want complex output: running ghostscript many times + // seems very inefficient. So for now I'll just use last page's size + maxPageWidth = pages->pageWidth; + maxPageHeight = pages->pageHeight; + + //if(!noframes&&!xml) fputs("<br>\n", fContentsFrame); + if(!stout && !globalParams->getErrQuiet()) printf("Page-%d\n",(pageNum)); +} + +void HtmlOutputDev::updateFont(GfxState *state) { + pages->updateFont(state); +} + +void HtmlOutputDev::beginString(GfxState *state, GooString *s) { + pages->beginString(state, s); +} + +void HtmlOutputDev::endString(GfxState *state) { + pages->endString(); +} + +void HtmlOutputDev::drawChar(GfxState *state, double x, double y, + double dx, double dy, + double originX, double originY, + CharCode code, int /*nBytes*/, Unicode *u, int uLen) +{ + if ( !showHidden && (state->getRender() & 3) == 3) { + return; + } + pages->addChar(state, x, y, dx, dy, originX, originY, u, uLen); +} + +void HtmlOutputDev::drawJpegImage(GfxState *state, Stream *str) +{ + FILE *f1; + int c; + + // open the image file + GooString *fName=createImageFileName("jpg"); + if (!(f1 = fopen(fName->getCString(), "wb"))) { + error(errIO, -1, "Couldn't open image file '%s'", fName->getCString()); + delete fName; + return; + } + + // initialize stream + str = str->getNextStream(); + str->reset(); + + // copy the stream + while ((c = str->getChar()) != EOF) + fputc(c, f1); + + fclose(f1); + + if (fName) { + pages->addImage(fName, state); + } +} + +void HtmlOutputDev::drawPngImage(GfxState *state, Stream *str, int width, int height, + GfxImageColorMap *colorMap, GBool isMask) +{ +#ifdef ENABLE_LIBPNG + FILE *f1; + + if (!colorMap && !isMask) { + error(errInternal, -1, "Can't have color image without a color map"); + return; + } + + // open the image file + GooString *fName=createImageFileName("png"); + if (!(f1 = fopen(fName->getCString(), "wb"))) { + error(errIO, -1, "Couldn't open image file '%s'", fName->getCString()); + delete fName; + return; + } + + PNGWriter *writer = new PNGWriter( isMask ? PNGWriter::MONOCHROME : PNGWriter::RGB ); + // TODO can we calculate the resolution of the image? + if (!writer->init(f1, width, height, 72, 72)) { + error(errInternal, -1, "Can't init PNG for image '%s'", fName->getCString()); + delete writer; + fclose(f1); + return; + } + + if (!isMask) { + Guchar *p; + GfxRGB rgb; + png_byte *row = (png_byte *) gmalloc(3 * width); // 3 bytes/pixel: RGB + png_bytep *row_pointer= &row; + + // Initialize the image stream + ImageStream *imgStr = new ImageStream(str, width, + colorMap->getNumPixelComps(), colorMap->getBits()); + imgStr->reset(); + + // For each line... + for (int y = 0; y < height; y++) { + + // Convert into a PNG row + p = imgStr->getLine(); + for (int x = 0; x < width; x++) { + colorMap->getRGB(p, &rgb); + // Write the RGB pixels into the row + row[3*x]= colToByte(rgb.r); + row[3*x+1]= colToByte(rgb.g); + row[3*x+2]= colToByte(rgb.b); + p += colorMap->getNumPixelComps(); + } + + if (!writer->writeRow(row_pointer)) { + error(errIO, -1, "Failed to write into PNG '%s'", fName->getCString()); + delete writer; + delete imgStr; + fclose(f1); + return; + } + } + gfree(row); + imgStr->close(); + delete imgStr; + } + else { // isMask == true + ImageStream *imgStr = new ImageStream(str, width, 1, 1); + imgStr->reset(); + + Guchar *png_row = (Guchar *)gmalloc( width ); + + for (int ri = 0; ri < height; ++ri) + { + // read the row of the mask + Guchar *bit_row = imgStr->getLine(); + + // invert for PNG + for(int i = 0; i < width; i++) + png_row[i] = bit_row[i] ? 0xff : 0x00; + + if (!writer->writeRow( &png_row )) + { + error(errIO, -1, "Failed to write into PNG '%s'", fName->getCString()); + delete writer; + fclose(f1); + delete imgStr; + gfree(png_row); + return; + } + } + imgStr->close(); + delete imgStr; + gfree(png_row); + } + + str->close(); + + writer->close(); + delete writer; + fclose(f1); + + pages->addImage(fName, state); +#else + return; +#endif +} + +GooString *HtmlOutputDev::createImageFileName(const char *ext) +{ + GooString *fName=new GooString(Docname); + fName->append("-"); + GooString *pgNum= GooString::fromInt(pageNum); + GooString *imgnum= GooString::fromInt(pages->getNumImages()+1); + + fName->append(pgNum)->append("_")->append(imgnum)->append(".")->append(ext); + delete pgNum; + delete imgnum; + + return fName; +} + +void HtmlOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str, + int width, int height, GBool invert, + GBool interpolate, GBool inlineImg) { + + if (ignore||(complexMode && !xml)) { + OutputDev::drawImageMask(state, ref, str, width, height, invert, interpolate, inlineImg); + return; + } + + // dump JPEG file + if (dumpJPEG && str->getKind() == strDCT) { + drawJpegImage(state, str); + } + else { +#ifdef ENABLE_LIBPNG + drawPngImage(state, str, width, height, NULL, gTrue); +#else + OutputDev::drawImageMask(state, ref, str, width, height, invert, interpolate, inlineImg); +#endif + } +} + +void HtmlOutputDev::drawImage(GfxState *state, Object *ref, Stream *str, + int width, int height, GfxImageColorMap *colorMap, + GBool interpolate, int *maskColors, GBool inlineImg) { + + if (ignore||(complexMode && !xml)) { + OutputDev::drawImage(state, ref, str, width, height, colorMap, interpolate, + maskColors, inlineImg); + return; + } + + /*if( !globalParams->getErrQuiet() ) + printf("image stream of kind %d\n", str->getKind());*/ + // dump JPEG file + if (dumpJPEG && str->getKind() == strDCT) { + drawJpegImage(state, str); + } + else { +#ifdef ENABLE_LIBPNG + drawPngImage(state, str, width, height, colorMap ); +#else + OutputDev::drawImage(state, ref, str, width, height, colorMap, interpolate, + maskColors, inlineImg); +#endif + } +} + + + +void HtmlOutputDev::doProcessLink(AnnotLink* link){ + double _x1,_y1,_x2,_y2; + int x1,y1,x2,y2; + + link->getRect(&_x1,&_y1,&_x2,&_y2); + cvtUserToDev(_x1,_y1,&x1,&y1); + + cvtUserToDev(_x2,_y2,&x2,&y2); + + + GooString* _dest=getLinkDest(link); + HtmlLink t((double) x1,(double) y2,(double) x2,(double) y1,_dest); + pages->AddLink(t); + delete _dest; +} + +GooString* HtmlOutputDev::getLinkDest(AnnotLink *link){ + char *p; + if (!link->getAction()) + return new GooString(); + switch(link->getAction()->getKind()) + { + case actionGoTo: + { + GooString* file=basename(Docname); + int page=1; + LinkGoTo *ha=(LinkGoTo *)link->getAction(); + LinkDest *dest=NULL; + if (ha->getDest()!=NULL) + dest=ha->getDest()->copy(); + else if (ha->getNamedDest()!=NULL) + dest=catalog->findDest(ha->getNamedDest()); + + if (dest){ + if (dest->isPageRef()){ + Ref pageref=dest->getPageRef(); + page=catalog->findPage(pageref.num,pageref.gen); + } + else { + page=dest->getPageNum(); + } + + delete dest; + + GooString *str=GooString::fromInt(page); + /* complex simple + frames file-4.html files.html#4 + noframes file.html#4 file.html#4 + */ + if (noframes) + { + file->append(".html#"); + file->append(str); + } + else + { + if( complexMode ) + { + file->append("-"); + file->append(str); + file->append(".html"); + } + else + { + file->append("s.html#"); + file->append(str); + } + } + + if (printCommands) printf(" link to page %d ",page); + delete str; + return file; + } + else + { + return new GooString(); + } + } + case actionGoToR: + { + LinkGoToR *ha=(LinkGoToR *) link->getAction(); + LinkDest *dest=NULL; + int page=1; + GooString *file=new GooString(); + if (ha->getFileName()){ + delete file; + file=new GooString(ha->getFileName()->getCString()); + } + if (ha->getDest()!=NULL) dest=ha->getDest()->copy(); + if (dest&&file){ + if (!(dest->isPageRef())) page=dest->getPageNum(); + delete dest; + + if (printCommands) printf(" link to page %d ",page); + if (printHtml){ + p=file->getCString()+file->getLength()-4; + if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")){ + file->del(file->getLength()-4,4); + file->append(".html"); + } + file->append('#'); + file->append(GooString::fromInt(page)); + } + } + if (printCommands && file) printf("filename %s\n",file->getCString()); + return file; + } + case actionURI: + { + LinkURI *ha=(LinkURI *) link->getAction(); + GooString* file=new GooString(ha->getURI()->getCString()); + // printf("uri : %s\n",file->getCString()); + return file; + } + case actionLaunch: + { + LinkLaunch *ha=(LinkLaunch *) link->getAction(); + GooString* file=new GooString(ha->getFileName()->getCString()); + if (printHtml) { + p=file->getCString()+file->getLength()-4; + if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")){ + file->del(file->getLength()-4,4); + file->append(".html"); + } + if (printCommands) printf("filename %s",file->getCString()); + + return file; + + } + } + default: + return new GooString(); + } +} + +void HtmlOutputDev::dumpMetaVars(FILE *file) +{ + GooString *var; + + for(int i = 0; i < glMetaVars->getLength(); i++) + { + HtmlMetaVar *t = (HtmlMetaVar*)glMetaVars->get(i); + var = t->toString(); + fprintf(file, "%s\n", var->getCString()); + delete var; + } +} + +GBool HtmlOutputDev::dumpDocOutline(PDFDoc* doc) +{ +#ifdef DISABLE_OUTLINE + return gFalse; +#else + FILE * output = NULL; + GBool bClose = gFalse; + Catalog *catalog = doc->getCatalog(); + + if (!ok) + return gFalse; + + Outline *outline = doc->getOutline(); + if (!outline) + return gFalse; + + GooList *outlines = outline->getItems(); + if (!outlines) + return gFalse; + + if (!complexMode || xml) + { + output = page; + } + else if (complexMode && !xml) + { + if (noframes) + { + output = page; + fputs("<hr>\n", output); + } + else + { + GooString *str = Docname->copy(); + str->append("-outline.html"); + output = fopen(str->getCString(), "w"); + if (output == NULL) + return gFalse; + delete str; + bClose = gTrue; + + GooString *htmlEncoding = + HtmlOutputDev::mapEncodingToHtml(globalParams->getTextEncodingName()); + + fprintf(output, "<HTML xmlns=\"http://www.w3.org/1999/xhtml\" " \ + "lang=\"\" xml:lang=\"\">\n" \ + "<HEAD>\n" \ + "<TITLE>Document Outline</TITLE>\n" \ + "<META http-equiv=\"Content-Type\" content=\"text/html; " \ + "charset=%s\"/>\n" \ + "</HEAD>\n<BODY>\n", htmlEncoding->getCString()); + delete htmlEncoding; + } + } + + if (!xml) + { + GBool done = newHtmlOutlineLevel(output, outlines, catalog); + if (done && !complexMode) + fputs("<hr>\n", output); + + if (bClose) + { + fputs("</BODY>\n</HTML>\n", output); + fclose(output); + } + } + else + newXmlOutlineLevel(output, outlines, catalog); + + return gTrue; +#endif +} + +GBool HtmlOutputDev::newHtmlOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog, int level) +{ +#ifdef DISABLE_OUTLINE + return gFalse; +#else + GBool atLeastOne = gFalse; + + if (level == 1) + { + fputs("<A name=\"outline\"></a>", output); + fputs("<h1>Document Outline</h1>\n", output); + } + fputs("<ul>\n",output); + + for (int i = 0; i < outlines->getLength(); i++) + { + OutlineItem *item = (OutlineItem*)outlines->get(i); + GooString *titleStr = HtmlFont::HtmlFilter(item->getTitle(), + item->getTitleLength()); + + GooString *linkName = NULL;; + int page = getOutlinePageNum(item); + if (page > 0) + { + /* complex simple + frames file-4.html files.html#4 + noframes file.html#4 file.html#4 + */ + linkName=basename(Docname); + GooString *str=GooString::fromInt(page); + if (noframes) { + linkName->append(".html#"); + linkName->append(str); + } else { + if( complexMode ) { + linkName->append("-"); + linkName->append(str); + linkName->append(".html"); + } else { + linkName->append("s.html#"); + linkName->append(str); + } + } + delete str; + } + + fputs("<li>",output); + if (linkName) + fprintf(output,"<A href=\"%s\">", linkName->getCString()); + fputs(titleStr->getCString(),output); + if (linkName) { + fputs("</A>",output); + delete linkName; + } + delete titleStr; + atLeastOne = gTrue; + + item->open(); + if (item->hasKids()) + { + fputs("\n",output); + newHtmlOutlineLevel(output, item->getKids(), catalog, level+1); + } + item->close(); + fputs("</li>\n",output); + } + fputs("</ul>\n",output); + + return atLeastOne; +#endif +} + +void HtmlOutputDev::newXmlOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog) +{ +#ifndef DISABLE_OUTLINE + fputs("<outline>\n", output); + + for (int i = 0; i < outlines->getLength(); i++) + { + OutlineItem *item = (OutlineItem*)outlines->get(i); + GooString *titleStr = HtmlFont::HtmlFilter(item->getTitle(), + item->getTitleLength()); + int page = getOutlinePageNum(item); + if (page > 0) + { + fprintf(output, "<item page=\"%d\">%s</item>\n", + page, titleStr->getCString()); + } + else + { + fprintf(output, "<item>%s</item>\n", titleStr->getCString()); + } + delete titleStr; + + item->open(); + if (item->hasKids()) + { + newXmlOutlineLevel(output, item->getKids(), catalog); + } + item->close(); + } + + fputs("</outline>\n", output); +#endif +} + +#ifndef DISABLE_OUTLINE +int HtmlOutputDev::getOutlinePageNum(OutlineItem *item) +{ + LinkAction *action = item->getAction(); + LinkGoTo *link = NULL; + LinkDest *linkdest = NULL; + int pagenum = -1; + + if (!action || action->getKind() != actionGoTo) + return pagenum; + + link = dynamic_cast<LinkGoTo*>(action); + + if (!link || !link->isOk()) + return pagenum; + + if (link->getDest()) + linkdest = link->getDest()->copy(); + else if (link->getNamedDest()) + linkdest = catalog->findDest(link->getNamedDest()); + + if (!linkdest) + return pagenum; + + if (linkdest->isPageRef()) { + Ref pageref = linkdest->getPageRef(); + pagenum = catalog->findPage(pageref.num, pageref.gen); + } else { + pagenum = linkdest->getPageNum(); + } + + delete linkdest; + return pagenum; +} +#endif diff --git a/utils/HtmlOutputDev.h b/utils/HtmlOutputDev.h new file mode 100644 index 00000000..12b16bff --- /dev/null +++ b/utils/HtmlOutputDev.h @@ -0,0 +1,354 @@ +//======================================================================== +// +// HtmlOutputDev.h +// +// Copyright 1997 Derek B. Noonburg +// +// Changed 1999 by G.Ovtcharov +//======================================================================== + +//======================================================================== +// +// Modified under the Poppler project - http://poppler.freedesktop.org +// +// All changes made under the Poppler project to this file are licensed +// under GPL version 2 or later +// +// Copyright (C) 2006, 2007, 2009, 2012 Albert Astals Cid <aacid@kde.org> +// Copyright (C) 2008, 2009 Warren Toomey <wkt@tuhs.org> +// Copyright (C) 2009, 2011 Carlos Garcia Campos <carlosgc@gnome.org> +// Copyright (C) 2009 Kovid Goyal <kovid@kovidgoyal.net> +// Copyright (C) 2010 Hib Eris <hib@hiberis.nl> +// Copyright (C) 2011 Joshua Richardson <jric@chegg.com> +// Copyright (C) 2011 Stephen Reichling <sreichling@chegg.com> +// Copyright (C) 2012 Igor Slepchin <igor.redhat@gmail.com> +// Copyright (C) 2012 Fabio D'Urso <fabiodurso@hotmail.it> +// +// To see a description of the changes please see the Changelog file that +// came with your tarball or type make ChangeLog if you are building from git +// +//======================================================================== + +#ifndef HTMLOUTPUTDEV_H +#define HTMLOUTPUTDEV_H + +#ifdef __GNUC__ +#pragma interface +#endif + +#include <stdio.h> +#include "goo/gtypes.h" +#include "goo/GooList.h" +#include "GfxFont.h" +#include "OutputDev.h" +#include "HtmlLinks.h" +#include "HtmlFonts.h" +#include "Link.h" +#include "Catalog.h" +#include "UnicodeMap.h" + + +#ifdef _WIN32 +# define SLASH '\\' +#else +# define SLASH '/' +#endif + +#define xoutRound(x) ((int)(x + 0.5)) + +#define DOCTYPE "<!DOCTYPE html>" + +class GfxState; +class GooString; +class PDFDoc; +class OutlineItem; +//------------------------------------------------------------------------ +// HtmlString +//------------------------------------------------------------------------ + +enum UnicodeTextDirection { + textDirUnknown, + textDirLeftRight, + textDirRightLeft, + textDirTopBottom +}; + + +class HtmlString { +public: + + // Constructor. + HtmlString(GfxState *state, double fontSize, HtmlFontAccu* fonts); + + // Destructor. + ~HtmlString(); + + // Add a character to the string. + void addChar(GfxState *state, double x, double y, + double dx, double dy, + Unicode u); + HtmlLink* getLink() { return link; } + const HtmlFont &getFont() const { return *fonts->Get(fontpos); } + void endString(); // postprocessing + +private: +// aender die text variable + HtmlLink *link; + double xMin, xMax; // bounding box x coordinates + double yMin, yMax; // bounding box y coordinates + int col; // starting column + Unicode *text; // the text + double *xRight; // right-hand x coord of each char + HtmlString *yxNext; // next string in y-major order + HtmlString *xyNext; // next string in x-major order + int fontpos; + GooString* htext; + int len; // length of text and xRight + int size; // size of text and xRight arrays + UnicodeTextDirection dir; // direction (left to right/right to left) + HtmlFontAccu *fonts; + + friend class HtmlPage; + +}; + + +//------------------------------------------------------------------------ +// HtmlPage +//------------------------------------------------------------------------ + + + +class HtmlPage { +public: + + // Constructor. + HtmlPage(GBool rawOrder, char *imgExtVal); + + // Destructor. + ~HtmlPage(); + + // Begin a new string. + void beginString(GfxState *state, GooString *s); + + // Add a character to the current string. + void addChar(GfxState *state, double x, double y, + double dx, double dy, + double ox, double oy, + Unicode *u, int uLen); //Guchar c); + + void updateFont(GfxState *state); + + // End the current string, sorting it into the list of strings. + void endString(); + + // Coalesce strings that look like parts of the same line. + void coalesce(); + + // Find a string. If <top> is true, starts looking at top of page; + // otherwise starts looking at <xMin>,<yMin>. If <bottom> is true, + // stops looking at bottom of page; otherwise stops looking at + // <xMax>,<yMax>. If found, sets the text bounding rectange and + // returns true; otherwise returns false. + + + // new functions + void AddLink(const HtmlLink& x){ + links->AddLink(x); + } + + // add an image to the current page + void addImage(GooString *fname, GfxState *state); + + // number of images on the current page + int getNumImages() { return imgList->getLength(); } + + void dump(FILE *f, int pageNum); + + // Clear the page. + void clear(); + + void conv(); +private: + HtmlFont* getFont(HtmlString *hStr) { return fonts->Get(hStr->fontpos); } + + double fontSize; // current font size + GBool rawOrder; // keep strings in content stream order + + HtmlString *curStr; // currently active string + + HtmlString *yxStrings; // strings in y-major order + HtmlString *xyStrings; // strings in x-major order + HtmlString *yxCur1, *yxCur2; // cursors for yxStrings list + + void setDocName(char* fname); + void dumpAsXML(FILE* f,int page); + void dumpComplex(FILE* f, int page); + int dumpComplexHeaders(FILE * const file, FILE *& pageFile, int page); + + // marks the position of the fonts that belong to current page (for noframes) + int fontsPageMarker; + HtmlFontAccu *fonts; + HtmlLinks *links; + GooList *imgList; + + GooString *DocName; + GooString *imgExt; + int pageWidth; + int pageHeight; + int firstPage; // used to begin the numeration of pages + + friend class HtmlOutputDev; +}; + +//------------------------------------------------------------------------ +// HtmlMetaVar +//------------------------------------------------------------------------ +class HtmlMetaVar { +public: + HtmlMetaVar(const char *_name, const char *_content); + ~HtmlMetaVar(); + + GooString* toString(); + +private: + + GooString *name; + GooString *content; +}; + +//------------------------------------------------------------------------ +// HtmlOutputDev +//------------------------------------------------------------------------ + +class HtmlOutputDev: public OutputDev { +public: + + // Open a text output file. If <fileName> is NULL, no file is written + // (this is useful, e.g., for searching text). If <useASCII7> is true, + // text is converted to 7-bit ASCII; otherwise, text is converted to + // 8-bit ISO Latin-1. <useASCII7> should also be set for Japanese + // (EUC-JP) text. If <rawOrder> is true, the text is kept in content + // stream order. + HtmlOutputDev(Catalog *catalogA, char *fileName, char *title, + char *author, + char *keywords, + char *subject, + char *date, + char *extension, + GBool rawOrder, + int firstPage = 1, + GBool outline = 0); + + // Destructor. + virtual ~HtmlOutputDev(); + + // Check if file was successfully created. + virtual GBool isOk() { return ok; } + + //---- get info about output device + + // Does this device use upside-down coordinates? + // (Upside-down means (0,0) is the top left corner of the page.) + virtual GBool upsideDown() { return gTrue; } + + // Does this device use drawChar() or drawString()? + virtual GBool useDrawChar() { return gTrue; } + + // Does this device use beginType3Char/endType3Char? Otherwise, + // text in Type 3 fonts will be drawn with drawChar/drawString. + virtual GBool interpretType3Chars() { return gFalse; } + + // Does this device need non-text content? + virtual GBool needNonText() { return gTrue; } + + //----- initialization and control + + virtual GBool checkPageSlice(Page *page, double hDPI, double vDPI, + int rotate, GBool useMediaBox, GBool crop, + int sliceX, int sliceY, int sliceW, int sliceH, + GBool printing, + GBool (* abortCheckCbk)(void *data) = NULL, + void * abortCheckCbkData = NULL, + GBool (*annotDisplayDecideCbk)(Annot *annot, void *user_data) = NULL, + void *annotDisplayDecideCbkData = NULL) + { + docPage = page; + return gTrue; + } + + + // Start a page. + virtual void startPage(int pageNum, GfxState *state); + + // End a page. + virtual void endPage(); + + //----- update text state + virtual void updateFont(GfxState *state); + + //----- text drawing + virtual void beginString(GfxState *state, GooString *s); + virtual void endString(GfxState *state); + virtual void drawChar(GfxState *state, double x, double y, + double dx, double dy, + double originX, double originY, + CharCode code, int nBytes, Unicode *u, int uLen); + + virtual void drawImageMask(GfxState *state, Object *ref, + Stream *str, + int width, int height, GBool invert, + GBool interpolate, GBool inlineImg); + virtual void drawImage(GfxState *state, Object *ref, Stream *str, + int width, int height, GfxImageColorMap *colorMap, + GBool interpolate, int *maskColors, GBool inlineImg); + + //new feature + virtual int DevType() {return 1234;} + + int getPageWidth() { return maxPageWidth; } + int getPageHeight() { return maxPageHeight; } + + GBool dumpDocOutline(PDFDoc* doc); + +private: + // convert encoding into a HTML standard, or encoding->getCString if not + // recognized. Will delete encoding for you and return a new one + // that you have to delete + static GooString* mapEncodingToHtml(GooString* encoding); + void doProcessLink(AnnotLink *link); + GooString* getLinkDest(AnnotLink *link); + void dumpMetaVars(FILE *); + void doFrame(int firstPage); + GBool newHtmlOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog, int level = 1); + void newXmlOutlineLevel(FILE *output, GooList *outlines, Catalog* catalog); +#ifndef DISABLE_OUTLINE + int getOutlinePageNum(OutlineItem *item); +#endif + void drawJpegImage(GfxState *state, Stream *str); + void drawPngImage(GfxState *state, Stream *str, int width, int height, + GfxImageColorMap *colorMap, GBool isMask = gFalse); + GooString *createImageFileName(const char *ext); + + FILE *fContentsFrame; + FILE *page; // html file + //FILE *tin; // image log file + //GBool write; + GBool needClose; // need to close the file? + HtmlPage *pages; // text for the current page + GBool rawOrder; // keep text in content stream order + GBool doOutline; // output document outline + GBool ok; // set up ok? + GBool dumpJPEG; + int pageNum; + int maxPageWidth; + int maxPageHeight; + GooString *Docname; + GooString *docTitle; + GooList *glMetaVars; + Catalog *catalog; + Page *docPage; + friend class HtmlPage; +}; + +#endif diff --git a/utils/HtmlUtils.h b/utils/HtmlUtils.h new file mode 100644 index 00000000..bdb89b9a --- /dev/null +++ b/utils/HtmlUtils.h @@ -0,0 +1,51 @@ +// +// HtmlUtils.h +// +// Created on: Jun 8, 2011 +// Author: Joshua Richardson <jric@chegg.com> +// Copyright 2011 +// +// All changes made under the Poppler project to this file are licensed +// under GPL version 2 or later +// +// Copyright (C) 2011 Joshua Richardson <jric@chegg.com> +// +// To see a description of the changes please see the Changelog file that +// came with your tarball or type make ChangeLog if you are building from git +// +//======================================================================== + +#ifndef HTMLUTILS_H_ +#define HTMLUTILS_H_ + +#include <math.h> // fabs +#include "goo/gtypes.h" // GBool + +// Returns true iff the difference between a and b is less than the threshold +// We always use fuzzy math when comparing decimal numbers due to imprecision +inline GBool is_within(double a, double thresh, double b) { + return fabs(a-b) < thresh; +} + +inline GBool rot_matrices_equal(const double * const mat0, const double * const mat1) { + return is_within(mat0[0], .1, mat1[0]) && is_within(mat0[1], .1, mat1[1]) && + is_within(mat0[2], .1, mat1[2]) && is_within(mat0[3], .1, mat1[3]); +} + +// rotation is (cos q, sin q, -sin q, cos q, 0, 0) +// sin q is zero iff there is no rotation, or 180 deg. rotation; +// for 180 rotation, cos q will be negative +inline GBool isMatRotOrSkew(const double * const mat) { + return mat[0] < 0 || !is_within(mat[1], .1, 0); +} + +// Alters the matrix so that it does not scale a vector's x component; +// If the matrix does not skew, then that will also normalize the y +// component, keeping any rotation, but removing scaling. +inline void normalizeRotMat(double *mat) { + double scale = fabs(mat[0] + mat[1]); + if (!scale) return; + for (int i = 0; i < 4; i++) mat[i] /= scale; +} + +#endif /* HTMLUTILS_H_ */ diff --git a/utils/ImageOutputDev.cc b/utils/ImageOutputDev.cc new file mode 100644 index 00000000..0c06513c --- /dev/null +++ b/utils/ImageOutputDev.cc @@ -0,0 +1,425 @@ +//======================================================================== +// +// ImageOutputDev.cc +// +// Copyright 1998-2003 Glyph & Cog, LLC +// +//======================================================================== + +//======================================================================== +// +// Modified under the Poppler project - http://poppler.freedesktop.org +// +// All changes made under the Poppler project to this file are licensed +// under GPL version 2 or later +// +// Copyright (C) 2005, 2007, 2011 Albert Astals Cid <aacid@kde.org> +// Copyright (C) 2006 Rainer Keller <class321@gmx.de> +// Copyright (C) 2008 Timothy Lee <timothy.lee@siriushk.com> +// Copyright (C) 2008 Vasile Gaburici <gaburici@cs.umd.edu> +// Copyright (C) 2009 Carlos Garcia Campos <carlosgc@gnome.org> +// Copyright (C) 2009 William Bader <williambader@hotmail.com> +// Copyright (C) 2010 Jakob Voss <jakob.voss@gbv.de> +// Copyright (C) 2012 Adrian Johnson <ajohnson@redneon.com> +// +// To see a description of the changes please see the Changelog file that +// came with your tarball or type make ChangeLog if you are building from git +// +//======================================================================== + +#include "config.h" +#include <poppler-config.h> + +#ifdef USE_GCC_PRAGMAS +#pragma implementation +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <ctype.h> +#include "goo/gmem.h" +#include "Error.h" +#include "GfxState.h" +#include "Object.h" +#include "Stream.h" +#include "ImageOutputDev.h" + +ImageOutputDev::ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEGA, GBool listImagesA) { + listImages = listImagesA; + if (!listImages) { + fileRoot = copyString(fileRootA); + fileName = (char *)gmalloc(strlen(fileRoot) + 45); + } + dumpJPEG = dumpJPEGA; + pageNames = pageNamesA; + imgNum = 0; + pageNum = 0; + ok = gTrue; + if (listImages) { + printf("page num type width height color comp bpc enc interp object ID\n"); + printf("---------------------------------------------------------------------\n"); + } +} + + +ImageOutputDev::~ImageOutputDev() { + if (!listImages) { + gfree(fileName); + gfree(fileRoot); + } +} + +void ImageOutputDev::setFilename(const char *fileExt) { + if (pageNames) { + sprintf(fileName, "%s-%03d-%03d.%s", fileRoot, pageNum, imgNum, fileExt); + } else { + sprintf(fileName, "%s-%03d.%s", fileRoot, imgNum, fileExt); + } +} + +void ImageOutputDev::listImage(GfxState *state, Object *ref, Stream *str, + int width, int height, + GfxImageColorMap *colorMap, + GBool interpolate, GBool inlineImg, + ImageType imageType) { + const char *type; + const char *colorspace; + const char *enc; + int components, bpc; + + printf("%4d %5d ", pageNum, imgNum); + type = ""; + switch (imageType) { + case imgImage: + type = "image"; + break; + case imgStencil: + type = "stencil"; + break; + case imgMask: + type = "mask"; + break; + case imgSmask: + type = "smask"; + break; + } + printf("%-7s %5d %5d ", type, width, height); + + colorspace = "-"; + /* masks and stencils default to ncomps = 1 and bpc = 1 */ + components = 1; + bpc = 1; + if (colorMap && colorMap->isOk()) { + switch (colorMap->getColorSpace()->getMode()) { + case csDeviceGray: + case csCalGray: + colorspace = "gray"; + break; + case csDeviceRGB: + case csCalRGB: + colorspace = "rgb"; + break; + case csDeviceCMYK: + colorspace = "cmyk"; + break; + case csLab: + colorspace = "lab"; + break; + case csICCBased: + colorspace = "icc"; + break; + case csIndexed: + colorspace = "index"; + break; + case csSeparation: + colorspace = "sep"; + break; + case csDeviceN: + colorspace = "devn"; + break; + case csPattern: + default: + colorspace = "-"; + break; + } + components = colorMap->getNumPixelComps(); + bpc = colorMap->getBits(); + } + printf("%-5s %2d %2d ", colorspace, components, bpc); + + switch (str->getKind()) { + case strCCITTFax: + enc = "ccitt"; + break; + case strDCT: + enc = "jpeg"; + break; + case strJPX: + enc = "jpx"; + break; + case strJBIG2: + enc = "jbig2"; + break; + case strFile: + case strFlate: + case strCachedFile: + case strASCIIHex: + case strASCII85: + case strLZW: + case strRunLength: + case strWeird: + default: + enc = "image"; + break; + } + printf("%-5s ", enc); + + printf("%-3s ", interpolate ? "yes" : "no"); + + if (inlineImg) { + printf("[inline]\n"); + } else if (ref->isRef()) { + const Ref imageRef = ref->getRef(); + if (imageRef.gen >= 100000) { + printf("[none]\n"); + } else { + printf(" %6d %2d\n", imageRef.num, imageRef.gen); + } + } else { + printf("[none]\n"); + } + + ++imgNum; +} + +void ImageOutputDev::writeMask(GfxState *state, Object *ref, Stream *str, + int width, int height, GBool invert, + GBool interpolate, GBool inlineImg) { + FILE *f; + int c; + int size, i; + + // dump JPEG file + if (dumpJPEG && str->getKind() == strDCT && !inlineImg) { + + // open the image file + setFilename("jpg"); + ++imgNum; + if (!(f = fopen(fileName, "wb"))) { + error(errIO, -1, "Couldn't open image file '{0:s}'", fileName); + return; + } + + // initialize stream + str = str->getNextStream(); + str->reset(); + + // copy the stream + while ((c = str->getChar()) != EOF) + fputc(c, f); + + str->close(); + fclose(f); + + // dump PBM file + } else { + + // open the image file and write the PBM header + setFilename("pbm"); + ++imgNum; + if (!(f = fopen(fileName, "wb"))) { + error(errIO, -1, "Couldn't open image file '{0:s}'", fileName); + return; + } + fprintf(f, "P4\n"); + fprintf(f, "%d %d\n", width, height); + + // initialize stream + str->reset(); + + // copy the stream + size = height * ((width + 7) / 8); + for (i = 0; i < size; ++i) { + fputc(str->getChar(), f); + } + + str->close(); + fclose(f); + } +} + +void ImageOutputDev::writeImage(GfxState *state, Object *ref, Stream *str, + int width, int height, + GfxImageColorMap *colorMap, + GBool interpolate, int *maskColors, GBool inlineImg) { + FILE *f; + ImageStream *imgStr; + Guchar *p; + Guchar zero = 0; + GfxGray gray; + GfxRGB rgb; + int x, y; + int c; + int size, i; + int pbm_mask = 0xff; + + // dump JPEG file + if (dumpJPEG && str->getKind() == strDCT && + (colorMap->getNumPixelComps() == 1 || + colorMap->getNumPixelComps() == 3) && + !inlineImg) { + + // open the image file + setFilename("jpg"); + ++imgNum; + if (!(f = fopen(fileName, "wb"))) { + error(errIO, -1, "Couldn't open image file '{0:s}'", fileName); + return; + } + + // initialize stream + str = str->getNextStream(); + str->reset(); + + // copy the stream + while ((c = str->getChar()) != EOF) + fputc(c, f); + + str->close(); + fclose(f); + + // dump PBM file + } else if (colorMap->getNumPixelComps() == 1 && + colorMap->getBits() == 1) { + + // open the image file and write the PBM header + setFilename("pbm"); + ++imgNum; + if (!(f = fopen(fileName, "wb"))) { + error(errIO, -1, "Couldn't open image file '{0:s}'", fileName); + return; + } + fprintf(f, "P4\n"); + fprintf(f, "%d %d\n", width, height); + + // initialize stream + str->reset(); + + // if 0 comes out as 0 in the color map, the we _flip_ stream bits + // otherwise we pass through stream bits unmolested + colorMap->getGray(&zero, &gray); + if(colToByte(gray)) + pbm_mask = 0; + + // copy the stream + size = height * ((width + 7) / 8); + for (i = 0; i < size; ++i) { + fputc(str->getChar() ^ pbm_mask, f); + } + + str->close(); + fclose(f); + + // dump PPM file + } else { + + // open the image file and write the PPM header + setFilename("ppm"); + ++imgNum; + if (!(f = fopen(fileName, "wb"))) { + error(errIO, -1, "Couldn't open image file '{0:s}'", fileName); + return; + } + fprintf(f, "P6\n"); + fprintf(f, "%d %d\n", width, height); + fprintf(f, "255\n"); + + // initialize stream + imgStr = new ImageStream(str, width, colorMap->getNumPixelComps(), + colorMap->getBits()); + imgStr->reset(); + + // for each line... + for (y = 0; y < height; ++y) { + + // write the line + if ((p = imgStr->getLine())) { + for (x = 0; x < width; ++x) { + colorMap->getRGB(p, &rgb); + fputc(colToByte(rgb.r), f); + fputc(colToByte(rgb.g), f); + fputc(colToByte(rgb.b), f); + p += colorMap->getNumPixelComps(); + } + } else { + for (x = 0; x < width; ++x) { + fputc(0, f); + fputc(0, f); + fputc(0, f); + } + } + } + imgStr->close(); + delete imgStr; + + fclose(f); + } +} + +GBool ImageOutputDev::tilingPatternFill(GfxState *state, Gfx *gfx, Catalog *cat, Object *str, + double *pmat, int paintType, int tilingType, Dict *resDict, + double *mat, double *bbox, + int x0, int y0, int x1, int y1, + double xStep, double yStep) { + return gTrue; + // do nothing -- this avoids the potentially slow loop in Gfx.cc +} + +void ImageOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str, + int width, int height, GBool invert, + GBool interpolate, GBool inlineImg) { + if (listImages) + listImage(state, ref, str, width, height, NULL, interpolate, inlineImg, imgMask); + else + writeMask(state, ref, str, width, height, invert, interpolate, inlineImg); +} + +void ImageOutputDev::drawImage(GfxState *state, Object *ref, Stream *str, + int width, int height, + GfxImageColorMap *colorMap, + GBool interpolate, int *maskColors, GBool inlineImg) { + if (listImages) + listImage(state, ref, str, width, height, colorMap, interpolate, inlineImg, imgImage); + else + writeImage(state, ref, str, width, height, colorMap, interpolate, maskColors, inlineImg); +} + +void ImageOutputDev::drawMaskedImage( + GfxState *state, Object *ref, Stream *str, + int width, int height, GfxImageColorMap *colorMap, GBool interpolate, + Stream *maskStr, int maskWidth, int maskHeight, GBool maskInvert, GBool maskInterpolate) { + if (listImages) { + listImage(state, ref, str, width, height, colorMap, interpolate, gFalse, imgImage); + listImage(state, ref, str, maskWidth, maskHeight, NULL, maskInterpolate, gFalse, imgMask); + } else { + drawImage(state, ref, str, width, height, colorMap, interpolate, NULL, gFalse); + drawImageMask(state, ref, maskStr, maskWidth, maskHeight, maskInvert, + maskInterpolate, gFalse); + } +} + +void ImageOutputDev::drawSoftMaskedImage( + GfxState *state, Object *ref, Stream *str, + int width, int height, GfxImageColorMap *colorMap, GBool interpolate, + Stream *maskStr, int maskWidth, int maskHeight, + GfxImageColorMap *maskColorMap, GBool maskInterpolate) { + if (listImages) { + listImage(state, ref, str, width, height, colorMap, interpolate, gFalse, imgImage); + listImage(state, ref, maskStr, maskWidth, height, maskColorMap, maskInterpolate, gFalse, imgSmask); + } else { + drawImage(state, ref, str, width, height, colorMap, interpolate, NULL, gFalse); + drawImage(state, ref, maskStr, maskWidth, maskHeight, + maskColorMap, maskInterpolate, NULL, gFalse); + } +} diff --git a/utils/ImageOutputDev.h b/utils/ImageOutputDev.h new file mode 100644 index 00000000..6201a249 --- /dev/null +++ b/utils/ImageOutputDev.h @@ -0,0 +1,148 @@ +//======================================================================== +// +// ImageOutputDev.h +// +// Copyright 1998-2003 Glyph & Cog, LLC +// +//======================================================================== + +//======================================================================== +// +// Modified under the Poppler project - http://poppler.freedesktop.org +// +// All changes made under the Poppler project to this file are licensed +// under GPL version 2 or later +// +// Copyright (C) 2006 Rainer Keller <class321@gmx.de> +// Copyright (C) 2008 Timothy Lee <timothy.lee@siriushk.com> +// Copyright (C) 2009 Carlos Garcia Campos <carlosgc@gnome.org> +// Copyright (C) 2010 Jakob Voss <jakob.voss@gbv.de> +// Copyright (C) 2012 Adrian Johnson <ajohnson@redneon.com> +// +// To see a description of the changes please see the Changelog file that +// came with your tarball or type make ChangeLog if you are building from git +// +//======================================================================== + +#ifndef IMAGEOUTPUTDEV_H +#define IMAGEOUTPUTDEV_H + +#include "poppler/poppler-config.h" + +#ifdef USE_GCC_PRAGMAS +#pragma interface +#endif + +#include <stdio.h> +#include "goo/gtypes.h" +#include "OutputDev.h" + +class GfxState; + +//------------------------------------------------------------------------ +// ImageOutputDev +//------------------------------------------------------------------------ + +class ImageOutputDev: public OutputDev { +public: + enum ImageType { + imgImage, + imgStencil, + imgMask, + imgSmask + }; + + // Create an OutputDev which will write images to files named + // <fileRoot>-NNN.<type> or <fileRoot>-PPP-NNN.<type>, if + // <pageNames> is set. Normally, all images are written as PBM + // (.pbm) or PPM (.ppm) files. If <dumpJPEG> is set, JPEG images + // are written as JPEG (.jpg) files. + ImageOutputDev(char *fileRootA, GBool pageNamesA, GBool dumpJPEGA, GBool listImagesA); + + // Destructor. + virtual ~ImageOutputDev(); + + // Check if file was successfully created. + virtual GBool isOk() { return ok; } + + // Does this device use tilingPatternFill()? If this returns false, + // tiling pattern fills will be reduced to a series of other drawing + // operations. + virtual GBool useTilingPatternFill() { return gTrue; } + + // Does this device use beginType3Char/endType3Char? Otherwise, + // text in Type 3 fonts will be drawn with drawChar/drawString. + virtual GBool interpretType3Chars() { return gFalse; } + + // Does this device need non-text content? + virtual GBool needNonText() { return gTrue; } + + // Start a page + virtual void startPage(int pageNumA, GfxState *state) + { pageNum = pageNumA; } + + //---- get info about output device + + // Does this device use upside-down coordinates? + // (Upside-down means (0,0) is the top left corner of the page.) + virtual GBool upsideDown() { return gTrue; } + + // Does this device use drawChar() or drawString()? + virtual GBool useDrawChar() { return gFalse; } + + //----- path painting + virtual GBool tilingPatternFill(GfxState *state, Gfx *gfx, Catalog *cat, Object *str, + double *pmat, int paintType, int tilingType, Dict *resDict, + double *mat, double *bbox, + int x0, int y0, int x1, int y1, + double xStep, double yStep); + + //----- image drawing + virtual void drawImageMask(GfxState *state, Object *ref, Stream *str, + int width, int height, GBool invert, + GBool interpolate, GBool inlineImg); + virtual void drawImage(GfxState *state, Object *ref, Stream *str, + int width, int height, GfxImageColorMap *colorMap, + GBool interpolate, int *maskColors, GBool inlineImg); + virtual void drawMaskedImage(GfxState *state, Object *ref, Stream *str, + int width, int height, + GfxImageColorMap *colorMap, + GBool interpolate, + Stream *maskStr, int maskWidth, int maskHeight, + GBool maskInvert, GBool maskInterpolate); + virtual void drawSoftMaskedImage(GfxState *state, Object *ref, Stream *str, + int width, int height, + GfxImageColorMap *colorMap, + GBool interpolate, + Stream *maskStr, + int maskWidth, int maskHeight, + GfxImageColorMap *maskColorMap, + GBool maskInterpolate); + +private: + // Sets the output filename with a given file extension + void setFilename(const char *fileExt); + void listImage(GfxState *state, Object *ref, Stream *str, + int width, int height, + GfxImageColorMap *colorMap, + GBool interpolate, GBool inlineImg, + ImageType imageType); + void writeMask(GfxState *state, Object *ref, Stream *str, + int width, int height, GBool invert, + GBool interpolate, GBool inlineImg); + void writeImage(GfxState *state, Object *ref, Stream *str, + int width, int height, GfxImageColorMap *colorMap, + GBool interpolate, int *maskColors, GBool inlineImg); + + + char *fileRoot; // root of output file names + char *fileName; // buffer for output file names + GBool listImages; // list images instead of dumping + GBool dumpJPEG; // set to dump native JPEG files + GBool pageNames; // set to include page number in file names + int pageNum; // current page number + int imgNum; // current image number + GBool ok; // set up ok? +}; + +#endif diff --git a/utils/Makefile.am b/utils/Makefile.am new file mode 100644 index 00000000..ad845c19 --- /dev/null +++ b/utils/Makefile.am @@ -0,0 +1,137 @@ +if BUILD_SPLASH_OUTPUT + +pdftoppm_SOURCES = \ + pdftoppm.cc \ + $(common) + +pdftoppm_binary = pdftoppm + +pdftoppm_manpage = pdftoppm.1 + +endif + +INCLUDES = \ + -I$(top_srcdir) \ + -I$(top_srcdir)/goo \ + -I$(top_srcdir)/utils \ + -I$(top_srcdir)/poppler \ + $(UTILS_CFLAGS) \ + $(FONTCONFIG_CFLAGS) \ + $(PDFTOCAIRO_CFLAGS) + +LDADD = \ + $(top_builddir)/poppler/libpoppler.la \ + $(UTILS_LIBS) \ + $(FONTCONFIG_LIBS) + +if BUILD_CAIRO_OUTPUT + +pdftocairo_SOURCES = \ + pdftocairo.cc \ + $(common) + +if USE_CMS +PDFTOCAIRO_CFLAGS += $(LCMS_CFLAGS) +PDFTOCAIRO_LIBS += $(LCMS_LIBS) +endif + +pdftocairo_LDADD = \ + $(top_builddir)/poppler/libpoppler-cairo.la \ + $(LDADD) $(PDFTOCAIRO_LIBS) + + +pdftocairo_binary = pdftocairo + +pdftocairo_manpage = pdftocairo.1 + +endif + +AM_LDFLAGS = @auto_import_flags@ + +bin_PROGRAMS = \ + pdfdetach \ + pdffonts \ + pdfimages \ + pdfinfo \ + pdftops \ + pdftotext \ + pdftohtml \ + pdfseparate \ + pdfunite \ + $(pdftoppm_binary) \ + $(pdftocairo_binary) + +dist_man1_MANS = \ + pdfdetach.1 \ + pdffonts.1 \ + pdfimages.1 \ + pdfinfo.1 \ + pdftops.1 \ + pdftotext.1 \ + pdftohtml.1 \ + pdfseparate.1 \ + pdfunite.1 \ + $(pdftoppm_manpage) \ + $(pdftocairo_manpage) + +common = parseargs.cc parseargs.h + +pdfdetach_SOURCES = \ + pdfdetach.cc \ + $(common) + +pdffonts_SOURCES = \ + pdffonts.cc \ + $(common) + +pdfimages_SOURCES = \ + pdfimages.cc \ + ImageOutputDev.cc \ + ImageOutputDev.h \ + $(common) + +pdfinfo_SOURCES = \ + pdfinfo.cc \ + printencodings.cc \ + printencodings.h \ + $(common) + +pdftops_SOURCES = \ + pdftops.cc \ + $(common) + +pdftotext_SOURCES = \ + pdftotext.cc \ + printencodings.cc \ + printencodings.h \ + $(common) + +pdftohtml_SOURCES = \ + pdftohtml.cc \ + HtmlFonts.cc \ + HtmlFonts.h \ + HtmlLinks.cc \ + HtmlLinks.h \ + HtmlOutputDev.cc \ + HtmlOutputDev.h \ + HtmlUtils.h \ + $(common) + +# HtmlOutputDev uses goo/PNGWriter.h that may depend on libpng header +pdftohtml_CXXFLAGS = $(AM_CXXFLAGS) +if BUILD_LIBPNG +pdftohtml_CXXFLAGS += $(LIBPNG_CFLAGS) +endif + +pdfseparate_SOURCES = \ + pdfseparate.cc \ + $(common) + +pdfunite_SOURCES = \ + pdfunite.cc \ + $(common) + +# Yay, automake! It should be able to figure out that it has to dist +# pdftoppm.1, but nooo. So we just add it here. + +EXTRA_DIST = pdf2xml.dtd pdftoppm.1 diff --git a/utils/Makefile.in b/utils/Makefile.in new file mode 100644 index 00000000..cfda1da2 --- /dev/null +++ b/utils/Makefile.in @@ -0,0 +1,1052 @@ +# Makefile.in generated by automake 1.11.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software +# Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +VPATH = @srcdir@ +am__make_dryrun = \ + { \ + am__dry=no; \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \ + | grep '^AM OK$$' >/dev/null || am__dry=yes;; \ + *) \ + for am__flg in $$MAKEFLAGS; do \ + case $$am__flg in \ + *=*|--*) ;; \ + *n*) am__dry=yes; break;; \ + esac; \ + done;; \ + esac; \ + test $$am__dry = yes; \ + } +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +@BUILD_CAIRO_OUTPUT_TRUE@@USE_CMS_TRUE@am__append_1 = $(LCMS_CFLAGS) +@BUILD_CAIRO_OUTPUT_TRUE@@USE_CMS_TRUE@am__append_2 = $(LCMS_LIBS) +bin_PROGRAMS = pdfdetach$(EXEEXT) pdffonts$(EXEEXT) pdfimages$(EXEEXT) \ + pdfinfo$(EXEEXT) pdftops$(EXEEXT) pdftotext$(EXEEXT) \ + pdftohtml$(EXEEXT) pdfseparate$(EXEEXT) pdfunite$(EXEEXT) \ + $(am__EXEEXT_1) $(am__EXEEXT_2) +@BUILD_LIBPNG_TRUE@am__append_3 = $(LIBPNG_CFLAGS) +subdir = utils +DIST_COMMON = $(dist_man1_MANS) $(srcdir)/Makefile.am \ + $(srcdir)/Makefile.in +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/acx_pthread.m4 \ + $(top_srcdir)/m4/define-dir.m4 $(top_srcdir)/m4/gtk-doc.m4 \ + $(top_srcdir)/m4/iconv.m4 $(top_srcdir)/m4/introspection.m4 \ + $(top_srcdir)/m4/libjpeg.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h \ + $(top_builddir)/poppler/poppler-config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +@BUILD_SPLASH_OUTPUT_TRUE@am__EXEEXT_1 = pdftoppm$(EXEEXT) +@BUILD_CAIRO_OUTPUT_TRUE@am__EXEEXT_2 = pdftocairo$(EXEEXT) +am__installdirs = "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)" +PROGRAMS = $(bin_PROGRAMS) +am__objects_1 = parseargs.$(OBJEXT) +am_pdfdetach_OBJECTS = pdfdetach.$(OBJEXT) $(am__objects_1) +pdfdetach_OBJECTS = $(am_pdfdetach_OBJECTS) +pdfdetach_LDADD = $(LDADD) +am__DEPENDENCIES_1 = +pdfdetach_DEPENDENCIES = $(top_builddir)/poppler/libpoppler.la \ + $(am__DEPENDENCIES_1) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am_pdffonts_OBJECTS = pdffonts.$(OBJEXT) $(am__objects_1) +pdffonts_OBJECTS = $(am_pdffonts_OBJECTS) +pdffonts_LDADD = $(LDADD) +pdffonts_DEPENDENCIES = $(top_builddir)/poppler/libpoppler.la \ + $(am__DEPENDENCIES_1) +am_pdfimages_OBJECTS = pdfimages.$(OBJEXT) ImageOutputDev.$(OBJEXT) \ + $(am__objects_1) +pdfimages_OBJECTS = $(am_pdfimages_OBJECTS) +pdfimages_LDADD = $(LDADD) +pdfimages_DEPENDENCIES = $(top_builddir)/poppler/libpoppler.la \ + $(am__DEPENDENCIES_1) +am_pdfinfo_OBJECTS = pdfinfo.$(OBJEXT) printencodings.$(OBJEXT) \ + $(am__objects_1) +pdfinfo_OBJECTS = $(am_pdfinfo_OBJECTS) +pdfinfo_LDADD = $(LDADD) +pdfinfo_DEPENDENCIES = $(top_builddir)/poppler/libpoppler.la \ + $(am__DEPENDENCIES_1) +am_pdfseparate_OBJECTS = pdfseparate.$(OBJEXT) $(am__objects_1) +pdfseparate_OBJECTS = $(am_pdfseparate_OBJECTS) +pdfseparate_LDADD = $(LDADD) +pdfseparate_DEPENDENCIES = $(top_builddir)/poppler/libpoppler.la \ + $(am__DEPENDENCIES_1) +am__pdftocairo_SOURCES_DIST = pdftocairo.cc parseargs.cc parseargs.h +@BUILD_CAIRO_OUTPUT_TRUE@am_pdftocairo_OBJECTS = pdftocairo.$(OBJEXT) \ +@BUILD_CAIRO_OUTPUT_TRUE@ $(am__objects_1) +pdftocairo_OBJECTS = $(am_pdftocairo_OBJECTS) +am__DEPENDENCIES_2 = $(top_builddir)/poppler/libpoppler.la \ + $(am__DEPENDENCIES_1) +@BUILD_CAIRO_OUTPUT_TRUE@@USE_CMS_TRUE@am__DEPENDENCIES_3 = \ +@BUILD_CAIRO_OUTPUT_TRUE@@USE_CMS_TRUE@ $(am__DEPENDENCIES_1) +am__DEPENDENCIES_4 = $(am__DEPENDENCIES_3) +@BUILD_CAIRO_OUTPUT_TRUE@pdftocairo_DEPENDENCIES = $(top_builddir)/poppler/libpoppler-cairo.la \ +@BUILD_CAIRO_OUTPUT_TRUE@ $(am__DEPENDENCIES_2) \ +@BUILD_CAIRO_OUTPUT_TRUE@ $(am__DEPENDENCIES_4) +am__objects_2 = pdftohtml-parseargs.$(OBJEXT) +am_pdftohtml_OBJECTS = pdftohtml-pdftohtml.$(OBJEXT) \ + pdftohtml-HtmlFonts.$(OBJEXT) pdftohtml-HtmlLinks.$(OBJEXT) \ + pdftohtml-HtmlOutputDev.$(OBJEXT) $(am__objects_2) +pdftohtml_OBJECTS = $(am_pdftohtml_OBJECTS) +pdftohtml_LDADD = $(LDADD) +pdftohtml_DEPENDENCIES = $(top_builddir)/poppler/libpoppler.la \ + $(am__DEPENDENCIES_1) +pdftohtml_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(pdftohtml_CXXFLAGS) \ + $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +am__pdftoppm_SOURCES_DIST = pdftoppm.cc parseargs.cc parseargs.h +@BUILD_SPLASH_OUTPUT_TRUE@am_pdftoppm_OBJECTS = pdftoppm.$(OBJEXT) \ +@BUILD_SPLASH_OUTPUT_TRUE@ $(am__objects_1) +pdftoppm_OBJECTS = $(am_pdftoppm_OBJECTS) +pdftoppm_LDADD = $(LDADD) +pdftoppm_DEPENDENCIES = $(top_builddir)/poppler/libpoppler.la \ + $(am__DEPENDENCIES_1) +am_pdftops_OBJECTS = pdftops.$(OBJEXT) $(am__objects_1) +pdftops_OBJECTS = $(am_pdftops_OBJECTS) +pdftops_LDADD = $(LDADD) +pdftops_DEPENDENCIES = $(top_builddir)/poppler/libpoppler.la \ + $(am__DEPENDENCIES_1) +am_pdftotext_OBJECTS = pdftotext.$(OBJEXT) printencodings.$(OBJEXT) \ + $(am__objects_1) +pdftotext_OBJECTS = $(am_pdftotext_OBJECTS) +pdftotext_LDADD = $(LDADD) +pdftotext_DEPENDENCIES = $(top_builddir)/poppler/libpoppler.la \ + $(am__DEPENDENCIES_1) +am_pdfunite_OBJECTS = pdfunite.$(OBJEXT) $(am__objects_1) +pdfunite_OBJECTS = $(am_pdfunite_OBJECTS) +pdfunite_LDADD = $(LDADD) +pdfunite_DEPENDENCIES = $(top_builddir)/poppler/libpoppler.la \ + $(am__DEPENDENCIES_1) +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) -I$(top_builddir)/poppler +depcomp = $(SHELL) $(top_srcdir)/depcomp +am__depfiles_maybe = depfiles +am__mv = mv -f +CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) +LTCXXCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CXXFLAGS) $(CXXFLAGS) +AM_V_CXX = $(am__v_CXX_@AM_V@) +am__v_CXX_ = $(am__v_CXX_@AM_DEFAULT_V@) +am__v_CXX_0 = @echo " CXX " $@; +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +CXXLD = $(CXX) +CXXLINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \ + $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CXXLD = $(am__v_CXXLD_@AM_V@) +am__v_CXXLD_ = $(am__v_CXXLD_@AM_DEFAULT_V@) +am__v_CXXLD_0 = @echo " CXXLD " $@; +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +SOURCES = $(pdfdetach_SOURCES) $(pdffonts_SOURCES) \ + $(pdfimages_SOURCES) $(pdfinfo_SOURCES) $(pdfseparate_SOURCES) \ + $(pdftocairo_SOURCES) $(pdftohtml_SOURCES) $(pdftoppm_SOURCES) \ + $(pdftops_SOURCES) $(pdftotext_SOURCES) $(pdfunite_SOURCES) +DIST_SOURCES = $(pdfdetach_SOURCES) $(pdffonts_SOURCES) \ + $(pdfimages_SOURCES) $(pdfinfo_SOURCES) $(pdfseparate_SOURCES) \ + $(am__pdftocairo_SOURCES_DIST) $(pdftohtml_SOURCES) \ + $(am__pdftoppm_SOURCES_DIST) $(pdftops_SOURCES) \ + $(pdftotext_SOURCES) $(pdfunite_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +man1dir = $(mandir)/man1 +NROFF = nroff +MANS = $(dist_man1_MANS) +ETAGS = etags +CTAGS = ctags +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CAIRO_CFLAGS = @CAIRO_CFLAGS@ +CAIRO_FEATURE = @CAIRO_FEATURE@ +CAIRO_LIBS = @CAIRO_LIBS@ +CAIRO_REQ = @CAIRO_REQ@ +CAIRO_VERSION = @CAIRO_VERSION@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CXX = @CXX@ +CXXCPP = @CXXCPP@ +CXXDEPMODE = @CXXDEPMODE@ +CXXFLAGS = @CXXFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FONTCONFIG_CFLAGS = @FONTCONFIG_CFLAGS@ +FONTCONFIG_LIBS = @FONTCONFIG_LIBS@ +FREETYPE_CFLAGS = @FREETYPE_CFLAGS@ +FREETYPE_CONFIG = @FREETYPE_CONFIG@ +FREETYPE_LIBS = @FREETYPE_LIBS@ +GLIB_MKENUMS = @GLIB_MKENUMS@ +GLIB_REQ = @GLIB_REQ@ +GLIB_REQUIRED = @GLIB_REQUIRED@ +GREP = @GREP@ +GTKDOC_CHECK = @GTKDOC_CHECK@ +GTKDOC_DEPS_CFLAGS = @GTKDOC_DEPS_CFLAGS@ +GTKDOC_DEPS_LIBS = @GTKDOC_DEPS_LIBS@ +GTKDOC_MKPDF = @GTKDOC_MKPDF@ +GTKDOC_REBASE = @GTKDOC_REBASE@ +GTK_TEST_CFLAGS = @GTK_TEST_CFLAGS@ +GTK_TEST_LIBS = @GTK_TEST_LIBS@ +HTML_DIR = @HTML_DIR@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +INTROSPECTION_CFLAGS = @INTROSPECTION_CFLAGS@ +INTROSPECTION_COMPILER = @INTROSPECTION_COMPILER@ +INTROSPECTION_GENERATE = @INTROSPECTION_GENERATE@ +INTROSPECTION_GIRDIR = @INTROSPECTION_GIRDIR@ +INTROSPECTION_LIBS = @INTROSPECTION_LIBS@ +INTROSPECTION_MAKEFILE = @INTROSPECTION_MAKEFILE@ +INTROSPECTION_SCANNER = @INTROSPECTION_SCANNER@ +INTROSPECTION_TYPELIBDIR = @INTROSPECTION_TYPELIBDIR@ +LCMS_CFLAGS = @LCMS_CFLAGS@ +LCMS_LIBS = @LCMS_LIBS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBCURL_CFLAGS = @LIBCURL_CFLAGS@ +LIBCURL_LIBS = @LIBCURL_LIBS@ +LIBICONV = @LIBICONV@ +LIBJPEG_CFLAGS = @LIBJPEG_CFLAGS@ +LIBJPEG_LIBS = @LIBJPEG_LIBS@ +LIBOBJS = @LIBOBJS@ +LIBOPENJPEG_CFLAGS = @LIBOPENJPEG_CFLAGS@ +LIBOPENJPEG_LIBS = @LIBOPENJPEG_LIBS@ +LIBPNG_CFLAGS = @LIBPNG_CFLAGS@ +LIBPNG_LIBS = @LIBPNG_LIBS@ +LIBS = @LIBS@ +LIBTIFF_CFLAGS = @LIBTIFF_CFLAGS@ +LIBTIFF_CFLAGSS = @LIBTIFF_CFLAGSS@ +LIBTIFF_LIBS = @LIBTIFF_LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBICONV = @LTLIBICONV@ +LTLIBOBJS = @LTLIBOBJS@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MOCQT4 = @MOCQT4@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PC_REQUIRES = @PC_REQUIRES@ +PC_REQUIRES_PRIVATE = @PC_REQUIRES_PRIVATE@ +PDFTOCAIRO_CFLAGS = @PDFTOCAIRO_CFLAGS@ $(am__append_1) +PDFTOCAIRO_LIBS = @PDFTOCAIRO_LIBS@ $(am__append_2) +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +POPPLER_DATADIR = @POPPLER_DATADIR@ +POPPLER_GLIB_CFLAGS = @POPPLER_GLIB_CFLAGS@ +POPPLER_GLIB_DISABLE_DEPRECATED = @POPPLER_GLIB_DISABLE_DEPRECATED@ +POPPLER_GLIB_DISABLE_SINGLE_INCLUDES = @POPPLER_GLIB_DISABLE_SINGLE_INCLUDES@ +POPPLER_GLIB_LIBS = @POPPLER_GLIB_LIBS@ +POPPLER_MAJOR_VERSION = @POPPLER_MAJOR_VERSION@ +POPPLER_MICRO_VERSION = @POPPLER_MICRO_VERSION@ +POPPLER_MINOR_VERSION = @POPPLER_MINOR_VERSION@ +POPPLER_QT4_CFLAGS = @POPPLER_QT4_CFLAGS@ +POPPLER_QT4_CXXFLAGS = @POPPLER_QT4_CXXFLAGS@ +POPPLER_QT4_LIBS = @POPPLER_QT4_LIBS@ +POPPLER_QT4_TEST_CFLAGS = @POPPLER_QT4_TEST_CFLAGS@ +POPPLER_QT4_TEST_LIBS = @POPPLER_QT4_TEST_LIBS@ +POPPLER_VERSION = @POPPLER_VERSION@ +PTHREAD_CC = @PTHREAD_CC@ +PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +TESTDATADIR = @TESTDATADIR@ +VERSION = @VERSION@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +ZLIB_LIBS = @ZLIB_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_CXX = @ac_ct_CXX@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +acx_pthread_config = @acx_pthread_config@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +auto_import_flags = @auto_import_flags@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +create_shared_lib = @create_shared_lib@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +win32_libs = @win32_libs@ +@BUILD_SPLASH_OUTPUT_TRUE@pdftoppm_SOURCES = \ +@BUILD_SPLASH_OUTPUT_TRUE@ pdftoppm.cc \ +@BUILD_SPLASH_OUTPUT_TRUE@ $(common) + +@BUILD_SPLASH_OUTPUT_TRUE@pdftoppm_binary = pdftoppm +@BUILD_SPLASH_OUTPUT_TRUE@pdftoppm_manpage = pdftoppm.1 +INCLUDES = \ + -I$(top_srcdir) \ + -I$(top_srcdir)/goo \ + -I$(top_srcdir)/utils \ + -I$(top_srcdir)/poppler \ + $(UTILS_CFLAGS) \ + $(FONTCONFIG_CFLAGS) \ + $(PDFTOCAIRO_CFLAGS) + +LDADD = \ + $(top_builddir)/poppler/libpoppler.la \ + $(UTILS_LIBS) \ + $(FONTCONFIG_LIBS) + +@BUILD_CAIRO_OUTPUT_TRUE@pdftocairo_SOURCES = \ +@BUILD_CAIRO_OUTPUT_TRUE@ pdftocairo.cc \ +@BUILD_CAIRO_OUTPUT_TRUE@ $(common) + +@BUILD_CAIRO_OUTPUT_TRUE@pdftocairo_LDADD = \ +@BUILD_CAIRO_OUTPUT_TRUE@ $(top_builddir)/poppler/libpoppler-cairo.la \ +@BUILD_CAIRO_OUTPUT_TRUE@ $(LDADD) $(PDFTOCAIRO_LIBS) + +@BUILD_CAIRO_OUTPUT_TRUE@pdftocairo_binary = pdftocairo +@BUILD_CAIRO_OUTPUT_TRUE@pdftocairo_manpage = pdftocairo.1 +AM_LDFLAGS = @auto_import_flags@ +dist_man1_MANS = \ + pdfdetach.1 \ + pdffonts.1 \ + pdfimages.1 \ + pdfinfo.1 \ + pdftops.1 \ + pdftotext.1 \ + pdftohtml.1 \ + pdfseparate.1 \ + pdfunite.1 \ + $(pdftoppm_manpage) \ + $(pdftocairo_manpage) + +common = parseargs.cc parseargs.h +pdfdetach_SOURCES = \ + pdfdetach.cc \ + $(common) + +pdffonts_SOURCES = \ + pdffonts.cc \ + $(common) + +pdfimages_SOURCES = \ + pdfimages.cc \ + ImageOutputDev.cc \ + ImageOutputDev.h \ + $(common) + +pdfinfo_SOURCES = \ + pdfinfo.cc \ + printencodings.cc \ + printencodings.h \ + $(common) + +pdftops_SOURCES = \ + pdftops.cc \ + $(common) + +pdftotext_SOURCES = \ + pdftotext.cc \ + printencodings.cc \ + printencodings.h \ + $(common) + +pdftohtml_SOURCES = \ + pdftohtml.cc \ + HtmlFonts.cc \ + HtmlFonts.h \ + HtmlLinks.cc \ + HtmlLinks.h \ + HtmlOutputDev.cc \ + HtmlOutputDev.h \ + HtmlUtils.h \ + $(common) + + +# HtmlOutputDev uses goo/PNGWriter.h that may depend on libpng header +pdftohtml_CXXFLAGS = $(AM_CXXFLAGS) $(am__append_3) +pdfseparate_SOURCES = \ + pdfseparate.cc \ + $(common) + +pdfunite_SOURCES = \ + pdfunite.cc \ + $(common) + + +# Yay, automake! It should be able to figure out that it has to dist +# pdftoppm.1, but nooo. So we just add it here. +EXTRA_DIST = pdf2xml.dtd pdftoppm.1 +all: all-am + +.SUFFIXES: +.SUFFIXES: .cc .lo .o .obj +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign utils/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign utils/Makefile +.PRECIOUS: Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): +install-binPROGRAMS: $(bin_PROGRAMS) + @$(NORMAL_INSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \ + fi; \ + for p in $$list; do echo "$$p $$p"; done | \ + sed 's/$(EXEEXT)$$//' | \ + while read p p1; do if test -f $$p || test -f $$p1; \ + then echo "$$p"; echo "$$p"; else :; fi; \ + done | \ + sed -e 'p;s,.*/,,;n;h' -e 's|.*|.|' \ + -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ + sed 'N;N;N;s,\n, ,g' | \ + $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ + { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ + if ($$2 == $$4) files[d] = files[d] " " $$1; \ + else { print "f", $$3 "/" $$4, $$1; } } \ + END { for (d in files) print "f", d, files[d] }' | \ + while read type dir files; do \ + if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ + test -z "$$files" || { \ + echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \ + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ + } \ + ; done + +uninstall-binPROGRAMS: + @$(NORMAL_UNINSTALL) + @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ + files=`for p in $$list; do echo "$$p"; done | \ + sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ + -e 's/$$/$(EXEEXT)/' `; \ + test -n "$$list" || exit 0; \ + echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \ + cd "$(DESTDIR)$(bindir)" && rm -f $$files + +clean-binPROGRAMS: + @list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \ + echo " rm -f" $$list; \ + rm -f $$list || exit $$?; \ + test -n "$(EXEEXT)" || exit 0; \ + list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \ + echo " rm -f" $$list; \ + rm -f $$list +pdfdetach$(EXEEXT): $(pdfdetach_OBJECTS) $(pdfdetach_DEPENDENCIES) $(EXTRA_pdfdetach_DEPENDENCIES) + @rm -f pdfdetach$(EXEEXT) + $(AM_V_CXXLD)$(CXXLINK) $(pdfdetach_OBJECTS) $(pdfdetach_LDADD) $(LIBS) +pdffonts$(EXEEXT): $(pdffonts_OBJECTS) $(pdffonts_DEPENDENCIES) $(EXTRA_pdffonts_DEPENDENCIES) + @rm -f pdffonts$(EXEEXT) + $(AM_V_CXXLD)$(CXXLINK) $(pdffonts_OBJECTS) $(pdffonts_LDADD) $(LIBS) +pdfimages$(EXEEXT): $(pdfimages_OBJECTS) $(pdfimages_DEPENDENCIES) $(EXTRA_pdfimages_DEPENDENCIES) + @rm -f pdfimages$(EXEEXT) + $(AM_V_CXXLD)$(CXXLINK) $(pdfimages_OBJECTS) $(pdfimages_LDADD) $(LIBS) +pdfinfo$(EXEEXT): $(pdfinfo_OBJECTS) $(pdfinfo_DEPENDENCIES) $(EXTRA_pdfinfo_DEPENDENCIES) + @rm -f pdfinfo$(EXEEXT) + $(AM_V_CXXLD)$(CXXLINK) $(pdfinfo_OBJECTS) $(pdfinfo_LDADD) $(LIBS) +pdfseparate$(EXEEXT): $(pdfseparate_OBJECTS) $(pdfseparate_DEPENDENCIES) $(EXTRA_pdfseparate_DEPENDENCIES) + @rm -f pdfseparate$(EXEEXT) + $(AM_V_CXXLD)$(CXXLINK) $(pdfseparate_OBJECTS) $(pdfseparate_LDADD) $(LIBS) +pdftocairo$(EXEEXT): $(pdftocairo_OBJECTS) $(pdftocairo_DEPENDENCIES) $(EXTRA_pdftocairo_DEPENDENCIES) + @rm -f pdftocairo$(EXEEXT) + $(AM_V_CXXLD)$(CXXLINK) $(pdftocairo_OBJECTS) $(pdftocairo_LDADD) $(LIBS) +pdftohtml$(EXEEXT): $(pdftohtml_OBJECTS) $(pdftohtml_DEPENDENCIES) $(EXTRA_pdftohtml_DEPENDENCIES) + @rm -f pdftohtml$(EXEEXT) + $(AM_V_CXXLD)$(pdftohtml_LINK) $(pdftohtml_OBJECTS) $(pdftohtml_LDADD) $(LIBS) +pdftoppm$(EXEEXT): $(pdftoppm_OBJECTS) $(pdftoppm_DEPENDENCIES) $(EXTRA_pdftoppm_DEPENDENCIES) + @rm -f pdftoppm$(EXEEXT) + $(AM_V_CXXLD)$(CXXLINK) $(pdftoppm_OBJECTS) $(pdftoppm_LDADD) $(LIBS) +pdftops$(EXEEXT): $(pdftops_OBJECTS) $(pdftops_DEPENDENCIES) $(EXTRA_pdftops_DEPENDENCIES) + @rm -f pdftops$(EXEEXT) + $(AM_V_CXXLD)$(CXXLINK) $(pdftops_OBJECTS) $(pdftops_LDADD) $(LIBS) +pdftotext$(EXEEXT): $(pdftotext_OBJECTS) $(pdftotext_DEPENDENCIES) $(EXTRA_pdftotext_DEPENDENCIES) + @rm -f pdftotext$(EXEEXT) + $(AM_V_CXXLD)$(CXXLINK) $(pdftotext_OBJECTS) $(pdftotext_LDADD) $(LIBS) +pdfunite$(EXEEXT): $(pdfunite_OBJECTS) $(pdfunite_DEPENDENCIES) $(EXTRA_pdfunite_DEPENDENCIES) + @rm -f pdfunite$(EXEEXT) + $(AM_V_CXXLD)$(CXXLINK) $(pdfunite_OBJECTS) $(pdfunite_LDADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ImageOutputDev.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parseargs.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdfdetach.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdffonts.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdfimages.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdfinfo.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdfseparate.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdftocairo.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdftohtml-HtmlFonts.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdftohtml-HtmlLinks.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdftohtml-HtmlOutputDev.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdftohtml-parseargs.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdftohtml-pdftohtml.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdftoppm.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdftops.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdftotext.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pdfunite.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/printencodings.Po@am__quote@ + +.cc.o: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ $< + +.cc.obj: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.cc.lo: +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(LTCXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LTCXXCOMPILE) -c -o $@ $< + +pdftohtml-pdftohtml.o: pdftohtml.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -MT pdftohtml-pdftohtml.o -MD -MP -MF $(DEPDIR)/pdftohtml-pdftohtml.Tpo -c -o pdftohtml-pdftohtml.o `test -f 'pdftohtml.cc' || echo '$(srcdir)/'`pdftohtml.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pdftohtml-pdftohtml.Tpo $(DEPDIR)/pdftohtml-pdftohtml.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='pdftohtml.cc' object='pdftohtml-pdftohtml.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -c -o pdftohtml-pdftohtml.o `test -f 'pdftohtml.cc' || echo '$(srcdir)/'`pdftohtml.cc + +pdftohtml-pdftohtml.obj: pdftohtml.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -MT pdftohtml-pdftohtml.obj -MD -MP -MF $(DEPDIR)/pdftohtml-pdftohtml.Tpo -c -o pdftohtml-pdftohtml.obj `if test -f 'pdftohtml.cc'; then $(CYGPATH_W) 'pdftohtml.cc'; else $(CYGPATH_W) '$(srcdir)/pdftohtml.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pdftohtml-pdftohtml.Tpo $(DEPDIR)/pdftohtml-pdftohtml.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='pdftohtml.cc' object='pdftohtml-pdftohtml.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -c -o pdftohtml-pdftohtml.obj `if test -f 'pdftohtml.cc'; then $(CYGPATH_W) 'pdftohtml.cc'; else $(CYGPATH_W) '$(srcdir)/pdftohtml.cc'; fi` + +pdftohtml-HtmlFonts.o: HtmlFonts.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -MT pdftohtml-HtmlFonts.o -MD -MP -MF $(DEPDIR)/pdftohtml-HtmlFonts.Tpo -c -o pdftohtml-HtmlFonts.o `test -f 'HtmlFonts.cc' || echo '$(srcdir)/'`HtmlFonts.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pdftohtml-HtmlFonts.Tpo $(DEPDIR)/pdftohtml-HtmlFonts.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='HtmlFonts.cc' object='pdftohtml-HtmlFonts.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -c -o pdftohtml-HtmlFonts.o `test -f 'HtmlFonts.cc' || echo '$(srcdir)/'`HtmlFonts.cc + +pdftohtml-HtmlFonts.obj: HtmlFonts.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -MT pdftohtml-HtmlFonts.obj -MD -MP -MF $(DEPDIR)/pdftohtml-HtmlFonts.Tpo -c -o pdftohtml-HtmlFonts.obj `if test -f 'HtmlFonts.cc'; then $(CYGPATH_W) 'HtmlFonts.cc'; else $(CYGPATH_W) '$(srcdir)/HtmlFonts.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pdftohtml-HtmlFonts.Tpo $(DEPDIR)/pdftohtml-HtmlFonts.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='HtmlFonts.cc' object='pdftohtml-HtmlFonts.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -c -o pdftohtml-HtmlFonts.obj `if test -f 'HtmlFonts.cc'; then $(CYGPATH_W) 'HtmlFonts.cc'; else $(CYGPATH_W) '$(srcdir)/HtmlFonts.cc'; fi` + +pdftohtml-HtmlLinks.o: HtmlLinks.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -MT pdftohtml-HtmlLinks.o -MD -MP -MF $(DEPDIR)/pdftohtml-HtmlLinks.Tpo -c -o pdftohtml-HtmlLinks.o `test -f 'HtmlLinks.cc' || echo '$(srcdir)/'`HtmlLinks.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pdftohtml-HtmlLinks.Tpo $(DEPDIR)/pdftohtml-HtmlLinks.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='HtmlLinks.cc' object='pdftohtml-HtmlLinks.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -c -o pdftohtml-HtmlLinks.o `test -f 'HtmlLinks.cc' || echo '$(srcdir)/'`HtmlLinks.cc + +pdftohtml-HtmlLinks.obj: HtmlLinks.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -MT pdftohtml-HtmlLinks.obj -MD -MP -MF $(DEPDIR)/pdftohtml-HtmlLinks.Tpo -c -o pdftohtml-HtmlLinks.obj `if test -f 'HtmlLinks.cc'; then $(CYGPATH_W) 'HtmlLinks.cc'; else $(CYGPATH_W) '$(srcdir)/HtmlLinks.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pdftohtml-HtmlLinks.Tpo $(DEPDIR)/pdftohtml-HtmlLinks.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='HtmlLinks.cc' object='pdftohtml-HtmlLinks.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -c -o pdftohtml-HtmlLinks.obj `if test -f 'HtmlLinks.cc'; then $(CYGPATH_W) 'HtmlLinks.cc'; else $(CYGPATH_W) '$(srcdir)/HtmlLinks.cc'; fi` + +pdftohtml-HtmlOutputDev.o: HtmlOutputDev.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -MT pdftohtml-HtmlOutputDev.o -MD -MP -MF $(DEPDIR)/pdftohtml-HtmlOutputDev.Tpo -c -o pdftohtml-HtmlOutputDev.o `test -f 'HtmlOutputDev.cc' || echo '$(srcdir)/'`HtmlOutputDev.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pdftohtml-HtmlOutputDev.Tpo $(DEPDIR)/pdftohtml-HtmlOutputDev.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='HtmlOutputDev.cc' object='pdftohtml-HtmlOutputDev.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -c -o pdftohtml-HtmlOutputDev.o `test -f 'HtmlOutputDev.cc' || echo '$(srcdir)/'`HtmlOutputDev.cc + +pdftohtml-HtmlOutputDev.obj: HtmlOutputDev.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -MT pdftohtml-HtmlOutputDev.obj -MD -MP -MF $(DEPDIR)/pdftohtml-HtmlOutputDev.Tpo -c -o pdftohtml-HtmlOutputDev.obj `if test -f 'HtmlOutputDev.cc'; then $(CYGPATH_W) 'HtmlOutputDev.cc'; else $(CYGPATH_W) '$(srcdir)/HtmlOutputDev.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pdftohtml-HtmlOutputDev.Tpo $(DEPDIR)/pdftohtml-HtmlOutputDev.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='HtmlOutputDev.cc' object='pdftohtml-HtmlOutputDev.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -c -o pdftohtml-HtmlOutputDev.obj `if test -f 'HtmlOutputDev.cc'; then $(CYGPATH_W) 'HtmlOutputDev.cc'; else $(CYGPATH_W) '$(srcdir)/HtmlOutputDev.cc'; fi` + +pdftohtml-parseargs.o: parseargs.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -MT pdftohtml-parseargs.o -MD -MP -MF $(DEPDIR)/pdftohtml-parseargs.Tpo -c -o pdftohtml-parseargs.o `test -f 'parseargs.cc' || echo '$(srcdir)/'`parseargs.cc +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pdftohtml-parseargs.Tpo $(DEPDIR)/pdftohtml-parseargs.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='parseargs.cc' object='pdftohtml-parseargs.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -c -o pdftohtml-parseargs.o `test -f 'parseargs.cc' || echo '$(srcdir)/'`parseargs.cc + +pdftohtml-parseargs.obj: parseargs.cc +@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -MT pdftohtml-parseargs.obj -MD -MP -MF $(DEPDIR)/pdftohtml-parseargs.Tpo -c -o pdftohtml-parseargs.obj `if test -f 'parseargs.cc'; then $(CYGPATH_W) 'parseargs.cc'; else $(CYGPATH_W) '$(srcdir)/parseargs.cc'; fi` +@am__fastdepCXX_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/pdftohtml-parseargs.Tpo $(DEPDIR)/pdftohtml-parseargs.Po +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='parseargs.cc' object='pdftohtml-parseargs.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(pdftohtml_CXXFLAGS) $(CXXFLAGS) -c -o pdftohtml-parseargs.obj `if test -f 'parseargs.cc'; then $(CYGPATH_W) 'parseargs.cc'; else $(CYGPATH_W) '$(srcdir)/parseargs.cc'; fi` + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-man1: $(dist_man1_MANS) + @$(NORMAL_INSTALL) + @list1='$(dist_man1_MANS)'; \ + list2=''; \ + test -n "$(man1dir)" \ + && test -n "`echo $$list1$$list2`" \ + || exit 0; \ + echo " $(MKDIR_P) '$(DESTDIR)$(man1dir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(man1dir)" || exit 1; \ + { for i in $$list1; do echo "$$i"; done; \ + if test -n "$$list2"; then \ + for i in $$list2; do echo "$$i"; done \ + | sed -n '/\.1[a-z]*$$/p'; \ + fi; \ + } | while read p; do \ + if test -f $$p; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; echo "$$p"; \ + done | \ + sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \ + sed 'N;N;s,\n, ,g' | { \ + list=; while read file base inst; do \ + if test "$$base" = "$$inst"; then list="$$list $$file"; else \ + echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man1dir)/$$inst'"; \ + $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man1dir)/$$inst" || exit $$?; \ + fi; \ + done; \ + for i in $$list; do echo "$$i"; done | $(am__base_list) | \ + while read files; do \ + test -z "$$files" || { \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man1dir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(man1dir)" || exit $$?; }; \ + done; } + +uninstall-man1: + @$(NORMAL_UNINSTALL) + @list='$(dist_man1_MANS)'; test -n "$(man1dir)" || exit 0; \ + files=`{ for i in $$list; do echo "$$i"; done; \ + } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \ + dir='$(DESTDIR)$(man1dir)'; $(am__uninstall_files_from_dir) + +ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + mkid -fID $$unique +tags: TAGS + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + set x; \ + here=`pwd`; \ + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: CTAGS +CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ + $(TAGS_FILES) $(LISP) + list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | \ + $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in files) print i; }; }'`; \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(DISTFILES) + @list='$(MANS)'; if test -n "$$list"; then \ + list=`for p in $$list; do \ + if test -f $$p; then d=; else d="$(srcdir)/"; fi; \ + if test -f "$$d$$p"; then echo "$$d$$p"; else :; fi; done`; \ + if test -n "$$list" && \ + grep 'ab help2man is required to generate this page' $$list >/dev/null; then \ + echo "error: found man pages containing the \`missing help2man' replacement text:" >&2; \ + grep -l 'ab help2man is required to generate this page' $$list | sed 's/^/ /' >&2; \ + echo " to fix them, install help2man, remove and regenerate the man pages;" >&2; \ + echo " typically \`make maintainer-clean' will remove them" >&2; \ + exit 1; \ + else :; fi; \ + else :; fi + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(PROGRAMS) $(MANS) +installdirs: + for dir in "$(DESTDIR)$(bindir)" "$(DESTDIR)$(man1dir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-man + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-binPROGRAMS + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: install-man1 + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -rf ./$(DEPDIR) + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-binPROGRAMS uninstall-man + +uninstall-man: uninstall-man1 + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS all all-am check check-am clean clean-binPROGRAMS \ + clean-generic clean-libtool ctags distclean distclean-compile \ + distclean-generic distclean-libtool distclean-tags distdir dvi \ + dvi-am html html-am info info-am install install-am \ + install-binPROGRAMS install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-man1 install-pdf install-pdf-am install-ps \ + install-ps-am install-strip installcheck installcheck-am \ + installdirs maintainer-clean maintainer-clean-generic \ + mostlyclean mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \ + uninstall-am uninstall-binPROGRAMS uninstall-man \ + uninstall-man1 + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/utils/parseargs.cc b/utils/parseargs.cc new file mode 100644 index 00000000..a457a885 --- /dev/null +++ b/utils/parseargs.cc @@ -0,0 +1,220 @@ +/* + * parseargs.h + * + * Command line argument parser. + * + * Copyright 1996-2003 Glyph & Cog, LLC + */ + +/*======================================================================== + + Modified under the Poppler project - http://poppler.freedesktop.org + + Poppler project changes to this file are under the GPLv2 or later license + + All changes made under the Poppler project to this file are licensed + under GPL version 2 or later + + Copyright (C) 2008, 2009 Albert Astals Cid <aacid@kde.org> + Copyright (C) 2011, 2012 Adrian Johnson <ajohnson@redneon.com> + + To see a description of the changes please see the Changelog file that + came with your tarball or type make ChangeLog if you are building from git + +========================================================================*/ + +#include <stdio.h> +#include <stddef.h> +#include <string.h> +#include <stdlib.h> +#include <ctype.h> +#include "parseargs.h" + +#include "goo/gstrtod.h" +#include "goo/GooString.h" + +static const ArgDesc *findArg(const ArgDesc *args, char *arg); +static GBool grabArg(const ArgDesc *arg, int i, int *argc, char *argv[]); + +GBool parseArgs(const ArgDesc *args, int *argc, char *argv[]) { + const ArgDesc *arg; + int i, j; + GBool ok; + + ok = gTrue; + i = 1; + while (i < *argc) { + if (!strcmp(argv[i], "--")) { + --*argc; + for (j = i; j < *argc; ++j) + argv[j] = argv[j+1]; + break; + } else if ((arg = findArg(args, argv[i]))) { + if (!grabArg(arg, i, argc, argv)) + ok = gFalse; + } else { + ++i; + } + } + return ok; +} + +void printUsage(const char *program, const char *otherArgs, const ArgDesc *args) { + const ArgDesc *arg; + const char *typ; + int w, w1; + + w = 0; + for (arg = args; arg->arg; ++arg) { + if ((w1 = strlen(arg->arg)) > w) + w = w1; + } + + fprintf(stderr, "Usage: %s [options]", program); + if (otherArgs) + fprintf(stderr, " %s", otherArgs); + fprintf(stderr, "\n"); + + for (arg = args; arg->arg; ++arg) { + fprintf(stderr, " %s", arg->arg); + w1 = 9 + w - strlen(arg->arg); + switch (arg->kind) { + case argInt: + case argIntDummy: + typ = " <int>"; + break; + case argFP: + case argFPDummy: + typ = " <fp>"; + break; + case argString: + case argStringDummy: + case argGooString: + typ = " <string>"; + break; + case argFlag: + case argFlagDummy: + default: + typ = ""; + break; + } + fprintf(stderr, "%-*s", w1, typ); + if (arg->usage) + fprintf(stderr, ": %s", arg->usage); + fprintf(stderr, "\n"); + } +} + +static const ArgDesc *findArg(const ArgDesc *args, char *arg) { + const ArgDesc *p; + + for (p = args; p->arg; ++p) { + if (p->kind < argFlagDummy && !strcmp(p->arg, arg)) + return p; + } + return NULL; +} + +static GBool grabArg(const ArgDesc *arg, int i, int *argc, char *argv[]) { + int n; + int j; + GBool ok; + + ok = gTrue; + n = 0; + switch (arg->kind) { + case argFlag: + *(GBool *)arg->val = gTrue; + n = 1; + break; + case argInt: + if (i + 1 < *argc && isInt(argv[i+1])) { + *(int *)arg->val = atoi(argv[i+1]); + n = 2; + } else { + ok = gFalse; + n = 1; + } + break; + case argFP: + if (i + 1 < *argc && isFP(argv[i+1])) { + *(double *)arg->val = gatof(argv[i+1]); + n = 2; + } else { + ok = gFalse; + n = 1; + } + break; + case argString: + if (i + 1 < *argc) { + strncpy((char *)arg->val, argv[i+1], arg->size - 1); + ((char *)arg->val)[arg->size - 1] = '\0'; + n = 2; + } else { + ok = gFalse; + n = 1; + } + break; + case argGooString: + if (i + 1 < *argc) { + ((GooString*)arg->val)->Set(argv[i+1]); + n = 2; + } else { + ok = gFalse; + n = 1; + } + break; + default: + fprintf(stderr, "Internal error in arg table\n"); + n = 1; + break; + } + if (n > 0) { + *argc -= n; + for (j = i; j < *argc; ++j) + argv[j] = argv[j+n]; + } + return ok; +} + +GBool isInt(char *s) { + if (*s == '-' || *s == '+') + ++s; + while (isdigit(*s)) + ++s; + if (*s) + return gFalse; + return gTrue; +} + +GBool isFP(char *s) { + int n; + + if (*s == '-' || *s == '+') + ++s; + n = 0; + while (isdigit(*s)) { + ++s; + ++n; + } + if (*s == '.') + ++s; + while (isdigit(*s)) { + ++s; + ++n; + } + if (n > 0 && (*s == 'e' || *s == 'E')) { + ++s; + if (*s == '-' || *s == '+') + ++s; + n = 0; + if (!isdigit(*s)) + return gFalse; + do { + ++s; + } while (isdigit(*s)); + } + if (*s) + return gFalse; + return gTrue; +} diff --git a/utils/parseargs.h b/utils/parseargs.h new file mode 100644 index 00000000..f035fa14 --- /dev/null +++ b/utils/parseargs.h @@ -0,0 +1,88 @@ +/* + * parseargs.h + * + * Command line argument parser. + * + * Copyright 1996-2003 Glyph & Cog, LLC + */ + +/*======================================================================== + + Modified under the Poppler project - http://poppler.freedesktop.org + + All changes made under the Poppler project to this file are licensed + under GPL version 2 or later + + Copyright (C) 2008 Albert Astals Cid <aacid@kde.org> + Copyright (C) 2011 Adrian Johnson <ajohnson@redneon.com> + + To see a description of the changes please see the Changelog file that + came with your tarball or type make ChangeLog if you are building from git + +========================================================================*/ + +#ifndef PARSEARGS_H +#define PARSEARGS_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "goo/gtypes.h" + +/* + * Argument kinds. + */ +typedef enum { + argFlag, /* flag (present / not-present) */ + /* [val: GBool *] */ + argInt, /* integer arg */ + /* [val: int *] */ + argFP, /* floating point arg */ + /* [val: double *] */ + argString, /* string arg */ + /* [val: char *] */ + argGooString, /* string arg */ + /* [val: GooString *] */ + /* dummy entries -- these show up in the usage listing only; */ + /* useful for X args, for example */ + argFlagDummy, + argIntDummy, + argFPDummy, + argStringDummy +} ArgKind; + +/* + * Argument descriptor. + */ +typedef struct { + const char *arg; /* the command line switch */ + ArgKind kind; /* kind of arg */ + void *val; /* place to store value */ + int size; /* for argString: size of string */ + const char *usage; /* usage string */ +} ArgDesc; + +/* + * Parse command line. Removes all args which are found in the arg + * descriptor list <args>. Stops parsing if "--" is found (and removes + * it). Returns gFalse if there was an error. + */ +extern GBool parseArgs(const ArgDesc *args, int *argc, char *argv[]); + +/* + * Print usage message, based on arg descriptor list. + */ +extern void printUsage(const char *program, const char *otherArgs, const ArgDesc *args); + +/* + * Check if a string is a valid integer or floating point number. + */ +extern GBool isInt(char *s); +extern GBool isFP(char *s); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/utils/pdf2xml.dtd b/utils/pdf2xml.dtd new file mode 100644 index 00000000..bf7f14f6 --- /dev/null +++ b/utils/pdf2xml.dtd @@ -0,0 +1,49 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!ELEMENT pdf2xml (page+, outline?)> +<!ATTLIST pdf2xml + producer CDATA #REQUIRED + version CDATA #REQUIRED +> +<!ELEMENT page (fontspec*, image*, text*)> +<!ATTLIST page + number CDATA #REQUIRED + position CDATA #REQUIRED + top CDATA #REQUIRED + left CDATA #REQUIRED + height CDATA #REQUIRED + width CDATA #REQUIRED +> +<!ELEMENT fontspec EMPTY> +<!ATTLIST fontspec + id CDATA #REQUIRED + size CDATA #REQUIRED + family CDATA #REQUIRED + color CDATA #REQUIRED +> +<!ELEMENT text (#PCDATA | b | i | a)*> +<!ATTLIST text + top CDATA #REQUIRED + left CDATA #REQUIRED + width CDATA #REQUIRED + height CDATA #REQUIRED + font CDATA #REQUIRED +> +<!ELEMENT b (#PCDATA | i)*> +<!ELEMENT i (#PCDATA | b)*> +<!ELEMENT a (#PCDATA)> +<!ATTLIST a + href CDATA #REQUIRED +> +<!ELEMENT image EMPTY> +<!ATTLIST image + top CDATA #REQUIRED + left CDATA #REQUIRED + width CDATA #REQUIRED + height CDATA #REQUIRED + src CDATA #REQUIRED +> +<!ELEMENT outline (item | outline)*> +<!ELEMENT item (#PCDATA)> +<!ATTLIST item + page CDATA #IMPLIED +> diff --git a/utils/pdfdetach.1 b/utils/pdfdetach.1 new file mode 100644 index 00000000..c80dde72 --- /dev/null +++ b/utils/pdfdetach.1 @@ -0,0 +1,103 @@ +.\" Copyright 2011 Glyph & Cog, LLC +.TH pdfdetach 1 "15 August 2011" +.SH NAME +pdfdetach \- Portable Document Format (PDF) document embedded file +extractor (version 3.03) +.SH SYNOPSIS +.B pdfdetach +[options] +.RI [ PDF-file ] +.SH DESCRIPTION +.B Pdfdetach +lists or extracts embedded files (attachments) from a Portable +Document Format (PDF) file. +.SH CONFIGURATION FILE +Pdfdetach reads a configuration file at startup. It first tries to +find the user's private config file, ~/.xpdfrc. If that doesn't +exist, it looks for a system-wide config file, typically +/usr/local/etc/xpdfrc (but this location can be changed when pdfinfo +is built). See the +.BR xpdfrc (5) +man page for details. +.SH OPTIONS +Some of the following options can be set with configuration file +commands. These are listed in square brackets with the description of +the corresponding command line option. +.TP +.B \-list +List all of the embedded files in the PDF file. File names are +converted to the text encoding specified by the "\-enc" switch. +.TP +.BI \-save " number" +Save the specified embedded file. By default, this uses the file name +associated with the embedded file (as printed by the "\-list" switch); +the file name can be changed with the "\-o" switch. +.TP +.BI \-saveall +Save all of the embedded files. This uses the file names associated +with the embedded files (as printed by the "\-list" switch). By +default, the files are saved in the current directory; this can be +changed with the "\-o" switch. +.TP +.BI \-o " path" +Set the file name used when saving an embedded file with the "\-save" +switch, or the directory used by "\-saveall". +.TP +.BI \-enc " encoding-name" +Sets the encoding to use for text output (embedded file names). The +.I encoding\-name +must be defined with the unicodeMap command (see +.BR xpdfrc (5)). +This defaults to "Latin1" (which is a built-in encoding). +.RB "[config file: " textEncoding ] +.TP +.BI \-opw " password" +Specify the owner password for the PDF file. Providing this will +bypass all security restrictions. +.TP +.BI \-upw " password" +Specify the user password for the PDF file. +.TP +.BI \-cfg " config-file" +Read +.I config-file +in place of ~/.xpdfrc or the system-wide config file. +.TP +.B \-v +Print copyright and version information. +.TP +.B \-h +Print usage information. +.RB ( \-help +and +.B \-\-help +are equivalent.) +.SH EXIT CODES +The Xpdf tools use the following exit codes: +.TP +0 +No error. +.TP +1 +Error opening a PDF file. +.TP +2 +Error opening an output file. +.TP +3 +Error related to PDF permissions. +.TP +99 +Other error. +.SH AUTHOR +The pdfinfo software and documentation are copyright 1996-2011 Glyph & +Cog, LLC. +.SH "SEE ALSO" +.BR pdfimages (1), +.BR pdffonts (1), +.BR pdfinfo (1), +.BR pdftocairo (1), +.BR pdftohtml (1), +.BR pdftoppm (1), +.BR pdftops (1), +.BR pdftotext (1)
\ No newline at end of file diff --git a/utils/pdfdetach.cc b/utils/pdfdetach.cc new file mode 100644 index 00000000..3fbdfb7e --- /dev/null +++ b/utils/pdfdetach.cc @@ -0,0 +1,318 @@ +//======================================================================== +// +// pdfdetach.cc +// +// Copyright 2010 Glyph & Cog, LLC +// +//======================================================================== + +//======================================================================== +// +// Modified under the Poppler project - http://poppler.freedesktop.org +// +// All changes made under the Poppler project to this file are licensed +// under GPL version 2 or later +// +// Copyright (C) 2011 Carlos Garcia Campos <carlosgc@gnome.org> +// +// To see a description of the changes please see the Changelog file that +// came with your tarball or type make ChangeLog if you are building from git +// +//======================================================================== + +#include "config.h" +#include <poppler-config.h> +#include <stdio.h> +#include "goo/gtypes.h" +#include "goo/gmem.h" +#include "goo/GooList.h" +#include "parseargs.h" +#include "Annot.h" +#include "GlobalParams.h" +#include "Page.h" +#include "PDFDoc.h" +#include "PDFDocFactory.h" +#include "FileSpec.h" +#include "CharTypes.h" +#include "Catalog.h" +#include "UnicodeMap.h" +#include "PDFDocEncoding.h" +#include "Error.h" + +static GBool doList = gFalse; +static int saveNum = 0; +static GBool saveAll = gFalse; +static char savePath[1024] = ""; +static char textEncName[128] = ""; +static char ownerPassword[33] = "\001"; +static char userPassword[33] = "\001"; +static char cfgFileName[256] = ""; +static GBool printVersion = gFalse; +static GBool printHelp = gFalse; + +static ArgDesc argDesc[] = { + {"-list", argFlag, &doList, 0, + "list all embedded files"}, + {"-save", argInt, &saveNum, 0, + "save the specified embedded file"}, + {"-saveall", argFlag, &saveAll, 0, + "save all embedded files"}, + {"-o", argString, savePath, sizeof(savePath), + "file name for the saved embedded file"}, + {"-enc", argString, textEncName, sizeof(textEncName), + "output text encoding name"}, + {"-opw", argString, ownerPassword, sizeof(ownerPassword), + "owner password (for encrypted files)"}, + {"-upw", argString, userPassword, sizeof(userPassword), + "user password (for encrypted files)"}, + {"-cfg", argString, cfgFileName, sizeof(cfgFileName), + "configuration file to use in place of .xpdfrc"}, + {"-v", argFlag, &printVersion, 0, + "print copyright and version info"}, + {"-h", argFlag, &printHelp, 0, + "print usage information"}, + {"-help", argFlag, &printHelp, 0, + "print usage information"}, + {"--help", argFlag, &printHelp, 0, + "print usage information"}, + {"-?", argFlag, &printHelp, 0, + "print usage information"}, + {NULL} +}; + +int main(int argc, char *argv[]) { + GooString *fileName; + UnicodeMap *uMap; + GooString *ownerPW, *userPW; + PDFDoc *doc; + char uBuf[8]; + char path[1024]; + char *p; + GBool ok; + int exitCode; + GooList *embeddedFiles = NULL; + int nFiles, nPages, n, i, j; + FileSpec *fileSpec; + Page *page; + Annots *annots; + Annot *annot; + GooString *s1; + Unicode u; + GBool isUnicode; + + exitCode = 99; + + // parse args + ok = parseArgs(argDesc, &argc, argv); + if ((doList ? 1 : 0) + + ((saveNum != 0) ? 1 : 0) + + (saveAll ? 1 : 0) != 1) { + ok = gFalse; + } + if (!ok || argc != 2 || printVersion || printHelp) { + fprintf(stderr, "pdfdetach version %s\n", PACKAGE_VERSION); + fprintf(stderr, "%s\n", popplerCopyright); + fprintf(stderr, "%s\n", xpdfCopyright); + if (!printVersion) { + printUsage("pdfdetach", "<PDF-file>", argDesc); + } + goto err0; + } + fileName = new GooString(argv[1]); + + // read config file + globalParams = new GlobalParams(cfgFileName); + if (textEncName[0]) { + globalParams->setTextEncoding(textEncName); + } + + // get mapping to output encoding + if (!(uMap = globalParams->getTextEncoding())) { + error(errConfig, -1, "Couldn't get text encoding"); + delete fileName; + goto err1; + } + + // open PDF file + if (ownerPassword[0] != '\001') { + ownerPW = new GooString(ownerPassword); + } else { + ownerPW = NULL; + } + if (userPassword[0] != '\001') { + userPW = new GooString(userPassword); + } else { + userPW = NULL; + } + + doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW); + + if (userPW) { + delete userPW; + } + if (ownerPW) { + delete ownerPW; + } + if (!doc->isOk()) { + exitCode = 1; + goto err2; + } + + embeddedFiles = new GooList(); + for (i = 0; i < doc->getCatalog()->numEmbeddedFiles(); ++i) + embeddedFiles->append(doc->getCatalog()->embeddedFile(i)); + + nPages = doc->getCatalog()->getNumPages(); + for (i = 0; i < nPages; ++i) { + page = doc->getCatalog()->getPage(i + 1); + annots = page->getAnnots(); + if (!annots) + break; + + for (j = 0; j < annots->getNumAnnots(); ++j) { + annot = annots->getAnnot(j); + if (annot->getType() != Annot::typeFileAttachment) + continue; + embeddedFiles->append(new FileSpec(static_cast<AnnotFileAttachment *>(annot)->getFile())); + } + } + + nFiles = embeddedFiles->getLength(); + + // list embedded files + if (doList) { + printf("%d embedded files\n", nFiles); + for (i = 0; i < nFiles; ++i) { + fileSpec = static_cast<FileSpec *>(embeddedFiles->get(i)); + printf("%d: ", i+1); + s1 = fileSpec->getFileName(); + if ((s1->getChar(0) & 0xff) == 0xfe && (s1->getChar(1) & 0xff) == 0xff) { + isUnicode = gTrue; + j = 2; + } else { + isUnicode = gFalse; + j = 0; + } + while (j < fileSpec->getFileName()->getLength()) { + if (isUnicode) { + u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j+1) & 0xff); + j += 2; + } else { + u = pdfDocEncoding[s1->getChar(j) & 0xff]; + ++j; + } + n = uMap->mapUnicode(u, uBuf, sizeof(uBuf)); + fwrite(uBuf, 1, n, stdout); + } + fputc('\n', stdout); + } + + // save all embedded files + } else if (saveAll) { + for (i = 0; i < nFiles; ++i) { + fileSpec = static_cast<FileSpec *>(embeddedFiles->get(i)); + if (savePath[0]) { + n = strlen(savePath); + if (n > (int)sizeof(path) - 2) { + n = sizeof(path) - 2; + } + memcpy(path, savePath, n); + path[n] = '/'; + p = path + n + 1; + } else { + p = path; + } + s1 = fileSpec->getFileName(); + if ((s1->getChar(0) & 0xff) == 0xfe && (s1->getChar(1) & 0xff) == 0xff) { + isUnicode = gTrue; + j = 2; + } else { + isUnicode = gFalse; + j = 0; + } + while (j < fileSpec->getFileName()->getLength()) { + if (isUnicode) { + u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j+1) & 0xff); + j += 2; + } else { + u = pdfDocEncoding[s1->getChar(j) & 0xff]; + ++j; + } + n = uMap->mapUnicode(u, uBuf, sizeof(uBuf)); + if (p + n >= path + sizeof(path)) + break; + memcpy(p, uBuf, n); + p += n; + } + *p = '\0'; + + if (!fileSpec->getEmbeddedFile()->save(path)) { + error(errIO, -1, "Error saving embedded file as '{0:s}'", p); + exitCode = 2; + goto err2; + } + } + + // save an embedded file + } else { + if (saveNum < 1 || saveNum > nFiles) { + error(errCommandLine, -1, "Invalid file number"); + goto err2; + } + + fileSpec = static_cast<FileSpec *>(embeddedFiles->get(saveNum - 1)); + if (savePath[0]) { + p = savePath; + } else { + p = path; + s1 = fileSpec->getFileName(); + if ((s1->getChar(0) & 0xff) == 0xfe && (s1->getChar(1) & 0xff) == 0xff) { + isUnicode = gTrue; + j = 2; + } else { + isUnicode = gFalse; + j = 0; + } + while (j < fileSpec->getFileName()->getLength()) { + if (isUnicode) { + u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j+1) & 0xff); + j += 2; + } else { + u = pdfDocEncoding[s1->getChar(j) & 0xff]; + ++j; + } + n = uMap->mapUnicode(u, uBuf, sizeof(uBuf)); + if (p + n >= path + sizeof(path)) + break; + memcpy(p, uBuf, n); + p += n; + } + *p = '\0'; + p = path; + } + + if (!fileSpec->getEmbeddedFile()->save(p)) { + error(errIO, -1, "Error saving embedded file as '{0:s}'", p); + exitCode = 2; + goto err2; + } + } + + exitCode = 0; + + // clean up + err2: + if (embeddedFiles) + deleteGooList(embeddedFiles, FileSpec); + uMap->decRefCnt(); + delete doc; + err1: + delete globalParams; + err0: + + // check for memory leaks + Object::memCheck(stderr); + gMemReport(stderr); + + return exitCode; +} diff --git a/utils/pdffonts.1 b/utils/pdffonts.1 new file mode 100644 index 00000000..4afc3956 --- /dev/null +++ b/utils/pdffonts.1 @@ -0,0 +1,119 @@ +.\" Copyright 1999-2011 Glyph & Cog, LLC +.TH pdffonts 1 "15 August 2011" +.SH NAME +pdffonts \- Portable Document Format (PDF) font analyzer (version +3.03) +.SH SYNOPSIS +.B pdffonts +[options] +.RI [ PDF-file ] +.SH DESCRIPTION +.B Pdffonts +lists the fonts used in a Portable Document Format (PDF) file along +with various information for each font. +.PP +The following information is listed for each font: +.TP +.B name +the font name, exactly as given in the PDF file (potentially including +a subset prefix) +.TP +.B type +the font type -- see below for details +.TP +.B encoding +the font encoding +.TP +.B emb +"yes" if the font is embedded in the PDF file +.TP +.B sub +"yes" if the font is a subset +.TP +.B uni +"yes" if there is an explicit "ToUnicode" map in the PDF file (the +absence of a ToUnicode map doesn't necessarily mean that the text +can't be converted to Unicode) +.TP +.B object ID +the font dictionary object ID (number and generation) +.PP +PDF files can contain the following types of fonts: +.PP +.RS +Type 1 +.RE +.RS +Type 1C -- aka Compact Font Format (CFF) +.RE +.RS +Type 3 +.RE +.RS +TrueType +.RE +.RS +CID Type 0 -- 16-bit font with no specified type +.RE +.RS +CID Type 0C -- 16-bit PostScript CFF font +.RE +.RS +CID TrueType -- 16-bit TrueType font +.RE +.SH OPTIONS +.TP +.BI \-f " number" +Specifies the first page to analyze. +.TP +.BI \-l " number" +Specifies the last page to analyze. +.TP +.BI \-subst +List the substitute fonts that poppler will use for non embedded fonts. +.TP +.BI \-opw " password" +Specify the owner password for the PDF file. Providing this will +bypass all security restrictions. +.TP +.BI \-upw " password" +Specify the user password for the PDF file. +.TP +.B \-v +Print copyright and version information. +.TP +.B \-h +Print usage information. +.RB ( \-help +and +.B \-\-help +are equivalent.) +.SH EXIT CODES +The Xpdf tools use the following exit codes: +.TP +0 +No error. +.TP +1 +Error opening a PDF file. +.TP +2 +Error opening an output file. +.TP +3 +Error related to PDF permissions. +.TP +99 +Other error. +.SH AUTHOR +The pdffonts software and documentation are copyright 1996-2011 Glyph +& Cog, LLC. +.SH "SEE ALSO" +.BR pdfdetach (1), +.BR pdfimages (1), +.BR pdfinfo (1), +.BR pdftocairo (1), +.BR pdftohtml (1), +.BR pdftoppm (1), +.BR pdftops (1), +.BR pdftotext (1) diff --git a/utils/pdffonts.cc b/utils/pdffonts.cc new file mode 100644 index 00000000..820abbf7 --- /dev/null +++ b/utils/pdffonts.cc @@ -0,0 +1,224 @@ +//======================================================================== +// +// pdffonts.cc +// +// Copyright 2001-2007 Glyph & Cog, LLC +// +//======================================================================== + +//======================================================================== +// +// Modified under the Poppler project - http://poppler.freedesktop.org +// +// All changes made under the Poppler project to this file are licensed +// under GPL version 2 or later +// +// Copyright (C) 2006 Dominic Lachowicz <cinamod@hotmail.com> +// Copyright (C) 2007-2008, 2010 Albert Astals Cid <aacid@kde.org> +// Copyright (C) 2010 Hib Eris <hib@hiberis.nl> +// Copyright (C) 2012 Adrian Johnson <ajohnson@redneon.com> +// +// To see a description of the changes please see the Changelog file that +// came with your tarball or type make ChangeLog if you are building from git +// +//======================================================================== + +#include "config.h" +#include <poppler-config.h> +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <string.h> +#include <math.h> +#include "parseargs.h" +#include "goo/GooString.h" +#include "goo/gmem.h" +#include "GlobalParams.h" +#include "Object.h" +#include "PDFDoc.h" +#include "PDFDocFactory.h" +#include "FontInfo.h" + +static const char *fontTypeNames[] = { + "unknown", + "Type 1", + "Type 1C", + "Type 1C (OT)", + "Type 3", + "TrueType", + "TrueType (OT)", + "CID Type 0", + "CID Type 0C", + "CID Type 0C (OT)", + "CID TrueType", + "CID TrueType (OT)" +}; + +static int firstPage = 1; +static int lastPage = 0; +static GBool showSubst = gFalse; +static char ownerPassword[33] = "\001"; +static char userPassword[33] = "\001"; +static GBool printVersion = gFalse; +static GBool printHelp = gFalse; + +static const ArgDesc argDesc[] = { + {"-f", argInt, &firstPage, 0, + "first page to examine"}, + {"-l", argInt, &lastPage, 0, + "last page to examine"}, + {"-subst", argFlag, &showSubst, 0, + "show font substitutions"}, + {"-opw", argString, ownerPassword, sizeof(ownerPassword), + "owner password (for encrypted files)"}, + {"-upw", argString, userPassword, sizeof(userPassword), + "user password (for encrypted files)"}, + {"-v", argFlag, &printVersion, 0, + "print copyright and version info"}, + {"-h", argFlag, &printHelp, 0, + "print usage information"}, + {"-help", argFlag, &printHelp, 0, + "print usage information"}, + {"--help", argFlag, &printHelp, 0, + "print usage information"}, + {"-?", argFlag, &printHelp, 0, + "print usage information"}, + {NULL} +}; + +int main(int argc, char *argv[]) { + PDFDoc *doc; + GooString *fileName; + GooString *ownerPW, *userPW; + GBool ok; + int exitCode; + + exitCode = 99; + + // parse args + ok = parseArgs(argDesc, &argc, argv); + if (!ok || argc != 2 || printVersion || printHelp) { + fprintf(stderr, "pdffonts version %s\n", PACKAGE_VERSION); + fprintf(stderr, "%s\n", popplerCopyright); + fprintf(stderr, "%s\n", xpdfCopyright); + if (!printVersion) { + printUsage("pdffonts", "<PDF-file>", argDesc); + } + if (printVersion || printHelp) + exitCode = 0; + goto err0; + } + fileName = new GooString(argv[1]); + + // read config file + globalParams = new GlobalParams(); + + // open PDF file + if (ownerPassword[0] != '\001') { + ownerPW = new GooString(ownerPassword); + } else { + ownerPW = NULL; + } + if (userPassword[0] != '\001') { + userPW = new GooString(userPassword); + } else { + userPW = NULL; + } + if (fileName->cmp("-") == 0) { + delete fileName; + fileName = new GooString("fd://0"); + } + + doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW); + delete fileName; + + if (userPW) { + delete userPW; + } + if (ownerPW) { + delete ownerPW; + } + if (!doc->isOk()) { + exitCode = 1; + goto err1; + } + + // get page range + if (firstPage < 1) { + firstPage = 1; + } + if (lastPage < 1 || lastPage > doc->getNumPages()) { + lastPage = doc->getNumPages(); + } + + // get the fonts + { + FontInfoScanner scanner(doc, firstPage - 1); + GooList *fonts = scanner.scan(lastPage - firstPage + 1); + + if (showSubst) { + // print the font substitutions + printf("name object ID substitute font substitute font file\n"); + printf("------------------------------------ --------- ------------------------------------ ------------------------------------\n"); + if (fonts) { + for (int i = 0; i < fonts->getLength(); ++i) { + FontInfo *font = (FontInfo *)fonts->get(i); + if (font->getFile()) { + printf("%-36s", + font->getName() ? font->getName()->getCString() : "[none]"); + const Ref fontRef = font->getRef(); + if (fontRef.gen >= 100000) { + printf(" [none]"); + } else { + printf(" %6d %2d", fontRef.num, fontRef.gen); + } + printf(" %-36s %s\n", + font->getSubstituteName() ? font->getSubstituteName()->getCString() : "[none]", + font->getFile()->getCString()); + } + delete font; + } + delete fonts; + } + } else { + // print the font info + printf("name type encoding emb sub uni object ID\n"); + printf("------------------------------------ ----------------- ---------------- --- --- --- ---------\n"); + if (fonts) { + for (int i = 0; i < fonts->getLength(); ++i) { + FontInfo *font = (FontInfo *)fonts->get(i); + printf("%-36s %-17s %-16s %-3s %-3s %-3s", + font->getName() ? font->getName()->getCString() : "[none]", + fontTypeNames[font->getType()], + font->getEncoding()->getCString(), + font->getEmbedded() ? "yes" : "no", + font->getSubset() ? "yes" : "no", + font->getToUnicode() ? "yes" : "no"); + const Ref fontRef = font->getRef(); + if (fontRef.gen >= 100000) { + printf(" [none]\n"); + } else { + printf(" %6d %2d\n", fontRef.num, fontRef.gen); + } + delete font; + } + delete fonts; + } + } + } + + exitCode = 0; + + err1: + delete doc; + delete globalParams; + err0: + + // check for memory leaks + Object::memCheck(stderr); + gMemReport(stderr); + + return exitCode; +} + + diff --git a/utils/pdfimages.1 b/utils/pdfimages.1 new file mode 100644 index 00000000..955d8b3c --- /dev/null +++ b/utils/pdfimages.1 @@ -0,0 +1,189 @@ +.\" Copyright 1998-2011 Glyph & Cog, LLC +.TH pdfimages 1 "15 August 2011" +.SH NAME +pdfimages \- Portable Document Format (PDF) image extractor +(version 3.03) +.SH SYNOPSIS +.B pdfimages +[options] +.I PDF-file image-root +.SH DESCRIPTION +.B Pdfimages +saves images from a Portable Document Format (PDF) file as Portable +Pixmap (PPM), Portable Bitmap (PBM), or JPEG files. +.PP +Pdfimages reads the PDF file +.IR PDF-file , +scans one or more pages, and writes one PPM, PBM, or JPEG file for each image, +.IR image-root - nnn . xxx , +where +.I nnn +is the image number and +.I xxx +is the image type (.ppm, .pbm, .jpg). +.SH OPTIONS +.TP +.BI \-f " number" +Specifies the first page to scan. +.TP +.BI \-l " number" +Specifies the last page to scan. +.TP +.B \-j +Normally, all images are written as PBM (for monochrome images) or PPM +(for non-monochrome images) files. With this option, images in DCT +format are saved as JPEG files. All non-DCT images are saved in +PBM/PPM format as usual. +.TP +.B \-list +Instead of writing the images, list the images along with various information for each image. Do not specify an +.IR image-root +with this option. +.IP +The following information is listed for each font: +.RS +.TP +.B page +the page number containing the image +.TP +.B num +the image number +.TP +.B type +the image type: +.PP +.RS +image - an opaque image +.RE +.RS +mask - a monochrome mask image +.RE +.RS +smask - a soft-mask image +.RE +.RS +stencil - a monochrome mask image used for painting a color or pattern +.RE +.PP +Note: Tranparency in images is represented in PDF using a separate image for the image and the mask/smask. +The mask/smask used as part of a transparent image always immediately follows the image in the image list. +.TP +.B width +image width (in pixels) +.TP +.B height +image height (in pixels) +.PP +Note: the image width/height is the size of the embedded image, not the size the image will be rendered at. +.TP +.B color +image color space: +.PP +.RS +gray - Gray +.RE +.RS +rgb - RGB +.RE +.RS +cmyk - CMYK +.RE +.RS +lab - L*a*b +.RE +.RS +icc - ICC Based +.RE +.RS +index - Indexed Color +.RE +.RS +sep - Separation +.RE +.RS +devn - DeviceN +.RE +.TP +.B comp +number of color components +.TP +.B bpc +bits per component +.TP +.B enc +encoding: +.PP +.RS +image - raster image (may be Flate or LZW compressed but does not use an image encoding) +.RE +.RS +jpeg - Joint Photographic Experts Group +.RE +.RS +jp2 - JPEG2000 +.RE +.RS +jbig2 - Joint Bi-Level Image Experts Group +.RE +.RS +ccitt - CCITT Group 3 or Group 4 Fax +.RE +.TP +.B interp +"yes" if the interpolation is to be performed when scaling up the image +.TP +.B object ID +the font dictionary object ID (number and generation) +.RE +.TP +.BI \-opw " password" +Specify the owner password for the PDF file. Providing this will +bypass all security restrictions. +.TP +.BI \-upw " password" +Specify the user password for the PDF file. +.TP +.B \-p +Include page numbers in output file names. +.TP +.B \-q +Don't print any messages or errors. +.TP +.B \-v +Print copyright and version information. +.TP +.B \-h +Print usage information. +.RB ( \-help +and +.B \-\-help +are equivalent.) +.SH EXIT CODES +The Xpdf tools use the following exit codes: +.TP +0 +No error. +.TP +1 +Error opening a PDF file. +.TP +2 +Error opening an output file. +.TP +3 +Error related to PDF permissions. +.TP +99 +Other error. +.SH AUTHOR +The pdfimages software and documentation are copyright 1998-2011 Glyph +& Cog, LLC. +.SH "SEE ALSO" +.BR pdfdetach (1) +.BR pdffonts (1), +.BR pdfinfo (1), +.BR pdftocairo (1), +.BR pdftohtml (1), +.BR pdftoppm (1), +.BR pdftops (1), +.BR pdftotext (1) diff --git a/utils/pdfimages.cc b/utils/pdfimages.cc new file mode 100644 index 00000000..82c301c7 --- /dev/null +++ b/utils/pdfimages.cc @@ -0,0 +1,191 @@ +//======================================================================== +// +// pdfimages.cc +// +// Copyright 1998-2003 Glyph & Cog, LLC +// +// Modified for Debian by Hamish Moffatt, 22 May 2002. +// +//======================================================================== + +//======================================================================== +// +// Modified under the Poppler project - http://poppler.freedesktop.org +// +// All changes made under the Poppler project to this file are licensed +// under GPL version 2 or later +// +// Copyright (C) 2007-2008, 2010 Albert Astals Cid <aacid@kde.org> +// Copyright (C) 2010 Hib Eris <hib@hiberis.nl> +// Copyright (C) 2010 Jakob Voss <jakob.voss@gbv.de> +// Copyright (C) 2012 Adrian Johnson <ajohnson@redneon.com> +// +// To see a description of the changes please see the Changelog file that +// came with your tarball or type make ChangeLog if you are building from git +// +//======================================================================== + +#include "config.h" +#include <poppler-config.h> +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <string.h> +#include "parseargs.h" +#include "goo/GooString.h" +#include "goo/gmem.h" +#include "GlobalParams.h" +#include "Object.h" +#include "Stream.h" +#include "Array.h" +#include "Dict.h" +#include "XRef.h" +#include "Catalog.h" +#include "Page.h" +#include "PDFDoc.h" +#include "PDFDocFactory.h" +#include "ImageOutputDev.h" +#include "Error.h" + +static int firstPage = 1; +static int lastPage = 0; +static GBool listImages = gFalse; +static GBool dumpJPEG = gFalse; +static GBool pageNames = gFalse; +static char ownerPassword[33] = "\001"; +static char userPassword[33] = "\001"; +static GBool quiet = gFalse; +static GBool printVersion = gFalse; +static GBool printHelp = gFalse; + +static const ArgDesc argDesc[] = { + {"-f", argInt, &firstPage, 0, + "first page to convert"}, + {"-l", argInt, &lastPage, 0, + "last page to convert"}, + {"-j", argFlag, &dumpJPEG, 0, + "write JPEG images as JPEG files"}, + {"-list", argFlag, &listImages, 0, + "print list of images instead of saving"}, + {"-opw", argString, ownerPassword, sizeof(ownerPassword), + "owner password (for encrypted files)"}, + {"-upw", argString, userPassword, sizeof(userPassword), + "user password (for encrypted files)"}, + {"-p", argFlag, &pageNames, 0, + "include page numbers in output file names"}, + {"-q", argFlag, &quiet, 0, + "don't print any messages or errors"}, + {"-v", argFlag, &printVersion, 0, + "print copyright and version info"}, + {"-h", argFlag, &printHelp, 0, + "print usage information"}, + {"-help", argFlag, &printHelp, 0, + "print usage information"}, + {"--help", argFlag, &printHelp, 0, + "print usage information"}, + {"-?", argFlag, &printHelp, 0, + "print usage information"}, + {NULL} +}; + +int main(int argc, char *argv[]) { + PDFDoc *doc; + GooString *fileName; + char *imgRoot = NULL; + GooString *ownerPW, *userPW; + ImageOutputDev *imgOut; + GBool ok; + int exitCode; + + exitCode = 99; + + // parse args + ok = parseArgs(argDesc, &argc, argv); + if (!ok || (listImages && argc != 2) || (!listImages && argc != 3) || printVersion || printHelp) { + fprintf(stderr, "pdfimages version %s\n", PACKAGE_VERSION); + fprintf(stderr, "%s\n", popplerCopyright); + fprintf(stderr, "%s\n", xpdfCopyright); + if (!printVersion) { + printUsage("pdfimages", "<PDF-file> <image-root>", argDesc); + } + if (printVersion || printHelp) + exitCode = 0; + goto err0; + } + fileName = new GooString(argv[1]); + if (!listImages) + imgRoot = argv[2]; + + // read config file + globalParams = new GlobalParams(); + if (quiet) { + globalParams->setErrQuiet(quiet); + } + + // open PDF file + if (ownerPassword[0] != '\001') { + ownerPW = new GooString(ownerPassword); + } else { + ownerPW = NULL; + } + if (userPassword[0] != '\001') { + userPW = new GooString(userPassword); + } else { + userPW = NULL; + } + if (fileName->cmp("-") == 0) { + delete fileName; + fileName = new GooString("fd://0"); + } + + doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW); + delete fileName; + + if (userPW) { + delete userPW; + } + if (ownerPW) { + delete ownerPW; + } + if (!doc->isOk()) { + exitCode = 1; + goto err1; + } + + // check for copy permission +#ifdef ENFORCE_PERMISSIONS + if (!doc->okToCopy()) { + error(errNotAllowed, -1, "Copying of images from this document is not allowed."); + exitCode = 3; + goto err1; + } +#endif + + // get page range + if (firstPage < 1) + firstPage = 1; + if (lastPage < 1 || lastPage > doc->getNumPages()) + lastPage = doc->getNumPages(); + + // write image files + imgOut = new ImageOutputDev(imgRoot, pageNames, dumpJPEG, listImages); + if (imgOut->isOk()) { + doc->displayPages(imgOut, firstPage, lastPage, 72, 72, 0, + gTrue, gFalse, gFalse); + } + delete imgOut; + + exitCode = 0; + + // clean up + err1: + delete doc; + delete globalParams; + err0: + + // check for memory leaks + Object::memCheck(stderr); + gMemReport(stderr); + + return exitCode; +} diff --git a/utils/pdfinfo.1 b/utils/pdfinfo.1 new file mode 100644 index 00000000..a3ad1c36 --- /dev/null +++ b/utils/pdfinfo.1 @@ -0,0 +1,146 @@ +.\" Copyright 1999-2011 Glyph & Cog, LLC +.TH pdfinfo 1 "15 August 2011" +.SH NAME +pdfinfo \- Portable Document Format (PDF) document information +extractor (version 3.03) +.SH SYNOPSIS +.B pdfinfo +[options] +.RI [ PDF-file ] +.SH DESCRIPTION +.B Pdfinfo +prints the contents of the \'Info' dictionary (plus some other useful +information) from a Portable Document Format (PDF) file. +.PP +The \'Info' dictionary contains the following values: +.PP +.RS +title +.RE +.RS +subject +.RE +.RS +keywords +.RE +.RS +author +.RE +.RS +creator +.RE +.RS +producer +.RE +.RS +creation date +.RE +.RS +modification date +.RE +.PP +In addition, the following information is printed: +.PP +.RS +tagged (yes/no) +.RE +.RS +form (AcroForm / XFA / none) +.RE +.RS +page count +.RE +.RS +encrypted flag (yes/no) +.RE +.RS +print and copy permissions (if encrypted) +.RE +.RS +page size +.RE +.RS +file size +.RE +.RS +linearized (yes/no) +.RE +.RS +PDF version +.RE +.RS +metadata (only if requested) +.RE +.SH OPTIONS +.TP +.BI \-f " number" +Specifies the first page to examine. If multiple pages are requested +using the "\-f" and "\-l" options, the size of each requested page (and, +optionally, the bounding boxes for each requested page) are printed. +Otherwise, only page one is examined. +.TP +.BI \-l " number" +Specifies the last page to examine. +.TP +.B \-box +Prints the page box bounding boxes: MediaBox, CropBox, BleedBox, +TrimBox, and ArtBox. +.TP +.B \-meta +Prints document-level metadata. (This is the "Metadata" stream from +the PDF file's Catalog object.) +.TP +.B \-rawdates +Prints the raw (undecoded) date strings, directly from the PDF file. +.TP +.BI \-enc " encoding-name" +Sets the encoding to use for text output. This defaults to "UTF-8". +.TP +.B \-listenc +Lits the available encodings +.TP +.BI \-opw " password" +Specify the owner password for the PDF file. Providing this will +bypass all security restrictions. +.TP +.BI \-upw " password" +Specify the user password for the PDF file. +.TP +.B \-v +Print copyright and version information. +.TP +.B \-h +Print usage information. +.RB ( \-help +and +.B \-\-help +are equivalent.) +.SH EXIT CODES +The Xpdf tools use the following exit codes: +.TP +0 +No error. +.TP +1 +Error opening a PDF file. +.TP +2 +Error opening an output file. +.TP +3 +Error related to PDF permissions. +.TP +99 +Other error. +.SH AUTHOR +The pdfinfo software and documentation are copyright 1996-2011 Glyph & +Cog, LLC. +.SH "SEE ALSO" +.BR pdfdetach (1), +.BR pdffonts (1), +.BR pdfimages (1), +.BR pdftocairo (1), +.BR pdftohtml (1), +.BR pdftoppm (1), +.BR pdftops (1), +.BR pdftotext (1) diff --git a/utils/pdfinfo.cc b/utils/pdfinfo.cc new file mode 100644 index 00000000..cdc5375d --- /dev/null +++ b/utils/pdfinfo.cc @@ -0,0 +1,464 @@ +//======================================================================== +// +// pdfinfo.cc +// +// Copyright 1998-2003 Glyph & Cog, LLC +// +//======================================================================== + +//======================================================================== +// +// Modified under the Poppler project - http://poppler.freedesktop.org +// +// All changes made under the Poppler project to this file are licensed +// under GPL version 2 or later +// +// Copyright (C) 2006 Dom Lachowicz <cinamod@hotmail.com> +// Copyright (C) 2007-2010, 2012 Albert Astals Cid <aacid@kde.org> +// Copyright (C) 2010 Hib Eris <hib@hiberis.nl> +// Copyright (C) 2011 Vittal Aithal <vittal.aithal@cognidox.com> +// Copyright (C) 2012 Adrian Johnson <ajohnson@redneon.com> +// +// To see a description of the changes please see the Changelog file that +// came with your tarball or type make ChangeLog if you are building from git +// +//======================================================================== + +#include "config.h" +#include <poppler-config.h> +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <string.h> +#include <time.h> +#include <math.h> +#include "parseargs.h" +#include "printencodings.h" +#include "goo/GooString.h" +#include "goo/gmem.h" +#include "GlobalParams.h" +#include "Object.h" +#include "Stream.h" +#include "Array.h" +#include "Dict.h" +#include "XRef.h" +#include "Catalog.h" +#include "Page.h" +#include "PDFDoc.h" +#include "PDFDocFactory.h" +#include "CharTypes.h" +#include "UnicodeMap.h" +#include "PDFDocEncoding.h" +#include "Error.h" +#include "DateInfo.h" + +static void printInfoString(Dict *infoDict, const char *key, const char *text, + UnicodeMap *uMap); +static void printInfoDate(Dict *infoDict, const char *key, const char *text); +static void printBox(const char *text, PDFRectangle *box); + +static int firstPage = 1; +static int lastPage = 0; +static GBool printBoxes = gFalse; +static GBool printMetadata = gFalse; +static GBool rawDates = gFalse; +static char textEncName[128] = ""; +static char ownerPassword[33] = "\001"; +static char userPassword[33] = "\001"; +static GBool printVersion = gFalse; +static GBool printHelp = gFalse; +static GBool printEnc = gFalse; + +static const ArgDesc argDesc[] = { + {"-f", argInt, &firstPage, 0, + "first page to convert"}, + {"-l", argInt, &lastPage, 0, + "last page to convert"}, + {"-box", argFlag, &printBoxes, 0, + "print the page bounding boxes"}, + {"-meta", argFlag, &printMetadata, 0, + "print the document metadata (XML)"}, + {"-rawdates", argFlag, &rawDates, 0, + "print the undecoded date strings directly from the PDF file"}, + {"-enc", argString, textEncName, sizeof(textEncName), + "output text encoding name"}, + {"-listenc",argFlag, &printEnc, 0, + "list available encodings"}, + {"-opw", argString, ownerPassword, sizeof(ownerPassword), + "owner password (for encrypted files)"}, + {"-upw", argString, userPassword, sizeof(userPassword), + "user password (for encrypted files)"}, + {"-v", argFlag, &printVersion, 0, + "print copyright and version info"}, + {"-h", argFlag, &printHelp, 0, + "print usage information"}, + {"-help", argFlag, &printHelp, 0, + "print usage information"}, + {"--help", argFlag, &printHelp, 0, + "print usage information"}, + {"-?", argFlag, &printHelp, 0, + "print usage information"}, + {NULL} +}; + +int main(int argc, char *argv[]) { + PDFDoc *doc; + GooString *fileName; + GooString *ownerPW, *userPW; + UnicodeMap *uMap; + Page *page; + Object info, xfa; + Object *acroForm; + char buf[256]; + double w, h, wISO, hISO; + FILE *f; + GooString *metadata; + GBool ok; + int exitCode; + int pg, i; + GBool multiPage; + int r; + + exitCode = 99; + + // parse args + ok = parseArgs(argDesc, &argc, argv); + if (!ok || (argc != 2 && !printEnc) || printVersion || printHelp) { + fprintf(stderr, "pdfinfo version %s\n", PACKAGE_VERSION); + fprintf(stderr, "%s\n", popplerCopyright); + fprintf(stderr, "%s\n", xpdfCopyright); + if (!printVersion) { + printUsage("pdfinfo", "<PDF-file>", argDesc); + } + if (printVersion || printHelp) + exitCode = 0; + goto err0; + } + + // read config file + globalParams = new GlobalParams(); + + if (printEnc) { + printEncodings(); + delete globalParams; + exitCode = 0; + goto err0; + } + + fileName = new GooString(argv[1]); + + if (textEncName[0]) { + globalParams->setTextEncoding(textEncName); + } + + // get mapping to output encoding + if (!(uMap = globalParams->getTextEncoding())) { + error(errCommandLine, -1, "Couldn't get text encoding"); + delete fileName; + goto err1; + } + + // open PDF file + if (ownerPassword[0] != '\001') { + ownerPW = new GooString(ownerPassword); + } else { + ownerPW = NULL; + } + if (userPassword[0] != '\001') { + userPW = new GooString(userPassword); + } else { + userPW = NULL; + } + + if (fileName->cmp("-") == 0) { + delete fileName; + fileName = new GooString("fd://0"); + } + + doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW); + + if (userPW) { + delete userPW; + } + if (ownerPW) { + delete ownerPW; + } + if (!doc->isOk()) { + exitCode = 1; + goto err2; + } + + // get page range + if (firstPage < 1) { + firstPage = 1; + } + if (lastPage == 0) { + multiPage = gFalse; + lastPage = 1; + } else { + multiPage = gTrue; + } + if (lastPage < 1 || lastPage > doc->getNumPages()) { + lastPage = doc->getNumPages(); + } + + // print doc info + doc->getDocInfo(&info); + if (info.isDict()) { + printInfoString(info.getDict(), "Title", "Title: ", uMap); + printInfoString(info.getDict(), "Subject", "Subject: ", uMap); + printInfoString(info.getDict(), "Keywords", "Keywords: ", uMap); + printInfoString(info.getDict(), "Author", "Author: ", uMap); + printInfoString(info.getDict(), "Creator", "Creator: ", uMap); + printInfoString(info.getDict(), "Producer", "Producer: ", uMap); + if (rawDates) { + printInfoString(info.getDict(), "CreationDate", "CreationDate: ", + uMap); + printInfoString(info.getDict(), "ModDate", "ModDate: ", + uMap); + } else { + printInfoDate(info.getDict(), "CreationDate", "CreationDate: "); + printInfoDate(info.getDict(), "ModDate", "ModDate: "); + } + } + info.free(); + + // print tagging info + printf("Tagged: %s\n", + doc->getStructTreeRoot()->isDict() ? "yes" : "no"); + + // print form info + if ((acroForm = doc->getCatalog()->getAcroForm())->isDict()) { + acroForm->dictLookup("XFA", &xfa); + if (xfa.isStream() || xfa.isArray()) { + printf("Form: XFA\n"); + } else { + printf("Form: AcroForm\n"); + } + xfa.free(); + } else { + printf("Form: none\n"); + } + + // print page count + printf("Pages: %d\n", doc->getNumPages()); + + // print encryption info + printf("Encrypted: "); + if (doc->isEncrypted()) { + printf("yes (print:%s copy:%s change:%s addNotes:%s)\n", + doc->okToPrint(gTrue) ? "yes" : "no", + doc->okToCopy(gTrue) ? "yes" : "no", + doc->okToChange(gTrue) ? "yes" : "no", + doc->okToAddNotes(gTrue) ? "yes" : "no"); + } else { + printf("no\n"); + } + + // print page size + for (pg = firstPage; pg <= lastPage; ++pg) { + w = doc->getPageCropWidth(pg); + h = doc->getPageCropHeight(pg); + if (multiPage) { + printf("Page %4d size: %g x %g pts", pg, w, h); + } else { + printf("Page size: %g x %g pts", w, h); + } + if ((fabs(w - 612) < 0.1 && fabs(h - 792) < 0.1) || + (fabs(w - 792) < 0.1 && fabs(h - 612) < 0.1)) { + printf(" (letter)"); + } else { + hISO = sqrt(sqrt(2.0)) * 7200 / 2.54; + wISO = hISO / sqrt(2.0); + for (i = 0; i <= 6; ++i) { + if ((fabs(w - wISO) < 1 && fabs(h - hISO) < 1) || + (fabs(w - hISO) < 1 && fabs(h - wISO) < 1)) { + printf(" (A%d)", i); + break; + } + hISO = wISO; + wISO /= sqrt(2.0); + } + } + printf("\n"); + r = doc->getPageRotate(pg); + if (multiPage) { + printf("Page %4d rot: %d\n", pg, r); + } else { + printf("Page rot: %d\n", r); + } + } + + // print the boxes + if (printBoxes) { + if (multiPage) { + for (pg = firstPage; pg <= lastPage; ++pg) { + page = doc->getPage(pg); + if (!page) { + error(errSyntaxError, -1, "Failed to print boxes for page {0:d}", pg); + continue; + } + sprintf(buf, "Page %4d MediaBox: ", pg); + printBox(buf, page->getMediaBox()); + sprintf(buf, "Page %4d CropBox: ", pg); + printBox(buf, page->getCropBox()); + sprintf(buf, "Page %4d BleedBox: ", pg); + printBox(buf, page->getBleedBox()); + sprintf(buf, "Page %4d TrimBox: ", pg); + printBox(buf, page->getTrimBox()); + sprintf(buf, "Page %4d ArtBox: ", pg); + printBox(buf, page->getArtBox()); + } + } else { + page = doc->getPage(firstPage); + if (!page) { + error(errSyntaxError, -1, "Failed to print boxes for page {0:d}", firstPage); + } else { + printBox("MediaBox: ", page->getMediaBox()); + printBox("CropBox: ", page->getCropBox()); + printBox("BleedBox: ", page->getBleedBox()); + printBox("TrimBox: ", page->getTrimBox()); + printBox("ArtBox: ", page->getArtBox()); + } + } + } + + // print file size +#ifdef VMS + f = fopen(fileName->getCString(), "rb", "ctx=stm"); +#else + f = fopen(fileName->getCString(), "rb"); +#endif + if (f) { +#if HAVE_FSEEKO + fseeko(f, 0, SEEK_END); + printf("File size: %u bytes\n", (Guint)ftello(f)); +#elif HAVE_FSEEK64 + fseek64(f, 0, SEEK_END); + printf("File size: %u bytes\n", (Guint)ftell64(f)); +#else + fseek(f, 0, SEEK_END); + printf("File size: %d bytes\n", (int)ftell(f)); +#endif + fclose(f); + } + + // print linearization info + printf("Optimized: %s\n", doc->isLinearized() ? "yes" : "no"); + + // print PDF version + printf("PDF version: %d.%d\n", doc->getPDFMajorVersion(), doc->getPDFMinorVersion()); + + // print the metadata + if (printMetadata && (metadata = doc->readMetadata())) { + fputs("Metadata:\n", stdout); + fputs(metadata->getCString(), stdout); + fputc('\n', stdout); + delete metadata; + } + + exitCode = 0; + + // clean up + err2: + uMap->decRefCnt(); + delete doc; + delete fileName; + err1: + delete globalParams; + err0: + + // check for memory leaks + Object::memCheck(stderr); + gMemReport(stderr); + + return exitCode; +} + +static void printInfoString(Dict *infoDict, const char *key, const char *text, + UnicodeMap *uMap) { + Object obj; + GooString *s1; + GBool isUnicode; + Unicode u, u2; + char buf[8]; + int i, n; + + if (infoDict->lookup(key, &obj)->isString()) { + fputs(text, stdout); + s1 = obj.getString(); + if ((s1->getChar(0) & 0xff) == 0xfe && + (s1->getChar(1) & 0xff) == 0xff) { + isUnicode = gTrue; + i = 2; + } else { + isUnicode = gFalse; + i = 0; + } + while (i < obj.getString()->getLength()) { + if (isUnicode) { + u = ((s1->getChar(i) & 0xff) << 8) | + (s1->getChar(i+1) & 0xff); + i += 2; + if (u >= 0xd800 && u <= 0xdbff && i < obj.getString()->getLength()) { + // surrogate pair + u2 = ((s1->getChar(i) & 0xff) << 8) | + (s1->getChar(i+1) & 0xff); + i += 2; + if (u2 >= 0xdc00 && u2 <= 0xdfff) { + u = 0x10000 + ((u - 0xd800) << 10) + (u2 - 0xdc00); + } + } + } else { + u = pdfDocEncoding[s1->getChar(i) & 0xff]; + ++i; + } + n = uMap->mapUnicode(u, buf, sizeof(buf)); + fwrite(buf, 1, n, stdout); + } + fputc('\n', stdout); + } + obj.free(); +} + +static void printInfoDate(Dict *infoDict, const char *key, const char *text) { + Object obj; + char *s; + int year, mon, day, hour, min, sec, tz_hour, tz_minute; + char tz; + struct tm tmStruct; + char buf[256]; + + if (infoDict->lookup(key, &obj)->isString()) { + fputs(text, stdout); + s = obj.getString()->getCString(); + // TODO do something with the timezone info + if ( parseDateString( s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute ) ) { + tmStruct.tm_year = year - 1900; + tmStruct.tm_mon = mon - 1; + tmStruct.tm_mday = day; + tmStruct.tm_hour = hour; + tmStruct.tm_min = min; + tmStruct.tm_sec = sec; + tmStruct.tm_wday = -1; + tmStruct.tm_yday = -1; + tmStruct.tm_isdst = -1; + // compute the tm_wday and tm_yday fields + if (mktime(&tmStruct) != (time_t)-1 && + strftime(buf, sizeof(buf), "%c", &tmStruct)) { + fputs(buf, stdout); + } else { + fputs(s, stdout); + } + } else { + fputs(s, stdout); + } + fputc('\n', stdout); + } + obj.free(); +} + +static void printBox(const char *text, PDFRectangle *box) { + printf("%s%8.2f %8.2f %8.2f %8.2f\n", + text, box->x1, box->y1, box->x2, box->y2); +} diff --git a/utils/pdfseparate.1 b/utils/pdfseparate.1 new file mode 100644 index 00000000..077faec8 --- /dev/null +++ b/utils/pdfseparate.1 @@ -0,0 +1,49 @@ +.\" Copyright 2011 The Poppler Developers - http://poppler.freedesktop.org +.TH pdfseparate 1 "15 September 2011" +.SH NAME +pdfseparate \- Portable Document Format (PDF) page extractor +.SH SYNOPSIS +.B pdfseparate +[options] +.I PDF-file PDF-page-pattern +.SH DESCRIPTION +.B pdfseparate +extract single pages from a Portable Document Format (PDF). +.PP +pdfseparate reads the PDF file +.IR PDF-file , +extracts one or more pages, and writes one PDF file for each page to +.IR PDF-page-pattern , +PDF-page-pattern should contain +.BR %d . +%d is replaced by the page number. +.TP +The PDF-file should not be encrypted. +.SH OPTIONS +.TP +.BI \-f " number" +Specifies the first page to extract. If \-f is omitted, extraction starts with page 1. +.TP +.BI \-l " number" +Specifies the last page to extract. if \-p is omitted, extraction ends with the last page. +.TP +.B \-v +Print copyright and version information. +.TP +.B \-h +Print usage information. +.RB ( \-help +and +.B \-\-help +are equivalent.) +.SH EXAMPLE +pdfseparate sample.pdf sample-%d.pdf +.TP +extracts all pages from sample.pdf, if i.e. sample.pdf has 3 pages, it produces +.TP +sample-1.pdf, sample-2.pdf, sample-3.pdf +.SH AUTHOR +The pdfseparate software and documentation are copyright 1996-2004 Glyph +& Cog, LLC and copyright 2005-2011 The Poppler Developers - http://poppler.freedesktop.org +.SH "SEE ALSO" +.BR pdfunite (1), diff --git a/utils/pdfseparate.cc b/utils/pdfseparate.cc new file mode 100644 index 00000000..25fac5a9 --- /dev/null +++ b/utils/pdfseparate.cc @@ -0,0 +1,122 @@ +//======================================================================== +// +// pdfseparate.cc +// +// This file is licensed under the GPLv2 or later +// +// Copyright (C) 2011, 2012 Thomas Freitag <Thomas.Freitag@alfa.de> +// Copyright (C) 2012 Albert Astals Cid <aacid@kde.org> +// +//======================================================================== +#include "config.h" +#include <poppler-config.h> +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <string.h> +#include "parseargs.h" +#include "goo/GooString.h" +#include "PDFDoc.h" +#include "ErrorCodes.h" +#include "GlobalParams.h" + +static int firstPage = 0; +static int lastPage = 0; +static GBool printVersion = gFalse; +static GBool printHelp = gFalse; + +static const ArgDesc argDesc[] = { + {"-f", argInt, &firstPage, 0, + "first page to extract"}, + {"-l", argInt, &lastPage, 0, + "last page to extract"}, + {"-v", argFlag, &printVersion, 0, + "print copyright and version info"}, + {"-h", argFlag, &printHelp, 0, + "print usage information"}, + {"-help", argFlag, &printHelp, 0, + "print usage information"}, + {"--help", argFlag, &printHelp, 0, + "print usage information"}, + {"-?", argFlag, &printHelp, 0, + "print usage information"}, + {NULL} +}; + +bool extractPages (const char *srcFileName, const char *destFileName) { + char pathName[1024]; + GooString *gfileName = new GooString (srcFileName); + PDFDoc *doc = new PDFDoc (gfileName, NULL, NULL, NULL); + + if (!doc->isOk()) { + error(errSyntaxError, -1, "Could not extract page(s) from damaged file ('{0:s}')", srcFileName); + return false; + } + if (doc->isEncrypted()) { + error(errSyntaxError, -1, "Could not extract page(s) from encrypted file ('{0:s}')", srcFileName); + return false; + } + + if (firstPage == 0 && lastPage == 0) { + firstPage = 1; + lastPage = doc->getNumPages(); + } + if (lastPage == 0) + lastPage = doc->getNumPages(); + if (firstPage == 0) + firstPage = 1; + if (firstPage != lastPage && strstr(destFileName, "%d") == NULL) { + error(errSyntaxError, -1, "'{0:s}' must contain '%%d' if more than one page should be extracted", destFileName); + return false; + } + for (int pageNo = firstPage; pageNo <= lastPage; pageNo++) { + sprintf (pathName, destFileName, pageNo); + GooString *gpageName = new GooString (pathName); + int errCode = doc->savePageAs(gpageName, pageNo); + if ( errCode != errNone) { + delete gpageName; + delete gfileName; + return false; + } + delete gpageName; + } + delete gfileName; + return true; +} + +int +main (int argc, char *argv[]) +{ + Object info; + GBool ok; + int exitCode; + + exitCode = 99; + + // parse args + ok = parseArgs (argDesc, &argc, argv); + if (!ok || argc != 3 || printVersion || printHelp) + { + fprintf (stderr, "pdfseparate version %s\n", PACKAGE_VERSION); + fprintf (stderr, "%s\n", popplerCopyright); + fprintf (stderr, "%s\n", xpdfCopyright); + if (!printVersion) + { + printUsage ("pdfseparate", "<PDF-sourcefile> <PDF-pattern-destfile>", + argDesc); + } + if (printVersion || printHelp) + exitCode = 0; + goto err0; + } + globalParams = new GlobalParams(); + ok = extractPages (argv[1], argv[2]); + if (ok) { + exitCode = 0; + } + delete globalParams; + +err0: + + return exitCode; +} diff --git a/utils/pdftocairo.1 b/utils/pdftocairo.1 new file mode 100644 index 00000000..19b62efb --- /dev/null +++ b/utils/pdftocairo.1 @@ -0,0 +1,261 @@ +.TH pdftoppm 1 +.SH NAME +pdftocairo \- Portable Document Format (PDF) to PNG/JPEG/PDF/PS/EPS/SVG using cairo +.SH SYNOPSIS +.B pdftocairo +[options] +.IR PDF-file +.RI [ output-file ] +.SH DESCRIPTION +.B pdftocairo +converts Portable Document Format (PDF) files, using the cairo output device of the poppler PDF library, to any of the following output formats: +.IP \(bu +Portable Network Graphics (PNG) +.IP \(bu +JPEG Interchange Format (JPEG) +.IP \(bu +Portable Document Format (PDF) +.IP \(bu +PostScript (PS) +.IP \(bu +Encapsulated PostScript (EPS) +.IP \(bu +Scalable Vector Graphics (SVG) +.PP +.B pdftocairo +reads the PDF file, +.IR PDF-file , +and writes to +.IR output-file . +The image formats (PNG and JPEG) generate one file per page with the page number and file type appended to +.IR output-file +(except when \-singlefile is used). +When the output format is a vector format (PDF, PS, EPS, and SVG) or when \-singlefile is used, +.IR output-file +is the full filename. + +If the +.IR PDF-file +is \*(lq\-\*(rq , the PDF is read from stdin. +If the +.IR output-file +is \*(lq\-\*(rq , the output file will be written to stdout. Using stdout is not valid with image formats unless \-singlefile is used. +If +.IR output-file +is not used, the output filename will be derived from the +.IR PDF-file +filename. +.PP +Not all options are valid with all output formats. One (and only one) of the output format options (\-png, \-jpeg, \-pdf, \-ps, \-eps, or \-svg) must be used. +.PP +The resolution options (\-r, \-rx, \-ry) set the resolution of the +image output formats. The image dimensions will depend on the PDF page +size and the resolution. For the vector outputs, regions of the page +that can not be represented natively in the output format (eg +translucency in PS) will be rasterized at the resolution specified by +the resolution options. +.PP +The \-scale-to options may be used to set a fixed image size. The +image resolution will vary with the page size. +.PP +The cropping options (\-x, \-y, \-W, and \-H) use units of pixels with +the image formats and PostScript points (1/72 inch) with the vector +formats. When cropping is used with vector output the cropped region is +centered unless \-nocenter is used in which case the cropped region is +at the top left (SVG) or bottom left (PDF, PS, EPS). +.PP +.SH OPTIONS +.TP +.BI \-png +Generates a PNG file(s) +.TP +.BI \-jpeg +Generates a JPEG file(s) +.TP +.BI \-pdf +Generates a PDF file +.TP +.BI \-ps +Generate a PS file +.TP +.BI \-eps +Generate an EPS file. An EPS file contains a single image, so if you +use this option with a multi-page PDF file, you must use \-f and \-l +to specify a single page. The page size options (\-origpagesizes, +\-paper, \-paperw, \-paperh) can not be used with this option. +.TP +.BI \-svg +Generate a SVG (Scalable Vector Graphics) file +.TP +.BI \-f " number" +Specifies the first page to convert. +.TP +.BI \-l " number" +Specifies the last page to convert. +.TP +.B \-o +Generates only the odd numbered pages. +.TP +.B \-e +Generates only the even numbered pages. +.TP +.BI \-singlefile +Writes only the first page and does not add digits. +.TP +.BI \-r " number" +Specifies the X and Y resolution, in pixels per inch of image files (or rasterized regions in vector output). The default is 150 PPI. +.TP +.BI \-rx " number" +Specifies the X resolution, in pixels per inch of image files (or rasterized regions in vector output). The default is 150 PPI. +.TP +.BI \-ry " number" +Specifies the Y resolution, in pixels per inch of image files (or rasterized regions in vector output). The default is 150 PPI. +.TP +.BI \-scale-to " number" +Scales the long side of each page (width for landscape pages, height +for portrait pages) to fit in scale-to pixels. The size of the short +side will be determined by the aspect ratio of the page (PNG/JPEG only). +.TP +.BI \-scale-to-x " number" +Scales each page horizontally to fit in scale-to-x pixels. If +scale-to-y is set to -1, the vertical size will determined by the +aspect ratio of the page (PNG/JPEG only). +.TP +.BI \-scale-to-y " number" +Scales each page vertically to fit in scale-to-y pixels. If scale-to-x +is set to -1, the horizontal size will determined by the aspect ratio +of the page (PNG/JPEG only). +.TP +.BI \-x " number" +Specifies the x-coordinate of the crop area top left corner in pixels (image output) or points (vector output) +.TP +.BI \-y " number" +Specifies the y-coordinate of the crop area top left corner in pixels (image output) or points (vector output) +.TP +.BI \-W " number" +Specifies the width of crop area in pixels (image output) or points (vector output) (default is 0) +.TP +.BI \-H " number" +Specifies the height of crop area in pixels (image output) or points (vector output) (default is 0) +.TP +.BI \-sz " number" +Specifies the size of crop square in pixels (image output) or points (vector output) (sets \-W and \-H) +.TP +.B \-cropbox +Uses the crop box rather than media box when generating the files +.TP +.B \-mono +Generate a monochrome file (PNG only). +.TP +.B \-gray +Generate a grayscale file (PNG and JPEG only). +.TP +.B \-transp +Use a transparent page color instead of white (PNG only). +.TP +.BI \-icc " icc-file" +Use the specified ICC file as the output profile (PNG only). The profile will be embedded in the PNG file. +.TP +.B \-level2 +Generate Level 2 PostScript (PS only). +.TP +.B \-level3 +Generate Level 3 PostScript (PS only). This enables all Level 2 features plus +shading patterns and masked images. This is the default setting. +.TP +.B \-origpagesizes +Generate a file with variable page sizes and orientations (PS and PDF +only). The size of each page will be the original page in the PDF +file. If the output is PS the file will contain %%DocumentMedia and +%%PageMedia DSC comments specifying the size of each page. Any +specification of the page size via \-paper, \-paperw, or \-paperh will +get overridden as long as each page of the PDF file has a defined +paper size. +.TP +.BI \-paper " size" +Set the paper size to one of "letter", "legal", "A4", or "A3" +(PS,PDF,SVG only). This can also be set to "match", which will set +the paper size to match the size specified in the PDF +file. \-origpagesizes overrides this setting if the PDF file has +defined page sizes. +.TP +.BI \-paperw " size" +Set the paper width, in points (PS,PDF,SVG only). \-origpagesizes overrides this setting +if the PDF file has defined page sizes. +.TP +.BI \-paperh " size" +Set the paper height, in points (PS,PDF,SVG only). \-origpagesizes overrides this setting +if the PDF file has defined page sizes. +.TP +.B \-nocrop +By default, output is cropped to the CropBox specified in the PDF +file. This option disables cropping. +.TP +.B \-expand +Expand PDF pages smaller than the paper to fill the paper (PS,PDF,SVG only). By +default, these pages are not scaled. +.TP +.B \-noshrink +Don't scale PDF pages which are larger than the paper (PS,PDF,SVG only). By default, +pages larger than the paper are shrunk to fit. +.TP +.B \-nocenter +By default, PDF pages smaller than the paper (after any scaling) are +centered on the paper. This option causes them to be aligned to the +lower-left corner of the paper instead (PS,PDF,SVG only). +.TP +.B \-duplex +Adds the %%IncludeFeature: *Duplex DuplexNoTumble DSC comment to the +PostScript file (PS only). This tells the print manager to enable duplexing. +.TP +.BI \-opw " password" +Specify the owner password for the PDF file. Providing this will +bypass all security restrictions. +.TP +.BI \-upw " password" +Specify the user password for the PDF file. +.TP +.B \-q +Don't print any messages or errors. +.TP +.B \-v +Print copyright and version information. +.TP +.B \-h +Print usage information. +.RB ( \-help +and +.B \-\-help +are equivalent.) +.SH EXIT CODES +The poppler tools use the following exit codes: +.TP +0 +No error. +.TP +1 +Error opening a PDF file. +.TP +2 +Error opening an output file. +.TP +3 +Error related to PDF permissions. +.TP +4 +Error related to ICC profile. +.TP +99 +Other error. +.SH AUTHOR +The pdftocairo software and documentation are copyright 1996-2004 Glyph +& Cog, LLC and copyright 2005-2011 The Poppler Developers. +.SH "SEE ALSO" +.BR pdfdetach (1), +.BR pdffonts (1), +.BR pdfimages (1), +.BR pdfinfo (1), +.BR pdftohtml (1), +.BR pdftoppm (1), +.BR pdftops (1), +.BR pdftotext (1) diff --git a/utils/pdftocairo.cc b/utils/pdftocairo.cc new file mode 100644 index 00000000..8d13e316 --- /dev/null +++ b/utils/pdftocairo.cc @@ -0,0 +1,1015 @@ +//======================================================================== +// +// pdftocairo.cc +// +// Copyright 2003 Glyph & Cog, LLC +// +//======================================================================== + +//======================================================================== +// +// Modified under the Poppler project - http://poppler.freedesktop.org +// +// All changes made under the Poppler project to this file are licensed +// under GPL version 2 or later +// +// Copyright (C) 2007 Ilmari Heikkinen <ilmari.heikkinen@gmail.com> +// Copyright (C) 2008 Richard Airlie <richard.airlie@maglabs.net> +// Copyright (C) 2009 Michael K. Johnson <a1237@danlj.org> +// Copyright (C) 2009 Shen Liang <shenzhuxi@gmail.com> +// Copyright (C) 2009 Stefan Thomas <thomas@eload24.com> +// Copyright (C) 2009, 2010 Albert Astals Cid <aacid@kde.org> +// Copyright (C) 2010, 2011, 2012 Adrian Johnson <ajohnson@redneon.com> +// Copyright (C) 2010 Hib Eris <hib@hiberis.nl> +// Copyright (C) 2010 Jonathan Liu <net147@gmail.com> +// Copyright (C) 2010 William Bader <williambader@hotmail.com> +// Copyright (C) 2011 Thomas Freitag <Thomas.Freitag@alfa.de> +// Copyright (C) 2011 Carlos Garcia Campos <carlosgc@gnome.org> +// Copyright (C) 2012 Koji Otani <sho@bbr.jp> +// +// To see a description of the changes please see the Changelog file that +// came with your tarball or type make ChangeLog if you are building from git +// +//======================================================================== + +#include "config.h" +#include <poppler-config.h> +#include <stdio.h> +#include <math.h> +#include <string.h> +#include "parseargs.h" +#include "goo/gmem.h" +#include "goo/gtypes.h" +#include "goo/gtypes_p.h" +#include "goo/GooString.h" +#include "goo/ImgWriter.h" +#include "goo/JpegWriter.h" +#include "goo/PNGWriter.h" +#include "GlobalParams.h" +#include "Object.h" +#include "PDFDoc.h" +#include "PDFDocFactory.h" +#include "CairoOutputDev.h" +#if USE_CMS +#ifdef USE_LCMS1 +#include <lcms.h> +#else +#include <lcms2.h> +#endif +#endif +#include <cairo.h> +#if CAIRO_HAS_PS_SURFACE +#include <cairo-ps.h> +#endif +#if CAIRO_HAS_PDF_SURFACE +#include <cairo-pdf.h> +#endif +#if CAIRO_HAS_SVG_SURFACE +#include <cairo-svg.h> +#endif + + +static GBool png = gFalse; +static GBool jpeg = gFalse; +static GBool ps = gFalse; +static GBool eps = gFalse; +static GBool pdf = gFalse; +static GBool svg = gFalse; + +static int firstPage = 1; +static int lastPage = 0; +static GBool printOnlyOdd = gFalse; +static GBool printOnlyEven = gFalse; +static GBool singleFile = gFalse; +static double resolution = 0.0; +static double x_resolution = 150.0; +static double y_resolution = 150.0; +static int scaleTo = 0; +static int x_scaleTo = 0; +static int y_scaleTo = 0; +static int crop_x = 0; +static int crop_y = 0; +static int crop_w = 0; +static int crop_h = 0; +static int sz = 0; +static GBool useCropBox = gFalse; +static GBool mono = gFalse; +static GBool gray = gFalse; +static GBool transp = gFalse; +static GooString icc; + +static GBool level2 = gFalse; +static GBool level3 = gFalse; +static GBool doOrigPageSizes = gFalse; +static char paperSize[15] = ""; +static int paperWidth = -1; +static int paperHeight = -1; +static GBool noCrop = gFalse; +static GBool expand = gFalse; +static GBool noShrink = gFalse; +static GBool noCenter = gFalse; +static GBool duplex = gFalse; + +static char ownerPassword[33] = ""; +static char userPassword[33] = ""; +static GBool quiet = gFalse; +static GBool printVersion = gFalse; +static GBool printHelp = gFalse; + +static const ArgDesc argDesc[] = { +#if ENABLE_LIBPNG + {"-png", argFlag, &png, 0, + "generate a PNG file"}, +#endif +#if ENABLE_LIBJPEG + {"-jpeg", argFlag, &jpeg, 0, + "generate a JPEG file"}, +#endif +#if CAIRO_HAS_PS_SURFACE + {"-ps", argFlag, &ps, 0, + "generate PostScript file"}, + {"-eps", argFlag, &eps, 0, + "generate Encapsulated PostScript (EPS)"}, +#endif +#if CAIRO_HAS_PDF_SURFACE + {"-pdf", argFlag, &pdf, 0, + "generate a PDF file"}, +#endif +#if CAIRO_HAS_SVG_SURFACE + {"-svg", argFlag, &svg, 0, + "generate a Scalable Vector Graphics (SVG) file"}, +#endif + + {"-f", argInt, &firstPage, 0, + "first page to print"}, + {"-l", argInt, &lastPage, 0, + "last page to print"}, + {"-o", argFlag, &printOnlyOdd, 0, + "print only odd pages"}, + {"-e", argFlag, &printOnlyEven, 0, + "print only even pages"}, + {"-singlefile", argFlag, &singleFile, 0, + "write only the first page and do not add digits"}, + + {"-r", argFP, &resolution, 0, + "resolution, in PPI (default is 150)"}, + {"-rx", argFP, &x_resolution, 0, + "X resolution, in PPI (default is 150)"}, + {"-ry", argFP, &y_resolution, 0, + "Y resolution, in PPI (default is 150)"}, + {"-scale-to", argInt, &scaleTo, 0, + "scales each page to fit within scale-to*scale-to pixel box"}, + {"-scale-to-x", argInt, &x_scaleTo, 0, + "scales each page horizontally to fit in scale-to-x pixels"}, + {"-scale-to-y", argInt, &y_scaleTo, 0, + "scales each page vertically to fit in scale-to-y pixels"}, + + {"-x", argInt, &crop_x, 0, + "x-coordinate of the crop area top left corner"}, + {"-y", argInt, &crop_y, 0, + "y-coordinate of the crop area top left corner"}, + {"-W", argInt, &crop_w, 0, + "width of crop area in pixels (default is 0)"}, + {"-H", argInt, &crop_h, 0, + "height of crop area in pixels (default is 0)"}, + {"-sz", argInt, &sz, 0, + "size of crop square in pixels (sets W and H)"}, + {"-cropbox",argFlag, &useCropBox, 0, + "use the crop box rather than media box"}, + + {"-mono", argFlag, &mono, 0, + "generate a monochrome image file (PNG, JPEG)"}, + {"-gray", argFlag, &gray, 0, + "generate a grayscale image file (PNG, JPEG)"}, + {"-transp", argFlag, &transp, 0, + "use a transparent background instead of white (PNG)"}, +#if USE_CMS + {"-icc", argGooString, &icc, 0, + "ICC color profile to use"}, +#endif + + {"-level2", argFlag, &level2, 0, + "generate Level 2 PostScript (PS, EPS)"}, + {"-level3", argFlag, &level3, 0, + "generate Level 3 PostScript (PS, EPS)"}, + {"-origpagesizes",argFlag, &doOrigPageSizes,0, + "conserve original page sizes (PS, PDF, SVG)"}, + {"-paper", argString, paperSize, sizeof(paperSize), + "paper size (letter, legal, A4, A3, match)"}, + {"-paperw", argInt, &paperWidth, 0, + "paper width, in points"}, + {"-paperh", argInt, &paperHeight, 0, + "paper height, in points"}, + {"-nocrop", argFlag, &noCrop, 0, + "don't crop pages to CropBox"}, + {"-expand", argFlag, &expand, 0, + "expand pages smaller than the paper size"}, + {"-noshrink", argFlag, &noShrink, 0, + "don't shrink pages larger than the paper size"}, + {"-nocenter", argFlag, &noCenter, 0, + "don't center pages smaller than the paper size"}, + {"-duplex", argFlag, &duplex, 0, + "enable duplex printing"}, + + {"-opw", argString, ownerPassword, sizeof(ownerPassword), + "owner password (for encrypted files)"}, + {"-upw", argString, userPassword, sizeof(userPassword), + "user password (for encrypted files)"}, + + {"-q", argFlag, &quiet, 0, + "don't print any messages or errors"}, + {"-v", argFlag, &printVersion, 0, + "print copyright and version info"}, + {"-h", argFlag, &printHelp, 0, + "print usage information"}, + {"-help", argFlag, &printHelp, 0, + "print usage information"}, + {"--help", argFlag, &printHelp, 0, + "print usage information"}, + {"-?", argFlag, &printHelp, 0, + "print usage information"}, + {NULL} +}; + + +static cairo_surface_t *surface; +static GBool printing; +static FILE *output_file; + +#if USE_CMS +static unsigned char *icc_data; +static int icc_data_size; +static cmsHPROFILE profile; +#endif + + +void writePageImage(GooString *filename) +{ + ImgWriter *writer = 0; + FILE *file; + int height, width, stride; + unsigned char *data; + + if (png) { +#if ENABLE_LIBPNG + if (transp) + writer = new PNGWriter(PNGWriter::RGBA); + else if (gray) + writer = new PNGWriter(PNGWriter::GRAY); + else if (mono) + writer = new PNGWriter(PNGWriter::MONOCHROME); + else + writer = new PNGWriter(PNGWriter::RGB); + +#if USE_CMS +#ifdef USE_LCMS1 + if (icc_data) + static_cast<PNGWriter*>(writer)->setICCProfile(cmsTakeProductName(profile), icc_data, icc_data_size); + else + static_cast<PNGWriter*>(writer)->setSRGBProfile(); +#else + if (icc_data) { + cmsUInt8Number profileID[17]; + profileID[16] = '\0'; + + cmsGetHeaderProfileID(profile,profileID); + static_cast<PNGWriter*>(writer)->setICCProfile(reinterpret_cast<char *>(profileID), icc_data, icc_data_size); + } else { + static_cast<PNGWriter*>(writer)->setSRGBProfile(); + } +#endif +#endif +#endif + + } else if (jpeg) { +#if ENABLE_LIBJPEG + if (gray) + writer = new JpegWriter(JCS_GRAYSCALE); + else + writer = new JpegWriter(JCS_RGB); +#endif + } + if (!writer) + return; + + if (filename->cmp("fd://0") == 0) + file = stdout; + else + file = fopen(filename->getCString(), "wb"); + + if (!file) { + fprintf(stderr, "Error opening output file %s\n", filename->getCString()); + exit(2); + } + + height = cairo_image_surface_get_height(surface); + width = cairo_image_surface_get_width(surface); + stride = cairo_image_surface_get_stride(surface); + data = cairo_image_surface_get_data(surface); + + if (!writer->init(file, width, height, x_resolution, y_resolution)) { + fprintf(stderr, "Error writing %s\n", filename->getCString()); + exit(2); + } + unsigned char *row = (unsigned char *) gmallocn(width, 4); + + for (int y = 0; y < height; y++ ) { + uint32_t *pixel = (uint32_t *) (data + y*stride); + unsigned char *rowp = row; + for (int x = 0; x < width; x++, pixel++) { + if (transp) { + // unpremultiply into RGBA format + uint8_t a; + a = (*pixel & 0xff000000) >> 24; + if (a == 0) { + *rowp++ = 0; + *rowp++ = 0; + *rowp++ = 0; + } else { + *rowp++ = (((*pixel & 0xff0000) >> 16) * 255 + a / 2) / a; + *rowp++ = (((*pixel & 0x00ff00) >> 8) * 255 + a / 2) / a; + *rowp++ = (((*pixel & 0x0000ff) >> 0) * 255 + a / 2) / a; + } + *rowp++ = a; + } else if (gray || mono) { + // convert to gray + // The PDF Reference specifies the DeviceRGB to DeviceGray conversion as + // gray = 0.3*red + 0.59*green + 0.11*blue + int r = (*pixel & 0x00ff0000) >> 16; + int g = (*pixel & 0x0000ff00) >> 8; + int b = (*pixel & 0x000000ff) >> 0; + // an arbitrary integer approximation of .3*r + .59*g + .11*b + int y = (r*19661+g*38666+b*7209 + 32829)>>16; + *rowp++ = y; + } else { + // copy into RGB format + *rowp++ = (*pixel & 0x00ff0000) >> 16; + *rowp++ = (*pixel & 0x0000ff00) >> 8; + *rowp++ = (*pixel & 0x000000ff) >> 0; + } + } + writer->writeRow(&row); + } + gfree(row); + writer->close(); + delete writer; + if (file == stdout) fflush(file); + else fclose(file); +} + +static void getCropSize(double page_w, double page_h, double *width, double *height) +{ + int w = crop_w; + int h = crop_h; + + if (w == 0) + w = (int)ceil(page_w); + + if (h == 0) + h = (int)ceil(page_h); + + *width = (crop_x + w > page_w ? (int)ceil(page_w - crop_x) : w); + *height = (crop_y + h > page_h ? (int)ceil(page_h - crop_y) : h); +} + +static void getOutputSize(double page_w, double page_h, double *width, double *height) +{ + + if (printing) { + if (doOrigPageSizes) { + *width = page_w; + *height = page_h; + } else { + *width = paperWidth; + *height = paperHeight; + } + } else { + getCropSize(page_w * (x_resolution / 72.0), + page_h * (y_resolution / 72.0), + width, height); + } +} + +static void getFitToPageTransform(double page_w, double page_h, + double paper_w, double paper_h, + cairo_matrix_t *m) +{ + double x_scale, y_scale, scale; + + x_scale = paper_w / page_w; + y_scale = paper_h / page_h; + if (x_scale < y_scale) + scale = x_scale; + else + scale = y_scale; + + cairo_matrix_init_identity (m); + if (scale > 1.0) { + // page is smaller than paper + if (expand) { + // expand to fit + cairo_matrix_scale (m, scale, scale); + } else if (!noCenter) { + // centre page + cairo_matrix_translate (m, (paper_w - page_w)/2, (paper_h - page_h)/2); + } else { + if (!svg) { + // move to PostScript origin + cairo_matrix_translate (m, 0, (paper_h - page_h)); + } + } + } else if (scale < 1.0) + // page is larger than paper + if (!noShrink) { + // shrink to fit + cairo_matrix_scale (m, scale, scale); + } +} + +static cairo_status_t writeStream(void *closure, const unsigned char *data, unsigned int length) +{ + FILE *file = (FILE *)closure; + + if (fwrite(data, length, 1, file) == 1) + return CAIRO_STATUS_SUCCESS; + else + return CAIRO_STATUS_WRITE_ERROR; +} + +static void beginDocument(GooString *outputFileName, double w, double h) +{ + if (printing) { + if (outputFileName->cmp("fd://0") == 0) + output_file = stdout; + else + output_file = fopen(outputFileName->getCString(), "wb"); + + if (ps || eps) { +#if CAIRO_HAS_PS_SURFACE + surface = cairo_ps_surface_create_for_stream(writeStream, output_file, w, h); + if (level2) + cairo_ps_surface_restrict_to_level (surface, CAIRO_PS_LEVEL_2); + if (eps) + cairo_ps_surface_set_eps (surface, 1); + if (duplex) { + cairo_ps_surface_dsc_comment(surface, "%%Requirements: duplex"); + cairo_ps_surface_dsc_begin_setup(surface); + cairo_ps_surface_dsc_comment(surface, "%%IncludeFeature: *Duplex DuplexNoTumble"); + } + cairo_ps_surface_dsc_begin_page_setup (surface); +#endif + } else if (pdf) { +#if CAIRO_HAS_PDF_SURFACE + surface = cairo_pdf_surface_create_for_stream(writeStream, output_file, w, h); +#endif + } else if (svg) { +#if CAIRO_HAS_SVG_SURFACE + surface = cairo_svg_surface_create_for_stream(writeStream, output_file, w, h); + cairo_svg_surface_restrict_to_version (surface, CAIRO_SVG_VERSION_1_2); +#endif + } + } +} + +static void beginPage(double w, double h) +{ + if (printing) { + if (ps || eps) { +#if CAIRO_HAS_PS_SURFACE + if (w > h) { + cairo_ps_surface_dsc_comment (surface, "%%PageOrientation: Landscape"); + cairo_ps_surface_set_size (surface, h, w); + } else { + cairo_ps_surface_dsc_comment (surface, "%%PageOrientation: Portrait"); + cairo_ps_surface_set_size (surface, w, h); + } +#endif + } + +#if CAIRO_HAS_PDF_SURFACE + if (pdf) + cairo_pdf_surface_set_size (surface, w, h); +#endif + + cairo_surface_set_fallback_resolution (surface, x_resolution, y_resolution); + + } else { + surface = cairo_image_surface_create(CAIRO_FORMAT_ARGB32, ceil(w), ceil(h)); + } +} + +static void renderPage(PDFDoc *doc, CairoOutputDev *cairoOut, int pg, + double page_w, double page_h, + double output_w, double output_h) +{ + cairo_t *cr; + cairo_status_t status; + cairo_matrix_t m; + + cr = cairo_create(surface); + cairoOut->setCairo(cr); + cairoOut->setPrinting(printing); + + cairo_save(cr); + if (ps && output_w > output_h) { + // rotate 90 deg for landscape + cairo_translate (cr, 0, output_w); + cairo_matrix_init (&m, 0, -1, 1, 0, 0, 0); + cairo_transform (cr, &m); + } + cairo_translate (cr, -crop_x, -crop_y); + if (printing) { + double cropped_w, cropped_h; + getCropSize(page_w, page_h, &cropped_w, &cropped_h); + getFitToPageTransform(cropped_w, cropped_h, output_w, output_h, &m); + cairo_transform (cr, &m); + cairo_rectangle(cr, crop_x, crop_y, cropped_w, cropped_h); + cairo_clip(cr); + } else { + cairo_scale (cr, x_resolution/72.0, y_resolution/72.0); + } + doc->displayPageSlice(cairoOut, + pg, + 72.0, 72.0, + 0, /* rotate */ + !useCropBox, /* useMediaBox */ + gFalse, /* Crop */ + printing, + -1, -1, -1, -1); + cairo_restore(cr); + cairoOut->setCairo(NULL); + + // Blend onto white page + if (!printing && !transp) { + cairo_save(cr); + cairo_set_operator(cr, CAIRO_OPERATOR_DEST_OVER); + cairo_set_source_rgb(cr, 1, 1, 1); + cairo_paint(cr); + cairo_restore(cr); + } + + status = cairo_status(cr); + if (status) + error(errInternal, -1, "cairo error: {0:s}\n", cairo_status_to_string(status)); + cairo_destroy (cr); +} + +static void endPage(GooString *imageFileName) +{ + cairo_status_t status; + + if (printing) { + cairo_surface_show_page(surface); + } else { + writePageImage(imageFileName); + cairo_surface_finish(surface); + status = cairo_surface_status(surface); + if (status) + error(errInternal, -1, "cairo error: {0:s}\n", cairo_status_to_string(status)); + cairo_surface_destroy(surface); + } + +} + +static void endDocument() +{ + cairo_status_t status; + + if (printing) { + cairo_surface_finish(surface); + status = cairo_surface_status(surface); + if (status) + error(errInternal, -1, "cairo error: {0:s}\n", cairo_status_to_string(status)); + cairo_surface_destroy(surface); + fclose(output_file); + } +} + +static GBool setPSPaperSize(char *size, int &psPaperWidth, int &psPaperHeight) { + if (!strcmp(size, "match")) { + psPaperWidth = psPaperHeight = -1; + } else if (!strcmp(size, "letter")) { + psPaperWidth = 612; + psPaperHeight = 792; + } else if (!strcmp(size, "legal")) { + psPaperWidth = 612; + psPaperHeight = 1008; + } else if (!strcmp(size, "A4")) { + psPaperWidth = 595; + psPaperHeight = 842; + } else if (!strcmp(size, "A3")) { + psPaperWidth = 842; + psPaperHeight = 1190; + } else { + return gFalse; + } + return gTrue; +} + +static int numberOfCharacters(unsigned int n) +{ + int charNum = 0; + while (n >= 10) + { + n = n / 10; + charNum++; + } + charNum++; + return charNum; +} + +static GooString *getImageFileName(GooString *outputFileName, int numDigits, int page) +{ + char buf[10]; + GooString *imageName = new GooString(outputFileName); + if (!singleFile) { + snprintf(buf, sizeof(buf), "-%0*d", numDigits, page); + imageName->appendf(buf); + } + if (png) + imageName->append(".png"); + else if (jpeg) + imageName->append(".jpg"); + + return imageName; +} + +// If (printing || singleFile) the output file name includes the +// extension. Otherwise it is the file name base. +static GooString *getOutputFileName(GooString *fileName, GooString *outputName) +{ + GooString *name; + char *s; + char *p; + + if (outputName) { + if (outputName->cmp("-") == 0) { + if (!printing && !singleFile) { + fprintf(stderr, "Error: stdout may only be used with the ps, eps, pdf, svg output options or if -singlefile is used.\n"); + exit(99); + } + return new GooString("fd://0"); + } + return new GooString(outputName); + } + + if (fileName->cmp("fd://0") == 0) { + fprintf(stderr, "Error: an output filename or '-' must be supplied when the PDF file is stdin.\n"); + exit(99); + } + + // be careful not to overwrite the input file when the output format is PDF + if (pdf && fileName->cmpN("http://", 7) != 0 && fileName->cmpN("https://", 8) != 0) { + fprintf(stderr, "Error: an output filename or '-' must be supplied when the output format is PDF and input PDF file is a local file.\n"); + exit(99); + } + + // strip everything up to last '/' + s = fileName->getCString(); + p = strrchr(s, '/'); + if (p) { + p++; + if (*p == 0) { + fprintf(stderr, "Error: invalid output filename.\n"); + exit(99); + } + name = new GooString(p); + } else { + name = new GooString(s); + } + + // remove .pdf extension + p = strrchr(name->getCString(), '.'); + if (p && strcasecmp(p, ".pdf") == 0) { + GooString *name2 = new GooString(name->getCString(), name->getLength() - 4); + delete name; + name = name2; + } + + // append new extension + if (ps) + name->append(".ps"); + else if (eps) + name->append(".eps"); + else if (pdf) + name->append(".pdf"); + else if (svg) + name->append(".svg"); + + return name; +} + +static void checkInvalidPrintOption(GBool option, const char *option_name) +{ + if (option) { + fprintf(stderr, "Error: %s may only be used with the -png or -jpeg output options.\n", option_name); + exit(99); + } +} + +static void checkInvalidImageOption(GBool option, const char *option_name) +{ + if (option) { + fprintf(stderr, "Error: %s may only be used with the -ps, -eps, -pdf, or -svg output options.\n", option_name); + exit(99); + } +} + +int main(int argc, char *argv[]) { + PDFDoc *doc; + GooString *fileName = NULL; + GooString *outputName = NULL; + GooString *outputFileName = NULL; + GooString *imageFileName = NULL; + GooString *ownerPW, *userPW; + CairoOutputDev *cairoOut; + int pg, pg_num_len; + double pg_w, pg_h, tmp, output_w, output_h; + int num_outputs; + + // parse args + if (!parseArgs(argDesc, &argc, argv)) + exit(99); + + if ( resolution != 0.0 && + (x_resolution == 150.0 || + y_resolution == 150.0)) { + x_resolution = resolution; + y_resolution = resolution; + } + if (argc < 2 || argc > 3 || printVersion || printHelp) { + fprintf(stderr, "pdftocairo version %s\n", PACKAGE_VERSION); + fprintf(stderr, "%s\n", popplerCopyright); + fprintf(stderr, "%s\n", xpdfCopyright); + if (!printVersion) { + printUsage("pdftocairo", "<PDF-file> [<output-file>]", argDesc); + } + if (printVersion || printHelp) + exit(0); + else + exit(99); + } + + num_outputs = (png ? 1 : 0) + + (jpeg ? 1 : 0) + + (ps ? 1 : 0) + + (eps ? 1 : 0) + + (pdf ? 1 : 0) + + (svg ? 1 : 0); + if (num_outputs == 0) { + fprintf(stderr, "Error: one of the output format options (-png, -jpeg, -ps, -eps, -pdf, -svg) must be used.\n"); + exit(99); + } + if (num_outputs > 1) { + fprintf(stderr, "Error: use only one of the output format options (-png, -jpeg, -ps, -eps, -pdf, -svg).\n"); + exit(99); + } + if (png || jpeg) + printing = gFalse; + else + printing = gTrue; + + if (printing) { + checkInvalidPrintOption(mono, "-mono"); + checkInvalidPrintOption(gray, "-gray"); + checkInvalidPrintOption(transp, "-transp"); + checkInvalidPrintOption(icc.getCString()[0], "-icc"); + checkInvalidPrintOption(singleFile, "-singlefile"); + } else { + checkInvalidImageOption(level2, "-level2"); + checkInvalidImageOption(level3, "-level3"); + checkInvalidImageOption(doOrigPageSizes, "-origpagesizes"); + checkInvalidImageOption(paperSize[0], "-paper"); + checkInvalidImageOption(paperWidth > 0, "-paperw"); + checkInvalidImageOption(paperHeight > 0, "-paperh"); + checkInvalidImageOption(noCrop, "-nocrop"); + checkInvalidImageOption(expand, "-expand"); + checkInvalidImageOption(noShrink, "-noshrink"); + checkInvalidImageOption(noCenter, "-nocenter"); + checkInvalidImageOption(duplex, "-duplex"); + } + + if (icc.getCString()[0] && !png) { + fprintf(stderr, "Error: -icc may only be used with png output.\n"); + exit(99); + } + + if (transp && !png) { + fprintf(stderr, "Error: -transp may only be used with png output.\n"); + exit(99); + } + + if (mono && gray) { + fprintf(stderr, "Error: -mono and -gray may not be used together.\n"); + exit(99); + } + + if (mono && !png) { + fprintf(stderr, "Error: -mono may only be used with png output.\n"); + exit(99); + } + + if (level2 && level3) { + fprintf(stderr, "Error: use only one of the 'level' options.\n"); + exit(99); + } + if (!level2 && !level3) + level3 = gTrue; + + if (eps && (doOrigPageSizes || paperSize[0] || paperWidth > 0 || paperHeight > 0)) { + fprintf(stderr, "Error: page size options may not be used with eps output.\n"); + exit(99); + } + + if (paperSize[0]) { + if (!setPSPaperSize(paperSize, paperWidth, paperHeight)) { + fprintf(stderr, "Invalid paper size\n"); + exit(99); + } + } + + globalParams = new GlobalParams(); + if (quiet) { + globalParams->setErrQuiet(quiet); + } + + // open PDF file + if (ownerPassword[0]) { + ownerPW = new GooString(ownerPassword); + } else { + ownerPW = NULL; + } + if (userPassword[0]) { + userPW = new GooString(userPassword); + } else { + userPW = NULL; + } + + fileName = new GooString(argv[1]); + if (fileName->cmp("-") == 0) { + delete fileName; + fileName = new GooString("fd://0"); + } + if (argc == 3) + outputName = new GooString(argv[2]); + else + outputName = NULL; + + outputFileName = getOutputFileName(fileName, outputName); + +#if USE_CMS + icc_data = NULL; + if (icc.getCString()[0]) { + FILE *file = fopen(icc.getCString(), "rb"); + if (!file) { + fprintf(stderr, "Error: unable to open icc profile %s\n", icc.getCString()); + exit(4); + } + fseek (file, 0, SEEK_END); + icc_data_size = ftell(file); + fseek (file, 0, SEEK_SET); + icc_data = (unsigned char*)gmalloc(icc_data_size); + if (fread(icc_data, icc_data_size, 1, file) != 1) { + fprintf(stderr, "Error: unable to read icc profile %s\n", icc.getCString()); + exit(4); + } + fclose(file); + profile = cmsOpenProfileFromMem(icc_data, icc_data_size); + if (!profile) { + fprintf(stderr, "Error: lcms error opening profile\n"); + exit(4); + } + } else { + profile = cmsCreate_sRGBProfile(); + } + GfxColorSpace::setDisplayProfile(profile); +#endif + + doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW); + if (!doc->isOk()) { + fprintf(stderr, "Error opening PDF file.\n"); + exit(1); + } + +#ifdef ENFORCE_PERMISSIONS + // check for print permission + if (printing && !doc->okToPrint()) { + fprintf(stderr, "Printing this document is not allowed.\n"); + exit(3); + } +#endif + + // get page range + if (firstPage < 1) + firstPage = 1; + if (singleFile && lastPage < 1) + lastPage = firstPage; + if (lastPage < 1 || lastPage > doc->getNumPages()) + lastPage = doc->getNumPages(); + + if (eps && firstPage != lastPage) { + fprintf(stderr, "EPS files can only contain one page.\n"); + exit(99); + } + + if (singleFile && firstPage < lastPage) { + if (!quiet) { + fprintf(stderr, + "Warning: Single file will write only the first of the %d pages.\n", + lastPage + 1 - firstPage); + } + lastPage = firstPage; + } + + // Make sure firstPage is always used so that beginDocument() is called + if ((printOnlyEven && firstPage % 2 == 0) || (printOnlyOdd && firstPage % 2 == 1)) + firstPage++; + + cairoOut = new CairoOutputDev(); + cairoOut->startDoc(doc); + if (sz != 0) + crop_w = crop_h = sz; + pg_num_len = numberOfCharacters(doc->getNumPages()); + for (pg = firstPage; pg <= lastPage; ++pg) { + if (printOnlyEven && pg % 2 == 0) continue; + if (printOnlyOdd && pg % 2 == 1) continue; + if (useCropBox) { + pg_w = doc->getPageCropWidth(pg); + pg_h = doc->getPageCropHeight(pg); + } else { + pg_w = doc->getPageMediaWidth(pg); + pg_h = doc->getPageMediaHeight(pg); + } + + if (printing && pg == firstPage) { + if (paperWidth < 0 || paperHeight < 0) { + paperWidth = (int)ceil(pg_w); + paperHeight = (int)ceil(pg_h); + } + } + + if (scaleTo != 0) { + resolution = (72.0 * scaleTo) / (pg_w > pg_h ? pg_w : pg_h); + x_resolution = y_resolution = resolution; + } else { + if (x_scaleTo > 0) { + x_resolution = (72.0 * x_scaleTo) / pg_w; + if (y_scaleTo == -1) + y_resolution = x_resolution; + } + if (y_scaleTo > 0) { + y_resolution = (72.0 * y_scaleTo) / pg_h; + if (x_scaleTo == -1) + x_resolution = y_resolution; + } + } + if ((doc->getPageRotate(pg) == 90) || (doc->getPageRotate(pg) == 270)) { + tmp = pg_w; + pg_w = pg_h; + pg_h = tmp; + } + if (imageFileName) { + delete imageFileName; + imageFileName = NULL; + } + if (!printing) + imageFileName = getImageFileName(outputFileName, pg_num_len, pg); + getOutputSize(pg_w, pg_h, &output_w, &output_h); + + if (pg == firstPage) + beginDocument(outputFileName, output_w, output_h); + beginPage(output_w, output_h); + renderPage(doc, cairoOut, pg, pg_w, pg_h, output_w, output_h); + endPage(imageFileName); + } + endDocument(); + + // clean up + delete cairoOut; + delete doc; + delete globalParams; + if (fileName) + delete fileName; + if (outputName) + delete outputName; + if (outputFileName) + delete outputFileName; + if (imageFileName) + delete imageFileName; + if (ownerPW) + delete ownerPW; + if (userPW) + delete ownerPW; + +#if USE_CMS + cmsCloseProfile(profile); + if (icc_data) + gfree(icc_data); +#endif + + // check for memory leaks + Object::memCheck(stderr); + gMemReport(stderr); + + return 0; +} diff --git a/utils/pdftohtml.1 b/utils/pdftohtml.1 new file mode 100644 index 00000000..44137e4d --- /dev/null +++ b/utils/pdftohtml.1 @@ -0,0 +1,108 @@ +.TH PDFTOHTML 1 +.\" NAME should be all caps, SECTION should be 1-8, maybe w/ subsection +.\" other parms are allowed: see man(7), man(1) +.SH NAME +pdftohtml \- program to convert PDF files into HTML, XML and PNG images +.SH SYNOPSIS +.B pdftohtml +.I "[options] <PDF-file> [<HTML-file> <XML-file>]" +.SH "DESCRIPTION" +This manual page documents briefly the +.BR pdftohtml +command. +This manual page was written for the Debian GNU/Linux distribution +because the original program does not have a manual page. +.PP +.B pdftohtml +is a program that converts PDF documents into HTML. It generates its output in +the current working directory. +.SH OPTIONS +A summary of options are included below. +.TP +.B \-h, \-help +Show summary of options. +.TP +.B \-f <int> +first page to print +.TP +.B \-l <int> +last page to print +.TP +.B \-q +do not print any messages or errors +.TP +.B \-v +print copyright and version info +.TP +.B \-p +exchange .pdf links with .html +.TP +.B \-c +generate complex output +.TP +.B \-s +generate single HTML that includes all pages +.TP +.B \-i +ignore images +.TP +.B \-noframes +generate no frames. Not supported in complex output mode. +.TP +.B \-stdout +use standard output +.TP +.B \-zoom <fp> +zoom the PDF document (default 1.5) +.TP +.B \-xml +output for XML post-processing +.TP +.B \-enc <string> +output text encoding name +.TP +.B \-opw <string> +owner password (for encrypted files) +.TP +.B \-upw <string> +user password (for encrypted files) +.TP +.B \-hidden +force hidden text extraction +.TP +.B \-dev +output device name for Ghostscript (png16m, jpeg etc). +Unless this option is specified, Splash will be used +.TP +.B \-fmt +image file format for Splash output (png or jpg). +If complex is selected, but neither \-fmt or \-dev are specified, +\-fmt png will be assumed +.TP +.B \-nomerge +do not merge paragraphs +.TP +.B \-nodrm +override document DRM settings +.TP +.B \-wbt <fp> +adjust the word break threshold percent. Default is 10. +Word break occurs when distance between two adjacent characters is +greater than this percent of character height. + +.SH AUTHOR + +Pdftohtml was developed by Gueorgui Ovtcharov and Rainer Dorsch. It is +based and benefits a lot from Derek Noonburg's xpdf package. + +This manual page was written by Søren Boll Overgaard <boll@debian.org>, +for the Debian GNU/Linux system (but may be used by others). +.SH "SEE ALSO" +.BR pdfdetach (1), +.BR pdffonts (1), +.BR pdfimages (1), +.BR pdfinfo (1), +.BR pdftocairo (1), +.BR pdftoppm (1), +.BR pdftops (1), +.BR pdftotext (1) diff --git a/utils/pdftohtml.cc b/utils/pdftohtml.cc new file mode 100644 index 00000000..d6475b01 --- /dev/null +++ b/utils/pdftohtml.cc @@ -0,0 +1,603 @@ +//======================================================================== +// +// pdftohtml.cc +// +// +// Copyright 1999-2000 G. Ovtcharov +//======================================================================== + +//======================================================================== +// +// Modified under the Poppler project - http://poppler.freedesktop.org +// +// All changes made under the Poppler project to this file are licensed +// under GPL version 2 or later +// +// Copyright (C) 2007-2008, 2010, 2012 Albert Astals Cid <aacid@kde.org> +// Copyright (C) 2010 Hib Eris <hib@hiberis.nl> +// Copyright (C) 2010 Mike Slegeir <tehpola@yahoo.com> +// Copyright (C) 2010 Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp> +// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey (leenac@cdacmumbai.in) and Onkar Potdar (onkar@cdacmumbai.in) +// Copyright (C) 2011 Steven Murdoch <Steven.Murdoch@cl.cam.ac.uk> +// Copyright (C) 2012 Igor Slepchin <igor.redhat@gmail.com> +// Copyright (C) 2012 Ihar Filipau <thephilips@gmail.com> +// +// To see a description of the changes please see the Changelog file that +// came with your tarball or type make ChangeLog if you are building from git +// +//======================================================================== + +#include "config.h" +#include <poppler-config.h> +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <string.h> +#ifdef HAVE_DIRENT_H +#include <dirent.h> +#endif +#include <time.h> +#include "parseargs.h" +#include "goo/GooString.h" +#include "goo/gmem.h" +#include "Object.h" +#include "Stream.h" +#include "Array.h" +#include "Dict.h" +#include "XRef.h" +#include "Catalog.h" +#include "Page.h" +#include "Outline.h" +#include "PDFDoc.h" +#include "PDFDocFactory.h" +#include "HtmlOutputDev.h" +#ifdef HAVE_SPLASH +#include "SplashOutputDev.h" +#include "splash/SplashBitmap.h" +#endif +#include "PSOutputDev.h" +#include "GlobalParams.h" +#include "PDFDocEncoding.h" +#include "Error.h" +#include "DateInfo.h" +#include "goo/gfile.h" + +#ifndef GHOSTSCRIPT +# define GHOSTSCRIPT "gs" +#endif + +static int firstPage = 1; +static int lastPage = 0; +static GBool rawOrder = gTrue; +GBool printCommands = gTrue; +static GBool printHelp = gFalse; +GBool printHtml = gFalse; +GBool complexMode=gFalse; +GBool singleHtml=gFalse; // singleHtml +GBool ignore=gFalse; +static GBool useSplash=gTrue; +static char extension[5]="png"; +static double scale=1.5; +GBool noframes=gFalse; +GBool stout=gFalse; +GBool xml=gFalse; +static GBool errQuiet=gFalse; +static GBool noDrm=gFalse; +double wordBreakThreshold=10; // 10%, below converted into a coefficient - 0.1 + +GBool showHidden = gFalse; +GBool noMerge = gFalse; +static char ownerPassword[33] = ""; +static char userPassword[33] = ""; +static char gsDevice[33] = "none"; +static GBool printVersion = gFalse; + +static GooString* getInfoString(Dict *infoDict, const char *key); +static GooString* getInfoDate(Dict *infoDict, const char *key); + +static char textEncName[128] = ""; + +static const ArgDesc argDesc[] = { + {"-f", argInt, &firstPage, 0, + "first page to convert"}, + {"-l", argInt, &lastPage, 0, + "last page to convert"}, + /*{"-raw", argFlag, &rawOrder, 0, + "keep strings in content stream order"},*/ + {"-q", argFlag, &errQuiet, 0, + "don't print any messages or errors"}, + {"-h", argFlag, &printHelp, 0, + "print usage information"}, + {"-help", argFlag, &printHelp, 0, + "print usage information"}, + {"-p", argFlag, &printHtml, 0, + "exchange .pdf links by .html"}, + {"-c", argFlag, &complexMode, 0, + "generate complex document"}, + {"-s", argFlag, &singleHtml, 0, + "generate single document that includes all pages"}, + {"-i", argFlag, &ignore, 0, + "ignore images"}, + {"-noframes", argFlag, &noframes, 0, + "generate no frames"}, + {"-stdout" ,argFlag, &stout, 0, + "use standard output"}, + {"-zoom", argFP, &scale, 0, + "zoom the pdf document (default 1.5)"}, + {"-xml", argFlag, &xml, 0, + "output for XML post-processing"}, + {"-hidden", argFlag, &showHidden, 0, + "output hidden text"}, + {"-nomerge", argFlag, &noMerge, 0, + "do not merge paragraphs"}, + {"-enc", argString, textEncName, sizeof(textEncName), + "output text encoding name"}, + {"-dev", argString, gsDevice, sizeof(gsDevice), + "output device name for Ghostscript (png16m, jpeg etc)"}, + {"-fmt", argString, extension, sizeof(extension), + "image file format for Splash output (png or jpg)"}, + {"-v", argFlag, &printVersion, 0, + "print copyright and version info"}, + {"-opw", argString, ownerPassword, sizeof(ownerPassword), + "owner password (for encrypted files)"}, + {"-upw", argString, userPassword, sizeof(userPassword), + "user password (for encrypted files)"}, + {"-nodrm", argFlag, &noDrm, 0, + "override document DRM settings"}, + {"-wbt", argFP, &wordBreakThreshold, 0, + "word break threshold (default 10 percent)"}, + {NULL} +}; + +#ifdef HAVE_SPLASH +class SplashOutputDevNoText : public SplashOutputDev { +public: + SplashOutputDevNoText(SplashColorMode colorModeA, int bitmapRowPadA, + GBool reverseVideoA, SplashColorPtr paperColorA, + GBool bitmapTopDownA = gTrue, + GBool allowAntialiasA = gTrue) : SplashOutputDev(colorModeA, + bitmapRowPadA, reverseVideoA, paperColorA, bitmapTopDownA, + allowAntialiasA) { } + virtual ~SplashOutputDevNoText() { } + + void drawChar(GfxState *state, double x, double y, + double dx, double dy, + double originX, double originY, + CharCode code, int nBytes, Unicode *u, int uLen) { } + GBool beginType3Char(GfxState *state, double x, double y, + double dx, double dy, + CharCode code, Unicode *u, int uLen) { return false; } + void endType3Char(GfxState *state) { } + void beginTextObject(GfxState *state) { } + GBool deviceHasTextClip(GfxState *state) { return false; } + void endTextObject(GfxState *state) { } + GBool interpretType3Chars() { return gFalse; } +}; +#endif + +int main(int argc, char *argv[]) { + PDFDoc *doc = NULL; + GooString *fileName = NULL; + GooString *docTitle = NULL; + GooString *author = NULL, *keywords = NULL, *subject = NULL, *date = NULL; + GooString *htmlFileName = NULL; + GooString *psFileName = NULL; + HtmlOutputDev *htmlOut = NULL; +#ifdef HAVE_SPLASH + SplashOutputDev *splashOut = NULL; +#endif + PSOutputDev *psOut = NULL; + GBool doOutline; + GBool ok; + char *p; + GooString *ownerPW, *userPW; + Object info; + const char * extsList[] = {"png", "jpeg", "bmp", "pcx", "tiff", "pbm", NULL}; + + // parse args + ok = parseArgs(argDesc, &argc, argv); + if (!ok || argc < 2 || argc > 3 || printHelp || printVersion) { + fprintf(stderr, "pdftohtml version %s\n", PACKAGE_VERSION); + fprintf(stderr, "%s\n", popplerCopyright); + fprintf(stderr, "%s\n", "Copyright 1999-2003 Gueorgui Ovtcharov and Rainer Dorsch"); + fprintf(stderr, "%s\n\n", xpdfCopyright); + if (!printVersion) { + printUsage("pdftohtml", "<PDF-file> [<html-file> <xml-file>]", argDesc); + } + exit(1); + } + + // init error file + //errorInit(); + + // read config file + globalParams = new GlobalParams(); + + if (errQuiet) { + globalParams->setErrQuiet(errQuiet); + printCommands = gFalse; // I'm not 100% what is the differecne between them + } + + if (textEncName[0]) { + globalParams->setTextEncoding(textEncName); + if( !globalParams->getTextEncoding() ) { + goto error; + } + } + + // convert from user-friendly percents into a coefficient + wordBreakThreshold /= 100.0; + + // open PDF file + if (ownerPassword[0]) { + ownerPW = new GooString(ownerPassword); + } else { + ownerPW = NULL; + } + if (userPassword[0]) { + userPW = new GooString(userPassword); + } else { + userPW = NULL; + } + + fileName = new GooString(argv[1]); + + if (fileName->cmp("-") == 0) { + delete fileName; + fileName = new GooString("fd://0"); + } + + doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW); + + if (userPW) { + delete userPW; + } + if (ownerPW) { + delete ownerPW; + } + if (!doc->isOk()) { + goto error; + } + + // check for copy permission + if (!doc->okToCopy()) { + if (!noDrm) { + error(errNotAllowed, -1, "Copying of text from this document is not allowed."); + goto error; + } + fprintf(stderr, "Document has copy-protection bit set.\n"); + } + + // construct text file name + if (argc == 3) { + GooString* tmp = new GooString(argv[2]); + if (!xml) { + if (tmp->getLength() >= 5) { + p = tmp->getCString() + tmp->getLength() - 5; + if (!strcmp(p, ".html") || !strcmp(p, ".HTML")) { + htmlFileName = new GooString(tmp->getCString(), tmp->getLength() - 5); + } + } + } else { + if (tmp->getLength() >= 4) { + p = tmp->getCString() + tmp->getLength() - 4; + if (!strcmp(p, ".xml") || !strcmp(p, ".XML")) { + htmlFileName = new GooString(tmp->getCString(), tmp->getLength() - 4); + } + } + } + if (!htmlFileName) { + htmlFileName =new GooString(tmp); + } + delete tmp; + } else if (fileName->cmp("fd://0") == 0) { + error(errCommandLine, -1, "You have to provide an output filename when reading form stdin."); + goto error; + } else { + p = fileName->getCString() + fileName->getLength() - 4; + if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")) + htmlFileName = new GooString(fileName->getCString(), + fileName->getLength() - 4); + else + htmlFileName = fileName->copy(); + // htmlFileName->append(".html"); + } + + if (scale>3.0) scale=3.0; + if (scale<0.5) scale=0.5; + + if (complexMode || singleHtml) { + //noframes=gFalse; + stout=gFalse; + } + + if (stout) { + noframes=gTrue; + complexMode=gFalse; + singleHtml=gFalse; + } + + if (xml) + { + complexMode = gTrue; + singleHtml = gFalse; + noframes = gTrue; + noMerge = gTrue; + } + + // get page range + if (firstPage < 1) + firstPage = 1; + if (lastPage < 1 || lastPage > doc->getNumPages()) + lastPage = doc->getNumPages(); + + doc->getDocInfo(&info); + if (info.isDict()) { + docTitle = getInfoString(info.getDict(), "Title"); + author = getInfoString(info.getDict(), "Author"); + keywords = getInfoString(info.getDict(), "Keywords"); + subject = getInfoString(info.getDict(), "Subject"); + date = getInfoDate(info.getDict(), "ModDate"); + if( !date ) + date = getInfoDate(info.getDict(), "CreationDate"); + } + info.free(); + if( !docTitle ) docTitle = new GooString(htmlFileName); + + if( strcmp("none", gsDevice) ) { + useSplash = gFalse; + /* determine extensions of output background images */ + int i; + for(i = 0; extsList[i]; i++) + { + if( strstr(gsDevice, extsList[i]) != (char *) NULL ) + { + strncpy(extension, extsList[i], sizeof(extension)); + break; + } + } + } + +#ifndef HAVE_SPLASH + if( useSplash ) { + fprintf(stderr, "You are trying to use the -fmt option but your pdftohtml was built without support for it. Please use the -dev option\n"); + delete docTitle; + delete author; + delete keywords; + delete subject; + delete date; + delete htmlFileName; + delete globalParams; + delete fileName; + delete doc; + return -1; + } +#endif + + if (!singleHtml) + rawOrder = complexMode; // todo: figure out what exactly rawOrder do :) + else + rawOrder = singleHtml; + +#ifdef DISABLE_OUTLINE + doOutline = gFalse; +#else + doOutline = doc->getOutline()->getItems() != NULL; +#endif + // write text file + htmlOut = new HtmlOutputDev(doc->getCatalog(), htmlFileName->getCString(), + docTitle->getCString(), + author ? author->getCString() : NULL, + keywords ? keywords->getCString() : NULL, + subject ? subject->getCString() : NULL, + date ? date->getCString() : NULL, + extension, + rawOrder, + firstPage, + doOutline); + delete docTitle; + if( author ) + { + delete author; + } + if( keywords ) + { + delete keywords; + } + if( subject ) + { + delete subject; + } + if( date ) + { + delete date; + } + + if (htmlOut->isOk()) + { + doc->displayPages(htmlOut, firstPage, lastPage, 72 * scale, 72 * scale, 0, + gTrue, gFalse, gFalse); + htmlOut->dumpDocOutline(doc); + } + + if ((complexMode || singleHtml) && !xml && !ignore) { + if(useSplash) { +#ifdef HAVE_SPLASH + GooString *imgFileName = NULL; + // White paper color + SplashColor color; + color[0] = color[1] = color[2] = 255; + // If the user specified "jpg" use JPEG, otherwise PNG + SplashImageFileFormat format = strcmp(extension, "jpg") ? + splashFormatPng : splashFormatJpeg; + + splashOut = new SplashOutputDevNoText(splashModeRGB8, 4, gFalse, color); + splashOut->startDoc(doc); + + for (int pg = firstPage; pg <= lastPage; ++pg) { + doc->displayPage(splashOut, pg, + 72 * scale, 72 * scale, + 0, gTrue, gFalse, gFalse); + SplashBitmap *bitmap = splashOut->getBitmap(); + + imgFileName = GooString::format("{0:s}{1:03d}.{2:s}", + htmlFileName->getCString(), pg, extension); + + bitmap->writeImgFile(format, imgFileName->getCString(), + 72 * scale, 72 * scale); + + delete imgFileName; + } + + delete splashOut; +#endif + } else { + int h=xoutRound(htmlOut->getPageHeight()/scale); + int w=xoutRound(htmlOut->getPageWidth()/scale); + //int h=xoutRound(doc->getPageHeight(1)/scale); + //int w=xoutRound(doc->getPageWidth(1)/scale); + + psFileName = new GooString(htmlFileName->getCString()); + psFileName->append(".ps"); + + psOut = new PSOutputDev(psFileName->getCString(), doc, + NULL, firstPage, lastPage, psModePS, w, h); + psOut->setDisplayText(gFalse); + doc->displayPages(psOut, firstPage, lastPage, 72, 72, 0, + gTrue, gFalse, gFalse); + delete psOut; + + /*sprintf(buf, "%s -sDEVICE=png16m -dBATCH -dNOPROMPT -dNOPAUSE -r%d -sOutputFile=%s%%03d.png -g%dx%d -q %s", GHOSTSCRIPT, resolution, htmlFileName->getCString(), w, h, + psFileName->getCString());*/ + + GooString *gsCmd = new GooString(GHOSTSCRIPT); + GooString *tw, *th, *sc; + gsCmd->append(" -sDEVICE="); + gsCmd->append(gsDevice); + gsCmd->append(" -dBATCH -dNOPROMPT -dNOPAUSE -r"); + sc = GooString::fromInt(static_cast<int>(72*scale)); + gsCmd->append(sc); + gsCmd->append(" -sOutputFile="); + gsCmd->append("\""); + gsCmd->append(htmlFileName); + gsCmd->append("%03d."); + gsCmd->append(extension); + gsCmd->append("\" -g"); + tw = GooString::fromInt(static_cast<int>(scale*w)); + gsCmd->append(tw); + gsCmd->append("x"); + th = GooString::fromInt(static_cast<int>(scale*h)); + gsCmd->append(th); + gsCmd->append(" -q \""); + gsCmd->append(psFileName); + gsCmd->append("\""); + // printf("running: %s\n", gsCmd->getCString()); + if( !executeCommand(gsCmd->getCString()) && !errQuiet) { + error(errIO, -1, "Failed to launch Ghostscript!\n"); + } + unlink(psFileName->getCString()); + delete tw; + delete th; + delete sc; + delete gsCmd; + delete psFileName; + } + } + + delete htmlOut; + + // clean up + error: + if(doc) delete doc; + delete fileName; + if(globalParams) delete globalParams; + + if(htmlFileName) delete htmlFileName; + HtmlFont::clear(); + + // check for memory leaks + Object::memCheck(stderr); + gMemReport(stderr); + + return 0; +} + +static GooString* getInfoString(Dict *infoDict, const char *key) { + Object obj; + // Raw value as read from PDF (may be in pdfDocEncoding or UCS2) + GooString *rawString; + // Value converted to unicode + Unicode *unicodeString; + int unicodeLength; + // Value HTML escaped and converted to desired encoding + GooString *encodedString = NULL; + // Is rawString UCS2 (as opposed to pdfDocEncoding) + GBool isUnicode; + + if (infoDict->lookup(key, &obj)->isString()) { + rawString = obj.getString(); + + // Convert rawString to unicode + if (rawString->hasUnicodeMarker()) { + isUnicode = gTrue; + unicodeLength = (obj.getString()->getLength() - 2) / 2; + } else { + isUnicode = gFalse; + unicodeLength = obj.getString()->getLength(); + } + unicodeString = new Unicode[unicodeLength]; + + for (int i=0; i<unicodeLength; i++) { + if (isUnicode) { + unicodeString[i] = ((rawString->getChar((i+1)*2) & 0xff) << 8) | + (rawString->getChar(((i+1)*2)+1) & 0xff); + } else { + unicodeString[i] = pdfDocEncoding[rawString->getChar(i) & 0xff]; + } + } + + // HTML escape and encode unicode + encodedString = HtmlFont::HtmlFilter(unicodeString, unicodeLength); + delete[] unicodeString; + } + + obj.free(); + return encodedString; +} + +static GooString* getInfoDate(Dict *infoDict, const char *key) { + Object obj; + char *s; + int year, mon, day, hour, min, sec, tz_hour, tz_minute; + char tz; + struct tm tmStruct; + GooString *result = NULL; + char buf[256]; + + if (infoDict->lookup(key, &obj)->isString()) { + s = obj.getString()->getCString(); + // TODO do something with the timezone info + if ( parseDateString( s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute ) ) { + tmStruct.tm_year = year - 1900; + tmStruct.tm_mon = mon - 1; + tmStruct.tm_mday = day; + tmStruct.tm_hour = hour; + tmStruct.tm_min = min; + tmStruct.tm_sec = sec; + tmStruct.tm_wday = -1; + tmStruct.tm_yday = -1; + tmStruct.tm_isdst = -1; + mktime(&tmStruct); // compute the tm_wday and tm_yday fields + if (strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%S+00:00", &tmStruct)) { + result = new GooString(buf); + } else { + result = new GooString(s); + } + } else { + result = new GooString(s); + } + } + obj.free(); + return result; +} + diff --git a/utils/pdftoppm.1 b/utils/pdftoppm.1 new file mode 100644 index 00000000..2321d6d0 --- /dev/null +++ b/utils/pdftoppm.1 @@ -0,0 +1,158 @@ +.\" Copyright 2005-2011 Glyph & Cog, LLC +.TH pdftoppm 1 "15 August 2011" +.SH NAME +pdftoppm \- Portable Document Format (PDF) to Portable Pixmap (PPM) +converter (version 3.03) +.SH SYNOPSIS +.B pdftoppm +[options] +.I PDF-file PPM-root +.SH DESCRIPTION +.B Pdftoppm +converts Portable Document Format (PDF) files to color image files in +Portable Pixmap (PPM) format, grayscale image files in Portable +Graymap (PGM) format, or monochrome image files in Portable Bitmap +(PBM) format. +.PP +Pdftoppm reads the PDF file, +.IR PDF-file , +and writes one PPM file for each page, +.IR PPM-root - number .ppm, +where +.I number +is the page number. +.SH OPTIONS +.TP +.BI \-f " number" +Specifies the first page to convert. +.TP +.BI \-l " number" +Specifies the last page to convert. +.TP +.B \-o +Generates only the odd numbered pages. +.TP +.B \-e +Generates only the even numbered pages. +.TP +.BI \-singlefile +Writes only the first page and does not add digits. +.TP +.BI \-r " number" +Specifies the X and Y resolution, in DPI. The default is 150 DPI. +.TP +.BI \-rx " number" +Specifies the X resolution, in DPI. The default is 150 DPI. +.TP +.BI \-ry " number" +Specifies the Y resolution, in DPI. The default is 150 DPI. +.TP +.BI \-scale-to " number" +Scales the long side of each page (width for landscape pages, height +for portrait pages) to fit in scale-to pixels. The size of the short +side will be determined by the aspect ratio of the page. +.TP +.BI \-scale-to-x " number" +Scales each page horizontally to fit in scale-to-x pixels. If +scale-to-y is set to -1, the vertical size will determined by the +aspect ratio of the page. +.TP +.BI \-scale-to-y " number" +Scales each page vertically to fit in scale-to-y pixels. If scale-to-x +is set to -1, the horizontal size will determined by the aspect ratio +of the page. +.TP +.BI \-x " number" +Specifies the x-coordinate of the crop area top left corner +.TP +.BI \-y " number" +Specifies the y-coordinate of the crop area top left corner +.TP +.BI \-W " number" +Specifies the width of crop area in pixels (default is 0) +.TP +.BI \-H " number" +Specifies the height of crop area in pixels (default is 0) +.TP +.BI \-sz " number" +Specifies the size of crop square in pixels (sets W and H) +.TP +.B \-cropbox +Uses the crop box rather than media box when generating the files +.TP +.B \-mono +Generate a monochrome PBM file (instead of a color PPM file). +.TP +.B \-gray +Generate a grayscale PGM file (instead of a color PPM file). +.TP +.B \-png +Generates a PNG file instead a PPM file. +.TP +.B \-jpeg +Generates a JPEG file instead a PPM file. +.TP +.B \-tiff +Generates a TIFF file instead a PPM file. +.TP +.BI \-tiffcompression " none | packbits | jpeg | lzw | deflate" +Specifies the TIFF compression type. This defaults to "none". +.TP +.BI \-freetype " yes | no" +Enable or disable FreeType (a TrueType / Type 1 font rasterizer). +This defaults to "yes". +.TP +.BI \-aa " yes | no" +Enable or disable font anti-aliasing. This defaults to "yes". +.TP +.BI \-aaVector " yes | no" +Enable or disable vector anti-aliasing. This defaults to "yes". +.TP +.BI \-opw " password" +Specify the owner password for the PDF file. Providing this will +bypass all security restrictions. +.TP +.BI \-upw " password" +Specify the user password for the PDF file. +.TP +.B \-q +Don't print any messages or errors. +.TP +.B \-v +Print copyright and version information. +.TP +.B \-h +Print usage information. +.RB ( \-help +and +.B \-\-help +are equivalent.) +.SH EXIT CODES +The Xpdf tools use the following exit codes: +.TP +0 +No error. +.TP +1 +Error opening a PDF file. +.TP +2 +Error opening an output file. +.TP +3 +Error related to PDF permissions. +.TP +99 +Other error. +.SH AUTHOR +The pdftoppm software and documentation are copyright 1996-2011 Glyph +& Cog, LLC. +.SH "SEE ALSO" +.BR pdfdetach (1), +.BR pdffonts (1), +.BR pdfimages (1), +.BR pdfinfo (1), +.BR pdftocairo (1), +.BR pdftohtml (1), +.BR pdftops (1), +.BR pdftotext (1) diff --git a/utils/pdftoppm.cc b/utils/pdftoppm.cc new file mode 100644 index 00000000..04a0dfba --- /dev/null +++ b/utils/pdftoppm.cc @@ -0,0 +1,440 @@ +//======================================================================== +// +// pdftoppm.cc +// +// Copyright 2003 Glyph & Cog, LLC +// +//======================================================================== + +//======================================================================== +// +// Modified under the Poppler project - http://poppler.freedesktop.org +// +// All changes made under the Poppler project to this file are licensed +// under GPL version 2 or later +// +// Copyright (C) 2007 Ilmari Heikkinen <ilmari.heikkinen@gmail.com> +// Copyright (C) 2008 Richard Airlie <richard.airlie@maglabs.net> +// Copyright (C) 2009 Michael K. Johnson <a1237@danlj.org> +// Copyright (C) 2009 Shen Liang <shenzhuxi@gmail.com> +// Copyright (C) 2009 Stefan Thomas <thomas@eload24.com> +// Copyright (C) 2009-2011 Albert Astals Cid <aacid@kde.org> +// Copyright (C) 2010, 2012 Adrian Johnson <ajohnson@redneon.com> +// Copyright (C) 2010 Hib Eris <hib@hiberis.nl> +// Copyright (C) 2010 Jonathan Liu <net147@gmail.com> +// Copyright (C) 2010 William Bader <williambader@hotmail.com> +// Copyright (C) 2011 Thomas Freitag <Thomas.Freitag@alfa.de> +// +// To see a description of the changes please see the Changelog file that +// came with your tarball or type make ChangeLog if you are building from git +// +//======================================================================== + +#include "config.h" +#include <poppler-config.h> +#ifdef _WIN32 +#include <fcntl.h> // for O_BINARY +#include <io.h> // for setmode +#endif +#include <stdio.h> +#include <math.h> +#include "parseargs.h" +#include "goo/gmem.h" +#include "goo/GooString.h" +#include "GlobalParams.h" +#include "Object.h" +#include "PDFDoc.h" +#include "PDFDocFactory.h" +#include "splash/SplashBitmap.h" +#include "splash/Splash.h" +#include "SplashOutputDev.h" + +static int firstPage = 1; +static int lastPage = 0; +static GBool printOnlyOdd = gFalse; +static GBool printOnlyEven = gFalse; +static GBool singleFile = gFalse; +static double resolution = 0.0; +static double x_resolution = 150.0; +static double y_resolution = 150.0; +static int scaleTo = 0; +static int x_scaleTo = 0; +static int y_scaleTo = 0; +static int x = 0; +static int y = 0; +static int w = 0; +static int h = 0; +static int sz = 0; +static GBool useCropBox = gFalse; +static GBool mono = gFalse; +static GBool gray = gFalse; +static GBool png = gFalse; +static GBool jpeg = gFalse; +static GBool jpegcmyk = gFalse; +static GBool tiff = gFalse; +#if SPLASH_CMYK +static GBool overprint = gFalse; +#endif +static char enableFreeTypeStr[16] = ""; +static char antialiasStr[16] = ""; +static char vectorAntialiasStr[16] = ""; +static char ownerPassword[33] = ""; +static char userPassword[33] = ""; +static char TiffCompressionStr[16] = ""; +static GBool quiet = gFalse; +static GBool printVersion = gFalse; +static GBool printHelp = gFalse; + +static const ArgDesc argDesc[] = { + {"-f", argInt, &firstPage, 0, + "first page to print"}, + {"-l", argInt, &lastPage, 0, + "last page to print"}, + {"-o", argFlag, &printOnlyOdd, 0, + "print only odd pages"}, + {"-e", argFlag, &printOnlyEven, 0, + "print only even pages"}, + {"-singlefile", argFlag, &singleFile, 0, + "write only the first page and do not add digits"}, + + {"-r", argFP, &resolution, 0, + "resolution, in DPI (default is 150)"}, + {"-rx", argFP, &x_resolution, 0, + "X resolution, in DPI (default is 150)"}, + {"-ry", argFP, &y_resolution, 0, + "Y resolution, in DPI (default is 150)"}, + {"-scale-to", argInt, &scaleTo, 0, + "scales each page to fit within scale-to*scale-to pixel box"}, + {"-scale-to-x", argInt, &x_scaleTo, 0, + "scales each page horizontally to fit in scale-to-x pixels"}, + {"-scale-to-y", argInt, &y_scaleTo, 0, + "scales each page vertically to fit in scale-to-y pixels"}, + + {"-x", argInt, &x, 0, + "x-coordinate of the crop area top left corner"}, + {"-y", argInt, &y, 0, + "y-coordinate of the crop area top left corner"}, + {"-W", argInt, &w, 0, + "width of crop area in pixels (default is 0)"}, + {"-H", argInt, &h, 0, + "height of crop area in pixels (default is 0)"}, + {"-sz", argInt, &sz, 0, + "size of crop square in pixels (sets W and H)"}, + {"-cropbox",argFlag, &useCropBox, 0, + "use the crop box rather than media box"}, + + {"-mono", argFlag, &mono, 0, + "generate a monochrome PBM file"}, + {"-gray", argFlag, &gray, 0, + "generate a grayscale PGM file"}, +#if ENABLE_LIBPNG + {"-png", argFlag, &png, 0, + "generate a PNG file"}, +#endif +#if ENABLE_LIBJPEG + {"-jpeg", argFlag, &jpeg, 0, + "generate a JPEG file"}, +#if SPLASH_CMYK + {"-jpegcmyk",argFlag, &jpegcmyk, 0, + "generate a CMYK JPEG file"}, +#endif +#endif +#if SPLASH_CMYK + {"-overprint",argFlag, &overprint, 0, + "enable overprint"}, +#endif +#if ENABLE_LIBTIFF + {"-tiff", argFlag, &tiff, 0, + "generate a TIFF file"}, + {"-tiffcompression", argString, TiffCompressionStr, sizeof(TiffCompressionStr), + "set TIFF compression: none, packbits, jpeg, lzw, deflate"}, +#endif +#if HAVE_FREETYPE_FREETYPE_H | HAVE_FREETYPE_H + {"-freetype", argString, enableFreeTypeStr, sizeof(enableFreeTypeStr), + "enable FreeType font rasterizer: yes, no"}, +#endif + + {"-aa", argString, antialiasStr, sizeof(antialiasStr), + "enable font anti-aliasing: yes, no"}, + {"-aaVector", argString, vectorAntialiasStr, sizeof(vectorAntialiasStr), + "enable vector anti-aliasing: yes, no"}, + + {"-opw", argString, ownerPassword, sizeof(ownerPassword), + "owner password (for encrypted files)"}, + {"-upw", argString, userPassword, sizeof(userPassword), + "user password (for encrypted files)"}, + + {"-q", argFlag, &quiet, 0, + "don't print any messages or errors"}, + {"-v", argFlag, &printVersion, 0, + "print copyright and version info"}, + {"-h", argFlag, &printHelp, 0, + "print usage information"}, + {"-help", argFlag, &printHelp, 0, + "print usage information"}, + {"--help", argFlag, &printHelp, 0, + "print usage information"}, + {"-?", argFlag, &printHelp, 0, + "print usage information"}, + {NULL} +}; + +static void savePageSlice(PDFDoc *doc, + SplashOutputDev *splashOut, + int pg, int x, int y, int w, int h, + double pg_w, double pg_h, + char *ppmFile) { + if (w == 0) w = (int)ceil(pg_w); + if (h == 0) h = (int)ceil(pg_h); + w = (x+w > pg_w ? (int)ceil(pg_w-x) : w); + h = (y+h > pg_h ? (int)ceil(pg_h-y) : h); + doc->displayPageSlice(splashOut, + pg, x_resolution, y_resolution, + 0, + !useCropBox, gFalse, gFalse, + x, y, w, h + ); + + SplashBitmap *bitmap = splashOut->getBitmap(); + + if (ppmFile != NULL) { + if (png) { + bitmap->writeImgFile(splashFormatPng, ppmFile, x_resolution, y_resolution); + } else if (jpeg) { + bitmap->writeImgFile(splashFormatJpeg, ppmFile, x_resolution, y_resolution); + } else if (jpegcmyk) { + bitmap->writeImgFile(splashFormatJpegCMYK, ppmFile, x_resolution, y_resolution); + } else if (tiff) { + bitmap->writeImgFile(splashFormatTiff, ppmFile, x_resolution, y_resolution, TiffCompressionStr); + } else { + bitmap->writePNMFile(ppmFile); + } + } else { +#ifdef _WIN32 + setmode(fileno(stdout), O_BINARY); +#endif + + if (png) { + bitmap->writeImgFile(splashFormatPng, stdout, x_resolution, y_resolution); + } else if (jpeg) { + bitmap->writeImgFile(splashFormatJpeg, stdout, x_resolution, y_resolution); + } else if (tiff) { + bitmap->writeImgFile(splashFormatTiff, stdout, x_resolution, y_resolution, TiffCompressionStr); + } else { + bitmap->writePNMFile(stdout); + } + } +} + +static int numberOfCharacters(unsigned int n) +{ + int charNum = 0; + while (n >= 10) + { + n = n / 10; + charNum++; + } + charNum++; + return charNum; +} + +int main(int argc, char *argv[]) { + PDFDoc *doc; + GooString *fileName = NULL; + char *ppmRoot = NULL; + char *ppmFile; + GooString *ownerPW, *userPW; + SplashColor paperColor; + SplashOutputDev *splashOut; + GBool ok; + int exitCode; + int pg, pg_num_len; + double pg_w, pg_h, tmp; + + exitCode = 99; + + // parse args + ok = parseArgs(argDesc, &argc, argv); + if (mono && gray) { + ok = gFalse; + } + if ( resolution != 0.0 && + (x_resolution == 150.0 || + y_resolution == 150.0)) { + x_resolution = resolution; + y_resolution = resolution; + } + if (!ok || argc > 3 || printVersion || printHelp) { + fprintf(stderr, "pdftoppm version %s\n", PACKAGE_VERSION); + fprintf(stderr, "%s\n", popplerCopyright); + fprintf(stderr, "%s\n", xpdfCopyright); + if (!printVersion) { + printUsage("pdftoppm", "[PDF-file [PPM-file-prefix]]", argDesc); + } + if (printVersion || printHelp) + exitCode = 0; + goto err0; + } + if (argc > 1) fileName = new GooString(argv[1]); + if (argc == 3) ppmRoot = argv[2]; + + // read config file + globalParams = new GlobalParams(); + if (enableFreeTypeStr[0]) { + if (!globalParams->setEnableFreeType(enableFreeTypeStr)) { + fprintf(stderr, "Bad '-freetype' value on command line\n"); + } + } + if (antialiasStr[0]) { + if (!globalParams->setAntialias(antialiasStr)) { + fprintf(stderr, "Bad '-aa' value on command line\n"); + } + } + if (vectorAntialiasStr[0]) { + if (!globalParams->setVectorAntialias(vectorAntialiasStr)) { + fprintf(stderr, "Bad '-aaVector' value on command line\n"); + } + } + if (quiet) { + globalParams->setErrQuiet(quiet); + } + + // open PDF file + if (ownerPassword[0]) { + ownerPW = new GooString(ownerPassword); + } else { + ownerPW = NULL; + } + if (userPassword[0]) { + userPW = new GooString(userPassword); + } else { + userPW = NULL; + } + + if (fileName == NULL) { + fileName = new GooString("fd://0"); + } + if (fileName->cmp("-") == 0) { + delete fileName; + fileName = new GooString("fd://0"); + } + doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW); + delete fileName; + + if (userPW) { + delete userPW; + } + if (ownerPW) { + delete ownerPW; + } + if (!doc->isOk()) { + exitCode = 1; + goto err1; + } + + // get page range + if (firstPage < 1) + firstPage = 1; + if (singleFile && lastPage < 1) + lastPage = firstPage; + if (lastPage < 1 || lastPage > doc->getNumPages()) + lastPage = doc->getNumPages(); + + if (singleFile && firstPage < lastPage) { + if (!quiet) { + fprintf(stderr, + "Warning: Single file will write only the first of the %d pages.\n", + lastPage + 1 - firstPage); + } + lastPage = firstPage; + } + + // write PPM files +#if SPLASH_CMYK + if (jpegcmyk || overprint) { + globalParams->setOverprintPreview(gTrue); + paperColor[0] = 0; + paperColor[1] = 0; + paperColor[2] = 0; + paperColor[3] = 0; + } else +#endif + { + paperColor[0] = 255; + paperColor[1] = 255; + paperColor[2] = 255; + } + splashOut = new SplashOutputDev(mono ? splashModeMono1 : + gray ? splashModeMono8 : +#if SPLASH_CMYK + (jpegcmyk || overprint) ? splashModeCMYK8 : +#endif + splashModeRGB8, 4, + gFalse, paperColor); + splashOut->startDoc(doc); + if (sz != 0) w = h = sz; + pg_num_len = numberOfCharacters(doc->getNumPages()); + for (pg = firstPage; pg <= lastPage; ++pg) { + if (printOnlyEven && pg % 2 == 0) continue; + if (printOnlyOdd && pg % 2 == 1) continue; + if (useCropBox) { + pg_w = doc->getPageCropWidth(pg); + pg_h = doc->getPageCropHeight(pg); + } else { + pg_w = doc->getPageMediaWidth(pg); + pg_h = doc->getPageMediaHeight(pg); + } + + if (scaleTo != 0) { + resolution = (72.0 * scaleTo) / (pg_w > pg_h ? pg_w : pg_h); + x_resolution = y_resolution = resolution; + } else { + if (x_scaleTo > 0) { + x_resolution = (72.0 * x_scaleTo) / pg_w; + if (y_scaleTo == -1) + y_resolution = x_resolution; + } + if (y_scaleTo > 0) { + y_resolution = (72.0 * y_scaleTo) / pg_h; + if (x_scaleTo == -1) + x_resolution = y_resolution; + } + } + pg_w = pg_w * (x_resolution / 72.0); + pg_h = pg_h * (y_resolution / 72.0); + if ((doc->getPageRotate(pg) == 90) || (doc->getPageRotate(pg) == 270)) { + tmp = pg_w; + pg_w = pg_h; + pg_h = tmp; + } + if (ppmRoot != NULL) { + const char *ext = png ? "png" : (jpeg || jpegcmyk) ? "jpg" : tiff ? "tif" : mono ? "pbm" : gray ? "pgm" : "ppm"; + if (singleFile) { + ppmFile = new char[strlen(ppmRoot) + 1 + strlen(ext) + 1]; + sprintf(ppmFile, "%s.%s", ppmRoot, ext); + } else { + ppmFile = new char[strlen(ppmRoot) + 1 + pg_num_len + 1 + strlen(ext) + 1]; + sprintf(ppmFile, "%s-%0*d.%s", ppmRoot, pg_num_len, pg, ext); + } + savePageSlice(doc, splashOut, pg, x, y, w, h, pg_w, pg_h, ppmFile); + delete[] ppmFile; + } else { + savePageSlice(doc, splashOut, pg, x, y, w, h, pg_w, pg_h, NULL); + } + } + delete splashOut; + + exitCode = 0; + + // clean up + err1: + delete doc; + delete globalParams; + err0: + + // check for memory leaks + Object::memCheck(stderr); + gMemReport(stderr); + + return exitCode; +} diff --git a/utils/pdftops.1 b/utils/pdftops.1 new file mode 100644 index 00000000..c7aeaf52 --- /dev/null +++ b/utils/pdftops.1 @@ -0,0 +1,227 @@ +.\" Copyright 1996-2011 Glyph & Cog, LLC +.TH pdftops 1 "15 August 2011" +.SH NAME +pdftops \- Portable Document Format (PDF) to PostScript converter +(version 3.03) +.SH SYNOPSIS +.B pdftops +[options] +.RI <PDF-file> +.RI [<PS-file>] +.SH DESCRIPTION +.B Pdftops +converts Portable Document Format (PDF) files to PostScript so they +can be printed. +.PP +Pdftops reads the PDF file, +.IR PDF-file , +and writes a PostScript file, +.IR PS-file . +If +.I PS-file +is not specified, pdftops converts +.I file.pdf +to +.I file.ps +(or +.I file.eps +with the \-eps option). If +.I PS-file +is \'-', the PostScript is sent to stdout. +.SH OPTIONS +.TP +.BI \-f " number" +Specifies the first page to print. +.TP +.BI \-l " number" +Specifies the last page to print. +.TP +.B \-level1 +Generate Level 1 PostScript. The resulting PostScript files will be +significantly larger (if they contain images), but will print on Level +1 printers. This also converts all images to black and white. No +more than one of the PostScript level options (\-level1, \-level1sep, +\-level2, \-level2sep, \-level3, \-level3Sep) may be given. +.TP +.B \-level1sep +Generate Level 1 separable PostScript. All colors are converted to +CMYK. Images are written with separate stream data for the four +components. +.TP +.B \-level2 +Generate Level 2 PostScript. Level 2 supports color images and image +compression. This is the default setting. +.TP +.B \-level2sep +Generate Level 2 separable PostScript. All colors are converted to +CMYK. The PostScript separation convention operators are used to +handle custom (spot) colors. +.TP +.B \-level3 +Generate Level 3 PostScript. This enables all Level 2 features plus +CID font embedding. +.TP +.B \-level3Sep +Generate Level 3 separable PostScript. The separation handling is the +same as for \-level2Sep. +.TP +.B \-origpagesizes +Generate a PostScript file with variable page sizes and orientations, +taking for each page the size of the original page in the PDF file. +The PostScript file contains "<</PageSize [WIDTH HEIGHT]>> setpagedevice" +lines in each page header, so that the paper input tray gets correctly +changed on the printer. This option should be used when pdftops is used +as a print filter. Any specification of the page size via \-paper, +\-paperw, or \-paperh will get overridden as long as each page of the +PDF file has a defined paper size. No more than one of the mode options +(\-origpagesizes, \-eps, \-form) may be given. +.TP +.B \-eps +Generate an Encapsulated PostScript (EPS) file. An EPS file contains +a single image, so if you use this option with a multi-page PDF file, +you must use \-f and \-l to specify a single page. No more than one of +the mode options (\-origpagesizes, \-eps, \-form) may be given. +.TP +.B \-form +Generate a PostScript form which can be imported by software that +understands forms. A form contains a single page, so if you use this +option with a multi-page PDF file, you must use \-f and \-l to specify a +single page. The \-level1 option cannot be used with \-form. No more +than one of the mode options (\-origpagesizes, \-eps, \-form) may be +given. +.TP +.B \-opi +Generate OPI comments for all images and forms which have OPI +information. (This option is only available if pdftops was compiled +with OPI support.) +.TP +.B \-binary +Write binary data in Level 1 PostScript. By default, pdftops writes +hex-encoded data in Level 1 PostScript. Binary data is non-standard +in Level 1 PostScript but reduces the file size and can be useful +when Level 1 PostScript is required only for its restricted use +of PostScript operators. +.TP +.BI \-r " number" +Set the resolution in DPI when pdftops rasterizes images with +transparencies or, for Level 1 PostScript, when pdftops +rasterizes images with color masks. +By default, pdftops rasterizes images to 300 DPI. +.TP +.B \-noembt1 +By default, any Type 1 fonts which are embedded in the PDF file are +copied into the PostScript file. This option causes pdftops to +substitute base fonts instead. Embedded fonts make PostScript files +larger, but may be necessary for readable output. +.TP +.B \-noembtt +By default, any TrueType fonts which are embedded in the PDF file are +copied into the PostScript file. This option causes pdftops to +substitute base fonts instead. Embedded fonts make PostScript files +larger, but may be necessary for readable output. Also, some +PostScript interpreters do not have TrueType rasterizers. +.TP +.B \-noembcidps +By default, any CID PostScript fonts which are embedded in the PDF +file are copied into the PostScript file. This option disables that +embedding. No attempt is made to substitute for non-embedded CID +PostScript fonts. +.TP +.B \-noembcidtt +By default, any CID TrueType fonts which are embedded in the PDF file +are copied into the PostScript file. This option disables that +embedding. No attempt is made to substitute for non-embedded CID +TrueType fonts. +.TP +.B \-passfonts +By default, references to non-embedded 8-bit fonts in the PDF file are +substituted with the closest "Helvetica", "Times-Roman", or "Courier" font. +This option passes references to non-embedded fonts +through to the PostScript file. +.TP +.B \-preload +preload images and forms +.TP +.BI \-paper " size" +Set the paper size to one of "letter", "legal", "A4", or "A3". This +can also be set to "match", which will set the paper size to match the +size specified in the PDF file. \-origpagesizes overrides this setting +if the PDF file has defined page sizes. +.TP +.BI \-paperw " size" +Set the paper width, in points. \-origpagesizes overrides this setting +if the PDF file has defined page sizes. +.TP +.BI \-paperh " size" +Set the paper height, in points. \-origpagesizes overrides this setting +if the PDF file has defined page sizes. +.TP +.B \-nocrop +By default, output is cropped to the CropBox specified in the PDF +file. This option disables cropping. +.TP +.B \-expand +Expand PDF pages smaller than the paper to fill the paper. By +default, these pages are not scaled. +.TP +.B \-noshrink +Don't scale PDF pages which are larger than the paper. By default, +pages larger than the paper are shrunk to fit. +.TP +.B \-nocenter +By default, PDF pages smaller than the paper (after any scaling) are +centered on the paper. This option causes them to be aligned to the +lower-left corner of the paper instead. +.TP +.B \-duplex +Set the Duplex pagedevice entry in the PostScript file. This tells +duplex-capable printers to enable duplexing. +.TP +.BI \-opw " password" +Specify the owner password for the PDF file. Providing this will +bypass all security restrictions. +.TP +.BI \-upw " password" +Specify the user password for the PDF file. +.TP +.B \-q +Don't print any messages or errors. +.TP +.B \-v +Print copyright and version information. +.TP +.B \-h +Print usage information. +.RB ( \-help +and +.B \-\-help +are equivalent.) +.SH EXIT CODES +The Xpdf tools use the following exit codes: +.TP +0 +No error. +.TP +1 +Error opening a PDF file. +.TP +2 +Error opening an output file. +.TP +3 +Error related to PDF permissions. +.TP +99 +Other error. +.SH AUTHOR +The pdftops software and documentation are copyright 1996-2011 Glyph & +Cog, LLC. +.SH "SEE ALSO" +.BR pdfdetach (1), +.BR pdffonts (1), +.BR pdfimages (1), +.BR pdfinfo (1), +.BR pdftocairo (1), +.BR pdftohtml (1), +.BR pdftoppm (1), +.BR pdftotext (1) diff --git a/utils/pdftops.cc b/utils/pdftops.cc new file mode 100644 index 00000000..7f5a0ce3 --- /dev/null +++ b/utils/pdftops.cc @@ -0,0 +1,422 @@ +//======================================================================== +// +// pdftops.cc +// +// Copyright 1996-2003 Glyph & Cog, LLC +// +// Modified for Debian by Hamish Moffatt, 22 May 2002. +// +//======================================================================== + +//======================================================================== +// +// Modified under the Poppler project - http://poppler.freedesktop.org +// +// All changes made under the Poppler project to this file are licensed +// under GPL version 2 or later +// +// Copyright (C) 2006 Kristian Høgsberg <krh@redhat.com> +// Copyright (C) 2007-2008, 2010 Albert Astals Cid <aacid@kde.org> +// Copyright (C) 2009 Till Kamppeter <till.kamppeter@gmail.com> +// Copyright (C) 2009 Sanjoy Mahajan <sanjoy@mit.edu> +// Copyright (C) 2009, 2011, 2012 William Bader <williambader@hotmail.com> +// Copyright (C) 2010 Hib Eris <hib@hiberis.nl> +// Copyright (C) 2012 Thomas Freitag <Thomas.Freitag@alfa.de> +// +// To see a description of the changes please see the Changelog file that +// came with your tarball or type make ChangeLog if you are building from git +// +//======================================================================== + +#include "config.h" +#include <poppler-config.h> +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <string.h> +#include "parseargs.h" +#include "goo/GooString.h" +#include "goo/gmem.h" +#include "GlobalParams.h" +#include "Object.h" +#include "Stream.h" +#include "Array.h" +#include "Dict.h" +#include "XRef.h" +#include "Catalog.h" +#include "Page.h" +#include "PDFDoc.h" +#include "PDFDocFactory.h" +#include "PSOutputDev.h" +#include "Error.h" + +static GBool setPSPaperSize(char *size, int &psPaperWidth, int &psPaperHeight) { + if (!strcmp(size, "match")) { + psPaperWidth = psPaperHeight = -1; + } else if (!strcmp(size, "letter")) { + psPaperWidth = 612; + psPaperHeight = 792; + } else if (!strcmp(size, "legal")) { + psPaperWidth = 612; + psPaperHeight = 1008; + } else if (!strcmp(size, "A4")) { + psPaperWidth = 595; + psPaperHeight = 842; + } else if (!strcmp(size, "A3")) { + psPaperWidth = 842; + psPaperHeight = 1190; + } else { + return gFalse; + } + return gTrue; +} + + +static int firstPage = 1; +static int lastPage = 0; +static GBool level1 = gFalse; +static GBool level1Sep = gFalse; +static GBool level2 = gFalse; +static GBool level2Sep = gFalse; +static GBool level3 = gFalse; +static GBool level3Sep = gFalse; +static GBool doOrigPageSizes = gFalse; +static GBool doEPS = gFalse; +static GBool doForm = gFalse; +#if OPI_SUPPORT +static GBool doOPI = gFalse; +#endif +static int splashResolution = 0; +static GBool psBinary = gFalse; +static GBool noEmbedT1Fonts = gFalse; +static GBool noEmbedTTFonts = gFalse; +static GBool noEmbedCIDPSFonts = gFalse; +static GBool noEmbedCIDTTFonts = gFalse; +static GBool fontPassthrough = gFalse; +static GBool preload = gFalse; +static char paperSize[15] = ""; +static int paperWidth = -1; +static int paperHeight = -1; +static GBool noCrop = gFalse; +static GBool expand = gFalse; +static GBool noShrink = gFalse; +static GBool noCenter = gFalse; +static GBool duplex = gFalse; +static char ownerPassword[33] = "\001"; +static char userPassword[33] = "\001"; +static GBool quiet = gFalse; +static GBool printVersion = gFalse; +static GBool printHelp = gFalse; +#if SPLASH_CMYK +static GBool overprint = gFalse; +#endif + +static const ArgDesc argDesc[] = { + {"-f", argInt, &firstPage, 0, + "first page to print"}, + {"-l", argInt, &lastPage, 0, + "last page to print"}, + {"-level1", argFlag, &level1, 0, + "generate Level 1 PostScript"}, + {"-level1sep", argFlag, &level1Sep, 0, + "generate Level 1 separable PostScript"}, + {"-level2", argFlag, &level2, 0, + "generate Level 2 PostScript"}, + {"-level2sep", argFlag, &level2Sep, 0, + "generate Level 2 separable PostScript"}, + {"-level3", argFlag, &level3, 0, + "generate Level 3 PostScript"}, + {"-level3sep", argFlag, &level3Sep, 0, + "generate Level 3 separable PostScript"}, + {"-origpagesizes",argFlag, &doOrigPageSizes,0, + "conserve original page sizes"}, + {"-eps", argFlag, &doEPS, 0, + "generate Encapsulated PostScript (EPS)"}, + {"-form", argFlag, &doForm, 0, + "generate a PostScript form"}, +#if OPI_SUPPORT + {"-opi", argFlag, &doOPI, 0, + "generate OPI comments"}, +#endif + {"-r", argInt, &splashResolution, 0, + "resolution for rasterization, in DPI (default is 300)"}, + {"-binary", argFlag, &psBinary, 0, + "write binary data in Level 1 PostScript"}, + {"-noembt1", argFlag, &noEmbedT1Fonts, 0, + "don't embed Type 1 fonts"}, + {"-noembtt", argFlag, &noEmbedTTFonts, 0, + "don't embed TrueType fonts"}, + {"-noembcidps", argFlag, &noEmbedCIDPSFonts, 0, + "don't embed CID PostScript fonts"}, + {"-noembcidtt", argFlag, &noEmbedCIDTTFonts, 0, + "don't embed CID TrueType fonts"}, + {"-passfonts", argFlag, &fontPassthrough,0, + "don't substitute missing fonts"}, + {"-preload", argFlag, &preload, 0, + "preload images and forms"}, + {"-paper", argString, paperSize, sizeof(paperSize), + "paper size (letter, legal, A4, A3, match)"}, + {"-paperw", argInt, &paperWidth, 0, + "paper width, in points"}, + {"-paperh", argInt, &paperHeight, 0, + "paper height, in points"}, + {"-nocrop", argFlag, &noCrop, 0, + "don't crop pages to CropBox"}, + {"-expand", argFlag, &expand, 0, + "expand pages smaller than the paper size"}, + {"-noshrink", argFlag, &noShrink, 0, + "don't shrink pages larger than the paper size"}, + {"-nocenter", argFlag, &noCenter, 0, + "don't center pages smaller than the paper size"}, + {"-duplex", argFlag, &duplex, 0, + "enable duplex printing"}, + {"-opw", argString, ownerPassword, sizeof(ownerPassword), + "owner password (for encrypted files)"}, + {"-upw", argString, userPassword, sizeof(userPassword), + "user password (for encrypted files)"}, +#if SPLASH_CMYK + {"-overprint",argFlag, &overprint, 0, + "enable overprint"}, +#endif + {"-q", argFlag, &quiet, 0, + "don't print any messages or errors"}, + {"-v", argFlag, &printVersion, 0, + "print copyright and version info"}, + {"-h", argFlag, &printHelp, 0, + "print usage information"}, + {"-help", argFlag, &printHelp, 0, + "print usage information"}, + {"--help", argFlag, &printHelp, 0, + "print usage information"}, + {"-?", argFlag, &printHelp, 0, + "print usage information"}, + {NULL} +}; + +int main(int argc, char *argv[]) { + PDFDoc *doc; + GooString *fileName; + GooString *psFileName; + PSLevel level; + PSOutMode mode; + GooString *ownerPW, *userPW; + PSOutputDev *psOut; + GBool ok; + char *p; + int exitCode; + + exitCode = 99; + + // parse args + ok = parseArgs(argDesc, &argc, argv); + if (!ok || argc < 2 || argc > 3 || printVersion || printHelp) { + fprintf(stderr, "pdftops version %s\n", PACKAGE_VERSION); + fprintf(stderr, "%s\n", popplerCopyright); + fprintf(stderr, "%s\n", xpdfCopyright); + if (!printVersion) { + printUsage("pdftops", "<PDF-file> [<PS-file>]", argDesc); + } + if (printVersion || printHelp) + exit(0); + else + exit(1); + } + if ((level1 ? 1 : 0) + + (level1Sep ? 1 : 0) + + (level2 ? 1 : 0) + + (level2Sep ? 1 : 0) + + (level3 ? 1 : 0) + + (level3Sep ? 1 : 0) > 1) { + fprintf(stderr, "Error: use only one of the 'level' options.\n"); + exit(1); + } + if ((doOrigPageSizes ? 1 : 0) + + (doEPS ? 1 : 0) + + (doForm ? 1 : 0) > 1) { + fprintf(stderr, "Error: use only one of -origpagesizes, -eps, and -form\n"); + exit(1); + } + if (level1) { + level = psLevel1; + } else if (level1Sep) { + level = psLevel1Sep; + } else if (level2Sep) { + level = psLevel2Sep; + } else if (level3) { + level = psLevel3; + } else if (level3Sep) { + level = psLevel3Sep; + } else { + level = psLevel2; + } + if (doForm && level < psLevel2) { + fprintf(stderr, "Error: forms are only available with Level 2 output.\n"); + exit(1); + } + mode = doOrigPageSizes ? psModePSOrigPageSizes + : doEPS ? psModeEPS + : doForm ? psModeForm + : psModePS; + fileName = new GooString(argv[1]); + + // read config file + globalParams = new GlobalParams(); + if (paperSize[0]) { + if (!setPSPaperSize(paperSize, paperWidth, paperHeight)) { + fprintf(stderr, "Invalid paper size\n"); + delete fileName; + goto err0; + } + } +#if SPLASH_CMYK + if (overprint) { + globalParams->setOverprintPreview(gTrue); + } +#endif + if (expand) { + globalParams->setPSExpandSmaller(gTrue); + } + if (noShrink) { + globalParams->setPSShrinkLarger(gFalse); + } + if (noCenter) { + globalParams->setPSCenter(gFalse); + } + if (level1 || level1Sep || level2 || level2Sep || level3 || level3Sep) { + globalParams->setPSLevel(level); + } + if (splashResolution > 0) { + globalParams->setPSRasterResolution(splashResolution); + } + if (noEmbedT1Fonts) { + globalParams->setPSEmbedType1(!noEmbedT1Fonts); + } + if (noEmbedTTFonts) { + globalParams->setPSEmbedTrueType(!noEmbedTTFonts); + } + if (noEmbedCIDPSFonts) { + globalParams->setPSEmbedCIDPostScript(!noEmbedCIDPSFonts); + } + if (noEmbedCIDTTFonts) { + globalParams->setPSEmbedCIDTrueType(!noEmbedCIDTTFonts); + } + if (fontPassthrough) { + globalParams->setPSFontPassthrough(fontPassthrough); + } + if (preload) { + globalParams->setPSPreload(preload); + } +#if OPI_SUPPORT + if (doOPI) { + globalParams->setPSOPI(doOPI); + } +#endif + if (psBinary) { + globalParams->setPSBinary(psBinary); + } + if (quiet) { + globalParams->setErrQuiet(quiet); + } + + // open PDF file + if (ownerPassword[0] != '\001') { + ownerPW = new GooString(ownerPassword); + } else { + ownerPW = NULL; + } + if (userPassword[0] != '\001') { + userPW = new GooString(userPassword); + } else { + userPW = NULL; + } + if (fileName->cmp("-") == 0) { + delete fileName; + fileName = new GooString("fd://0"); + } + + doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW); + + if (userPW) { + delete userPW; + } + if (ownerPW) { + delete ownerPW; + } + if (!doc->isOk()) { + exitCode = 1; + goto err1; + } + +#ifdef ENFORCE_PERMISSIONS + // check for print permission + if (!doc->okToPrint()) { + error(errNotAllowed, -1, "Printing this document is not allowed."); + exitCode = 3; + goto err1; + } +#endif + + // construct PostScript file name + if (argc == 3) { + psFileName = new GooString(argv[2]); + } else if (fileName->cmp("fd://0") == 0) { + error(errCommandLine, -1, "You have to provide an output filename when reading form stdin."); + goto err1; + } else { + p = fileName->getCString() + fileName->getLength() - 4; + if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")) { + psFileName = new GooString(fileName->getCString(), + fileName->getLength() - 4); + } else { + psFileName = fileName->copy(); + } + psFileName->append(doEPS ? ".eps" : ".ps"); + } + + // get page range + if (firstPage < 1) { + firstPage = 1; + } + if (lastPage < 1 || lastPage > doc->getNumPages()) { + lastPage = doc->getNumPages(); + } + + // check for multi-page EPS or form + if ((doEPS || doForm) && firstPage != lastPage) { + error(errCommandLine, -1, "EPS and form files can only contain one page."); + goto err2; + } + + // write PostScript file + psOut = new PSOutputDev(psFileName->getCString(), doc, + NULL, firstPage, lastPage, mode, + paperWidth, + paperHeight, + duplex); + if (psOut->isOk()) { + doc->displayPages(psOut, firstPage, lastPage, 72, 72, + 0, noCrop, !noCrop, gTrue); + } else { + delete psOut; + exitCode = 2; + goto err2; + } + delete psOut; + + exitCode = 0; + + // clean up + err2: + delete psFileName; + err1: + delete doc; + delete fileName; + err0: + delete globalParams; + + // check for memory leaks + Object::memCheck(stderr); + gMemReport(stderr); + + return exitCode; +} diff --git a/utils/pdftotext.1 b/utils/pdftotext.1 new file mode 100644 index 00000000..0199b03c --- /dev/null +++ b/utils/pdftotext.1 @@ -0,0 +1,137 @@ +.\" Copyright 1997-2011 Glyph & Cog, LLC +.TH pdftotext 1 "15 August 2011" +.SH NAME +pdftotext \- Portable Document Format (PDF) to text converter +(version 3.03) +.SH SYNOPSIS +.B pdftotext +[options] +.RI [ PDF-file +.RI [ text-file ]] +.SH DESCRIPTION +.B Pdftotext +converts Portable Document Format (PDF) files to plain text. +.PP +Pdftotext reads the PDF file, +.IR PDF-file , +and writes a text file, +.IR text-file . +If +.I text-file +is not specified, pdftotext converts +.I file.pdf +to +.IR file.txt . +If +.I text-file +is \'-', the text is sent to stdout. +.SH OPTIONS +.TP +.BI \-f " number" +Specifies the first page to convert. +.TP +.BI \-l " number" +Specifies the last page to convert. +.TP +.BI \-r " number" +Specifies the resolution, in DPI. The default is 72 DPI. +.TP +.BI \-x " number" +Specifies the x-coordinate of the crop area top left corner +.TP +.BI \-y " number" +Specifies the y-coordinate of the crop area top left corner +.TP +.BI \-W " number" +Specifies the width of crop area in pixels (default is 0) +.TP +.BI \-H " number" +Specifies the height of crop area in pixels (default is 0) +.TP +.B \-layout +Maintain (as best as possible) the original physical layout of the +text. The default is to \'undo' physical layout (columns, +hyphenation, etc.) and output the text in reading order. +.TP +.BI \-fixed " number" +Assume fixed-pitch (or tabular) text, with the specified character +width (in points). This forces physical layout mode. +.TP +.B \-raw +Keep the text in content stream order. This is a hack which often +"undoes" column formatting, etc. Use of raw mode is no longer +recommended. +.TP +.B \-htmlmeta +Generate a simple HTML file, including the meta information. This +simply wraps the text in <pre> and </pre> and prepends the meta +headers. +.TP +.B \-bbox +Generate an XHTML file containing bounding box information for each +word in the file. +.TP +.BI \-enc " encoding-name" +Sets the encoding to use for text output. This defaults to "UTF-8". +.TP +.B \-listenc +Lits the available encodings +.TP +.BI \-eol " unix | dos | mac" +Sets the end-of-line convention to use for text output. +.TP +.B \-nopgbrk +Don't insert page breaks (form feed characters) between pages. +.TP +.BI \-opw " password" +Specify the owner password for the PDF file. Providing this will +bypass all security restrictions. +.TP +.BI \-upw " password" +Specify the user password for the PDF file. +.TP +.B \-q +Don't print any messages or errors. +.TP +.B \-v +Print copyright and version information. +.TP +.B \-h +Print usage information. +.RB ( \-help +and +.B \-\-help +are equivalent.) +.SH BUGS +Some PDF files contain fonts whose encodings have been mangled beyond +recognition. There is no way (short of OCR) to extract text from +these files. +.SH EXIT CODES +The Xpdf tools use the following exit codes: +.TP +0 +No error. +.TP +1 +Error opening a PDF file. +.TP +2 +Error opening an output file. +.TP +3 +Error related to PDF permissions. +.TP +99 +Other error. +.SH AUTHOR +The pdftotext software and documentation are copyright 1996-2011 Glyph +& Cog, LLC. +.SH "SEE ALSO" +.BR pdfdetach (1), +.BR pdffonts (1), +.BR pdfimages (1), +.BR pdfinfo (1), +.BR pdftocairo (1), +.BR pdftohtml (1), +.BR pdftoppm (1), +.BR pdftops (1) diff --git a/utils/pdftotext.cc b/utils/pdftotext.cc new file mode 100644 index 00000000..a170f1b7 --- /dev/null +++ b/utils/pdftotext.cc @@ -0,0 +1,486 @@ +//======================================================================== +// +// pdftotext.cc +// +// Copyright 1997-2003 Glyph & Cog, LLC +// +// Modified for Debian by Hamish Moffatt, 22 May 2002. +// +//======================================================================== + +//======================================================================== +// +// Modified under the Poppler project - http://poppler.freedesktop.org +// +// All changes made under the Poppler project to this file are licensed +// under GPL version 2 or later +// +// Copyright (C) 2006 Dominic Lachowicz <cinamod@hotmail.com> +// Copyright (C) 2007-2008, 2010, 2011 Albert Astals Cid <aacid@kde.org> +// Copyright (C) 2009 Jan Jockusch <jan@jockusch.de> +// Copyright (C) 2010 Hib Eris <hib@hiberis.nl> +// Copyright (C) 2010 Kenneth Berland <ken@hero.com> +// Copyright (C) 2011 Tom Gleason <tom@buildadam.com> +// Copyright (C) 2011 Steven Murdoch <Steven.Murdoch@cl.cam.ac.uk> +// +// To see a description of the changes please see the Changelog file that +// came with your tarball or type make ChangeLog if you are building from git +// +//======================================================================== + +#include "config.h" +#include <poppler-config.h> +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <string.h> +#include "parseargs.h" +#include "printencodings.h" +#include "goo/GooString.h" +#include "goo/gmem.h" +#include "GlobalParams.h" +#include "Object.h" +#include "Stream.h" +#include "Array.h" +#include "Dict.h" +#include "XRef.h" +#include "Catalog.h" +#include "Page.h" +#include "PDFDoc.h" +#include "PDFDocFactory.h" +#include "TextOutputDev.h" +#include "CharTypes.h" +#include "UnicodeMap.h" +#include "PDFDocEncoding.h" +#include "Error.h" +#include <string> + +static void printInfoString(FILE *f, Dict *infoDict, const char *key, + const char *text1, const char *text2, UnicodeMap *uMap); +static void printInfoDate(FILE *f, Dict *infoDict, const char *key, const char *fmt); + +static int firstPage = 1; +static int lastPage = 0; +static double resolution = 72.0; +static int x = 0; +static int y = 0; +static int w = 0; +static int h = 0; +static GBool bbox = gFalse; +static GBool physLayout = gFalse; +static double fixedPitch = 0; +static GBool rawOrder = gFalse; +static GBool htmlMeta = gFalse; +static char textEncName[128] = ""; +static char textEOL[16] = ""; +static GBool noPageBreaks = gFalse; +static char ownerPassword[33] = "\001"; +static char userPassword[33] = "\001"; +static GBool quiet = gFalse; +static GBool printVersion = gFalse; +static GBool printHelp = gFalse; +static GBool printEnc = gFalse; + +static const ArgDesc argDesc[] = { + {"-f", argInt, &firstPage, 0, + "first page to convert"}, + {"-l", argInt, &lastPage, 0, + "last page to convert"}, + {"-r", argFP, &resolution, 0, + "resolution, in DPI (default is 72)"}, + {"-x", argInt, &x, 0, + "x-coordinate of the crop area top left corner"}, + {"-y", argInt, &y, 0, + "y-coordinate of the crop area top left corner"}, + {"-W", argInt, &w, 0, + "width of crop area in pixels (default is 0)"}, + {"-H", argInt, &h, 0, + "height of crop area in pixels (default is 0)"}, + {"-layout", argFlag, &physLayout, 0, + "maintain original physical layout"}, + {"-fixed", argFP, &fixedPitch, 0, + "assume fixed-pitch (or tabular) text"}, + {"-raw", argFlag, &rawOrder, 0, + "keep strings in content stream order"}, + {"-htmlmeta", argFlag, &htmlMeta, 0, + "generate a simple HTML file, including the meta information"}, + {"-enc", argString, textEncName, sizeof(textEncName), + "output text encoding name"}, + {"-listenc",argFlag, &printEnc, 0, + "list available encodings"}, + {"-eol", argString, textEOL, sizeof(textEOL), + "output end-of-line convention (unix, dos, or mac)"}, + {"-nopgbrk", argFlag, &noPageBreaks, 0, + "don't insert page breaks between pages"}, + {"-bbox", argFlag, &bbox, 0, + "output bounding box for each word and page size to html. Sets -htmlmeta"}, + {"-opw", argString, ownerPassword, sizeof(ownerPassword), + "owner password (for encrypted files)"}, + {"-upw", argString, userPassword, sizeof(userPassword), + "user password (for encrypted files)"}, + {"-q", argFlag, &quiet, 0, + "don't print any messages or errors"}, + {"-v", argFlag, &printVersion, 0, + "print copyright and version info"}, + {"-h", argFlag, &printHelp, 0, + "print usage information"}, + {"-help", argFlag, &printHelp, 0, + "print usage information"}, + {"--help", argFlag, &printHelp, 0, + "print usage information"}, + {"-?", argFlag, &printHelp, 0, + "print usage information"}, + {NULL} +}; + +static std::string myStringReplace(const std::string &inString, const std::string &oldToken, const std::string &newToken) { + std::string result = inString; + size_t foundLoc; + int advance = 0; + do { + foundLoc = result.find(oldToken, advance); + if (foundLoc != std::string::npos){ + result.replace(foundLoc, oldToken.length(), newToken); + advance = foundLoc + newToken.length(); + } + } while (foundLoc != std::string::npos ); + return result; +} + +static std::string myXmlTokenReplace(const char *inString){ + std::string myString(inString); + myString = myStringReplace(myString, "&", "&" ); + myString = myStringReplace(myString, "'", "'" ); + myString = myStringReplace(myString, "\"", """ ); + myString = myStringReplace(myString, "<", "<" ); + myString = myStringReplace(myString, ">", ">" ); + return myString; +} + +int main(int argc, char *argv[]) { + PDFDoc *doc; + GooString *fileName; + GooString *textFileName; + GooString *ownerPW, *userPW; + TextOutputDev *textOut; + FILE *f; + UnicodeMap *uMap; + Object info; + GBool ok; + char *p; + int exitCode; + + exitCode = 99; + + // parse args + ok = parseArgs(argDesc, &argc, argv); + if (bbox) { + htmlMeta = gTrue; + } + if (!ok || (argc < 2 && !printEnc) || argc > 3 || printVersion || printHelp) { + fprintf(stderr, "pdftotext version %s\n", PACKAGE_VERSION); + fprintf(stderr, "%s\n", popplerCopyright); + fprintf(stderr, "%s\n", xpdfCopyright); + if (!printVersion) { + printUsage("pdftotext", "<PDF-file> [<text-file>]", argDesc); + } + if (printVersion || printHelp) + exitCode = 0; + goto err0; + } + + // read config file + globalParams = new GlobalParams(); + + if (printEnc) { + printEncodings(); + delete globalParams; + exitCode = 0; + goto err0; + } + + fileName = new GooString(argv[1]); + if (fixedPitch) { + physLayout = gTrue; + } + + if (textEncName[0]) { + globalParams->setTextEncoding(textEncName); + } + if (textEOL[0]) { + if (!globalParams->setTextEOL(textEOL)) { + fprintf(stderr, "Bad '-eol' value on command line\n"); + } + } + if (noPageBreaks) { + globalParams->setTextPageBreaks(gFalse); + } + if (quiet) { + globalParams->setErrQuiet(quiet); + } + + // get mapping to output encoding + if (!(uMap = globalParams->getTextEncoding())) { + error(errCommandLine, -1, "Couldn't get text encoding"); + delete fileName; + goto err1; + } + + // open PDF file + if (ownerPassword[0] != '\001') { + ownerPW = new GooString(ownerPassword); + } else { + ownerPW = NULL; + } + if (userPassword[0] != '\001') { + userPW = new GooString(userPassword); + } else { + userPW = NULL; + } + + if (fileName->cmp("-") == 0) { + delete fileName; + fileName = new GooString("fd://0"); + } + + doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW); + + if (userPW) { + delete userPW; + } + if (ownerPW) { + delete ownerPW; + } + if (!doc->isOk()) { + exitCode = 1; + goto err2; + } + +#ifdef ENFORCE_PERMISSIONS + // check for copy permission + if (!doc->okToCopy()) { + error(errNotAllowed, -1, "Copying of text from this document is not allowed."); + exitCode = 3; + goto err2; + } +#endif + + // construct text file name + if (argc == 3) { + textFileName = new GooString(argv[2]); + } else if (fileName->cmp("fd://0") == 0) { + error(errCommandLine, -1, "You have to provide an output filename when reading form stdin."); + goto err2; + } else { + p = fileName->getCString() + fileName->getLength() - 4; + if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")) { + textFileName = new GooString(fileName->getCString(), + fileName->getLength() - 4); + } else { + textFileName = fileName->copy(); + } + textFileName->append(htmlMeta ? ".html" : ".txt"); + } + + // get page range + if (firstPage < 1) { + firstPage = 1; + } + if (lastPage < 1 || lastPage > doc->getNumPages()) { + lastPage = doc->getNumPages(); + } + + // write HTML header + if (htmlMeta) { + if (!textFileName->cmp("-")) { + f = stdout; + } else { + if (!(f = fopen(textFileName->getCString(), "wb"))) { + error(errIO, -1, "Couldn't open text file '{0:t}'", textFileName); + exitCode = 2; + goto err3; + } + } + fputs("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">", f); + fputs("<html xmlns=\"http://www.w3.org/1999/xhtml\">\n", f); + fputs("<head>\n", f); + doc->getDocInfo(&info); + if (info.isDict()) { + Object obj; + if (info.getDict()->lookup("Title", &obj)->isString()) { + printInfoString(f, info.getDict(), "Title", "<title>", "</title>\n", uMap); + } else { + fputs("<title></title>\n", f); + } + obj.free(); + printInfoString(f, info.getDict(), "Subject", + "<meta name=\"Subject\" content=\"", "\"/>\n", uMap); + printInfoString(f, info.getDict(), "Keywords", + "<meta name=\"Keywords\" content=\"", "\"/>\n", uMap); + printInfoString(f, info.getDict(), "Author", + "<meta name=\"Author\" content=\"", "\"/>\n", uMap); + printInfoString(f, info.getDict(), "Creator", + "<meta name=\"Creator\" content=\"", "\"/>\n", uMap); + printInfoString(f, info.getDict(), "Producer", + "<meta name=\"Producer\" content=\"", "\"/>\n", uMap); + printInfoDate(f, info.getDict(), "CreationDate", + "<meta name=\"CreationDate\" content=\"\"/>\n"); + printInfoDate(f, info.getDict(), "LastModifiedDate", + "<meta name=\"ModDate\" content=\"\"/>\n"); + } + info.free(); + fputs("</head>\n", f); + fputs("<body>\n", f); + if (!bbox) fputs("<pre>\n", f); + if (f != stdout) { + fclose(f); + } + } + + // write text file + if (bbox) { + textOut = new TextOutputDev(NULL, physLayout, fixedPitch, rawOrder, htmlMeta); + if (!(f = fopen(textFileName->getCString(), "ab"))) { + error(errIO, -1, "Couldn't open text file '{0:t}' for append", textFileName); + exitCode = 2; + delete textOut; + goto err3; + } + + if (textOut->isOk()) { + fprintf(f, "<doc>\n"); + for (int page = firstPage; page <= lastPage; ++page) { + fprintf(f, " <page width=\"%f\" height=\"%f\">\n",doc->getPageMediaWidth(page), doc->getPageMediaHeight(page)); + doc->displayPage(textOut, page, resolution, resolution, 0, gTrue, gFalse, gFalse); + TextWordList *wordlist = textOut->makeWordList(); + const int word_length = wordlist != NULL ? wordlist->getLength() : 0; + TextWord *word; + double xMinA, yMinA, xMaxA, yMaxA; + if (word_length == 0) + fprintf(stderr, "no word list\n"); + + for (int i = 0; i < word_length; ++i) { + word = wordlist->get(i); + word->getBBox(&xMinA, &yMinA, &xMaxA, &yMaxA); + const std::string myString = myXmlTokenReplace(word->getText()->getCString()); + fprintf(f," <word xMin=\"%f\" yMin=\"%f\" xMax=\"%f\" yMax=\"%f\">%s</word>\n", xMinA, yMinA, xMaxA, yMaxA, myString.c_str()); + } + fprintf(f, " </page>\n"); + delete wordlist; + } + fprintf(f, "</doc>\n"); + } + fclose(f); + } else { + textOut = new TextOutputDev(textFileName->getCString(), + physLayout, fixedPitch, rawOrder, htmlMeta); + if (textOut->isOk()) { + if ((w==0) && (h==0) && (x==0) && (y==0)) { + doc->displayPages(textOut, firstPage, lastPage, resolution, resolution, 0, + gTrue, gFalse, gFalse); + } else { + + for (int page = firstPage; page <= lastPage; ++page) { + doc->displayPageSlice(textOut, page, resolution, resolution, 0, + gTrue, gFalse, gFalse, + x, y, w, h); + } + } + + } else { + delete textOut; + exitCode = 2; + goto err3; + } + } + delete textOut; + + // write end of HTML file + if (htmlMeta) { + if (!textFileName->cmp("-")) { + f = stdout; + } else { + if (!(f = fopen(textFileName->getCString(), "ab"))) { + error(errIO, -1, "Couldn't open text file '{0:t}'", textFileName); + exitCode = 2; + goto err3; + } + } + if (!bbox) fputs("</pre>\n", f); + fputs("</body>\n", f); + fputs("</html>\n", f); + if (f != stdout) { + fclose(f); + } + } + + exitCode = 0; + + // clean up + err3: + delete textFileName; + err2: + delete doc; + delete fileName; + uMap->decRefCnt(); + err1: + delete globalParams; + err0: + + // check for memory leaks + Object::memCheck(stderr); + gMemReport(stderr); + + return exitCode; +} + +static void printInfoString(FILE *f, Dict *infoDict, const char *key, + const char *text1, const char *text2, UnicodeMap *uMap) { + Object obj; + GooString *s1; + GBool isUnicode; + Unicode u; + char buf[8]; + int i, n; + + if (infoDict->lookup(key, &obj)->isString()) { + fputs(text1, f); + s1 = obj.getString(); + if ((s1->getChar(0) & 0xff) == 0xfe && + (s1->getChar(1) & 0xff) == 0xff) { + isUnicode = gTrue; + i = 2; + } else { + isUnicode = gFalse; + i = 0; + } + while (i < obj.getString()->getLength()) { + if (isUnicode) { + u = ((s1->getChar(i) & 0xff) << 8) | + (s1->getChar(i+1) & 0xff); + i += 2; + } else { + u = pdfDocEncoding[s1->getChar(i) & 0xff]; + ++i; + } + n = uMap->mapUnicode(u, buf, sizeof(buf)); + fwrite(buf, 1, n, f); + } + fputs(text2, f); + } + obj.free(); +} + +static void printInfoDate(FILE *f, Dict *infoDict, const char *key, const char *fmt) { + Object obj; + char *s; + + if (infoDict->lookup(key, &obj)->isString()) { + s = obj.getString()->getCString(); + if (s[0] == 'D' && s[1] == ':') { + s += 2; + } + fprintf(f, fmt, s); + } + obj.free(); +} diff --git a/utils/pdfunite.1 b/utils/pdfunite.1 new file mode 100644 index 00000000..9b1f2e8f --- /dev/null +++ b/utils/pdfunite.1 @@ -0,0 +1,33 @@ +.\" Copyright 2011 The Poppler Developers - http://poppler.freedesktop.org +.TH pdfunite 1 "15 September 2011" +.SH NAME +pdfunite \- Portable Document Format (PDF) page merger +.SH SYNOPSIS +.B pdfunite +[options] +.I PDF-sourcefile1..PDF-sourcefilen PDF-destfile +.SH DESCRIPTION +.B pdfunite +merges several PDF (Portable Document Format) files in order of their occurence on command line to one PDF result file. +.TP +Neither of the PDF-sourcefile1 to PDF-sourcefilen should be encrypted. +.SH OPTIONS +.TP +.B \-v +Print copyright and version information. +.TP +.B \-h +Print usage information. +.RB ( \-help +and +.B \-\-help +are equivalent.) +.SH EXAMPLE +pdfunite sample1.pdf sample2.pdf sample.pdf +.TP +merges all pages from sample1.pdf and sample2.pdf (in that order) and creates sample.pdf +.SH AUTHOR +The pdfunite software and documentation are copyright 1996-2004 Glyph & Cog, LLC +and copyright 2005-2011 The Poppler Developers - http://poppler.freedesktop.org +.SH "SEE ALSO" +.BR pdfseparate (1), diff --git a/utils/pdfunite.cc b/utils/pdfunite.cc new file mode 100644 index 00000000..212f89be --- /dev/null +++ b/utils/pdfunite.cc @@ -0,0 +1,182 @@ +//======================================================================== +// +// pdfunite.cc +// +// This file is licensed under the GPLv2 or later +// +// Copyright (C) 2011 Thomas Freitag <Thomas.Freitag@alfa.de> +// Copyright (C) 2012 Arseny Solokha <asolokha@gmx.com> +// Copyright (C) 2012 Fabio D'Urso <fabiodurso@hotmail.it> +// +//======================================================================== +#include <PDFDoc.h> +#include <GlobalParams.h> +#include "parseargs.h" +#include "config.h" +#include <poppler-config.h> +#include <vector> + +static GBool printVersion = gFalse; +static GBool printHelp = gFalse; + +static const ArgDesc argDesc[] = { + {"-v", argFlag, &printVersion, 0, + "print copyright and version info"}, + {"-h", argFlag, &printHelp, 0, + "print usage information"}, + {"-help", argFlag, &printHelp, 0, + "print usage information"}, + {"--help", argFlag, &printHelp, 0, + "print usage information"}, + {"-?", argFlag, &printHelp, 0, + "print usage information"}, + {NULL} +}; + +/////////////////////////////////////////////////////////////////////////// +int main (int argc, char *argv[]) +/////////////////////////////////////////////////////////////////////////// +// Merge PDF files given by arguments 1 to argc-2 and write the result +// to the file specified by argument argc-1. +/////////////////////////////////////////////////////////////////////////// +{ + int objectsCount = 0; + Guint numOffset = 0; + std::vector<Object> pages; + std::vector<Guint> offsets; + XRef *yRef, *countRef; + FILE *f; + OutStream *outStr; + int i; + int j, rootNum; + std::vector<PDFDoc *>docs; + int majorVersion = 0; + int minorVersion = 0; + char *fileName = argv[argc - 1]; + int exitCode; + + exitCode = 99; + if (argc <= 3 || printVersion || printHelp) { + fprintf(stderr, "pdfunite version %s\n", PACKAGE_VERSION); + fprintf(stderr, "%s\n", popplerCopyright); + fprintf(stderr, "%s\n", xpdfCopyright); + if (!printVersion) { + printUsage("pdfunite", "<PDF-sourcefile-1>..<PDF-sourcefile-n> <PDF-destfile>", + argDesc); + } + if (printVersion || printHelp) + exitCode = 0; + return exitCode; + } + exitCode = 0; + globalParams = new GlobalParams(); + + for (i = 1; i < argc - 1; i++) { + GooString *gfileName = new GooString(argv[i]); + PDFDoc *doc = new PDFDoc(gfileName, NULL, NULL, NULL); + if (doc->isOk() && !doc->isEncrypted()) { + docs.push_back(doc); + if (doc->getPDFMajorVersion() > majorVersion) { + majorVersion = doc->getPDFMajorVersion(); + minorVersion = doc->getPDFMinorVersion(); + } else if (doc->getPDFMajorVersion() == majorVersion) { + if (doc->getPDFMinorVersion() > minorVersion) { + minorVersion = doc->getPDFMinorVersion(); + } + } + } else if (doc->isOk()) { + error(errUnimplemented, -1, "Could not merge encrypted files ('{0:s}')", argv[i]); + return -1; + } else { + error(errSyntaxError, -1, "Could not merge damaged documents ('{0:s}')", argv[i]); + return -1; + } + } + + if (!(f = fopen(fileName, "wb"))) { + error(errIO, -1, "Could not open file '{0:s}'", fileName); + return -1; + } + outStr = new FileOutStream(f, 0); + + yRef = new XRef(); + countRef = new XRef(); + yRef->add(0, 65535, 0, gFalse); + PDFDoc::writeHeader(outStr, majorVersion, minorVersion); + + for (i = 0; i < (int) docs.size(); i++) { + for (j = 1; j <= docs[i]->getNumPages(); j++) { + PDFRectangle *cropBox = NULL; + if (docs[i]->getCatalog()->getPage(j)->isCropped()) + cropBox = docs[i]->getCatalog()->getPage(j)->getCropBox(); + docs[i]->replacePageDict(j, + docs[i]->getCatalog()->getPage(j)->getRotate(), + docs[i]->getCatalog()->getPage(j)->getMediaBox(), cropBox, NULL); + Ref *refPage = docs[i]->getCatalog()->getPageRef(j); + Object page; + docs[i]->getXRef()->fetch(refPage->num, refPage->gen, &page); + pages.push_back(page); + offsets.push_back(numOffset); + Dict *pageDict = page.getDict(); + docs[i]->markPageObjects(pageDict, yRef, countRef, numOffset); + } + objectsCount += docs[i]->writePageObjects(outStr, yRef, numOffset); + numOffset = yRef->getNumObjects() + 1; + } + + rootNum = yRef->getNumObjects() + 1; + yRef->add(rootNum, 0, outStr->getPos(), gTrue); + outStr->printf("%d 0 obj\n", rootNum); + outStr->printf("<< /Type /Catalog /Pages %d 0 R", rootNum + 1); + outStr->printf(">>\nendobj\n"); + objectsCount++; + + yRef->add(rootNum + 1, 0, outStr->getPos(), gTrue); + outStr->printf("%d 0 obj\n", rootNum + 1); + outStr->printf("<< /Type /Pages /Kids ["); + for (j = 0; j < (int) pages.size(); j++) + outStr->printf(" %d 0 R", rootNum + j + 2); + outStr->printf(" ] /Count %d >>\nendobj\n", pages.size()); + objectsCount++; + + for (i = 0; i < (int) pages.size(); i++) { + yRef->add(rootNum + i + 2, 0, outStr->getPos(), gTrue); + outStr->printf("%d 0 obj\n", rootNum + i + 2); + outStr->printf("<< "); + Dict *pageDict = pages[i].getDict(); + for (j = 0; j < pageDict->getLength(); j++) { + if (j > 0) + outStr->printf(" "); + const char *key = pageDict->getKey(j); + Object value; + pageDict->getValNF(j, &value); + if (strcmp(key, "Parent") == 0) { + outStr->printf("/Parent %d 0 R", rootNum + 1); + } else { + outStr->printf("/%s ", key); + PDFDoc::writeObject(&value, NULL, outStr, yRef, offsets[i]); + } + value.free(); + } + outStr->printf(" >>\nendobj\n"); + objectsCount++; + } + Guint uxrefOffset = outStr->getPos(); + Ref ref; + ref.num = rootNum; + ref.gen = 0; + Dict *trailerDict = PDFDoc::createTrailerDict(objectsCount, gFalse, 0, &ref, yRef, + fileName, outStr->getPos()); + PDFDoc::writeXRefTableTrailer(trailerDict, yRef, gFalse /* do not write unnecessary entries */, + uxrefOffset, outStr, yRef); + delete trailerDict; + + outStr->close(); + fclose(f); + delete yRef; + delete countRef; + for (j = 0; j < (int) pages.size (); j++) pages[j].free(); + for (i = 0; i < (int) docs.size (); i++) delete docs[i]; + delete globalParams; + return exitCode; +} diff --git a/utils/printencodings.cc b/utils/printencodings.cc new file mode 100644 index 00000000..dec6f98f --- /dev/null +++ b/utils/printencodings.cc @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2008, Albert Astals Cid <aacid@kde.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include "printencodings.h" + +#include "GlobalParams.h" +#include "goo/GooList.h" +#include "goo/GooString.h" + +void printEncodings() +{ + GooList *encNames = globalParams->getEncodingNames(); + printf("Available encodings are:\n"); + for (int i = 0; i < encNames->getLength(); ++i) { + GooString *enc = (GooString*)encNames->get(i); + printf("%s\n", enc->getCString()); + } + delete encNames; +} diff --git a/utils/printencodings.h b/utils/printencodings.h new file mode 100644 index 00000000..5be2819f --- /dev/null +++ b/utils/printencodings.h @@ -0,0 +1,24 @@ +/* + * Copyright (C) 2008, Albert Astals Cid <aacid@kde.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef PRINTENCODINGS_H +#define PRINTENCODINGS_H + +void printEncodings(); + +#endif |