![]() |
Leptonica
1.82.0
Image processing and image analysis suite
|
#include <string.h>
#include <math.h>
#include "allheaders.h"
Go to the source code of this file.
Macros | |
#define | L_SMALLBUF 256 |
#define | L_BIGBUF 2048 /* must be able to hold hex colormap */ |
#define | DEBUG_MULTIPAGE 0 |
Functions | |
static L_COMP_DATA * | l_generateJp2kData (const char *fname) |
static L_COMP_DATA * | pixGenerateFlateData (PIX *pixs, l_int32 ascii85flag) |
static L_COMP_DATA * | pixGenerateJpegData (PIX *pixs, l_int32 ascii85flag, l_int32 quality) |
static L_COMP_DATA * | pixGenerateJp2kData (PIX *pixs, l_int32 quality) |
static L_COMP_DATA * | pixGenerateG4Data (PIX *pixs, l_int32 ascii85flag) |
static l_int32 | l_generatePdf (l_uint8 **pdata, size_t *pnbytes, L_PDF_DATA *lpd) |
static void | generateFixedStringsPdf (L_PDF_DATA *lpd) |
static char * | generateEscapeString (const char *str) |
static void | generateMediaboxPdf (L_PDF_DATA *lpd) |
static l_int32 | generatePageStringPdf (L_PDF_DATA *lpd) |
static l_int32 | generateContentStringPdf (L_PDF_DATA *lpd) |
static l_int32 | generatePreXStringsPdf (L_PDF_DATA *lpd) |
static l_int32 | generateColormapStringsPdf (L_PDF_DATA *lpd) |
static void | generateTrailerPdf (L_PDF_DATA *lpd) |
static char * | makeTrailerStringPdf (L_DNA *daloc) |
static l_int32 | generateOutputDataPdf (l_uint8 **pdata, size_t *pnbytes, L_PDF_DATA *lpd) |
static l_int32 | parseTrailerPdf (L_BYTEA *bas, L_DNA **pda) |
static char * | generatePagesObjStringPdf (NUMA *napage) |
static L_BYTEA * | substituteObjectNumbers (L_BYTEA *bas, NUMA *na_objs) |
static L_PDF_DATA * | pdfdataCreate (const char *title) |
static void | pdfdataDestroy (L_PDF_DATA **plpd) |
static L_COMP_DATA * | pdfdataGetCid (L_PDF_DATA *lpd, l_int32 index) |
l_ok | pixConvertToPdfData (PIX *pix, l_int32 type, l_int32 quality, l_uint8 **pdata, size_t *pnbytes, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position) |
l_ok | ptraConcatenatePdfToData (L_PTRA *pa_data, SARRAY *sa, l_uint8 **pdata, size_t *pnbytes) |
l_ok | convertTiffMultipageToPdf (const char *filein, const char *fileout) |
l_ok | l_generateCIDataForPdf (const char *fname, PIX *pix, l_int32 quality, L_COMP_DATA **pcid) |
l_ok | l_generateCIData (const char *fname, l_int32 type, l_int32 quality, l_int32 ascii85, L_COMP_DATA **pcid) |
L_COMP_DATA * | l_generateFlateDataPdf (const char *fname, PIX *pixs) |
L_COMP_DATA * | l_generateJpegData (const char *fname, l_int32 ascii85flag) |
L_COMP_DATA * | l_generateJpegDataMem (l_uint8 *data, size_t nbytes, l_int32 ascii85flag) |
L_COMP_DATA * | l_generateG4Data (const char *fname, l_int32 ascii85flag) |
l_ok | pixGenerateCIData (PIX *pixs, l_int32 type, l_int32 quality, l_int32 ascii85, L_COMP_DATA **pcid) |
L_COMP_DATA * | l_generateFlateData (const char *fname, l_int32 ascii85flag) |
l_ok | cidConvertToPdfData (L_COMP_DATA *cid, const char *title, l_uint8 **pdata, size_t *pnbytes) |
void | l_CIDataDestroy (L_COMP_DATA **pcid) |
void | l_pdfSetG4ImageMask (l_int32 flag) |
void | l_pdfSetDateAndVersion (l_int32 flag) |
Variables | |
static const l_int32 | DefaultInputRes = 300 |
static l_int32 | var_WRITE_G4_IMAGE_MASK = 1 |
static l_int32 | var_WRITE_DATE_AND_VERSION = 1 |
Lower-level operations for generating pdf.
Intermediate function for single page, multi-image conversion l_int32 pixConvertToPdfData()
Intermediate function for generating multipage pdf output l_int32 ptraConcatenatePdfToData()
Convert tiff multipage to pdf file l_int32 convertTiffMultipageToPdf()
Generates the CID, transcoding under some conditions l_int32 l_generateCIDataForPdf() l_int32 l_generateCIData()
Lower-level CID generation without transcoding L_COMP_DATA *l_generateFlateDataPdf() L_COMP_DATA *l_generateJpegData() L_COMP_DATA *l_generateJpegDataMem() static L_COMP_DATA *l_generateJp2kData() L_COMP_DATA *l_generateG4Data()
Lower-level CID generation with transcoding l_int32 pixGenerateCIData() L_COMP_DATA *l_generateFlateData() static L_COMP_DATA *pixGenerateFlateData() static L_COMP_DATA *pixGenerateJpegData() static L_COMP_DATA *pixGenerateJp2kData() static L_COMP_DATA *pixGenerateG4Data()
Other CID operations l_int32 cidConvertToPdfData() void l_CIDataDestroy()
Helper functions for generating the output pdf string static l_int32 l_generatePdf() static void generateFixedStringsPdf() static char *generateEscapeString() static void generateMediaboxPdf() static l_int32 generatePageStringPdf() static l_int32 generateContentStringPdf() static l_int32 generatePreXStringsPdf() static l_int32 generateColormapStringsPdf() static void generateTrailerPdf() static l_int32 makeTrailerStringPdf() static l_int32 generateOutputDataPdf()
Helper functions for generating multipage pdf output static l_int32 parseTrailerPdf() static char *generatePagesObjStringPdf() static L_BYTEA *substituteObjectNumbers()
Create/destroy/access pdf data static L_PDF_DATA *pdfdataCreate() static void pdfdataDestroy() static L_COMP_DATA *pdfdataGetCid()
Set flags for special modes void l_pdfSetG4ImageMask() void l_pdfSetDateAndVersion()
Definition in file pdfio2.c.
l_ok cidConvertToPdfData | ( | L_COMP_DATA * | cid, |
const char * | title, | ||
l_uint8 ** | pdata, | ||
size_t * | pnbytes | ||
) |
[in] | cid | compressed image data |
[in] | title | [optional] pdf title; can be NULL |
[out] | pdata | output pdf data for image |
[out] | pnbytes | size of output pdf data |
Notes: (1) Caller must not destroy the cid. It is absorbed in the lpd and destroyed by this function.
Definition at line 1607 of file pdfio2.c.
References L_Compressed_Data::res.
Referenced by pixcompFastConvertToPdfData().
l_ok convertTiffMultipageToPdf | ( | const char * | filein, |
const char * | fileout | ||
) |
[in] | filein | (tiff) |
[in] | fileout | (pdf) |
Notes: (1) A multipage tiff file can also be converted to PS, using convertTiffMultipageToPS()
Definition at line 485 of file pdfio2.c.
References fileFormatIsTiff(), fopenReadStream(), pixaConvertToPdf(), pixaDestroy(), and pixaReadMultipageTiff().
|
static |
[in] | str | input string |
Notes: (1) If the input string is not ascii, returns null. (2) This takes an input ascii string and generates a hex ascii output string with 4 bytes out for each byte in. The feff code at the beginning tells the pdf interpreter that the data is to be interpreted as big-endian, 4 bytes at a time. For ascii, the first two bytes are 0 and the last two bytes are less than 0x80.
Definition at line 1815 of file pdfio2.c.
References stringCat().
|
static |
[out] | pdata | pdf data array |
[out] | pnbytes | size of pdf data array |
[in] | lpd | input data used to make pdf |
Notes: (1) Only called from l_generatePdf(). On error, no data is returned.
Definition at line 2238 of file pdfio2.c.
References L_Pdf_Data::id, l_dnaGetIArray(), L_Pdf_Data::n, L_Pdf_Data::obj1, L_Pdf_Data::obj2, L_Pdf_Data::obj3, L_Pdf_Data::obj4, L_Pdf_Data::obj5, L_Pdf_Data::objloc, L_Pdf_Data::objsize, L_Pdf_Data::trailer, and L_Pdf_Data::xrefloc.
void l_CIDataDestroy | ( | L_COMP_DATA ** | pcid | ) |
[in,out] | pcid | will be set to null before returning |
Definition at line 1656 of file pdfio2.c.
References L_Compressed_Data::cmapdata85, L_Compressed_Data::cmapdatahex, L_Compressed_Data::data85, and L_Compressed_Data::datacomp.
Referenced by convertFlateToPSEmbed(), convertG4ToPSEmbed(), convertG4ToPSString(), convertJpegToPSEmbed(), and l_generateJp2kData().
l_ok l_generateCIData | ( | const char * | fname, |
l_int32 | type, | ||
l_int32 | quality, | ||
l_int32 | ascii85, | ||
L_COMP_DATA ** | pcid | ||
) |
[in] | fname | |
[in] | type | L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE, L_JP2K_ENCODE |
[in] | quality | for jpeg if transcoded: 1-100; 0 for default (75) for jp2k if transcoded: 27-45; 0 for default (34) |
[in] | ascii85 | 0 for binary; 1 for ascii85-encoded |
[out] | pcid | compressed data |
Notes: (1) This can be used for both PostScript and pdf. (1) Set ascii85: ~ 0 for binary data (PDF only) ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only) (2) This attempts to compress according to the requested type. If this can't be done, it falls back to ordinary flate encoding. (3) This differs from l_generateCIDataForPdf(), which determines the file format and only works for pdf.
Definition at line 626 of file pdfio2.c.
References L_FLATE_ENCODE, L_G4_ENCODE, L_JP2K_ENCODE, L_JPEG_ENCODE, and pixReadHeader().
l_ok l_generateCIDataForPdf | ( | const char * | fname, |
PIX * | pix, | ||
l_int32 | quality, | ||
L_COMP_DATA ** | pcid | ||
) |
[in] | fname | [optional] can be null |
[in] | pix | [optional] can be null |
[in] | quality | for jpeg if transcoded: 1-100; 0 for default (75) for jp2k if transcoded: 27-45; 0 for default (34) |
[out] | pcid | compressed data |
Notes: (1) You must set either filename or pix. (2) Given an image file and optionally a pix raster of that data, this provides a CID that is compatible with PDF, preferably without transcoding. (3) The pix is included for efficiency, in case transcoding is required and the pix is available to the caller. (4) We don't try to open files named "stdin" or "-" for Tesseract compatibility reasons. We may remove this restriction in the future. (5) Note that tiff-g4 must be transcoded to properly handle byte order and perhaps photometry (e.g., min-is-black). For a multipage tiff file, data will only be extracted from the first page, so this should not be invoked.
Definition at line 539 of file pdfio2.c.
References findFileFormat().
L_COMP_DATA* l_generateFlateData | ( | const char * | fname, |
l_int32 | ascii85flag | ||
) |
[in] | fname | |
[in] | ascii85flag | 0 for gzipped; 1 for ascii85-encoded gzipped |
Notes: (1) The input image is converted to one of these 4 types: ~ 1 bpp ~ 8 bpp, no colormap ~ 8 bpp, colormap ~ 32 bpp rgb (2) Set ascii85flag: ~ 0 for binary data (PDF only) ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only) (3) Always transcodes (i.e., first decodes the png file)
Definition at line 1305 of file pdfio2.c.
References pixDestroy(), pixGenerateFlateData(), and pixRead().
Referenced by convertFlateToPSEmbed(), and convertFlateToPSString().
L_COMP_DATA* l_generateFlateDataPdf | ( | const char * | fname, |
PIX * | pixs | ||
) |
[in] | fname | preferably png |
[in] | pixs | [optional] can be null |
Notes: (1) If you hand this a png file, you are going to get png predictors embedded in the flate data. So it has come to this. http://xkcd.com/1022/ (2) Exception: if the png is interlaced or if it is RGBA, it will be transcoded. (3) If transcoding is required, this will not have to read from file if a pix is input.
Definition at line 730 of file pdfio2.c.
References findFileFormat().
L_COMP_DATA* l_generateG4Data | ( | const char * | fname, |
l_int32 | ascii85flag | ||
) |
[in] | fname | of g4 compressed file |
[in] | ascii85flag | 0 for g4 compressed; 1 for ascii85-encoded g4 |
Notes: (1) Set ascii85flag: ~ 0 for binary data (PDF only) ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only) (2) This does not work for multipage tiff files.
Definition at line 1118 of file pdfio2.c.
References L_Compressed_Data::bps, L_Compressed_Data::data85, L_Compressed_Data::datacomp, extractG4DataFromFile(), fopenReadStream(), getTiffResolution(), L_Compressed_Data::h, L_G4_ENCODE, L_Compressed_Data::minisblack, L_Compressed_Data::nbytes85, L_Compressed_Data::nbytescomp, L_Compressed_Data::res, L_Compressed_Data::spp, tiffGetCount(), L_Compressed_Data::type, and L_Compressed_Data::w.
Referenced by convertG4ToPSEmbed(), and convertG4ToPSString().
|
static |
[in] | fname | of jp2k file |
Notes: (1) This is only called after the file is verified to be jp2k.
Definition at line 1064 of file pdfio2.c.
References L_Compressed_Data::datacomp, fopenReadStream(), l_binaryRead(), l_CIDataDestroy(), and readHeaderJp2k().
L_COMP_DATA* l_generateJpegData | ( | const char * | fname, |
l_int32 | ascii85flag | ||
) |
[in] | fname | of jpeg file |
[in] | ascii85flag | 0 for jpeg; 1 for ascii85-encoded jpeg |
Notes: (1) Set ascii85flag: ~ 0 for binary data (PDF only) ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only) (2) Most of this function is repeated in l_generateJpegMemData(), which is required in pixacompFastConvertToPdfData().
Definition at line 929 of file pdfio2.c.
References fopenReadStream(), and readHeaderJpeg().
Referenced by convertJpegToPSEmbed(), and convertJpegToPSString().
L_COMP_DATA* l_generateJpegDataMem | ( | l_uint8 * | data, |
size_t | nbytes, | ||
l_int32 | ascii85flag | ||
) |
[in] | data | of jpeg-encoded file |
[in] | nbytes | size of jpeg-encoded file |
[in] | ascii85flag | 0 for jpeg; 1 for ascii85-encoded jpeg |
Notes: (1) Set ascii85flag: ~ 0 for binary data (PDF only) ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
Definition at line 1002 of file pdfio2.c.
References L_Compressed_Data::bps, L_Compressed_Data::data85, L_Compressed_Data::datacomp, L_Compressed_Data::h, L_JPEG_ENCODE, L_Compressed_Data::nbytes85, L_Compressed_Data::nbytescomp, readHeaderMemJpeg(), readResolutionMemJpeg(), L_Compressed_Data::res, L_Compressed_Data::spp, L_Compressed_Data::type, and L_Compressed_Data::w.
Referenced by pixcompFastConvertToPdfData().
|
static |
[out] | pdata | pdf array |
[out] | pnbytes | number of bytes in pdf array |
[in] | lpd | all the required input image data |
Notes: (1) On error, no data is returned. (2) The objects are: 1: Catalog 2: Info 3: Pages 4: Page 5: Contents (rendering command) 6 to 6+n-1: n XObjects 6+n to 6+n+m-1: m colormaps
void l_pdfSetDateAndVersion | ( | l_int32 | flag | ) |
[in] | flag | 1 for writing date/time and leptonica version; 0 for omitting this from the metadata |
Notes: (1) The default is for writing this data. For regression tests that compare output against golden files, it is useful to omit.
void l_pdfSetG4ImageMask | ( | l_int32 | flag | ) |
[in] | flag | 1 for writing g4 data as fg only through a mask; 0 for writing fg and bg |
Notes: (1) The default is for writing only the fg (through the mask). That way when you write a 1 bpp image, the bg is transparent, so any previously written image remains visible behind it.
[in] | bas | lba of a pdf file |
[out] | pda | byte locations of the beginning of each object |
Definition at line 2314 of file pdfio2.c.
References arrayFindSequence(), l_byteaFindEachSequence(), l_byteaGetData(), l_dnaAddNumber(), l_dnaCreate(), l_dnaDestroy(), l_dnaEmpty(), l_dnaGetCount(), l_dnaGetIValue(), l_dnaWriteStderr(), L_NOCOPY, lept_stderr(), sarrayCreateLinesFromString(), sarrayDestroy(), sarrayGetString(), and sarrayWriteStderr().
Referenced by ptraConcatenatePdfToData().
l_ok pixConvertToPdfData | ( | PIX * | pix, |
l_int32 | type, | ||
l_int32 | quality, | ||
l_uint8 ** | pdata, | ||
size_t * | pnbytes, | ||
l_int32 | x, | ||
l_int32 | y, | ||
l_int32 | res, | ||
const char * | title, | ||
L_PDF_DATA ** | plpd, | ||
l_int32 | position | ||
) |
[in] | pix | all depths; cmap OK |
[in] | type | L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE, L_JP2K_ENCODE |
[in] | quality | for jpeg: 1-100; 0 for default (75) for jp2k: 27-45; 0 for default (34) |
[out] | pdata | pdf array |
[out] | pnbytes | number of bytes in pdf array |
[in] | x,y | location of lower-left corner of image, in pixels, relative to the PostScript origin (0,0) at the lower-left corner of the page) |
[in] | res | override the resolution of the input image, in ppi; use 0 to respect resolution embedded in the input |
[in] | title | [optional] pdf title; can be null |
[in,out] | plpd | ptr to lpd; created on the first invocation and returned until last image is processed |
[in] | position | in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE, L_LAST_IMAGE |
Notes: (1) If res == 0 and the input resolution field is 0, this will use DefaultInputRes. (2) This only writes data if it is the last image to be written on the page. (3) See comments in convertToPdf().
Definition at line 190 of file pdfio2.c.
References L_Compressed_Data::h, L_FIRST_IMAGE, L_FLATE_ENCODE, L_G4_ENCODE, L_JP2K_ENCODE, L_JPEG_ENCODE, pixGenerateCIData(), L_Compressed_Data::res, selectDefaultPdfEncoding(), and L_Compressed_Data::w.
Referenced by convertImageDataToPdfData(), convertToPdfData(), pixaConvertToPdfData(), pixConvertToPdf(), pixWriteMemPdf(), and saConvertFilesToPdfData().
l_ok pixGenerateCIData | ( | PIX * | pixs, |
l_int32 | type, | ||
l_int32 | quality, | ||
l_int32 | ascii85, | ||
L_COMP_DATA ** | pcid | ||
) |
[in] | pixs | 8 or 32 bpp, no colormap |
[in] | type | L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE or L_JP2K_ENCODE |
[in] | quality | for jpeg if transcoded: 1-100; 0 for default (75) for jp2k if transcoded: 27-45; 0 for default (34) |
[in] | ascii85 | 0 for binary; 1 for ascii85-encoded |
[out] | pcid | compressed data |
Notes: (1) Set ascii85: ~ 0 for binary data (PDF only) ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
Definition at line 1207 of file pdfio2.c.
References L_FLATE_ENCODE, L_G4_ENCODE, L_JP2K_ENCODE, L_JPEG_ENCODE, and selectDefaultPdfEncoding().
Referenced by pixConvertToPdfData().
|
static |
[in] | pixs | |
[in] | ascii85flag | 0 for gzipped; 1 for ascii85-encoded gzipped |
Notes: (1) If called with an RGBA pix (spp == 4), the alpha channel will be removed, projecting a white backgrouond through any transparency. (2) If called with a colormapped pix, any transparency in the alpha component in the colormap will be ignored, as it is for all leptonica operations on colormapped pix.
Definition at line 1342 of file pdfio2.c.
References pixGetDimensions().
Referenced by l_generateFlateData().
|
static |
[in] | pixs | 1 bpp, no colormap |
[in] | ascii85flag | 0 for gzipped; 1 for ascii85-encoded gzipped |
Notes: (1) Set ascii85flag: ~ 0 for binary data (PDF only) ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
|
static |
[in] | pixs | 8 or 32 bpp, no colormap |
[in] | quality | 0 for default, which is 34 |
Notes: (1) The quality can be set between 27 (very poor) and 45 (nearly perfect). Use 0 for default (34). Use 100 for lossless, but this is very expensive and not recommended.
|
static |
[in] | pixs | 8 or 32 bpp, no colormap |
[in] | ascii85flag | 0 for jpeg; 1 for ascii85-encoded jpeg |
[in] | quality | 0 for default, which is 75 |
Notes: (1) Set ascii85flag: ~ 0 for binary data (PDF only) ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
[in] | pa_data | ptra array of pdf strings, each for a single-page pdf file |
[in] | sa | [optional] string array of pathnames for input pdf files; can be null |
[out] | pdata | concatenated pdf data in memory |
[out] | pnbytes | number of bytes in pdf data |
Notes: (1) This only works with leptonica-formatted single-page pdf files. pdf files generated by other programs will have unpredictable (and usually bad) results. The requirements for each pdf file: (a) The Catalog and Info objects are the first two. (b) Object 3 is Pages (c) Object 4 is Page (d) The remaining objects are Contents, XObjects, and ColorSpace (2) We remove trailers from each page, and append the full trailer for all pages at the end. (3) For all but the first file, remove the ID and the first 3 objects (catalog, info, pages), so that each subsequent file has only objects of these classes: Page, Contents, XObject, ColorSpace (Indexed RGB). For those objects, we substitute these refs to objects in the local file: Page: Parent(object 3), Contents, XObject(typically multiple) XObject: [ColorSpace if indexed] The Pages object on the first page (object 3) has a Kids array of references to all the Page objects, with a Count equal to the number of pages. Each Page object refers back to this parent.
Definition at line 321 of file pdfio2.c.
References l_byteaDestroy(), L_CLONE, l_dnaaAddDna(), l_dnaaCreate(), l_dnaaDestroy(), l_dnaaGetDna(), l_dnaDestroy(), l_dnaGetCount(), L_INSERT, L_NO_COMPACTION, L_NOCOPY, numaaAddNuma(), numaaCreate(), numaAddNumber(), numaCreate(), numaMakeConstant(), numaMakeSequence(), numaReplaceNumber(), numaSetValue(), parseTrailerPdf(), ptraCompactArray(), ptraGetActualCount(), ptraGetPtrToItem(), ptraRemove(), and sarrayGetString().
Referenced by convertSegmentedFilesToPdf(), pixaConvertToPdfData(), ptraConcatenatePdf(), saConcatenatePdfToData(), saConvertFilesToPdfData(), and saConvertUnscaledFilesToPdfData().
[in] | bas | lba of a pdf object |
[in] | na_objs | object number mapping array |
Notes: (1) Interpret the first set of bytes as the object number, map to the new number, and write it out. (2) Find all occurrences of this 4-byte sequence: " 0 R" (3) Find the location and value of the integer preceding this, and map it to the new value. (4) Rewrite the object with new object numbers.
Definition at line 2467 of file pdfio2.c.
References arrayFindEachSequence(), arrayFindSequence(), l_byteaAppendData(), l_byteaAppendString(), l_byteaCreate(), l_byteaGetData(), l_dnaDestroy(), l_dnaGetCount(), l_dnaGetIArray(), numaGetCount(), and numaGetIArray().