Leptonica  1.82.0
Image processing and image analysis suite
parseprotos.c
1 /*====================================================================*
2  - Copyright (C) 2001 Leptonica. All rights reserved.
3  -
4  - Redistribution and use in source and binary forms, with or without
5  - modification, are permitted provided that the following conditions
6  - are met:
7  - 1. Redistributions of source code must retain the above copyright
8  - notice, this list of conditions and the following disclaimer.
9  - 2. Redistributions in binary form must reproduce the above
10  - copyright notice, this list of conditions and the following
11  - disclaimer in the documentation and/or other materials
12  - provided with the distribution.
13  -
14  - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18  - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
27 /*
28  * \file parseprotos.c
29  * <pre>
30  *
31  * char *parseForProtos()
32  *
33  * Static helpers
34  * static l_int32 getNextNonCommentLine()
35  * static l_int32 getNextNonBlankLine()
36  * static l_int32 getNextNonDoubleSlashLine()
37  * static l_int32 searchForProtoSignature()
38  * static char *captureProtoSignature()
39  * static char *cleanProtoSignature()
40  * static l_int32 skipToEndOfFunction()
41  * static l_int32 skipToMatchingBrace()
42  * static l_int32 skipToSemicolon()
43  * static l_int32 getOffsetForCharacter()
44  * static l_int32 getOffsetForMatchingRP()
45  * </pre>
46  */
47 
48 #ifdef HAVE_CONFIG_H
49 #include <config_auto.h>
50 #endif /* HAVE_CONFIG_H */
51 
52 #include <string.h>
53 #include "allheaders.h"
54 
55 #define L_BUF_SIZE 2048 /* max token size */
56 
57 static l_int32 getNextNonCommentLine(SARRAY *sa, l_int32 start, l_int32 *pnext);
58 static l_int32 getNextNonBlankLine(SARRAY *sa, l_int32 start, l_int32 *pnext);
59 static l_int32 getNextNonDoubleSlashLine(SARRAY *sa, l_int32 start,
60  l_int32 *pnext);
61 static l_int32 searchForProtoSignature(SARRAY *sa, l_int32 begin,
62  l_int32 *pstart, l_int32 *pstop, l_int32 *pcharindex,
63  l_int32 *pfound);
64 static char * captureProtoSignature(SARRAY *sa, l_int32 start, l_int32 stop,
65  l_int32 charindex);
66 static char * cleanProtoSignature(char *str);
67 static l_int32 skipToEndOfFunction(SARRAY *sa, l_int32 start,
68  l_int32 charindex, l_int32 *pnext);
69 static l_int32 skipToMatchingBrace(SARRAY *sa, l_int32 start,
70  l_int32 lbindex, l_int32 *prbline, l_int32 *prbindex);
71 static l_int32 skipToSemicolon(SARRAY *sa, l_int32 start,
72  l_int32 charindex, l_int32 *pnext);
73 static l_int32 getOffsetForCharacter(SARRAY *sa, l_int32 start, char tchar,
74  l_int32 *psoffset, l_int32 *pboffset, l_int32 *ptoffset);
75 static l_int32 getOffsetForMatchingRP(SARRAY *sa, l_int32 start,
76  l_int32 soffsetlp, l_int32 boffsetlp, l_int32 toffsetlp,
77  l_int32 *psoffset, l_int32 *pboffset, l_int32 *ptoffset);
78 
79 
80 /*
81  * \brief parseForProtos()
82  *
83  * \param[in] filein output of cpp
84  * \param[in] prestring [optional] string that prefaces each decl;
85  * use NULL to omit
86  * \return parsestr string of function prototypes, or NULL on error
87  *
88  * <pre>
89  * Notes:
90  * (1) We parse the output of cpp:
91  * cpp -ansi <filein>
92  * Three plans were attempted, with success on the third.
93  * (2) Plan 1. A cursory examination of the cpp output indicated that
94  * every function was preceded by a cpp comment statement.
95  * So we just need to look at statements beginning after comments.
96  * Unfortunately, this is NOT the case. Some functions start
97  * without cpp comment lines, typically when there are no
98  * comments in the source that immediately precede the function.
99  * (3) Plan 2. Consider the keywords in the language that start
100  * parts of the cpp file. Some, like 'enum', 'union' and
101  * 'struct', are followed after a while by '{', and eventually
102  * end with '}, plus an optional token and a final ';'.
103  * Others, like 'extern', 'static' and 'typedef', are never
104  * the beginnings of global function definitions. Function
105  * prototypes have one or more sets of '(' followed eventually
106  * by a ')', and end with ';'. But function definitions have
107  * tokens, followed by '(', more tokens, ')' and then
108  * immediately a '{'. We would generate a prototype from this
109  * by adding a ';' to all tokens up to the ')'. So we use
110  * these special tokens to decide what we are parsing. And
111  * whenever a function definition is found and the prototype
112  * extracted, we skip through the rest of the function
113  * past the corresponding '}'. This token ends a line, and
114  * is often on a line of its own. But as it turns out,
115  * the only keyword we need to consider is 'static'.
116  * (4) Plan 3. Consider the parentheses and braces for various
117  * declarations. A struct, enum, or union has a pair of
118  * braces followed by a semicolon. With the exception of an
119  * __attribute__ declaration for a struct, they cannot have parentheses
120  * before the left brace, but a struct can have lots of parentheses
121  * within the brace set. A function prototype has no braces.
122  * A function declaration can have sets of left and right
123  * parentheses, but these are followed by a left brace.
124  * So plan 3 looks at the way parentheses and braces are
125  * organized. Once the beginning of a function definition
126  * is found, the prototype is extracted and we search for
127  * the ending right brace.
128  * (5) To find the ending right brace, it is necessary to do some
129  * careful parsing. For example, in this file, we have
130  * left and right braces as characters, and these must not
131  * be counted. Somewhat more tricky, the file fhmtauto.c
132  * generates code, and includes a right brace in a string.
133  * So we must not include braces that are in strings. But how
134  * do we know if something is inside a string? Keep state,
135  * starting with not-inside, and every time you hit a double quote
136  * that is not escaped, toggle the condition. Any brace
137  * found in the state of being within a string is ignored.
138  * (6) When a prototype is extracted, it is put in a canonical
139  * form (i.e., cleaned up). Finally, we check that it is
140  * not static and save it. (If static, it is ignored).
141  * (7) The %prestring for unix is NULL; it is included here so that
142  * you can use Microsoft's declaration for importing or
143  * exporting to a dll. See environ.h for examples of use.
144  * Here, we set: %prestring = "LEPT_DLL ". Note in particular
145  * the space character that will separate 'LEPT_DLL' from
146  * the standard unix prototype that follows.
147  * </pre>
148  */
149 char *
150 parseForProtos(const char *filein,
151  const char *prestring)
152 {
153 char *strdata, *str, *newstr, *parsestr, *secondword;
154 l_int32 start, next, stop, charindex, found;
155 size_t nbytes;
156 SARRAY *sa, *saout, *satest;
157 
158  PROCNAME("parseForProtos");
159 
160  if (!filein)
161  return (char *)ERROR_PTR("filein not defined", procName, NULL);
162 
163  /* Read in the cpp output into memory, one string for each
164  * line in the file, omitting blank lines. */
165  strdata = (char *)l_binaryRead(filein, &nbytes);
166  sa = sarrayCreateLinesFromString(strdata, 0);
167 
168  saout = sarrayCreate(0);
169  next = 0;
170  while (1) { /* repeat after each non-static prototype is extracted */
171  searchForProtoSignature(sa, next, &start, &stop, &charindex, &found);
172  if (!found)
173  break;
174 /* lept_stderr(" start = %d, stop = %d, charindex = %d\n",
175  start, stop, charindex); */
176  str = captureProtoSignature(sa, start, stop, charindex);
177 
178  /* Make sure that the signature found by cpp does not begin with
179  * static, extern or typedef. We get 'extern' declarations
180  * from header files, and with some versions of cpp running on
181  * #include <sys/stat.h> we get something of the form:
182  * extern ... (( ... )) ... ( ... ) { ...
183  * For this, the 1st '(' is the lp, the 2nd ')' is the rp,
184  * and there is a lot of garbage between the rp and the lp.
185  * It is easiest to simply reject any signature that starts
186  * with 'extern'. Note also that an 'extern' token has been
187  * prepended to each prototype, so the 'static' or
188  * 'extern' keywords we are looking for, if they exist,
189  * would be the second word. We also have a typedef in
190  * bmpio.c that has the form:
191  * typedef struct __attribute__((....)) { ...} ... ;
192  * This is avoided by blacklisting 'typedef' along with 'extern'
193  * and 'static'. */
194  satest = sarrayCreateWordsFromString(str);
195  secondword = sarrayGetString(satest, 1, L_NOCOPY);
196  if (strcmp(secondword, "static") && /* not static */
197  strcmp(secondword, "extern") && /* not extern */
198  strcmp(secondword, "typedef")) { /* not typedef */
199  if (prestring) { /* prepend it to the prototype */
200  newstr = stringJoin(prestring, str);
201  sarrayAddString(saout, newstr, L_INSERT);
202  LEPT_FREE(str);
203  } else {
204  sarrayAddString(saout, str, L_INSERT);
205  }
206  } else {
207  LEPT_FREE(str);
208  }
209  sarrayDestroy(&satest);
210 
211  skipToEndOfFunction(sa, stop, charindex, &next);
212  if (next == -1) break;
213  }
214 
215  /* Flatten into a string with newlines between prototypes */
216  parsestr = sarrayToString(saout, 1);
217  LEPT_FREE(strdata);
218  sarrayDestroy(&sa);
219  sarrayDestroy(&saout);
220 
221  return parsestr;
222 }
223 
224 
225 /*
226  * \brief getNextNonCommentLine()
227  *
228  * \param[in] sa output from cpp, by line)
229  * \param[in] start starting index to search)
230  * \param[out] pnext index of first uncommented line after the start line
231  * \return 0 if OK, o on error
232  *
233  * <pre>
234  * Notes:
235  * (1) Skips over all consecutive comment lines, beginning at 'start'
236  * (2) If all lines to the end are '#' comments, return next = -1
237  * </pre>
238  */
239 static l_int32
240 getNextNonCommentLine(SARRAY *sa,
241  l_int32 start,
242  l_int32 *pnext)
243 {
244 char *str;
245 l_int32 i, n;
246 
247  PROCNAME("getNextNonCommentLine");
248 
249  if (!sa)
250  return ERROR_INT("sa not defined", procName, 1);
251  if (!pnext)
252  return ERROR_INT("&pnext not defined", procName, 1);
253 
254  /* Init for situation where this line and all following are comments */
255  *pnext = -1;
256 
257  n = sarrayGetCount(sa);
258  for (i = start; i < n; i++) {
259  if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
260  return ERROR_INT("str not returned; shouldn't happen", procName, 1);
261  if (str[0] != '#') {
262  *pnext = i;
263  return 0;
264  }
265  }
266 
267  return 0;
268 }
269 
270 
271 /*
272  * \brief getNextNonBlankLine()
273  *
274  * \param[in] sa output from cpp, by line
275  * \param[in] start starting index to search
276  * \param[out] pnext index of first nonblank line after the start line
277  * \return 0 if OK, 1 on error
278  *
279  * <pre>
280  * Notes:
281  * (1) Skips over all consecutive blank lines, beginning at 'start'
282  * (2) A blank line has only whitespace characters (' ', '\t', '\n', '\r')
283  * (3) If all lines to the end are blank, return next = -1
284  * </pre>
285  */
286 static l_int32
287 getNextNonBlankLine(SARRAY *sa,
288  l_int32 start,
289  l_int32 *pnext)
290 {
291 char *str;
292 l_int32 i, j, n, len;
293 
294  PROCNAME("getNextNonBlankLine");
295 
296  if (!sa)
297  return ERROR_INT("sa not defined", procName, 1);
298  if (!pnext)
299  return ERROR_INT("&pnext not defined", procName, 1);
300 
301  /* Init for situation where this line and all following are blank */
302  *pnext = -1;
303 
304  n = sarrayGetCount(sa);
305  for (i = start; i < n; i++) {
306  if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
307  return ERROR_INT("str not returned; shouldn't happen", procName, 1);
308  len = strlen(str);
309  for (j = 0; j < len; j++) {
310  if (str[j] != ' ' && str[j] != '\t'
311  && str[j] != '\n' && str[j] != '\r') { /* non-blank */
312  *pnext = i;
313  return 0;
314  }
315  }
316  }
317 
318  return 0;
319 }
320 
321 
322 /*
323  * \brief getNextNonDoubleSlashLine()
324  *
325  * \param[in] sa output from cpp, by line
326  * \param[in] start starting index to search
327  * \param[out] pnext index of first uncommented line after the start line
328  * \return 0 if OK, 1 on error
329  *
330  * <pre>
331  * Notes:
332  * (1) Skips over all consecutive '//' lines, beginning at 'start'
333  * (2) If all lines to the end start with '//', return next = -1
334  * </pre>
335  */
336 static l_int32
337 getNextNonDoubleSlashLine(SARRAY *sa,
338  l_int32 start,
339  l_int32 *pnext)
340 {
341 char *str;
342 l_int32 i, n, len;
343 
344  PROCNAME("getNextNonDoubleSlashLine");
345 
346  if (!sa)
347  return ERROR_INT("sa not defined", procName, 1);
348  if (!pnext)
349  return ERROR_INT("&pnext not defined", procName, 1);
350 
351  /* Init for situation where this line and all following
352  * start with '//' */
353  *pnext = -1;
354 
355  n = sarrayGetCount(sa);
356  for (i = start; i < n; i++) {
357  if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
358  return ERROR_INT("str not returned; shouldn't happen", procName, 1);
359  len = strlen(str);
360  if (len < 2 || str[0] != '/' || str[1] != '/') {
361  *pnext = i;
362  return 0;
363  }
364  }
365 
366  return 0;
367 }
368 
369 
370 /*
371  * \brief searchForProtoSignature()
372  *
373  * \param[in] sa output from cpp, by line
374  * \param[in] begin beginning index to search
375  * \param[out] pstart starting index for function definition
376  * \param[out] pstop index of line on which proto is completed
377  * \param[out] pcharindex char index of completing ')' character
378  * \param[out] pfound 1 if valid signature is found; 0 otherwise
379  * \return 0 if OK, 1 on error
380  *
381  * <pre>
382  * Notes:
383  * (1) If this returns found == 0, it means that there are no
384  * more function definitions in the file. Caller must check
385  * this value and exit the loop over the entire cpp file.
386  * (2) This follows plan 3 (see above). We skip comment and blank
387  * lines at the beginning. Then we don't check for keywords.
388  * Instead, find the relative locations of the first occurrences
389  * of these four tokens: left parenthesis (lp), right
390  * parenthesis (rp), left brace (lb) and semicolon (sc).
391  * (3) The signature of a function definition looks like this:
392  * .... '(' .... ')' '{'
393  * where the lp and rp must both precede the lb, with only
394  * whitespace between the rp and the lb. The '....'
395  * are sets of tokens that have no braces.
396  * (4) If a function definition is found, this returns found = 1,
397  * with 'start' being the first line of the definition and
398  * 'charindex' being the position of the ')' in line 'stop'
399  * at the end of the arg list.
400  * </pre>
401  */
402 static l_int32
403 searchForProtoSignature(SARRAY *sa,
404  l_int32 begin,
405  l_int32 *pstart,
406  l_int32 *pstop,
407  l_int32 *pcharindex,
408  l_int32 *pfound)
409 {
410 l_int32 next, rbline, rbindex, scline;
411 l_int32 soffsetlp, soffsetrp, soffsetlb, soffsetsc;
412 l_int32 boffsetlp, boffsetrp, boffsetlb, boffsetsc;
413 l_int32 toffsetlp, toffsetrp, toffsetlb, toffsetsc;
414 
415  PROCNAME("searchForProtoSignature");
416 
417  if (!sa)
418  return ERROR_INT("sa not defined", procName, 1);
419  if (!pstart)
420  return ERROR_INT("&start not defined", procName, 1);
421  if (!pstop)
422  return ERROR_INT("&stop not defined", procName, 1);
423  if (!pcharindex)
424  return ERROR_INT("&charindex not defined", procName, 1);
425  if (!pfound)
426  return ERROR_INT("&found not defined", procName, 1);
427 
428  *pfound = FALSE;
429 
430  while (1) {
431 
432  /* Skip over sequential '#' comment lines */
433  getNextNonCommentLine(sa, begin, &next);
434  if (next == -1) return 0;
435  if (next != begin) {
436  begin = next;
437  continue;
438  }
439 
440  /* Skip over sequential blank lines */
441  getNextNonBlankLine(sa, begin, &next);
442  if (next == -1) return 0;
443  if (next != begin) {
444  begin = next;
445  continue;
446  }
447 
448  /* Skip over sequential lines starting with '//' */
449  getNextNonDoubleSlashLine(sa, begin, &next);
450  if (next == -1) return 0;
451  if (next != begin) {
452  begin = next;
453  continue;
454  }
455 
456  /* Search for specific character sequence patterns; namely
457  * a lp, a matching rp, a lb and a semicolon.
458  * Abort the search if no lp is found. */
459  getOffsetForCharacter(sa, next, '(', &soffsetlp, &boffsetlp,
460  &toffsetlp);
461  if (soffsetlp == -1)
462  break;
463  getOffsetForMatchingRP(sa, next, soffsetlp, boffsetlp, toffsetlp,
464  &soffsetrp, &boffsetrp, &toffsetrp);
465  getOffsetForCharacter(sa, next, '{', &soffsetlb, &boffsetlb,
466  &toffsetlb);
467  getOffsetForCharacter(sa, next, ';', &soffsetsc, &boffsetsc,
468  &toffsetsc);
469 
470  /* We've found a lp. Now weed out the case where a matching
471  * rp and a lb are not both found. */
472  if (soffsetrp == -1 || soffsetlb == -1)
473  break;
474 
475  /* Check if a left brace occurs before a left parenthesis;
476  * if so, skip it */
477  if (toffsetlb < toffsetlp) {
478  skipToMatchingBrace(sa, next + soffsetlb, boffsetlb,
479  &rbline, &rbindex);
480  skipToSemicolon(sa, rbline, rbindex, &scline);
481  begin = scline + 1;
482  continue;
483  }
484 
485  /* Check if a semicolon occurs before a left brace or
486  * a left parenthesis; if so, skip it */
487  if ((soffsetsc != -1) &&
488  (toffsetsc < toffsetlb || toffsetsc < toffsetlp)) {
489  skipToSemicolon(sa, next, 0, &scline);
490  begin = scline + 1;
491  continue;
492  }
493 
494  /* OK, it should be a function definition. We haven't
495  * checked that there is only white space between the
496  * rp and lb, but we've only seen problems with two
497  * extern inlines in sys/stat.h, and this is handled
498  * later by eliminating any prototype beginning with 'extern'. */
499  *pstart = next;
500  *pstop = next + soffsetrp;
501  *pcharindex = boffsetrp;
502  *pfound = TRUE;
503  break;
504  }
505 
506  return 0;
507 }
508 
509 
510 /*
511  * \brief captureProtoSignature()
512  *
513  * \param[in] sa output from cpp, by line
514  * \param[in] start starting index to search; never a comment line
515  * \param[in] stop index of line on which pattern is completed
516  * \param[in] charindex char index of completing ')' character
517  * \return cleanstr prototype string, or NULL on error
518  *
519  * <pre>
520  * Notes:
521  * (1) Return all characters, ending with a ';' after the ')'
522  * </pre>
523  */
524 static char *
525 captureProtoSignature(SARRAY *sa,
526  l_int32 start,
527  l_int32 stop,
528  l_int32 charindex)
529 {
530 char *str, *newstr, *protostr, *cleanstr;
531 SARRAY *sap;
532 l_int32 i;
533 
534  PROCNAME("captureProtoSignature");
535 
536  if (!sa)
537  return (char *)ERROR_PTR("sa not defined", procName, NULL);
538 
539  sap = sarrayCreate(0);
540  for (i = start; i < stop; i++) {
541  str = sarrayGetString(sa, i, L_COPY);
542  sarrayAddString(sap, str, L_INSERT);
543  }
544  str = sarrayGetString(sa, stop, L_COPY);
545  str[charindex + 1] = '\0';
546  newstr = stringJoin(str, ";");
547  sarrayAddString(sap, newstr, L_INSERT);
548  LEPT_FREE(str);
549  protostr = sarrayToString(sap, 2);
550  sarrayDestroy(&sap);
551  cleanstr = cleanProtoSignature(protostr);
552  LEPT_FREE(protostr);
553 
554  return cleanstr;
555 }
556 
557 
558 /*
559  * \brief cleanProtoSignature()
560  *
561  * \param[in] instr input prototype string
562  * \return cleanstr clean prototype string, or NULL on error
563  *
564  * <pre>
565  * Notes:
566  * (1) Adds 'extern' at beginning and regularizes spaces
567  * between tokens.
568  * </pre>
569  */
570 static char *
571 cleanProtoSignature(char *instr)
572 {
573 char *str, *cleanstr;
574 char buf[L_BUF_SIZE];
575 char externstring[] = "extern";
576 l_int32 i, j, nwords, nchars, index, len;
577 SARRAY *sa, *saout;
578 
579  PROCNAME("cleanProtoSignature");
580 
581  if (!instr)
582  return (char *)ERROR_PTR("instr not defined", procName, NULL);
583 
584  sa = sarrayCreateWordsFromString(instr);
585  nwords = sarrayGetCount(sa);
586  saout = sarrayCreate(0);
587  sarrayAddString(saout, externstring, L_COPY);
588  for (i = 0; i < nwords; i++) {
589  str = sarrayGetString(sa, i, L_NOCOPY);
590  nchars = strlen(str);
591  index = 0;
592  for (j = 0; j < nchars; j++) {
593  if (index > L_BUF_SIZE - 6) {
594  sarrayDestroy(&sa);
595  sarrayDestroy(&saout);
596  return (char *)ERROR_PTR("token too large", procName, NULL);
597  }
598  if (str[j] == '(') {
599  buf[index++] = ' ';
600  buf[index++] = '(';
601  buf[index++] = ' ';
602  } else if (str[j] == ')') {
603  buf[index++] = ' ';
604  buf[index++] = ')';
605  } else {
606  buf[index++] = str[j];
607  }
608  }
609  buf[index] = '\0';
610  sarrayAddString(saout, buf, L_COPY);
611  }
612 
613  /* Flatten to a prototype string with spaces added after
614  * each word, and remove the last space */
615  cleanstr = sarrayToString(saout, 2);
616  len = strlen(cleanstr);
617  cleanstr[len - 1] = '\0';
618 
619  sarrayDestroy(&sa);
620  sarrayDestroy(&saout);
621  return cleanstr;
622 }
623 
624 
625 /*
626  * \brief skipToEndOfFunction()
627  *
628  * \param[in] sa output from cpp, by line
629  * \param[in] start index of starting line with left bracket to search
630  * \param[in] lbindex starting char index for left bracket
631  * \param[out] pnext index of line following the ending '}' for function
632  * \return 0 if OK, 1 on error
633  */
634 static l_int32
635 skipToEndOfFunction(SARRAY *sa,
636  l_int32 start,
637  l_int32 lbindex,
638  l_int32 *pnext)
639 {
640 l_int32 end, rbindex;
641 l_int32 soffsetlb, boffsetlb, toffsetlb;
642 
643  PROCNAME("skipToEndOfFunction");
644 
645  if (!sa)
646  return ERROR_INT("sa not defined", procName, 1);
647  if (!pnext)
648  return ERROR_INT("&next not defined", procName, 1);
649 
650  getOffsetForCharacter(sa, start, '{', &soffsetlb, &boffsetlb,
651  &toffsetlb);
652  skipToMatchingBrace(sa, start + soffsetlb, boffsetlb, &end, &rbindex);
653  if (end == -1) { /* shouldn't happen! */
654  *pnext = -1;
655  return 1;
656  }
657 
658  *pnext = end + 1;
659  return 0;
660 }
661 
662 
663 /*
664  * \brief skipToMatchingBrace()
665  *
666  * \param[in] sa output from cpp, by line
667  * \param[in] start index of starting line with left bracket to search
668  * \param[in] lbindex starting char index for left bracket
669  * \param[out] pstop index of line with the matching right bracket
670  * \param[out] prbindex char index of matching right bracket
671  * \return 0 if OK, 1 on error
672  *
673  * <pre>
674  * Notes:
675  * (1) If the matching right brace is not found, returns
676  * stop = -1. This shouldn't happen.
677  * </pre>
678  */
679 static l_int32
680 skipToMatchingBrace(SARRAY *sa,
681  l_int32 start,
682  l_int32 lbindex,
683  l_int32 *pstop,
684  l_int32 *prbindex)
685 {
686 char *str;
687 l_int32 i, j, jstart, n, sumbrace, found, instring, nchars;
688 
689  PROCNAME("skipToMatchingBrace");
690 
691  if (!sa)
692  return ERROR_INT("sa not defined", procName, 1);
693  if (!pstop)
694  return ERROR_INT("&stop not defined", procName, 1);
695  if (!prbindex)
696  return ERROR_INT("&rbindex not defined", procName, 1);
697 
698  instring = 0; /* init to FALSE; toggle on double quotes */
699  *pstop = -1;
700  n = sarrayGetCount(sa);
701  sumbrace = 1;
702  found = FALSE;
703  for (i = start; i < n; i++) {
704  str = sarrayGetString(sa, i, L_NOCOPY);
705  jstart = 0;
706  if (i == start)
707  jstart = lbindex + 1;
708  nchars = strlen(str);
709  for (j = jstart; j < nchars; j++) {
710  /* Toggle the instring state every time you encounter
711  * a double quote that is NOT escaped. */
712  if (j == jstart && str[j] == '\"')
713  instring = 1 - instring;
714  if (j > jstart && str[j] == '\"' && str[j-1] != '\\')
715  instring = 1 - instring;
716  /* Record the braces if they are neither a literal character
717  * nor within a string. */
718  if (str[j] == '{' && str[j+1] != '\'' && !instring) {
719  sumbrace++;
720  } else if (str[j] == '}' && str[j+1] != '\'' && !instring) {
721  sumbrace--;
722  if (sumbrace == 0) {
723  found = TRUE;
724  *prbindex = j;
725  break;
726  }
727  }
728  }
729  if (found) {
730  *pstop = i;
731  return 0;
732  }
733  }
734 
735  return ERROR_INT("matching right brace not found", procName, 1);
736 }
737 
738 
739 /*
740  * \brief skipToSemicolon()
741  *
742  * \param[in] sa output from cpp, by line
743  * \param[in] start index of starting line to search
744  * \param[in] charindex starting char index for search
745  * \param[out] pnext index of line containing the next ';'
746  * \return 0 if OK, 1 on error
747  *
748  * <pre>
749  * Notes:
750  * (1) If the semicolon isn't found, returns next = -1.
751  * This shouldn't happen.
752  * (2) This is only used in contexts where the semicolon is
753  * not within a string.
754  * </pre>
755  */
756 static l_int32
757 skipToSemicolon(SARRAY *sa,
758  l_int32 start,
759  l_int32 charindex,
760  l_int32 *pnext)
761 {
762 char *str;
763 l_int32 i, j, n, jstart, nchars, found;
764 
765  PROCNAME("skipToSemicolon");
766 
767  if (!sa)
768  return ERROR_INT("sa not defined", procName, 1);
769  if (!pnext)
770  return ERROR_INT("&next not defined", procName, 1);
771 
772  *pnext = -1;
773  n = sarrayGetCount(sa);
774  found = FALSE;
775  for (i = start; i < n; i++) {
776  str = sarrayGetString(sa, i, L_NOCOPY);
777  jstart = 0;
778  if (i == start)
779  jstart = charindex + 1;
780  nchars = strlen(str);
781  for (j = jstart; j < nchars; j++) {
782  if (str[j] == ';') {
783  found = TRUE;;
784  break;
785  }
786  }
787  if (found) {
788  *pnext = i;
789  return 0;
790  }
791  }
792 
793  return ERROR_INT("semicolon not found", procName, 1);
794 }
795 
796 
797 /*
798  * \brief getOffsetForCharacter()
799  *
800  * \param[in] sa output from cpp, by line
801  * \param[in] start starting index in sa to search;
802  * never a comment line
803  * \param[in] tchar we are searching for the first instance of this
804  * \param[out] psoffset offset in strings from start index
805  * \param[out] pboffset offset in bytes within string in which
806  * the character is first found
807  * \param[out] ptoffset offset in total bytes from beginning of string
808  * indexed by 'start' to the location where
809  * the character is first found
810  * \return 0 if OK, 1 on error
811  *
812  * <pre>
813  * Notes:
814  * (1) We are searching for the first instance of 'tchar', starting
815  * at the beginning of the string indexed by start.
816  * (2) If the character is not found, soffset is returned as -1,
817  * and the other offsets are set to very large numbers. The
818  * caller must check the value of soffset.
819  * (3) This is only used in contexts where it is not necessary to
820  * consider if the character is inside a string.
821  * </pre>
822  */
823 static l_int32
824 getOffsetForCharacter(SARRAY *sa,
825  l_int32 start,
826  char tchar,
827  l_int32 *psoffset,
828  l_int32 *pboffset,
829  l_int32 *ptoffset)
830 {
831 char *str;
832 l_int32 i, j, n, nchars, totchars, found;
833 
834  PROCNAME("getOffsetForCharacter");
835 
836  if (!sa)
837  return ERROR_INT("sa not defined", procName, 1);
838  if (!psoffset)
839  return ERROR_INT("&soffset not defined", procName, 1);
840  if (!pboffset)
841  return ERROR_INT("&boffset not defined", procName, 1);
842  if (!ptoffset)
843  return ERROR_INT("&toffset not defined", procName, 1);
844 
845  *psoffset = -1; /* init to not found */
846  *pboffset = 100000000;
847  *ptoffset = 100000000;
848 
849  n = sarrayGetCount(sa);
850  found = FALSE;
851  totchars = 0;
852  for (i = start; i < n; i++) {
853  if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
854  return ERROR_INT("str not returned; shouldn't happen", procName, 1);
855  nchars = strlen(str);
856  for (j = 0; j < nchars; j++) {
857  if (str[j] == tchar) {
858  found = TRUE;
859  break;
860  }
861  }
862  if (found)
863  break;
864  totchars += nchars;
865  }
866 
867  if (found) {
868  *psoffset = i - start;
869  *pboffset = j;
870  *ptoffset = totchars + j;
871  }
872 
873  return 0;
874 }
875 
876 
877 /*
878  * \brief getOffsetForMatchingRP()
879  *
880  * \param[in] sa output from cpp, by line
881  * \param[in] start starting index in sa to search;
882  * never a comment line
883  * \param[in] soffsetlp string offset to first LP
884  * \param[in] boffsetlp byte offset within string to first LP
885  * \param[in] toffsetlp total byte offset to first LP
886  * \param[out] psoffset offset in strings from start index
887  * \param[out] pboffset offset in bytes within string in which
888  * the matching RP is found
889  * \param[out] ptoffset offset in total bytes from beginning of string
890  * indexed by 'start' to the location where
891  * the matching RP is found
892  * \return 0 if OK, 1 on error
893  *
894  * <pre>
895  * Notes:
896  * (1) We are searching for the matching right parenthesis (RP) that
897  * corresponds to the first LP found beginning at the string
898  * indexed by start.
899  * (2) If the matching RP is not found, soffset is returned as -1,
900  * and the other offsets are set to very large numbers. The
901  * caller must check the value of soffset.
902  * (3) This is only used in contexts where it is not necessary to
903  * consider if the character is inside a string.
904  * (4) We must do this because although most arg lists have a single
905  * left and right parenthesis, it is possible to construct
906  * more complicated prototype declarations, such as those
907  * where functions are passed in. The C++ rules for prototypes
908  * are strict, and require that for functions passed in as args,
909  * the function name arg be placed in parenthesis, as well
910  * as its arg list, thus incurring two extra levels of parentheses.
911  * </pre>
912  */
913 static l_int32
914 getOffsetForMatchingRP(SARRAY *sa,
915  l_int32 start,
916  l_int32 soffsetlp,
917  l_int32 boffsetlp,
918  l_int32 toffsetlp,
919  l_int32 *psoffset,
920  l_int32 *pboffset,
921  l_int32 *ptoffset)
922 {
923 char *str;
924 l_int32 i, j, n, nchars, totchars, leftmatch, firstline, jstart, found;
925 
926  PROCNAME("getOffsetForMatchingRP");
927 
928  if (!sa)
929  return ERROR_INT("sa not defined", procName, 1);
930  if (!psoffset)
931  return ERROR_INT("&soffset not defined", procName, 1);
932  if (!pboffset)
933  return ERROR_INT("&boffset not defined", procName, 1);
934  if (!ptoffset)
935  return ERROR_INT("&toffset not defined", procName, 1);
936 
937  *psoffset = -1; /* init to not found */
938  *pboffset = 100000000;
939  *ptoffset = 100000000;
940 
941  n = sarrayGetCount(sa);
942  found = FALSE;
943  totchars = toffsetlp;
944  leftmatch = 1; /* count of (LP - RP); we're finished when it goes to 0. */
945  firstline = start + soffsetlp;
946  for (i = firstline; i < n; i++) {
947  if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
948  return ERROR_INT("str not returned; shouldn't happen", procName, 1);
949  nchars = strlen(str);
950  jstart = 0;
951  if (i == firstline)
952  jstart = boffsetlp + 1;
953  for (j = jstart; j < nchars; j++) {
954  if (str[j] == '(')
955  leftmatch++;
956  else if (str[j] == ')')
957  leftmatch--;
958  if (leftmatch == 0) {
959  found = TRUE;
960  break;
961  }
962  }
963  if (found)
964  break;
965  if (i == firstline)
966  totchars += nchars - boffsetlp;
967  else
968  totchars += nchars;
969  }
970 
971  if (found) {
972  *psoffset = i - start;
973  *pboffset = j;
974  *ptoffset = totchars + j;
975  }
976 
977  return 0;
978 }
char * sarrayToString(SARRAY *sa, l_int32 addnlflag)
sarrayToString()
Definition: sarray1.c:785
Definition: pix.h:712
Definition: pix.h:710
SARRAY * sarrayCreate(l_int32 n)
sarrayCreate()
Definition: sarray1.c:170
Definition: array.h:126
l_uint8 * l_binaryRead(const char *filename, size_t *pnbytes)
l_binaryRead()
Definition: utils2.c:1352
l_ok sarrayAddString(SARRAY *sa, const char *string, l_int32 copyflag)
sarrayAddString()
Definition: sarray1.c:451
char * sarrayGetString(SARRAY *sa, l_int32 index, l_int32 copyflag)
sarrayGetString()
Definition: sarray1.c:703
Definition: pix.h:711
SARRAY * sarrayCreateLinesFromString(const char *string, l_int32 blankflag)
sarrayCreateLinesFromString()
Definition: sarray1.c:283
l_int32 sarrayGetCount(SARRAY *sa)
sarrayGetCount()
Definition: sarray1.c:643
char * stringJoin(const char *src1, const char *src2)
stringJoin()
Definition: utils2.c:518
#define L_BUF_SIZE
Definition: classapp.c:59
SARRAY * sarrayCreateWordsFromString(const char *string)
sarrayCreateWordsFromString()
Definition: sarray1.c:233
void sarrayDestroy(SARRAY **psa)
sarrayDestroy()
Definition: sarray1.c:362