1 /* Copyright (C) 1991, 92, 93, 94, 95, 96, 97 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
14 You should have received a copy of the GNU Library General Public
15 License along with the GNU C Library; see the file COPYING.LIB. If not,
16 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 Boston, MA 02111-1307, USA. */
20 #include <crtdll/errno.h>
22 #include <crtdll/ctype.h>
24 #include <crtdll/stdio.h>
25 #include <crtdll/stdlib.h>
26 #include <crtdll/string.h>
27 #include <crtdll/wchar.h>
28 #include <crtdll/malloc.h>
29 #include <crtdll/internal/file.h>
30 #include <crtdll/mbstring.h>
31 /* The internal entry points for `strtoX' take an extra flag argument
32 saying whether or not to parse locale-dependent number grouping. */
34 double __strtod_internal (const char *__nptr,char **__endptr, int __group);
35 float __strtof_internal (const char *__nptr, char **__endptr,int __group);
36 long double __strtold_internal (const char *__nptr,char **__endptr, int __group);
37 long int __strtol_internal (const char *__nptr, char **__endptr, int __base, int __group);
38 unsigned long int __strtoul_internal (const char *__nptr, char **__endptr, int __base, int __group);
44 #define LONGLONG long long
49 /* Those are flags in the conversion format. */
50 # define LONG 0x001 /* l: long or double */
51 # define LONGDBL 0x002 /* L: long long or long double */
52 # define SHORT 0x004 /* h: short */
53 # define SUPPRESS 0x008 /* *: suppress assignment */
54 # define POINTER 0x010 /* weird %p pointer (`fake hex') */
55 # define NOSKIP 0x020 /* do not skip blanks */
56 # define WIDTH 0x040 /* width was given */
57 # define GROUP 0x080 /* ': group numbers */
58 # define MALLOC 0x100 /* a: malloc strings */
60 # define TYPEMOD (LONG|LONGDBL|SHORT)
63 # define ungetc(c, s) ((void) (c != EOF && --read_in), ungetc (c, s))
64 # define inchar() ((c = getc (s)), (void) (c != EOF && ++read_in), c)
65 # define encode_error() do { \
67 __set_errno (EILSEQ); \
70 # define conv_error() do { \
74 # define input_error() do { \
76 return done ? 0 : EOF; \
78 # define memory_error() do { \
80 __set_errno (ENOMEM); \
83 # define ARGCHECK(s, format) \
86 /* Check file argument for consistence. */ \
87 if (!__validfp (s) || !s->__mode.__read) \
89 __set_errno (EBADF); \
92 else if (format == NULL) \
94 __set_errno (EINVAL); \
99 # define flockfile(S) /* nothing */
100 # define funlockfile(S) /* nothing */
102 char *wp = NULL; /* Workspace. */
103 size_t wpmax = 0; /* Maximal size of workspace. */
104 size_t wpsize = 0; /* Currently used bytes in workspace. */
112 wpmax = UCHAR_MAX > 2 * wpmax ? UCHAR_MAX : 2 * wpmax;
113 wp = (char *) malloc (wpmax);
115 memcpy (wp, old, wpsize);
124 int __vfscanf (FILE *s, const char *format, va_list argptr)
127 register const char *f = format;
128 register unsigned char fc; /* Current character of the format. */
129 register size_t done = 0; /* Assignments done. */
130 register size_t read_in = 0; /* Chars read in. */
131 register int c = 0; /* Last char read. */
132 register int width; /* Maximum field width. */
133 register int flags; /* Modifiers for current format element. */
135 /* Status for reading F-P nums. */
136 char got_dot, got_e, negative;
137 /* If a [...] is a [^...]. */
139 /* Base for integral numbers. */
141 /* Signedness for integral numbers. */
143 /* Decimal point character. */
144 wchar_t decimal = '.';
145 /* The thousands character of the current locale. */
146 wchar_t thousands = ',';
147 /* Integral holding variables. */
151 unsigned long long int uq;
153 unsigned long int ul;
155 /* Character-buffer pointer. */
157 wchar_t *wstr = NULL;
158 char **strptr = NULL;
160 /* We must not react on white spaces immediately because they can
161 possibly be matched even if in the input stream no character is
162 available anymore. */
165 char *tw; /* Temporary pointer. */
168 __va_copy (arg, argptr);
170 arg = (va_list) argptr;
175 /* Run through the format string. */
179 /* Extract the next argument, which is of type TYPE.
180 For a %N$... spec, this is the Nth argument from the beginning;
181 otherwise it is the next argument after the state now in ARG. */
182 #define ARG(type) va_arg(argptr,type)
186 /* Non-ASCII, may be a multibyte. */
187 // int len = mblen (f, strlen (f));
210 /* Remember to skip spaces. */
217 /* Read a character. */
220 /* Characters other than format specs must just match. */
224 /* We saw white space char as the last character in the format
225 string. Now it's time to skip all leading white space. */
229 if (inchar () == EOF && errno == EINTR)
243 /* This is the start of the conversion string. */
246 /* Initialize state of modifiers. */
249 /* Prepare temporary buffer. */
252 /* Check for a positional parameter specification. */
257 argpos = argpos * 10 + (*f++ - '0');
262 /* Oops; that was actually the field width. */
270 /* Check for the assignment-suppressing and the number grouping flag. */
271 while (*f == '*' || *f == '\'')
282 /* We have seen width. */
286 /* Find the maximum field width. */
297 /* Check for type modifiers. */
298 while (*f == 'h' || *f == 'l' || *f == 'L' || *f == 'a' || *f == 'q')
302 /* int's are short int's. */
304 /* Signal illegal format element. */
309 if (flags & (SHORT|LONGDBL))
311 else if (flags & LONG)
313 /* A double `l' is equivalent to an `L'. */
318 /* int's are long int's. */
323 /* double's are long double's, and int's are long long int's. */
325 /* Signal illegal format element. */
331 /* Signal illegal format element. */
333 /* String conversions (%s, %[) take a `char **'
334 arg and fill it in with a malloc'd pointer. */
339 /* End of the format string? */
343 /* We must take care for EINTR errors. */
344 if (c == EOF && errno == EINTR)
347 /* Find the conversion specifier. */
349 if (skip_space || (fc != '[' && fc != 'c' && fc != 'C' && fc != 'n'))
351 /* Eat whitespace. */
353 if (inchar () == EOF && errno == EINTR)
362 case '%': /* Must match a literal '%'. */
371 case 'n': /* Answer number of assignments done. */
372 /* Corrigendum 1 to ISO C 1990 describes the allowed flags
373 with the 'n' conversion specifier. */
374 if (!(flags & SUPPRESS))
376 /* Don't count the read-ahead. */
378 *ARG (long int *) = read_in;
379 else if (flags & LONG)
380 *ARG (long int *) = read_in;
381 else if (flags & SHORT)
382 *ARG (short int *) = read_in;
384 *ARG (int *) = read_in;
386 #ifdef NO_BUG_IN_ISO_C_CORRIGENDUM_1
387 /* We have a severe problem here. The ISO C standard
388 contradicts itself in explaining the effect of the %n
389 format in `scanf'. While in ISO C:1990 and the ISO C
390 Amendement 1:1995 the result is described as
392 Execution of a %n directive does not effect the
393 assignment count returned at the completion of
394 execution of the f(w)scanf function.
396 in ISO C Corrigendum 1:1994 the following was added:
399 Add the following fourth example:
402 int d1, d2, n1, n2, i;
403 i = sscanf("123", "%d%n%n%d", &d1, &n1, &n2, &d2);
404 the value 123 is assigned to d1 and the value3 to n1.
405 Because %n can never get an input failure the value
406 of 3 is also assigned to n2. The value of d2 is not
407 affected. The value 3 is assigned to i.
409 We go for now with the historically correct code fro ISO C,
410 i.e., we don't count the %n assignments. When it ever
411 should proof to be wrong just remove the #ifdef above. */
417 case 'c': /* Match characters. */
418 if ((flags & LONG) == 0)
420 if (!(flags & SUPPRESS))
434 if (!(flags & SUPPRESS))
438 while (--width > 0 && inchar () != EOF);
441 while (--width > 0 && inchar () != EOF);
444 /* I.e., EOF was read. */
447 if (!(flags & SUPPRESS))
454 /* Get UTF-8 encoded wide character. Here we assume (as in
455 other parts of the libc) that we only have to handle
462 if (!(flags & SUPPRESS))
464 wstr = ARG (wchar_t *);
471 #define NEXT_WIDE_CHAR(First) \
474 /* EOF is only an error for the first character. */ \
487 if ((c & 0xc0) == 0x80 || (c & 0xfe) == 0xfe) \
489 if ((c & 0xe0) == 0xc0) \
491 /* We expect two bytes. */ \
495 else if ((c & 0xf0) == 0xe0) \
497 /* We expect three bytes. */ \
501 else if ((c & 0xf8) == 0xf0) \
503 /* We expect four bytes. */ \
507 else if ((c & 0xfc) == 0xf8) \
509 /* We expect five bytes. */ \
515 /* We expect six bytes. */ \
524 || (c & 0xc0) == 0x80 || (c & 0xfe) == 0xfe) \
532 if (!(flags & SUPPRESS)) \
536 NEXT_WIDE_CHAR (first);
541 /* I.e., EOF was read. */
544 if (!(flags & SUPPRESS))
549 case 's': /* Read a string. */
551 /* We have to process a wide character string. */
552 goto wide_char_string;
554 #define STRING_ARG(Str, Type) \
555 if (!(flags & SUPPRESS)) \
557 if (flags & MALLOC) \
559 /* The string is to be stored in a malloc'd buffer. */ \
560 strptr = ARG (char **); \
561 if (strptr == NULL) \
563 /* Allocate an initial buffer. */ \
565 *strptr = malloc (strsize * sizeof (Type)); \
566 Str = (Type *) *strptr; \
569 Str = ARG (Type *); \
573 STRING_ARG (str, char);
586 #define STRING_ADD_CHAR(Str, c, Type) \
587 if (!(flags & SUPPRESS)) \
590 if ((flags & MALLOC) && (char *) Str == *strptr + strsize) \
592 /* Enlarge the buffer. */ \
593 Str = realloc (*strptr, strsize * 2 * sizeof (Type)); \
596 /* Can't allocate that much. Last-ditch effort. */\
597 Str = realloc (*strptr, \
598 (strsize + 1) * sizeof (Type)); \
601 /* We lose. Oh well. \
602 Terminate the string and stop converting, \
603 so at least we don't skip any input. */ \
604 ((Type *) (*strptr))[strsize] = '\0'; \
610 *strptr = (char *) Str; \
611 Str = ((Type *) *strptr) + strsize; \
617 *strptr = (char *) Str; \
618 Str = ((Type *) *strptr) + strsize; \
623 STRING_ADD_CHAR (str, c, char);
624 } while ((width <= 0 || --width > 0) && inchar () != EOF);
626 if (!(flags & SUPPRESS))
634 /* Wide character string. */
639 STRING_ARG (wstr, wchar_t);
644 NEXT_WIDE_CHAR (first);
648 /* XXX We would have to push back the whole wide char
649 with possibly many bytes. But since scanf does
650 not make a difference for white space characters
651 we can simply push back a simple <SP> which is
652 guaranteed to be in the [:space:] class. */
657 STRING_ADD_CHAR (wstr, val, wchar_t);
660 while (width <= 0 || --width > 0);
662 if (!(flags & SUPPRESS))
670 case 'x': /* Hexadecimal integer. */
671 case 'X': /* Ditto. */
676 case 'o': /* Octal integer. */
681 case 'u': /* Unsigned decimal integer. */
686 case 'd': /* Signed decimal integer. */
691 case 'i': /* Generic number. */
700 /* Check for a sign. */
701 if (c == '-' || c == '+')
709 /* Look for a leading indication of base. */
710 if (width != 0 && c == '0')
718 if (width != 0 && tolower (c) == 'x')
736 /* Read the number into workspace. */
737 while (c != EOF && width != 0)
739 if (base == 16 ? !isxdigit (c) :
740 ((!isdigit (c) || c - '0' >= base) &&
741 !((flags & GROUP) && base == 10 && c == thousands)))
750 /* The just read character is not part of the number anymore. */
754 (wpsize == 1 && (wp[0] == '+' || wp[0] == '-')))
755 /* There was no number. */
758 /* Convert the number. */
762 // if (number_signed)
763 // num.q = __strtoq_internal (wp, &tw, base, flags & GROUP);
765 // num.uq = __strtouq_internal (wp, &tw, base, flags & GROUP);
770 num.l = __strtol_internal (wp, &tw, base, flags & GROUP);
772 num.ul = __strtoul_internal (wp, &tw, base, flags & GROUP);
777 if (!(flags & SUPPRESS))
781 if (flags & LONGDBL) {
782 *ARG (unsigned LONGLONG int *) = num.uq;
784 else if (flags & LONG)
785 *ARG (unsigned long int *) = num.ul;
786 else if (flags & SHORT)
787 *ARG (unsigned short int *) = (unsigned short int) num.ul;
789 *ARG (unsigned int *) = (unsigned int) num.ul;
793 if (flags & LONGDBL) {
794 *ARG (LONGLONG int *) = num.q;
796 else if (flags & LONG)
797 *ARG (long int *) = num.l;
798 else if (flags & SHORT)
799 *ARG (short int *) = (short int) num.l;
801 *ARG (int *) = (int) num.l;
807 case 'e': /* Floating-point numbers. */
816 /* Check for a sign. */
817 if (c == '-' || c == '+')
820 if (inchar () == EOF)
821 /* EOF is only an input error before we read any chars. */
834 else if (got_e && wp[wpsize - 1] == 'e'
835 && (c == '-' || c == '+'))
837 else if (wpsize > 0 && !got_e && tolower (c) == 'e')
842 else if (c == decimal && !got_dot)
847 else if ((flags & GROUP) && c == thousands && !got_dot)
851 /* The last read character is not part of the number
859 while (width != 0 && inchar () != EOF);
864 /* Convert the number. */
868 long double d = __strtold_internal (wp, &tw, flags & GROUP);
869 if (!(flags & SUPPRESS) && tw != wp)
870 *ARG (long double *) = negative ? -d : d;
872 else if (flags & LONG)
874 double d = __strtod_internal (wp, &tw, flags & GROUP);
875 if (!(flags & SUPPRESS) && tw != wp)
876 *ARG (double *) = negative ? -d : d;
880 float d = __strtof_internal (wp, &tw, flags & GROUP);
881 if (!(flags & SUPPRESS) && tw != wp)
882 *ARG (float *) = negative ? -d : d;
888 if (!(flags & SUPPRESS))
892 case '[': /* Character class. */
895 STRING_ARG (wstr, wchar_t);
896 c = '\0'; /* This is to keep gcc quiet. */
900 STRING_ARG (str, char);
915 /* Fill WP with byte flags indexed by character.
916 We will use this flag map for matching input characters. */
917 if (wpmax < UCHAR_MAX)
920 wp = (char *) alloca (wpmax);
922 memset (wp, 0, UCHAR_MAX);
925 if (fc == ']' || fc == '-')
927 /* If ] or - appears before any char in the set, it is not
928 the terminator or separator, but the first char in the
934 while ((fc = *f++) != '\0' && fc != ']')
936 if (fc == '-' && *f != '\0' && *f != ']' &&
937 (unsigned char) f[-2] <= (unsigned char) *f)
939 /* Add all characters from the one before the '-'
940 up to (but not including) the next format char. */
941 for (fc = f[-2]; fc < *f; ++fc)
945 /* Add the character to the flag map. */
963 NEXT_WIDE_CHAR (first);
964 if (val > 255 || wp[val] == not_in)
966 /* XXX We have a problem here. We read a wide
967 character and this possibly took several
968 bytes. But we can only push back one single
969 character. To be sure we don't create wrong
970 input we push it back only in case it is
971 representable within one byte. */
976 STRING_ADD_CHAR (wstr, val, wchar_t);
986 if (!(flags & SUPPRESS))
994 num.ul = read_in - 1; /* -1 because we already read one char. */
1002 STRING_ADD_CHAR (str, c, char);
1006 while (width != 0 && inchar () != EOF);
1008 if (read_in == num.ul)
1011 if (!(flags & SUPPRESS))
1019 case 'p': /* Generic pointer. */
1021 /* A PTR must be the same size as a `long int'. */
1022 flags &= ~(SHORT|LONGDBL);
1029 /* The last thing we saw int the format string was a white space.
1030 Consume the last white spaces. */
1035 while (isspace (c));
1046 xfscanf(FILE *f, const char *fmt, ...)
1051 r = __vfscanf(f, fmt, a);
1057 double __strtod_internal (const char *__nptr,char **__endptr, int __group)
1059 return strtod(__nptr,__endptr);
1061 float __strtof_internal (const char *__nptr, char **__endptr,int __group)
1063 return (float)strtod(__nptr,__endptr);
1065 static double powten[] =
1067 1e1L, 1e2L, 1e4L, 1e8L, 1e16L, 1e32L, 1e64L, 1e128L, 1e256L,
1068 1e512L, 1e512L*1e512L, 1e2048L, 1e4096L
1071 long double __strtold_internal (const char *s,char **sret, int __group)
1074 long double r; /* result */
1075 int e, ne; /* exponent */
1076 int sign; /* +- 1.0 */
1086 while(*s && isspace(*s))
1097 while ((*s >= '0') && (*s <= '9'))
1108 while ((*s >= '0') && (*s <= '9'))
1124 if ((*s == 'e') || (*s == 'E'))
1134 while ((*s >= '0') && (*s <= '9'))
1149 /* possibly subnormal number, 10^e would overflow */
1167 d *= powten[l2powm1];
1182 long int __strtol_internal (const char *__nptr, char **__endptr, int __base, int __group)
1184 return strtol(__nptr,__endptr, __base);
1186 unsigned long int __strtoul_internal (const char *__nptr, char **__endptr, int __base, int __group)
1188 return strtoul(__nptr,__endptr, __base);