ignore non-UTF-8 string constants
authorJohan Bilien <jobi@litl.com>
Sat, 11 Oct 2008 23:19:59 +0000 (23:19 +0000)
committerLucas Almeida Rocha <lucasr@src.gnome.org>
Sat, 11 Oct 2008 23:19:59 +0000 (23:19 +0000)
2008-10-11  Johan Bilien  <jobi@litl.com>

* giscanner/scannerparser.y: ignore non-UTF-8 string constants

2008-10-11  Johan Bilien  <jobi@litl.com>

Bug 552347: Parse #defines constants

* girepository/gtypelib.c: update the list of value_size
with recently defined type tags
* giscanner/scannerparser.y: brought back parsing of #defined, as
present in older version
* giscanner/giscannermodule.c: bind gi_source_scanner_append_filename
* giscanner/girwriter.py: write out constant tags in the gir
* giscanner/sourcescanner.py: add accessor for const_string
* giscanner/transformer.py, giscanner/glibtransformer.py: handle
constant

svn path=/trunk/; revision=673

ChangeLog
girepository/gtypelib.c
giscanner/girwriter.py
giscanner/giscannermodule.c
giscanner/glibtransformer.py
giscanner/scannerparser.y
giscanner/sourcescanner.py
giscanner/transformer.py
tools/g-ir-scanner

index cdc110b..64e9500 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,21 @@
+2008-10-11  Johan Bilien  <jobi@litl.com>
+
+       * giscanner/scannerparser.y: ignore non-UTF-8 string constants
+
+2008-10-11  Johan Bilien  <jobi@litl.com>
+
+       Bug 552347: Parse #defines constants
+
+       * girepository/gtypelib.c: update the list of value_size
+       with recently defined type tags
+       * giscanner/scannerparser.y: brought back parsing of #defined, as
+       present in older version
+       * giscanner/giscannermodule.c: bind gi_source_scanner_append_filename
+       * giscanner/girwriter.py: write out constant tags in the gir
+       * giscanner/sourcescanner.py: add accessor for const_string
+       * giscanner/transformer.py, giscanner/glibtransformer.py: handle
+       constant
+
 2008-10-11  Tommi Komulainen  <tommi.komulainen@iki.fi>
 
        Bug 555946: Install a test module exercising all types, transfer
 2008-10-11  Tommi Komulainen  <tommi.komulainen@iki.fi>
 
        Bug 555946: Install a test module exercising all types, transfer
index df99eb4..e3f52aa 100644 (file)
@@ -878,13 +878,35 @@ validate_constant_blob (GTypelib     *typelib,
                        GError       **error)
 {
   gint value_size[] = {
                        GError       **error)
 {
   gint value_size[] = {
-    0, 4, 1, 1, 2, 2, 4, 4, 8, 8, 
-    sizeof (gint), sizeof (guint), 
-    sizeof (glong), sizeof (gulong),
-    sizeof (gssize), sizeof (gsize),
-    sizeof (gfloat), sizeof (gdouble), 
-    0, 0
-  }; 
+    0, /* VOID */
+    4, /* BOOLEAN */
+    1, /* INT8 */
+    1, /* UINT8 */
+    2, /* INT16 */
+    2, /* UINT16 */
+    4, /* INT32 */
+    4, /* UINT32 */
+    8, /* INT64 */
+    8, /* UINT64 */
+    sizeof (gint),
+    sizeof (guint),
+    sizeof (glong),
+    sizeof (gulong),
+    sizeof (gssize),
+    sizeof (gsize),
+    sizeof (gfloat),
+    sizeof (gdouble),
+    sizeof (time_t),
+    0, /* GTYPE */
+    0, /* UTF8 */
+    0, /* FILENAME */
+    0, /* ARRAY */
+    0, /* INTERFACE */
+    0, /* GLIST */
+    0, /* GSLIST */
+    0, /* GHASH */
+    0, /* ERROR */
+  };
   ConstantBlob *blob;
   SimpleTypeBlob *type;
 
   ConstantBlob *blob;
   SimpleTypeBlob *type;
 
index e8faa5d..ede42b4 100644 (file)
@@ -22,7 +22,7 @@ from __future__ import with_statement
 
 import os
 
 
 import os
 
-from .ast import (Callback, Class, Enum, Function, Interface, Member,
+from .ast import (Callback, Class, Constant, Enum, Function, Interface, Member,
                   Array, Struct, Alias, Union, List, Map, Varargs)
 from .glibast import (GLibBoxed, GLibEnum, GLibEnumMember,
                       GLibFlags, GLibObject, GLibInterface)
                   Array, Struct, Alias, Union, List, Map, Varargs)
 from .glibast import (GLibBoxed, GLibEnum, GLibEnumMember,
                       GLibFlags, GLibObject, GLibInterface)
@@ -82,6 +82,8 @@ class GIRWriter(XMLWriter):
             pass
         elif isinstance(node, Alias):
             self._write_alias(node)
             pass
         elif isinstance(node, Alias):
             self._write_alias(node)
+        elif isinstance(node, Constant):
+            self._write_constant(node)
         else:
             print 'WRITER: Unhandled node', node
 
         else:
             print 'WRITER: Unhandled node', node
 
@@ -213,6 +215,12 @@ class GIRWriter(XMLWriter):
             attrs.append(('glib:nick', member.nick))
         self.write_tag('member', attrs)
 
             attrs.append(('glib:nick', member.nick))
         self.write_tag('member', attrs)
 
+    def _write_constant(self, constant):
+        attrs = [('name', constant.name),
+                 ('value', str(constant.value))]
+        with self.tagcontext('constant', attrs):
+            self._write_type(constant.type)
+
     def _write_class(self, node):
         attrs = [('name', node.name),
                  ('c:type', node.ctype)]
     def _write_class(self, node):
         attrs = [('name', node.name),
                  ('c:type', node.ctype)]
index 8159c59..e33b597 100644 (file)
@@ -391,6 +391,41 @@ pygi_source_scanner_append_filename (PyGISourceScanner *self,
   return Py_None;
 }
 
   return Py_None;
 }
 
+static PyObject *
+pygi_source_scanner_parse_macros (PyGISourceScanner *self,
+                                  PyObject          *args)
+{
+  GList *filenames;
+  int i;
+  PyObject *list;
+
+  list = PyTuple_GET_ITEM (args, 0);
+
+  if (!PyList_Check (list))
+    {
+      PyErr_SetString (PyExc_RuntimeError, "parse macro takes a list of filenames");
+      return NULL;
+    }
+
+  filenames = NULL;
+  for (i = 0; i < PyList_Size (list); ++i)
+    {
+      PyObject *obj;
+      char *filename;
+
+      obj = PyList_GetItem (list, i);
+      filename = PyString_AsString (obj);
+
+      filenames = g_list_append (filenames, filename);
+    }
+
+  gi_source_scanner_parse_macros (self->scanner, filenames);
+  g_list_free (filenames);
+
+  Py_INCREF (Py_None);
+  return Py_None;
+}
+
 static PyObject *
 pygi_source_scanner_parse_file (PyGISourceScanner *self,
                                PyObject          *args)
 static PyObject *
 pygi_source_scanner_parse_file (PyGISourceScanner *self,
                                PyObject          *args)
@@ -548,6 +583,7 @@ static const PyMethodDef _PyGISourceScanner_methods[] = {
   { "get_symbols", (PyCFunction) pygi_source_scanner_get_symbols, METH_NOARGS },
   { "append_filename", (PyCFunction) pygi_source_scanner_append_filename, METH_VARARGS },
   { "parse_file", (PyCFunction) pygi_source_scanner_parse_file, METH_VARARGS },
   { "get_symbols", (PyCFunction) pygi_source_scanner_get_symbols, METH_NOARGS },
   { "append_filename", (PyCFunction) pygi_source_scanner_append_filename, METH_VARARGS },
   { "parse_file", (PyCFunction) pygi_source_scanner_parse_file, METH_VARARGS },
+  { "parse_macros", (PyCFunction) pygi_source_scanner_parse_macros, METH_VARARGS },
   { "lex_filename", (PyCFunction) pygi_source_scanner_lex_filename, METH_VARARGS },
   { "set_macro_scan", (PyCFunction) pygi_source_scanner_set_macro_scan, METH_VARARGS },
   { NULL, NULL, 0 }
   { "lex_filename", (PyCFunction) pygi_source_scanner_lex_filename, METH_VARARGS },
   { "set_macro_scan", (PyCFunction) pygi_source_scanner_set_macro_scan, METH_VARARGS },
   { NULL, NULL, 0 }
index fdae652..1d9df84 100644 (file)
@@ -24,8 +24,8 @@ import ctypes
 from ctypes.util import find_library
 
 from . import cgobject
 from ctypes.util import find_library
 
 from . import cgobject
-from .ast import (Callback, Enum, Function, Member, Namespace, Parameter,
-                  Property, Return, Struct, Type, Alias, Array,
+from .ast import (Callback, Constant, Enum, Function, Member, Namespace,
+                  Parameter, Property, Return, Struct, Type, Alias, Array,
                   Union, type_name_from_ctype,
                   default_array_types, TYPE_UINT8)
 from .transformer import Names
                   Union, type_name_from_ctype,
                   default_array_types, TYPE_UINT8)
 from .transformer import Names
@@ -228,6 +228,8 @@ class GLibTransformer(object):
             pass
         elif isinstance(node, Union):
             self._parse_union(node)
             pass
         elif isinstance(node, Union):
             self._parse_union(node)
+        elif isinstance(node, Constant):
+            self._parse_constant(node)
         else:
             print 'GLIB Transformer: Unhandled node:', node
 
         else:
             print 'GLIB Transformer: Unhandled node:', node
 
@@ -237,6 +239,9 @@ class GLibTransformer(object):
     def _parse_enum(self, enum):
         self._add_attribute(enum)
 
     def _parse_enum(self, enum):
         self._add_attribute(enum)
 
+    def _parse_constant(self, constant):
+        self._add_attribute(constant)
+
     def _parse_function(self, func):
         if func.symbol in SYMBOL_BLACKLIST:
             return
     def _parse_function(self, func):
         if func.symbol in SYMBOL_BLACKLIST:
             return
index 43635bd..69d39a5 100644 (file)
@@ -32,6 +32,7 @@
 #include <string.h>
 #include <errno.h>
 #include <glib.h>
 #include <string.h>
 #include <errno.h>
 #include <glib.h>
+#include <glib/gstdio.h>
 #include "sourcescanner.h"
 #include "scannerparser.h"
 
 #include "sourcescanner.h"
 #include "scannerparser.h"
 
@@ -179,6 +180,13 @@ strings
                $$ = gi_source_symbol_new (CSYMBOL_TYPE_CONST);
                yytext[strlen (yytext) - 1] = '\0';
                $$->const_string = g_strcompress (yytext + 1);
                $$ = gi_source_symbol_new (CSYMBOL_TYPE_CONST);
                yytext[strlen (yytext) - 1] = '\0';
                $$->const_string = g_strcompress (yytext + 1);
+                if (!g_utf8_validate ($$->const_string, -1, NULL))
+                  {
+                    g_warning ("Ignoring non-UTF-8 constant string %s", $$->ident);
+                    g_free($$->const_string);
+                    $$->const_string = NULL;
+                  }
+
          }
        | strings STRING
          {
          }
        | strings STRING
          {
@@ -1253,6 +1261,183 @@ yyerror (GISourceScanner *scanner, const char *s)
     }
 }
 
     }
 }
 
+static int
+eat_hspace (FILE * f)
+{
+  int c;
+  do
+    {
+      c = fgetc (f);
+    }
+  while (c == ' ' || c == '\t');
+  return c;
+}
+
+static int
+eat_line (FILE * f, int c)
+{
+  while (c != EOF && c != '\n')
+    {
+      c = fgetc (f);
+    }
+  if (c == '\n')
+    {
+      c = fgetc (f);
+      if (c == ' ' || c == '\t')
+        {
+          c = eat_hspace (f);
+        }
+    }
+  return c;
+}
+
+static int
+read_identifier (FILE * f, int c, char **identifier)
+{
+  GString *id = g_string_new ("");
+  while (g_ascii_isalnum (c) || c == '_')
+    {
+      g_string_append_c (id, c);
+      c = fgetc (f);
+    }
+  *identifier = g_string_free (id, FALSE);
+  return c;
+}
+
+void
+gi_source_scanner_parse_macros (GISourceScanner *scanner, GList *filenames)
+{
+  GError *error = NULL;
+  char *tmp_name = NULL;
+  FILE *fmacros =
+    fdopen (g_file_open_tmp ("gen-introspect-XXXXXX.h", &tmp_name, &error),
+            "w+");
+  g_unlink (tmp_name);
+
+  GList *l;
+  for (l = filenames; l != NULL; l = l->next)
+    {
+      FILE *f = fopen (l->data, "r");
+      int line = 1;
+
+      GString *define_line;
+      char *str;
+      gboolean error_line = FALSE;
+      int c = eat_hspace (f);
+      while (c != EOF)
+        {
+          if (c != '#')
+            {
+              /* ignore line */
+              c = eat_line (f, c);
+              line++;
+              continue;
+            }
+
+          /* print current location */
+          str = g_strescape (l->data, "");
+          fprintf (fmacros, "# %d \"%s\"\n", line, str);
+          g_free (str);
+
+          c = eat_hspace (f);
+          c = read_identifier (f, c, &str);
+          if (strcmp (str, "define") != 0 || (c != ' ' && c != '\t'))
+            {
+              g_free (str);
+              /* ignore line */
+              c = eat_line (f, c);
+              line++;
+              continue;
+            }
+          g_free (str);
+          c = eat_hspace (f);
+          c = read_identifier (f, c, &str);
+          if (strlen (str) == 0 || (c != ' ' && c != '\t' && c != '('))
+            {
+              g_free (str);
+              /* ignore line */
+              c = eat_line (f, c);
+              line++;
+              continue;
+            }
+          define_line = g_string_new ("#define ");
+          g_string_append (define_line, str);
+          g_free (str);
+          if (c == '(')
+            {
+              while (c != ')')
+                {
+                  g_string_append_c (define_line, c);
+                  c = fgetc (f);
+                  if (c == EOF || c == '\n')
+                    {
+                      error_line = TRUE;
+                      break;
+                    }
+                }
+              if (error_line)
+                {
+                  g_string_free (define_line, TRUE);
+                  /* ignore line */
+                  c = eat_line (f, c);
+                  line++;
+                  continue;
+                }
+
+              g_assert (c == ')');
+              g_string_append_c (define_line, c);
+              c = fgetc (f);
+
+              /* found function-like macro */
+              fprintf (fmacros, "%s\n", define_line->str);
+
+              g_string_free (define_line, TRUE);
+              /* ignore rest of line */
+              c = eat_line (f, c);
+              line++;
+              continue;
+            }
+          if (c != ' ' && c != '\t')
+            {
+              g_string_free (define_line, TRUE);
+              /* ignore line */
+              c = eat_line (f, c);
+              line++;
+              continue;
+            }
+          while (c != EOF && c != '\n')
+            {
+              g_string_append_c (define_line, c);
+              c = fgetc (f);
+              if (c == '\\')
+                {
+                  c = fgetc (f);
+                  if (c == '\n')
+                    {
+                      /* fold lines when seeing backslash new-line sequence */
+                      c = fgetc (f);
+                    }
+                  else
+                    {
+                      g_string_append_c (define_line, '\\');
+                    }
+                }
+            }
+
+          /* found object-like macro */
+          fprintf (fmacros, "%s\n", define_line->str);
+
+          c = eat_line (f, c);
+          line++;
+        }
+
+      fclose (f);
+    }
+
+  rewind (fmacros);
+  gi_source_scanner_parse_file (scanner, fmacros);
+}
+
 gboolean
 gi_source_scanner_parse_file (GISourceScanner *scanner, FILE *file)
 {
 gboolean
 gi_source_scanner_parse_file (GISourceScanner *scanner, FILE *file)
 {
index a4355ca..5c2d704 100644 (file)
@@ -140,7 +140,7 @@ class SourceType(object):
 
 
 class SourceSymbol(object):
 
 
 class SourceSymbol(object):
-    __members__ = ['const_int', 'ident', 'type', 'base_type']
+    __members__ = ['const_int', 'const_string', 'ident', 'type', 'base_type']
 
     def __init__(self, scanner, symbol):
         self._scanner = scanner
 
     def __init__(self, scanner, symbol):
         self._scanner = scanner
@@ -162,6 +162,10 @@ class SourceSymbol(object):
     def const_int(self):
         return self._symbol.const_int
 
     def const_int(self):
         return self._symbol.const_int
 
+    @property
+    def const_string(self):
+        return self._symbol.const_string
+
     @property
     def ident(self):
         return self._symbol.ident
     @property
     def ident(self):
         return self._symbol.ident
@@ -210,9 +214,9 @@ class SourceScanner(object):
         self._parse(headers)
         self._filenames.extend(headers)
 
         self._parse(headers)
         self._filenames.extend(headers)
 
-    def parse_macros(self):
+    def parse_macros(self, filenames):
         self._scanner.set_macro_scan(True)
         self._scanner.set_macro_scan(True)
-        self._parse(self._filenames)
+        self._scanner.parse_macros(filenames)
         self._scanner.set_macro_scan(False)
 
     def get_symbols(self):
         self._scanner.set_macro_scan(False)
 
     def get_symbols(self):
index fcab570..024a299 100644 (file)
@@ -23,7 +23,7 @@ import os
 from giscanner.ast import (Callback, Enum, Function, Namespace, Member,
                            Parameter, Return, Array, Struct, Field,
                            Type, Alias, Interface, Class, Node, Union,
 from giscanner.ast import (Callback, Enum, Function, Namespace, Member,
                            Parameter, Return, Array, Struct, Field,
                            Type, Alias, Interface, Class, Node, Union,
-                           List, Map, Varargs, type_name_from_ctype,
+                           List, Map, Varargs, Constant, type_name_from_ctype,
                            type_names, default_array_types)
 from giscanner.config import DATADIR
 from .glibast import GLibBoxed
                            type_names, default_array_types)
 from giscanner.config import DATADIR
 from .glibast import GLibBoxed
@@ -33,7 +33,7 @@ from giscanner.sourcescanner import (
     CTYPE_VOID, CTYPE_ENUM, CTYPE_FUNCTION, CTYPE_STRUCT,
     CSYMBOL_TYPE_FUNCTION, CSYMBOL_TYPE_TYPEDEF, CSYMBOL_TYPE_STRUCT,
     CSYMBOL_TYPE_ENUM, CSYMBOL_TYPE_UNION, CSYMBOL_TYPE_OBJECT,
     CTYPE_VOID, CTYPE_ENUM, CTYPE_FUNCTION, CTYPE_STRUCT,
     CSYMBOL_TYPE_FUNCTION, CSYMBOL_TYPE_TYPEDEF, CSYMBOL_TYPE_STRUCT,
     CSYMBOL_TYPE_ENUM, CSYMBOL_TYPE_UNION, CSYMBOL_TYPE_OBJECT,
-    CSYMBOL_TYPE_MEMBER, CSYMBOL_TYPE_ELLIPSIS,
+    CSYMBOL_TYPE_MEMBER, CSYMBOL_TYPE_ELLIPSIS, CSYMBOL_TYPE_CONST,
     TYPE_QUALIFIER_CONST)
 from .odict import odict
 from .utils import strip_common_prefix, to_underscores
     TYPE_QUALIFIER_CONST)
 from .odict import odict
 from .utils import strip_common_prefix, to_underscores
@@ -201,6 +201,8 @@ class Transformer(object):
             return self._create_member(symbol)
         elif stype == CSYMBOL_TYPE_UNION:
             return self._create_union(symbol)
             return self._create_member(symbol)
         elif stype == CSYMBOL_TYPE_UNION:
             return self._create_union(symbol)
+        elif stype == CSYMBOL_TYPE_CONST:
+            return self._create_const(symbol)
         else:
             raise NotImplementedError(
                 'Transformer: unhandled symbol: %r' % (symbol, ))
         else:
             raise NotImplementedError(
                 'Transformer: unhandled symbol: %r' % (symbol, ))
@@ -424,6 +426,18 @@ class Transformer(object):
                     option, )
         return return_
 
                     option, )
         return return_
 
+    def _create_const(self, symbol):
+        name = self._remove_prefix(symbol.ident)
+        name = self.strip_namespace_object(name)
+        if symbol.const_string is None:
+            type_name = 'int'
+            value = symbol.const_int
+        else:
+            type_name = 'utf8'
+            value = symbol.const_string
+        const = Constant(name, type_name, value)
+        return const
+
     def _create_typedef_struct(self, symbol):
         name = self.strip_namespace_object(symbol.ident)
         struct = Struct(name, symbol.ident)
     def _create_typedef_struct(self, symbol):
         name = self.strip_namespace_object(symbol.ident)
         struct = Struct(name, symbol.ident)
index 88800d7..1d01bb7 100755 (executable)
@@ -241,7 +241,7 @@ def main(args):
                        options.cpp_defines,
                        options.cpp_undefines)
     ss.parse_files(filenames)
                        options.cpp_defines,
                        options.cpp_undefines)
     ss.parse_files(filenames)
-    ss.parse_macros()
+    ss.parse_macros(filenames)
 
     # Transform the C symbols into AST nodes
     transformer = Transformer(ss, options.namespace_name)
 
     # Transform the C symbols into AST nodes
     transformer = Transformer(ss, options.namespace_name)