Bug 563794 - Redo annotation parsing & applying
[gnome.gobject-introspection] / giscanner / transformer.py
1 # -*- Mode: Python -*-
2 # GObject-Introspection - a framework for introspecting GObject libraries
3 # Copyright (C) 2008  Johan Dahlin
4 #
5 # This library is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU Lesser General Public
7 # License as published by the Free Software Foundation; either
8 # version 2 of the License, or (at your option) any later version.
9 #
10 # This library is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 # Lesser General Public License for more details.
14 #
15 # You should have received a copy of the GNU Lesser General Public
16 # License along with this library; if not, write to the
17 # Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 # Boston, MA 02111-1307, USA.
19 #
20
21 import os
22
23 from .ast import (Callback, Enum, Function, Namespace, Member,
24                   Parameter, Return, Struct, Field,
25                   Type, Array, Alias, Interface, Class, Node, Union,
26                   Varargs, Constant, type_name_from_ctype,
27                   type_names, TYPE_STRING, BASIC_GIR_TYPES)
28 from .config import DATADIR
29 from .glibast import GLibBoxed
30 from .girparser import GIRParser
31 from .odict import odict
32 from .sourcescanner import (
33     SourceSymbol, ctype_name, CTYPE_POINTER,
34     CTYPE_BASIC_TYPE, CTYPE_UNION, CTYPE_ARRAY, CTYPE_TYPEDEF,
35     CTYPE_VOID, CTYPE_ENUM, CTYPE_FUNCTION, CTYPE_STRUCT,
36     CSYMBOL_TYPE_FUNCTION, CSYMBOL_TYPE_TYPEDEF, CSYMBOL_TYPE_STRUCT,
37     CSYMBOL_TYPE_ENUM, CSYMBOL_TYPE_UNION, CSYMBOL_TYPE_OBJECT,
38     CSYMBOL_TYPE_MEMBER, CSYMBOL_TYPE_ELLIPSIS, CSYMBOL_TYPE_CONST,
39     TYPE_QUALIFIER_CONST)
40 from .utils import strip_common_prefix, to_underscores
41
42 _xdg_data_dirs = [x for x in os.environ.get('XDG_DATA_DIRS', '').split(':') \
43                       + [DATADIR, '/usr/share'] if x]
44
45
46 class SkipError(Exception):
47     pass
48
49
50 class Names(object):
51     names = property(lambda self: self._names)
52     aliases = property(lambda self: self._aliases)
53     type_names = property(lambda self: self._type_names)
54     ctypes = property(lambda self: self._ctypes)
55
56     def __init__(self):
57         super(Names, self).__init__()
58         self._names = odict() # Maps from GIName -> (namespace, node)
59         self._aliases = {} # Maps from GIName -> GIName
60         self._type_names = {} # Maps from GTName -> (namespace, node)
61         self._ctypes = {} # Maps from CType -> (namespace, node)
62
63
64 class Transformer(object):
65
66     def __init__(self, cachestore, generator,
67                  namespace_name, namespace_version):
68         self._cachestore = cachestore
69         self.generator = generator
70         self._namespace = Namespace(namespace_name, namespace_version)
71         self._names = Names()
72         self._typedefs_ns = {}
73         self._strip_prefix = ''
74         self._includes = set()
75         self._includepaths = []
76
77     def get_names(self):
78         return self._names
79
80     def get_includes(self):
81         return self._includes
82
83     def set_strip_prefix(self, strip_prefix):
84         self._strip_prefix = strip_prefix
85
86     def parse(self):
87         nodes = []
88         for symbol in self.generator.get_symbols():
89             node = self._traverse_one(symbol)
90             self._add_node(node)
91         return self._namespace
92
93     def set_include_paths(self, paths):
94         self._includepaths = list(paths)
95
96     def register_include(self, include):
97         if include in self._includes:
98             return
99         filename = self._find_include(include)
100         self._parse_include(filename)
101         self._includes.add(include)
102
103     # Private
104
105     def _find_include(self, include):
106         searchdirs = self._includepaths[:]
107         for path in _xdg_data_dirs:
108             searchdirs.append(os.path.join(path, 'gir'))
109
110         girname = '%s-%s.gir' % (include.name, include.version)
111         for d in searchdirs:
112             path = os.path.join(d, girname)
113             if os.path.exists(path):
114                 return path
115         else:
116             raise ValueError("Couldn't find include %r (search path: %r)"\
117                              % (girname, searchdirs))
118
119     def _parse_include(self, filename):
120         parser = self._cachestore.load(filename)
121         if parser is None:
122             parser = GIRParser()
123             parser.set_include_parsing(True)
124             parser.parse(filename)
125             self._cachestore.store(filename, parser)
126
127         for include in parser.get_includes():
128             self.register_include(include)
129
130         namespace = parser.get_namespace()
131         nsname = namespace.name
132         for node in namespace.nodes:
133             if isinstance(node, Alias):
134                 self._names.aliases[node.name] = (nsname, node)
135             elif isinstance(node, (GLibBoxed, Interface, Class)):
136                 self._names.type_names[node.type_name] = (nsname, node)
137             giname = '%s.%s' % (nsname, node.name)
138             self._names.names[giname] = (nsname, node)
139             if hasattr(node, 'ctype'):
140                 self._names.ctypes[node.ctype] = (nsname, node)
141             elif hasattr(node, 'symbol'):
142                 self._names.ctypes[node.symbol] = (nsname, node)
143
144     def _add_node(self, node):
145         if node is None:
146             return
147         if node.name.startswith('_'):
148             return
149         self._namespace.nodes.append(node)
150         self._names.names[node.name] = (None, node)
151
152     def _strip_namespace_func(self, name):
153         prefix = self._namespace.name.lower() + '_'
154         if name.lower().startswith(prefix):
155             name = name[len(prefix):]
156         else:
157             prefix = to_underscores(self._namespace.name).lower() + '_'
158             if name.lower().startswith(prefix):
159                 name = name[len(prefix):]
160         return self.remove_prefix(name, isfunction=True)
161
162     def remove_prefix(self, name, isfunction=False):
163         # when --strip-prefix=g:
164         #   GHashTable -> HashTable
165         #   g_hash_table_new -> hash_table_new
166         prefix = self._strip_prefix.lower()
167         if isfunction:
168             prefix += '_'
169         if len(name) > len(prefix) and name.lower().startswith(prefix):
170             name = name[len(prefix):]
171
172         while name.startswith('_'):
173             name = name[1:]
174         return name
175
176     def _traverse_one(self, symbol, stype=None):
177         assert isinstance(symbol, SourceSymbol), symbol
178
179         if stype is None:
180             stype = symbol.type
181         if stype == CSYMBOL_TYPE_FUNCTION:
182             try:
183                 return self._create_function(symbol)
184             except SkipError:
185                 return
186         elif stype == CSYMBOL_TYPE_TYPEDEF:
187             return self._create_typedef(symbol)
188         elif stype == CSYMBOL_TYPE_STRUCT:
189             return self._create_struct(symbol)
190         elif stype == CSYMBOL_TYPE_ENUM:
191             return self._create_enum(symbol)
192         elif stype == CSYMBOL_TYPE_OBJECT:
193             return self._create_object(symbol)
194         elif stype == CSYMBOL_TYPE_MEMBER:
195             return self._create_member(symbol)
196         elif stype == CSYMBOL_TYPE_UNION:
197             return self._create_union(symbol)
198         elif stype == CSYMBOL_TYPE_CONST:
199             return self._create_const(symbol)
200         else:
201             raise NotImplementedError(
202                 'Transformer: unhandled symbol: %r' % (symbol, ))
203
204     def _create_enum(self, symbol):
205         members = []
206         for child in symbol.base_type.child_list:
207             name = strip_common_prefix(symbol.ident, child.ident).lower()
208             members.append(Member(name,
209                                   child.const_int,
210                                   child.ident))
211
212         enum_name = self.remove_prefix(symbol.ident)
213         enum = Enum(enum_name, symbol.ident, members)
214         self._names.type_names[symbol.ident] = (None, enum)
215         return enum
216
217     def _create_object(self, symbol):
218         return Member(symbol.ident, symbol.base_type.name,
219                       symbol.ident)
220
221     def _type_is_callback(self, type):
222         if (isinstance(type, Callback) or
223             isinstance(self._typedefs_ns.get(type.name), Callback)):
224             return True
225         return False
226
227     def _handle_closure(self, param, closure_idx, closure_param):
228         if (closure_param.type.name == 'any' and
229             closure_param.name == 'user_data'):
230             param.closure_name = closure_param.name
231             param.closure_index = closure_idx
232             return True
233         return False
234
235     def _handle_destroy(self, param, destroy_idx, destroy_param):
236         if ((self._namespace.name == 'GLib' and
237              destroy_param.type.name == 'DestroyNotify') or
238             destroy_param.type.name == 'GLib.DestroyNotify'):
239             param.destroy_name = destroy_param.name
240             param.destroy_index = destroy_idx
241             return True
242         return False
243
244     def _augment_callback_params(self, params):
245         for i, param in enumerate(params):
246             if not self._type_is_callback(param.type):
247                 continue
248
249             # j is the index where we look for closure/destroy to
250             # group with the callback param
251             j = i + 1
252             if j == len(params):
253                 continue # no more args -> nothing to group look
254             # at the param directly following for either a closure
255             # or a destroy; only one of these will fire
256             had_closure = self._handle_closure(param, j, params[j])
257             had_destroy = self._handle_destroy(param, j, params[j])
258             j += 1
259             # are we out of params, or did we find neither?
260             if j == len(params) or (not had_closure and not had_destroy):
261                 continue
262             # we found either a closure or a destroy; check the
263             # parameter following for the other
264             if not had_closure:
265                 self._handle_closure(param, j, params[j])
266             if not had_destroy:
267                 self._handle_destroy(param, j, params[j])
268
269     def _create_function(self, symbol):
270         parameters = list(self._create_parameters(symbol.base_type))
271         return_ = self._create_return(symbol.base_type.base_type)
272         self._augment_callback_params(parameters)
273         name = self._strip_namespace_func(symbol.ident)
274         func = Function(name, return_, parameters, symbol.ident)
275         return func
276
277     def _create_source_type(self, source_type):
278         if source_type is None:
279             return 'None'
280         if source_type.type == CTYPE_VOID:
281             value = 'void'
282         elif source_type.type == CTYPE_BASIC_TYPE:
283             value = source_type.name
284         elif source_type.type == CTYPE_TYPEDEF:
285             value = source_type.name
286         elif source_type.type == CTYPE_ARRAY:
287             return self._create_source_type(source_type.base_type)
288         elif source_type.type == CTYPE_POINTER:
289             value = self._create_source_type(source_type.base_type) + '*'
290         else:
291             value = 'any'
292         return value
293
294     def _create_parameters(self, base_type):
295
296         # warn if we see annotations for unknown parameters
297         param_names = set(child.ident for child in base_type.child_list)
298         for child in base_type.child_list:
299             yield self._create_parameter(child)
300
301     def _create_member(self, symbol):
302         source_type = symbol.base_type
303         if (source_type.type == CTYPE_POINTER and
304             symbol.base_type.base_type.type == CTYPE_FUNCTION):
305             node = self._create_callback(symbol)
306         else:
307             # Special handling for fields; we don't have annotations on them
308             # to apply later, yet.
309             if source_type.type == CTYPE_ARRAY:
310                 ctype = self._create_source_type(source_type)
311                 canonical_ctype = self._canonicalize_ctype(ctype)
312                 if canonical_ctype[-1] == '*':
313                     derefed_name = canonical_ctype[:-1]
314                 else:
315                     derefed_name = canonical_ctype
316                 derefed_name = self.resolve_param_type(derefed_name)
317                 ftype = Array(ctype, self.parse_ctype(derefed_name))
318                 child_list = list(symbol.base_type.child_list)
319                 ftype.zeroterminated = False
320                 if child_list:
321                     ftype.size = '%d' % (child_list[0].const_int, )
322             else:
323                 ftype = self._create_type(symbol.base_type,
324                                           is_param=False, is_retval=False)
325             ftype = self.resolve_param_type(ftype)
326             # Fields are assumed to be read-write
327             # (except for Objects, see also glibtransformer.py)
328             node = Field(symbol.ident, ftype, ftype.name,
329                          readable=True, writable=True, bits=symbol.const_int)
330         return node
331
332     def _create_typedef(self, symbol):
333         ctype = symbol.base_type.type
334         if (ctype == CTYPE_POINTER and
335             symbol.base_type.base_type.type == CTYPE_FUNCTION):
336             node = self._create_typedef_callback(symbol)
337         elif (ctype == CTYPE_POINTER and
338             symbol.base_type.base_type.type == CTYPE_STRUCT):
339             node = self._create_typedef_struct(symbol, disguised=True)
340         elif ctype == CTYPE_STRUCT:
341             node = self._create_typedef_struct(symbol)
342         elif ctype == CTYPE_UNION:
343             node = self._create_typedef_union(symbol)
344         elif ctype == CTYPE_ENUM:
345             return self._create_enum(symbol)
346         elif ctype in (CTYPE_TYPEDEF,
347                        CTYPE_POINTER,
348                        CTYPE_BASIC_TYPE,
349                        CTYPE_VOID):
350             name = self.remove_prefix(symbol.ident)
351             if symbol.base_type.name:
352                 target = self.remove_prefix(symbol.base_type.name)
353             else:
354                 target = 'none'
355             if name in type_names:
356                 return None
357             return Alias(name, target, ctype=symbol.ident)
358         else:
359             raise NotImplementedError(
360                 "symbol %r of type %s" % (symbol.ident, ctype_name(ctype)))
361         return node
362
363     def _canonicalize_ctype(self, ctype):
364         # First look up the ctype including any pointers;
365         # a few type names like 'char*' have their own aliases
366         # and we need pointer information for those.
367         firstpass = type_name_from_ctype(ctype)
368
369         # If we have a particular alias for this, skip deep
370         # canonicalization to prevent changing
371         # e.g. char* -> int8*
372         if firstpass != ctype:
373             return firstpass
374
375         # We're also done if the type is already a fundamental
376         # known type, or there are no pointers.
377         if ctype in type_names or not firstpass.endswith('*'):
378             return firstpass
379
380         # We have a pointer type.
381         # Strip the end pointer, canonicalize our base type
382         base = firstpass[:-1]
383         canonical_base = self._canonicalize_ctype(base)
384
385         # Append the pointer again
386         canonical = canonical_base + '*'
387
388         return canonical
389
390     def parse_ctype(self, ctype, is_member=False):
391         canonical = self._canonicalize_ctype(ctype)
392
393         # Remove all pointers - we require standard calling
394         # conventions.  For example, an 'int' is always passed by
395         # value (unless it's out or inout).
396         derefed_typename = canonical.replace('*', '')
397
398         # Preserve "pointerness" of struct/union members
399         if (is_member and canonical.endswith('*') and
400             derefed_typename in BASIC_GIR_TYPES):
401             return 'any'
402         else:
403             return derefed_typename
404
405     def _create_type(self, source_type, is_param, is_retval):
406         ctype = self._create_source_type(source_type)
407         if ctype == 'va_list':
408             raise SkipError()
409         # FIXME: FILE* should not be skipped, it should be handled
410         #        properly instead
411         elif ctype == 'FILE*':
412             raise SkipError
413
414         is_member = not (is_param or is_retval)
415         # Here we handle basic type parsing; most of the heavy lifting
416         # and inference comes in annotationparser.py when we merge
417         # in annotation data.
418         derefed_name = self.parse_ctype(ctype, is_member)
419         rettype = Type(derefed_name, ctype)
420         rettype.canonical = self._canonicalize_ctype(ctype)
421         derefed_ctype = ctype.replace('*', '')
422         rettype.derefed_canonical = self._canonicalize_ctype(derefed_ctype)
423
424         canontype = type_name_from_ctype(ctype)
425         if ((canontype == TYPE_STRING or
426              source_type.type == CTYPE_POINTER) and
427             source_type.base_type.type_qualifier & TYPE_QUALIFIER_CONST):
428             rettype.is_const = True
429         return rettype
430
431     def _create_parameter(self, symbol):
432         if symbol.type == CSYMBOL_TYPE_ELLIPSIS:
433             ptype = Varargs()
434         else:
435             ptype = self._create_type(symbol.base_type,
436                                       is_param=True, is_retval=False)
437             ptype = self.resolve_param_type(ptype)
438         return Parameter(symbol.ident, ptype)
439
440     def _create_return(self, source_type):
441         rtype = self._create_type(source_type,
442                                   is_param=False, is_retval=True)
443         rtype = self.resolve_param_type(rtype)
444         return_ = Return(rtype)
445         return return_
446
447     def _create_const(self, symbol):
448         name = self.remove_prefix(symbol.ident)
449         if symbol.const_string is None:
450             type_name = 'int'
451             value = symbol.const_int
452         else:
453             type_name = 'utf8'
454             value = symbol.const_string
455         const = Constant(name, type_name, value)
456         return const
457
458     def _create_typedef_struct(self, symbol, disguised=False):
459         name = self.remove_prefix(symbol.ident)
460         struct = Struct(name, symbol.ident, disguised)
461         self._typedefs_ns[symbol.ident] = struct
462         self._create_struct(symbol)
463         return struct
464
465     def _create_typedef_union(self, symbol):
466         name = self.remove_prefix(symbol.ident)
467         union = Union(name, symbol.ident)
468         self._typedefs_ns[symbol.ident] = union
469         self._create_union(symbol)
470         return union
471
472     def _create_typedef_callback(self, symbol):
473         callback = self._create_callback(symbol)
474         self._typedefs_ns[callback.name] = callback
475         return callback
476
477     def _create_struct(self, symbol):
478         struct = self._typedefs_ns.get(symbol.ident, None)
479         if struct is None:
480             # This is a bit of a hack; really we should try
481             # to resolve through the typedefs to find the real
482             # name
483             if symbol.ident.startswith('_'):
484                 name = symbol.ident[1:]
485             else:
486                 name = symbol.ident
487             name = self.remove_prefix(name)
488             struct = Struct(name, symbol.ident)
489
490         for child in symbol.base_type.child_list:
491             field = self._traverse_one(child)
492             if field:
493                 struct.fields.append(field)
494
495         return struct
496
497     def _create_union(self, symbol):
498         union = self._typedefs_ns.get(symbol.ident, None)
499         if union is None:
500             # This is a bit of a hack; really we should try
501             # to resolve through the typedefs to find the real
502             # name
503             if symbol.ident.startswith('_'):
504                 name = symbol.ident[1:]
505             else:
506                 name = symbol.ident
507             name = self.remove_prefix(name)
508             union = Union(name, symbol.ident)
509
510         for child in symbol.base_type.child_list:
511             field = self._traverse_one(child)
512             if field:
513                 union.fields.append(field)
514
515         return union
516
517     def _create_callback(self, symbol):
518         parameters = self._create_parameters(symbol.base_type.base_type)
519         retval = self._create_return(symbol.base_type.base_type.base_type)
520         if symbol.ident.find('_') > 0:
521             name = self.remove_prefix(symbol.ident, True)
522         else:
523             name = self.remove_prefix(symbol.ident)
524         callback = Callback(name, retval, list(parameters), symbol.ident)
525
526         return callback
527
528     def _typepair_to_str(self, item):
529         nsname, item = item
530         if nsname is None:
531             return item.name
532         return '%s.%s' % (nsname, item.name)
533
534     def _resolve_type_name_1(self, type_name, ctype, names):
535         # First look using the built-in names
536         if ctype:
537             try:
538                 return type_names[ctype]
539             except KeyError, e:
540                 pass
541         try:
542             return type_names[type_name]
543         except KeyError, e:
544             pass
545
546         if ctype:
547             ctype = ctype.replace('*', '')
548             resolved = names.ctypes.get(ctype)
549             if resolved:
550                 return self._typepair_to_str(resolved)
551         type_name = self.remove_prefix(type_name)
552         resolved = names.aliases.get(type_name)
553         if resolved:
554             return self._typepair_to_str(resolved)
555         resolved = names.names.get(type_name)
556         if resolved:
557             return self._typepair_to_str(resolved)
558         resolved = names.type_names.get(type_name)
559         if resolved:
560             return self._typepair_to_str(resolved)
561         raise KeyError("failed to find %r" % (type_name, ))
562
563     def resolve_type_name_full(self, type_name, ctype,
564                                names, allow_invalid=True):
565         try:
566             return self._resolve_type_name_1(type_name, ctype, names)
567         except KeyError, e:
568             try:
569                 return self._resolve_type_name_1(type_name, ctype, self._names)
570             except KeyError, e:
571                 if not allow_invalid:
572                     raise
573                 return type_name
574
575     def resolve_type_name(self, type_name, ctype=None):
576         try:
577             return self.resolve_type_name_full(type_name, ctype, self._names)
578         except KeyError, e:
579             return type_name
580
581     def gtypename_to_giname(self, gtname, names):
582         resolved = names.type_names.get(gtname)
583         if resolved:
584             return self._typepair_to_str(resolved)
585         resolved = self._names.type_names.get(gtname)
586         if resolved:
587             return self._typepair_to_str(resolved)
588         raise KeyError("Failed to resolve GType name: %r" % (gtname, ))
589
590     def ctype_of(self, obj):
591         if hasattr(obj, 'ctype'):
592             return obj.ctype
593         elif hasattr(obj, 'symbol'):
594             return obj.symbol
595         else:
596             return None
597
598     def resolve_param_type_full(self, ptype, names, **kwargs):
599         if isinstance(ptype, Node):
600             ptype.name = self.resolve_type_name_full(ptype.name,
601                                                      self.ctype_of(ptype),
602                                                      names, **kwargs)
603         elif isinstance(ptype, basestring):
604             return self.resolve_type_name_full(ptype, None, names, **kwargs)
605         else:
606             raise AssertionError("Unhandled param: %r" % (ptype, ))
607         return ptype
608
609     def resolve_param_type(self, ptype):
610         try:
611             return self.resolve_param_type_full(ptype, self._names)
612         except KeyError, e:
613             return ptype
614
615     def follow_aliases(self, type_name, names):
616         while True:
617             resolved = names.aliases.get(type_name)
618             if resolved:
619                 (ns, alias) = resolved
620                 type_name = alias.target
621             else:
622                 break
623         return type_name