Bug 565147 - Add (type) annotation to override the C type definition
[gnome.gobject-introspection] / giscanner / annotationparser.py
1 # -*- Mode: Python -*-
2 # GObject-Introspection - a framework for introspecting GObject libraries
3 # Copyright (C) 2008  Johan Dahlin
4 #
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 # 02110-1301, USA.
19 #
20
21 # AnnotationParser - parses gtk-doc annotations
22
23 import sys
24
25 from .ast import (Array, Bitfield, Callback, Class, Enum, Field, Function,
26                   Interface, List, Map, Parameter, Record, Return, Type, Union,
27                   Varargs,
28                   default_array_types,
29                   BASIC_GIR_TYPES,
30                   PARAM_DIRECTION_INOUT,
31                   PARAM_DIRECTION_IN,
32                   PARAM_DIRECTION_OUT,
33                   PARAM_TRANSFER_NONE,
34                   PARAM_TRANSFER_CONTAINER,
35                   PARAM_TRANSFER_FULL,
36                   TYPE_ANY, TYPE_NONE)
37 from .odict import odict
38 from .glibast import GLibBoxed
39
40 # All gtk-doc comments needs to start with this:
41 _COMMENT_HEADER = '*\n '
42
43 # Tags - annotations applyed to comment blocks
44 TAG_VFUNC = 'virtual'
45 TAG_SINCE = 'since'
46 TAG_DEPRECATED = 'deprecated'
47 TAG_RETURNS = 'returns'
48 TAG_RETURNS_ALT = 'return value'
49 TAG_ATTRIBUTES = 'attributes'
50
51 # Options - annotations for parameters and return values
52 OPT_ALLOW_NONE = 'allow-none'
53 OPT_ARRAY = 'array'
54 OPT_ELEMENT_TYPE = 'element-type'
55 OPT_IN = 'in'
56 OPT_INOUT = 'inout'
57 OPT_INOUT_ALT = 'in-out'
58 OPT_OUT = 'out'
59 OPT_SCOPE = 'scope'
60 OPT_TRANSFER = 'transfer'
61 OPT_TYPE = 'type'
62
63 # Specific option values
64 OPT_VAL_BITFIELD = 'bitfield'
65
66 # Array options - array specific annotations
67 OPT_ARRAY_FIXED_SIZE = 'fixed-size'
68 OPT_ARRAY_LENGTH = 'length'
69 OPT_ARRAY_ZERO_TERMINATED = 'zero-terminated'
70
71
72 class InvalidAnnotationError(Exception):
73     pass
74
75
76 class DocBlock(object):
77
78     def __init__(self, name, options):
79         self.name = name
80         self.options = options
81         self.value = None
82         self.tags = odict()
83         self.comment = None
84
85     def __repr__(self):
86         return '<DocBlock %r %r>' % (self.name, self.options)
87
88     def get(self, name):
89         if name == TAG_RETURNS:
90             value = self.tags.get(name)
91             if value is None:
92                 return self.tags.get(TAG_RETURNS_ALT)
93             else:
94                 return value
95         else:
96             return self.tags.get(name)
97
98
99 class DocTag(object):
100
101     def __init__(self, name):
102         self.name = name
103         self.options = {}
104         self.comment = None
105
106
107 class Option(object):
108
109     def __init__(self, option):
110         self._array = []
111         self._dict = {}
112         for p in option.split(' '):
113             if '=' in p:
114                 name, value = p.split('=', 1)
115             else:
116                 name = p
117                 value = None
118             self._dict[name] = value
119             if value is None:
120                 self._array.append(name)
121             else:
122                 self._array.append((name, value))
123
124     def __repr__(self):
125         return '<Option %r>' % (self._array, )
126
127     def one(self):
128         assert len(self._array) == 1
129         return self._array[0]
130
131     def flat(self):
132         return self._array
133
134     def all(self):
135         return self._dict
136
137
138 class AnnotationParser(object):
139
140     def __init__(self, namespace, source_scanner, transformer):
141         self._blocks = {}
142         self._namespace = namespace
143         self._transformer = transformer
144         for comment in source_scanner.get_comments():
145             self._parse_comment(comment)
146
147     def parse(self):
148         aa = AnnotationApplier(self._blocks, self._transformer)
149         aa.parse(self._namespace)
150
151     def _parse_comment(self, comment):
152         # We're looking for gtk-doc comments here, they look like this:
153         # /**
154         #   * symbol:
155         #
156         # Or, alternatively, with options:
157         # /**
158         #   * symbol: (name value) ...
159         #
160         # symbol is currently one of:
161         #  - function: gtk_widget_show
162         #  - signal:   GtkWidget::destroy
163         #  - property: GtkWidget:visible
164         #
165         comment = comment.lstrip()
166         if not comment.startswith(_COMMENT_HEADER):
167             return
168         comment = comment[len(_COMMENT_HEADER):]
169         comment = comment.strip()
170         if not comment.startswith('* '):
171             return
172         comment = comment[2:]
173
174         pos = comment.find('\n ')
175         if pos == -1:
176             return
177         block_header = comment[:pos]
178         block_header = block_header.strip()
179         cpos = block_header.find(': ')
180         if cpos:
181             block_name = block_header[:cpos]
182             block_options, rest = self._parse_options(block_header[cpos+2:])
183             if rest:
184                 return
185         else:
186             block_name, block_options = block_header, {}
187         block = DocBlock(block_name, block_options)
188         comment_lines = []
189         for line in comment[pos+1:].split('\n'):
190             line = line.lstrip()
191             line = line[2:].strip() # Skip ' *'
192             if not line:
193                 continue
194             if line.startswith('@'):
195                 line = line[1:]
196             elif not ': ' in line:
197                 comment_lines.append(line)
198                 continue
199             tag_name, value = self._split_tag_namevalue(line)
200             canon_name = tag_name.lower()
201             if canon_name in block.tags:
202                 print >>sys.stderr, "Multiple definition of tag %r" \
203                     % (canon_name, )
204             block.tags[canon_name] = self._create_tag(canon_name, value)
205         block.comment = '\n'.join(comment_lines)
206         self._blocks[block.name] = block
207
208     def _split_tag_namevalue(self, raw):
209         """Split a line into tag name and value"""
210         parts = raw.split(': ', 1)
211         if len(parts) == 1:
212             tag_name = parts[0]
213             value = ''
214         else:
215             tag_name, value = parts
216         return (tag_name, value)
217
218     def _create_tag(self, tag_name, value):
219         # Tag: bar
220         # Tag: bar opt1 opt2
221         tag = DocTag(tag_name)
222         tag.value = value
223         options, rest = self._parse_options(tag.value)
224         tag.options = options
225         tag.comment = rest
226         return tag
227
228     def _parse_options(self, value):
229         # (foo)
230         # (bar opt1 opt2...)
231         opened = -1
232         options = {}
233         last = None
234         for i, c in enumerate(value):
235             if c == '(' and opened == -1:
236                 opened = i+1
237             if c == ')' and opened != -1:
238                 segment = value[opened:i]
239                 parts = segment.split(' ', 1)
240                 if len(parts) == 2:
241                     name, option = parts
242                 elif len(parts) == 1:
243                     name = parts[0]
244                     option = None
245                 else:
246                     raise AssertionError
247                 if option is not None:
248                     option = Option(option)
249                 options[name] = option
250                 last = i + 2
251                 opened = -1
252
253         if last is not None:
254             rest = value[last:].strip()
255         else:
256             rest = None
257         return options, rest
258
259
260 class AnnotationApplier(object):
261
262     def __init__(self, blocks, transformer):
263         self._blocks = blocks
264         self._transformer = transformer
265
266     def _get_tag(self, block, tag_name):
267         if block is None:
268             return None
269
270         return block.get(tag_name)
271
272     def parse(self, namespace):
273         for node in namespace.nodes:
274             self._parse_node(node)
275
276     # Boring parsing boilerplate.
277
278     def _parse_node(self, node):
279         if isinstance(node, Function):
280             self._parse_function(node)
281         elif isinstance(node, Enum):
282             self._parse_enum(node)
283         elif isinstance(node, Bitfield):
284             self._parse_bitfield(node)
285         elif isinstance(node, Class):
286             self._parse_class(node)
287         elif isinstance(node, Interface):
288             self._parse_interface(node)
289         elif isinstance(node, Callback):
290             self._parse_callback(node)
291         elif isinstance(node, Record):
292             self._parse_record(node)
293         elif isinstance(node, Union):
294             self._parse_union(node)
295         elif isinstance(node, GLibBoxed):
296             self._parse_boxed(node)
297
298     def _parse_class(self, class_):
299         block = self._blocks.get(class_.type_name)
300         self._parse_node_common(class_, block)
301         self._parse_constructors(class_.constructors)
302         self._parse_methods(class_, class_.methods)
303         self._parse_vfuncs(class_, class_.virtual_methods)
304         self._parse_methods(class_, class_.static_methods)
305         self._parse_properties(class_, class_.properties)
306         self._parse_signals(class_, class_.signals)
307         self._parse_fields(class_, class_.fields)
308         if block:
309             class_.doc = block.comment
310
311     def _parse_interface(self, interface):
312         block = self._blocks.get(interface.type_name)
313         self._parse_node_common(interface, block)
314         self._parse_methods(interface, interface.methods)
315         self._parse_vfuncs(interface, interface.virtual_methods)
316         self._parse_properties(interface, interface.properties)
317         self._parse_signals(interface, interface.signals)
318         self._parse_fields(interface, interface.fields)
319         if block:
320             interface.doc = block.comment
321
322     def _parse_record(self, record):
323         block = self._blocks.get(record.symbol)
324         self._parse_node_common(record, block)
325         self._parse_constructors(record.constructors)
326         self._parse_methods(record, record.methods)
327         self._parse_fields(record, record.fields)
328         if block:
329             record.doc = block.comment
330
331     def _parse_boxed(self, boxed):
332         block = self._blocks.get(boxed.name)
333         self._parse_node_common(boxed, block)
334         self._parse_constructors(boxed.constructors)
335         self._parse_methods(boxed, boxed.methods)
336         if block:
337             boxed.doc = block.comment
338
339     def _parse_union(self, union):
340         block = self._blocks.get(union.name)
341         self._parse_node_common(union, block)
342         self._parse_fields(union, union.fields)
343         self._parse_constructors(union.constructors)
344         self._parse_methods(union, union.methods)
345         if block:
346             union.doc = block.comment
347
348     def _parse_enum(self, enum):
349         block = self._blocks.get(enum.symbol)
350         self._parse_node_common(enum, block)
351         if block:
352             enum.doc = block.comment
353             type_opt = block.options.get(OPT_TYPE)
354             if type_opt and type_opt.one() == OPT_VAL_BITFIELD:
355                 # This is hack, but hey, it works :-)
356                 enum.__class__ = Bitfield
357
358     def _parse_bitfield(self, bitfield):
359         block = self._blocks.get(bitfield.symbol)
360         self._parse_node_common(bitfield, block)
361         if block:
362             bitfield.doc = block.comment
363
364     def _parse_constructors(self, constructors):
365         for ctor in constructors:
366             self._parse_function(ctor)
367
368     def _parse_fields(self, parent, fields):
369         for field in fields:
370             self._parse_field(parent, field)
371
372     def _parse_properties(self, parent, properties):
373         for prop in properties:
374             self._parse_property(parent, prop)
375
376     def _parse_methods(self, parent, methods):
377         for method in methods:
378             self._parse_method(parent, method)
379
380     def _parse_vfuncs(self, parent, vfuncs):
381         for vfunc in vfuncs:
382             self._parse_vfunc(parent, vfunc)
383
384     def _parse_signals(self, parent, signals):
385         for signal in signals:
386             self._parse_signal(parent, signal)
387
388     def _parse_property(self, parent, prop):
389         block = self._blocks.get('%s:%s' % (parent.type_name, prop.name))
390         self._parse_node_common(prop, block)
391         if block:
392             prop.doc = block.comment
393
394     def _parse_callback(self, callback):
395         block = self._blocks.get(callback.ctype)
396         self._parse_node_common(callback, block)
397         self._parse_params(callback, callback.parameters, block)
398         self._parse_return(callback, callback.retval, block)
399         if block:
400             callback.doc = block.comment
401
402     def _parse_callable(self, callable, block):
403         self._parse_node_common(callable, block)
404         self._parse_params(callable, callable.parameters, block)
405         self._parse_return(callable, callable.retval, block)
406         if block:
407             callable.doc = block.comment
408
409     def _parse_function(self, func):
410         block = self._blocks.get(func.symbol)
411         self._parse_callable(func, block)
412
413     def _parse_signal(self, parent, signal):
414         block = self._blocks.get('%s::%s' % (parent.type_name, signal.name))
415         self._parse_node_common(signal, block)
416         # We're only attempting to name the signal parameters if
417         # the number of parameter tags (@foo) is the same or greater
418         # than the number of signal parameters
419         resolve = self._transformer.resolve_param_type
420         if block and len(block.tags) > len(signal.parameters):
421             names = block.tags.items()
422         else:
423             names = []
424         for i, param in enumerate(signal.parameters):
425             if names:
426                 name, tag = names[i+1]
427                 param.name = name
428                 options = getattr(tag, 'options', {})
429                 param_type = options.get(OPT_TYPE)
430                 if param_type:
431                     param.type.name = resolve(param_type.one())
432             else:
433                 tag = None
434             self._parse_param(signal, param, tag)
435         self._parse_return(signal, signal.retval, block)
436         if block:
437             signal.doc = block.comment
438
439     def _parse_method(self, parent, meth):
440         block = self._blocks.get(meth.symbol)
441         self._parse_function(meth)
442         virtual = self._get_tag(block, TAG_VFUNC)
443         if virtual:
444             invoker_name = virtual.value
445             matched = False
446             for vfunc in parent.virtual_methods:
447                 if vfunc.name == invoker_name:
448                     matched = True
449                     vfunc.invoker = meth.name
450                     break
451             if not matched:
452                 print "warning: unmatched virtual invoker %r for method %r" % \
453                     (invoker_name, meth.symbol)
454
455     def _parse_vfunc(self, parent, vfunc):
456         key = '%s::%s' % (parent.type_name, vfunc.name)
457         self._parse_callable(vfunc, self._blocks.get(key))
458
459     def _parse_field(self, parent, field):
460         if isinstance(field, Callback):
461             self._parse_callback(field)
462
463     def _parse_params(self, parent, params, block):
464         for param in params:
465             tag = self._get_tag(block, param.name)
466             self._parse_param(parent, param, tag)
467
468     def _parse_return(self, parent, return_, block):
469         tag = self._get_tag(block, TAG_RETURNS)
470         self._parse_param_ret_common(parent, return_, tag)
471
472     def _parse_param(self, parent, param, tag):
473         if isinstance(parent, Function):
474             options = getattr(tag, 'options', {})
475             scope = options.get(OPT_SCOPE)
476             if scope:
477                 param.scope = scope.one()
478                 param.transfer = PARAM_TRANSFER_NONE
479         self._parse_param_ret_common(parent, param, tag)
480
481     def _parse_param_ret_common(self, parent, node, tag):
482         options = getattr(tag, 'options', {})
483         node.direction = self._extract_direction(node, options)
484         container_type = self._extract_container_type(
485             parent, node, options)
486         if container_type is not None:
487             node.type = container_type
488         if node.direction is None:
489             node.direction = self._guess_direction(node)
490         node.transfer = self._extract_transfer(parent, node, options)
491         if OPT_ALLOW_NONE in options:
492             node.allow_none = True
493         param_type = options.get(OPT_TYPE)
494         if param_type:
495             resolve = self._transformer.resolve_param_type
496             node.type.name = resolve(param_type.one())
497
498         assert node.transfer is not None
499         if tag is not None and tag.comment is not None:
500             node.doc = tag.comment
501
502     def _extract_direction(self, node, options):
503         if (OPT_INOUT in options or
504             OPT_INOUT_ALT in options):
505             direction = PARAM_DIRECTION_INOUT
506         elif OPT_OUT in options:
507             direction = PARAM_DIRECTION_OUT
508         elif OPT_IN in options:
509             direction = PARAM_DIRECTION_IN
510         else:
511             direction = node.direction
512         return direction
513
514     def _guess_array(self, node):
515         ctype = node.type.ctype
516         if ctype is None:
517             return False
518         if not ctype.endswith('*'):
519             return False
520         if node.type.canonical in default_array_types:
521             return True
522         return False
523
524     def _extract_container_type(self, parent, node, options):
525         has_element_type = OPT_ELEMENT_TYPE in options
526         has_array = OPT_ARRAY in options
527
528         # FIXME: This is a hack :-(
529         if (not isinstance(node, Field) and
530             (not has_element_type and
531              (node.direction is None
532               or node.direction == PARAM_DIRECTION_IN))):
533             if self._guess_array(node):
534                 has_array = True
535
536         if has_array:
537             container_type = self._parse_array(parent, node, options)
538         elif has_element_type:
539             container_type = self._parse_element_type(parent, node, options)
540         else:
541             container_type = None
542
543         return container_type
544
545     def _parse_array(self, parent, node, options):
546         array_opt = options.get(OPT_ARRAY)
547         if array_opt:
548             array_values = array_opt.all()
549         else:
550             array_values = {}
551
552         element_type = options.get(OPT_ELEMENT_TYPE)
553         if element_type is not None:
554             element_type_name = element_type.one()
555         else:
556             element_type_name = node.type.name
557
558         container_type = Array(node.type.ctype,
559                                element_type_name)
560         if OPT_ARRAY_ZERO_TERMINATED in array_values:
561             container_type.zeroterminated = array_values.get(
562                 OPT_ARRAY_ZERO_TERMINATED) == '1'
563         length = array_values.get(OPT_ARRAY_LENGTH)
564         if length is not None:
565             param_index = parent.get_parameter_index(length)
566             container_type.length_param_index = param_index
567             # For in parameters we're incorrectly deferring
568             # char/unsigned char to utf8 when a length annotation
569             # is specified.
570             if (isinstance(node, Parameter) and
571                 node.type.name == 'utf8' and
572                 self._guess_direction(node) == PARAM_DIRECTION_IN):
573                 # FIXME: unsigned char/guchar should be uint8
574                 container_type.element_type = 'int8'
575         container_type.size = array_values.get(OPT_ARRAY_FIXED_SIZE)
576         return container_type
577
578     def _parse_element_type(self, parent, node, options):
579         element_type_opt = options.get(OPT_ELEMENT_TYPE)
580         element_type = element_type_opt.flat()
581         if node.type.name in ['GLib.List', 'GLib.SList']:
582             assert len(element_type) == 1
583             etype = Type(element_type[0])
584             container_type = List(
585                 node.type.name,
586                 node.type.ctype,
587                 self._transformer.resolve_param_type(etype))
588         elif node.type.name in ['GLib.HashTable']:
589             assert len(element_type) == 2
590             key_type = Type(element_type[0])
591             value_type = Type(element_type[1])
592             container_type = Map(
593                 node.type.name,
594                 node.type.ctype,
595                 self._transformer.resolve_param_type(key_type),
596                 self._transformer.resolve_param_type(value_type))
597         else:
598             print 'FIXME: unhandled element-type container:', node
599         return container_type
600
601     def _extract_transfer(self, parent, node, options):
602         transfer_opt = options.get(OPT_TRANSFER)
603         if transfer_opt is None:
604             transfer = self._guess_transfer(node, options)
605         else:
606             transfer = transfer_opt.one()
607             if transfer is None:
608                 transfer = PARAM_TRANSFER_FULL
609             if transfer not in [PARAM_TRANSFER_NONE,
610                                 PARAM_TRANSFER_CONTAINER,
611                                 PARAM_TRANSFER_FULL]:
612                 raise InvalidAnnotationError(
613                     "transfer for %s of %r is invalid (%r), must be one of "
614                     "none, container, full." % (node, parent.name, transfer))
615         return transfer
616
617     def _parse_node_common(self, node, block):
618         self._parse_version(node, block)
619         self._parse_deprecated(node, block)
620         self._parse_attributes(node, block)
621
622     def _parse_version(self, node, block):
623         since_tag = self._get_tag(block, TAG_SINCE)
624         if since_tag is None:
625             return
626         node.version = since_tag.value
627
628     def _parse_deprecated(self, node, block):
629         deprecated_tag = self._get_tag(block, TAG_DEPRECATED)
630         if deprecated_tag is None:
631             return
632         value = deprecated_tag.value
633         if ': ' in value:
634             version, desc = value.split(': ')
635         else:
636             desc = value
637             version = None
638         node.deprecated = desc
639         if version is not None:
640             node.deprecated_version = version
641
642     def _parse_attributes(self, node, block):
643         annos_tag = self._get_tag(block, TAG_ATTRIBUTES)
644         if annos_tag is None:
645             return
646         for key, value in annos_tag.options.iteritems():
647             node.attributes.append((key, value.one()))
648
649     def _guess_direction(self, node):
650         if node.direction:
651             return node.direction
652         is_pointer = False
653         if node.type.ctype:
654             is_pointer = '*' in node.type.ctype
655
656         if is_pointer and node.type.name in BASIC_GIR_TYPES:
657             return PARAM_DIRECTION_OUT
658
659         return PARAM_DIRECTION_IN
660
661     def _guess_transfer(self, node, options):
662         if node.transfer is not None:
663             return node.transfer
664
665         if isinstance(node.type, Array):
666             return PARAM_TRANSFER_NONE
667         # Anything with 'const' gets none
668         if node.type.is_const:
669             return PARAM_TRANSFER_NONE
670
671         elif node.type.name in [TYPE_NONE, TYPE_ANY]:
672             return PARAM_TRANSFER_NONE
673         elif isinstance(node.type, Varargs):
674             return PARAM_TRANSFER_NONE
675         elif isinstance(node, Parameter):
676             if node.direction in [PARAM_DIRECTION_INOUT,
677                                   PARAM_DIRECTION_OUT]:
678                 return PARAM_TRANSFER_FULL
679             # This one is a hack for compatibility; the transfer
680             # for string parameters really has no defined meaning.
681             elif node.type.canonical == 'utf8':
682                 return PARAM_TRANSFER_FULL
683             else:
684                 return PARAM_TRANSFER_NONE
685         elif isinstance(node, Return):
686             if (node.type.canonical in BASIC_GIR_TYPES or
687                 (node.type.canonical in [TYPE_NONE, TYPE_ANY] and
688                  node.type.is_const)):
689                 return PARAM_TRANSFER_NONE
690             else:
691                 return PARAM_TRANSFER_FULL
692         elif isinstance(node, Field):
693             return PARAM_TRANSFER_NONE
694         else:
695             raise AssertionError(node)