Bug 556475 – support Shadows: annotation
[gnome.gobject-introspection] / giscanner / annotationparser.py
1 # -*- Mode: Python -*-
2 # GObject-Introspection - a framework for introspecting GObject libraries
3 # Copyright (C) 2008  Johan Dahlin
4 #
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 # 02110-1301, USA.
19 #
20
21 # AnnotationParser - parses gtk-doc annotations
22
23 import sys
24
25 from .ast import (Array, Bitfield, Callback, Class, Enum, Field, Function,
26                   Interface, List, Map, Parameter, Record, Return, Type, Union,
27                   Varargs,
28                   default_array_types,
29                   BASIC_GIR_TYPES,
30                   PARAM_DIRECTION_INOUT,
31                   PARAM_DIRECTION_IN,
32                   PARAM_DIRECTION_OUT,
33                   PARAM_TRANSFER_NONE,
34                   PARAM_TRANSFER_CONTAINER,
35                   PARAM_TRANSFER_FULL,
36                   TYPE_ANY, TYPE_NONE)
37 from .odict import odict
38 from .glibast import GLibBoxed
39
40 # All gtk-doc comments needs to start with this:
41 _COMMENT_HEADER = '*\n '
42
43 # Tags - annotations applyed to comment blocks
44 TAG_VFUNC = 'virtual'
45 TAG_SINCE = 'since'
46 TAG_DEPRECATED = 'deprecated'
47 TAG_RETURNS = 'returns'
48 TAG_RETURNS_ALT = 'return value'
49 TAG_ATTRIBUTES = 'attributes'
50 TAG_RENAME_TO = 'rename to'
51
52 # Options - annotations for parameters and return values
53 OPT_ALLOW_NONE = 'allow-none'
54 OPT_ARRAY = 'array'
55 OPT_ELEMENT_TYPE = 'element-type'
56 OPT_IN = 'in'
57 OPT_INOUT = 'inout'
58 OPT_INOUT_ALT = 'in-out'
59 OPT_OUT = 'out'
60 OPT_SCOPE = 'scope'
61 OPT_TRANSFER = 'transfer'
62 OPT_TYPE = 'type'
63
64 # Specific option values
65 OPT_VAL_BITFIELD = 'bitfield'
66
67 # Array options - array specific annotations
68 OPT_ARRAY_FIXED_SIZE = 'fixed-size'
69 OPT_ARRAY_LENGTH = 'length'
70 OPT_ARRAY_ZERO_TERMINATED = 'zero-terminated'
71
72
73 class InvalidAnnotationError(Exception):
74     pass
75
76
77 class DocBlock(object):
78
79     def __init__(self, name, options):
80         self.name = name
81         self.options = options
82         self.value = None
83         self.tags = odict()
84         self.comment = None
85
86     def __repr__(self):
87         return '<DocBlock %r %r>' % (self.name, self.options)
88
89     def get(self, name):
90         if name == TAG_RETURNS:
91             value = self.tags.get(name)
92             if value is None:
93                 return self.tags.get(TAG_RETURNS_ALT)
94             else:
95                 return value
96         else:
97             return self.tags.get(name)
98
99
100 class DocTag(object):
101
102     def __init__(self, name):
103         self.name = name
104         self.options = {}
105         self.comment = None
106
107     def __repr__(self):
108         return '<DocTag %r %r>' % (self.name, self.options)
109
110 class Option(object):
111
112     def __init__(self, option):
113         self._array = []
114         self._dict = {}
115         for p in option.split(' '):
116             if '=' in p:
117                 name, value = p.split('=', 1)
118             else:
119                 name = p
120                 value = None
121             self._dict[name] = value
122             if value is None:
123                 self._array.append(name)
124             else:
125                 self._array.append((name, value))
126
127     def __repr__(self):
128         return '<Option %r>' % (self._array, )
129
130     def one(self):
131         assert len(self._array) == 1
132         return self._array[0]
133
134     def flat(self):
135         return self._array
136
137     def all(self):
138         return self._dict
139
140
141 class AnnotationParser(object):
142
143     def __init__(self, namespace, source_scanner, transformer):
144         self._blocks = {}
145         self._namespace = namespace
146         self._transformer = transformer
147         for comment in source_scanner.get_comments():
148             self._parse_comment(comment)
149
150     def parse(self):
151         aa = AnnotationApplier(self._blocks, self._transformer)
152         aa.parse(self._namespace)
153
154     def _parse_comment(self, comment):
155         # We're looking for gtk-doc comments here, they look like this:
156         # /**
157         #   * symbol:
158         #
159         # Or, alternatively, with options:
160         # /**
161         #   * symbol: (name value) ...
162         #
163         # symbol is currently one of:
164         #  - function: gtk_widget_show
165         #  - signal:   GtkWidget::destroy
166         #  - property: GtkWidget:visible
167         #
168         comment = comment.lstrip()
169         if not comment.startswith(_COMMENT_HEADER):
170             return
171         comment = comment[len(_COMMENT_HEADER):]
172         comment = comment.strip()
173         if not comment.startswith('* '):
174             return
175         comment = comment[2:]
176
177         pos = comment.find('\n ')
178         if pos == -1:
179             return
180         block_header = comment[:pos]
181         block_header = block_header.strip()
182         cpos = block_header.find(': ')
183         if cpos:
184             block_name = block_header[:cpos]
185             block_options, rest = self._parse_options(block_header[cpos+2:])
186             if rest:
187                 return
188         else:
189             block_name, block_options = block_header, {}
190         block = DocBlock(block_name, block_options)
191         comment_lines = []
192         for line in comment[pos+1:].split('\n'):
193             line = line.lstrip()
194             line = line[2:].strip() # Skip ' *'
195             if not line:
196                 continue
197             if line.startswith('@'):
198                 line = line[1:]
199             elif not ': ' in line:
200                 comment_lines.append(line)
201                 continue
202             tag_name, value = self._split_tag_namevalue(line)
203             canon_name = tag_name.lower()
204             if canon_name in block.tags:
205                 print >>sys.stderr, "Multiple definition of tag %r" \
206                     % (canon_name, )
207             block.tags[canon_name] = self._create_tag(canon_name, value)
208         block.comment = '\n'.join(comment_lines)
209         self._blocks[block.name] = block
210
211     def _split_tag_namevalue(self, raw):
212         """Split a line into tag name and value"""
213         parts = raw.split(': ', 1)
214         if len(parts) == 1:
215             tag_name = parts[0]
216             value = ''
217         else:
218             tag_name, value = parts
219         return (tag_name, value)
220
221     def _create_tag(self, tag_name, value):
222         # Tag: bar
223         # Tag: bar opt1 opt2
224         tag = DocTag(tag_name)
225         tag.value = value
226         options, rest = self._parse_options(tag.value)
227         tag.options = options
228         tag.comment = rest
229         return tag
230
231     def _parse_options(self, value):
232         # (foo)
233         # (bar opt1 opt2...)
234         opened = -1
235         options = {}
236         last = None
237         for i, c in enumerate(value):
238             if c == '(' and opened == -1:
239                 opened = i+1
240             if c == ')' and opened != -1:
241                 segment = value[opened:i]
242                 parts = segment.split(' ', 1)
243                 if len(parts) == 2:
244                     name, option = parts
245                 elif len(parts) == 1:
246                     name = parts[0]
247                     option = None
248                 else:
249                     raise AssertionError
250                 if option is not None:
251                     option = Option(option)
252                 options[name] = option
253                 last = i + 2
254                 opened = -1
255
256         if last is not None:
257             rest = value[last:].strip()
258         else:
259             rest = None
260         return options, rest
261
262
263 class AnnotationApplier(object):
264
265     def __init__(self, blocks, transformer):
266         self._blocks = blocks
267         self._transformer = transformer
268
269     def _get_tag(self, block, tag_name):
270         if block is None:
271             return None
272
273         return block.get(tag_name)
274
275     def parse(self, namespace):
276         self._namespace = namespace
277         for node in namespace.nodes[:]:
278             self._parse_node(node)
279         del self._namespace
280
281     # Boring parsing boilerplate.
282
283     def _parse_node(self, node):
284         if isinstance(node, Function):
285             self._parse_function(node)
286         elif isinstance(node, Enum):
287             self._parse_enum(node)
288         elif isinstance(node, Bitfield):
289             self._parse_bitfield(node)
290         elif isinstance(node, Class):
291             self._parse_class(node)
292         elif isinstance(node, Interface):
293             self._parse_interface(node)
294         elif isinstance(node, Callback):
295             self._parse_callback(node)
296         elif isinstance(node, Record):
297             self._parse_record(node)
298         elif isinstance(node, Union):
299             self._parse_union(node)
300         elif isinstance(node, GLibBoxed):
301             self._parse_boxed(node)
302
303     def _parse_class(self, class_):
304         block = self._blocks.get(class_.type_name)
305         self._parse_node_common(class_, block)
306         self._parse_constructors(class_.constructors)
307         self._parse_methods(class_, class_.methods)
308         self._parse_vfuncs(class_, class_.virtual_methods)
309         self._parse_methods(class_, class_.static_methods)
310         self._parse_properties(class_, class_.properties)
311         self._parse_signals(class_, class_.signals)
312         self._parse_fields(class_, class_.fields)
313         if block:
314             class_.doc = block.comment
315
316     def _parse_interface(self, interface):
317         block = self._blocks.get(interface.type_name)
318         self._parse_node_common(interface, block)
319         self._parse_methods(interface, interface.methods)
320         self._parse_vfuncs(interface, interface.virtual_methods)
321         self._parse_properties(interface, interface.properties)
322         self._parse_signals(interface, interface.signals)
323         self._parse_fields(interface, interface.fields)
324         if block:
325             interface.doc = block.comment
326
327     def _parse_record(self, record):
328         block = self._blocks.get(record.symbol)
329         self._parse_node_common(record, block)
330         self._parse_constructors(record.constructors)
331         self._parse_methods(record, record.methods)
332         self._parse_fields(record, record.fields)
333         if block:
334             record.doc = block.comment
335
336     def _parse_boxed(self, boxed):
337         block = self._blocks.get(boxed.name)
338         self._parse_node_common(boxed, block)
339         self._parse_constructors(boxed.constructors)
340         self._parse_methods(boxed, boxed.methods)
341         if block:
342             boxed.doc = block.comment
343
344     def _parse_union(self, union):
345         block = self._blocks.get(union.name)
346         self._parse_node_common(union, block)
347         self._parse_fields(union, union.fields)
348         self._parse_constructors(union.constructors)
349         self._parse_methods(union, union.methods)
350         if block:
351             union.doc = block.comment
352
353     def _parse_enum(self, enum):
354         block = self._blocks.get(enum.symbol)
355         self._parse_node_common(enum, block)
356         if block:
357             enum.doc = block.comment
358             type_opt = block.options.get(OPT_TYPE)
359             if type_opt and type_opt.one() == OPT_VAL_BITFIELD:
360                 # This is hack, but hey, it works :-)
361                 enum.__class__ = Bitfield
362
363     def _parse_bitfield(self, bitfield):
364         block = self._blocks.get(bitfield.symbol)
365         self._parse_node_common(bitfield, block)
366         if block:
367             bitfield.doc = block.comment
368
369     def _parse_constructors(self, constructors):
370         for ctor in constructors:
371             self._parse_function(ctor)
372
373     def _parse_fields(self, parent, fields):
374         for field in fields:
375             self._parse_field(parent, field)
376
377     def _parse_properties(self, parent, properties):
378         for prop in properties:
379             self._parse_property(parent, prop)
380
381     def _parse_methods(self, parent, methods):
382         for method in methods:
383             self._parse_method(parent, method)
384
385     def _parse_vfuncs(self, parent, vfuncs):
386         for vfunc in vfuncs:
387             self._parse_vfunc(parent, vfunc)
388
389     def _parse_signals(self, parent, signals):
390         for signal in signals:
391             self._parse_signal(parent, signal)
392
393     def _parse_property(self, parent, prop):
394         block = self._blocks.get('%s:%s' % (parent.type_name, prop.name))
395         self._parse_node_common(prop, block)
396         if block:
397             prop.doc = block.comment
398
399     def _parse_callback(self, callback):
400         block = self._blocks.get(callback.ctype)
401         self._parse_node_common(callback, block)
402         self._parse_params(callback, callback.parameters, block)
403         self._parse_return(callback, callback.retval, block)
404         if block:
405             callback.doc = block.comment
406
407     def _parse_callable(self, callable, block):
408         self._parse_node_common(callable, block)
409         self._parse_params(callable, callable.parameters, block)
410         self._parse_return(callable, callable.retval, block)
411         if block:
412             callable.doc = block.comment
413
414     def _parse_function(self, func):
415         block = self._blocks.get(func.symbol)
416         self._parse_callable(func, block)
417         self._parse_rename_to_func(func, block)
418
419     def _parse_signal(self, parent, signal):
420         block = self._blocks.get('%s::%s' % (parent.type_name, signal.name))
421         self._parse_node_common(signal, block)
422         # We're only attempting to name the signal parameters if
423         # the number of parameter tags (@foo) is the same or greater
424         # than the number of signal parameters
425         resolve = self._transformer.resolve_param_type
426         if block and len(block.tags) > len(signal.parameters):
427             names = block.tags.items()
428         else:
429             names = []
430         for i, param in enumerate(signal.parameters):
431             if names:
432                 name, tag = names[i+1]
433                 param.name = name
434                 options = getattr(tag, 'options', {})
435                 param_type = options.get(OPT_TYPE)
436                 if param_type:
437                     param.type.name = resolve(param_type.one())
438             else:
439                 tag = None
440             self._parse_param(signal, param, tag)
441         self._parse_return(signal, signal.retval, block)
442         if block:
443             signal.doc = block.comment
444
445     def _parse_method(self, parent, meth):
446         block = self._blocks.get(meth.symbol)
447         self._parse_function(meth)
448         virtual = self._get_tag(block, TAG_VFUNC)
449         if virtual:
450             invoker_name = virtual.value
451             matched = False
452             for vfunc in parent.virtual_methods:
453                 if vfunc.name == invoker_name:
454                     matched = True
455                     vfunc.invoker = meth.name
456                     break
457             if not matched:
458                 print "warning: unmatched virtual invoker %r for method %r" % \
459                     (invoker_name, meth.symbol)
460
461     def _parse_vfunc(self, parent, vfunc):
462         key = '%s::%s' % (parent.type_name, vfunc.name)
463         self._parse_callable(vfunc, self._blocks.get(key))
464
465     def _parse_field(self, parent, field):
466         if isinstance(field, Callback):
467             self._parse_callback(field)
468
469     def _parse_params(self, parent, params, block):
470         for param in params:
471             tag = self._get_tag(block, param.name)
472             self._parse_param(parent, param, tag)
473
474     def _parse_return(self, parent, return_, block):
475         tag = self._get_tag(block, TAG_RETURNS)
476         self._parse_param_ret_common(parent, return_, tag)
477
478     def _parse_param(self, parent, param, tag):
479         if isinstance(parent, Function):
480             options = getattr(tag, 'options', {})
481             scope = options.get(OPT_SCOPE)
482             if scope:
483                 param.scope = scope.one()
484                 param.transfer = PARAM_TRANSFER_NONE
485         self._parse_param_ret_common(parent, param, tag)
486
487     def _parse_param_ret_common(self, parent, node, tag):
488         options = getattr(tag, 'options', {})
489         node.direction = self._extract_direction(node, options)
490         container_type = self._extract_container_type(
491             parent, node, options)
492         if container_type is not None:
493             node.type = container_type
494         if node.direction is None:
495             node.direction = self._guess_direction(node)
496         node.transfer = self._extract_transfer(parent, node, options)
497         if OPT_ALLOW_NONE in options:
498             node.allow_none = True
499         param_type = options.get(OPT_TYPE)
500         if param_type:
501             resolve = self._transformer.resolve_param_type
502             node.type.name = resolve(param_type.one())
503
504         assert node.transfer is not None
505         if tag is not None and tag.comment is not None:
506             node.doc = tag.comment
507
508     def _extract_direction(self, node, options):
509         if (OPT_INOUT in options or
510             OPT_INOUT_ALT in options):
511             direction = PARAM_DIRECTION_INOUT
512         elif OPT_OUT in options:
513             direction = PARAM_DIRECTION_OUT
514         elif OPT_IN in options:
515             direction = PARAM_DIRECTION_IN
516         else:
517             direction = node.direction
518         return direction
519
520     def _guess_array(self, node):
521         ctype = node.type.ctype
522         if ctype is None:
523             return False
524         if not ctype.endswith('*'):
525             return False
526         if node.type.canonical in default_array_types:
527             return True
528         return False
529
530     def _extract_container_type(self, parent, node, options):
531         has_element_type = OPT_ELEMENT_TYPE in options
532         has_array = OPT_ARRAY in options
533
534         # FIXME: This is a hack :-(
535         if (not isinstance(node, Field) and
536             (not has_element_type and
537              (node.direction is None
538               or node.direction == PARAM_DIRECTION_IN))):
539             if self._guess_array(node):
540                 has_array = True
541
542         if has_array:
543             container_type = self._parse_array(parent, node, options)
544         elif has_element_type:
545             container_type = self._parse_element_type(parent, node, options)
546         else:
547             container_type = None
548
549         return container_type
550
551     def _parse_array(self, parent, node, options):
552         array_opt = options.get(OPT_ARRAY)
553         if array_opt:
554             array_values = array_opt.all()
555         else:
556             array_values = {}
557
558         element_type = options.get(OPT_ELEMENT_TYPE)
559         if element_type is not None:
560             element_type_name = element_type.one()
561         else:
562             element_type_name = node.type.name
563
564         container_type = Array(node.type.ctype,
565                                element_type_name)
566         if OPT_ARRAY_ZERO_TERMINATED in array_values:
567             container_type.zeroterminated = array_values.get(
568                 OPT_ARRAY_ZERO_TERMINATED) == '1'
569         length = array_values.get(OPT_ARRAY_LENGTH)
570         if length is not None:
571             param_index = parent.get_parameter_index(length)
572             container_type.length_param_index = param_index
573             # For in parameters we're incorrectly deferring
574             # char/unsigned char to utf8 when a length annotation
575             # is specified.
576             if (isinstance(node, Parameter) and
577                 node.type.name == 'utf8' and
578                 self._guess_direction(node) == PARAM_DIRECTION_IN):
579                 # FIXME: unsigned char/guchar should be uint8
580                 container_type.element_type = 'int8'
581         container_type.size = array_values.get(OPT_ARRAY_FIXED_SIZE)
582         return container_type
583
584     def _parse_element_type(self, parent, node, options):
585         element_type_opt = options.get(OPT_ELEMENT_TYPE)
586         element_type = element_type_opt.flat()
587         if node.type.name in ['GLib.List', 'GLib.SList']:
588             assert len(element_type) == 1
589             etype = Type(element_type[0])
590             container_type = List(
591                 node.type.name,
592                 node.type.ctype,
593                 self._transformer.resolve_param_type(etype))
594         elif node.type.name in ['GLib.HashTable']:
595             assert len(element_type) == 2
596             key_type = Type(element_type[0])
597             value_type = Type(element_type[1])
598             container_type = Map(
599                 node.type.name,
600                 node.type.ctype,
601                 self._transformer.resolve_param_type(key_type),
602                 self._transformer.resolve_param_type(value_type))
603         else:
604             print 'FIXME: unhandled element-type container:', node
605         return container_type
606
607     def _extract_transfer(self, parent, node, options):
608         transfer_opt = options.get(OPT_TRANSFER)
609         if transfer_opt is None:
610             transfer = self._guess_transfer(node, options)
611         else:
612             transfer = transfer_opt.one()
613             if transfer is None:
614                 transfer = PARAM_TRANSFER_FULL
615             if transfer not in [PARAM_TRANSFER_NONE,
616                                 PARAM_TRANSFER_CONTAINER,
617                                 PARAM_TRANSFER_FULL]:
618                 raise InvalidAnnotationError(
619                     "transfer for %s of %r is invalid (%r), must be one of "
620                     "none, container, full." % (node, parent.name, transfer))
621         return transfer
622
623     def _parse_node_common(self, node, block):
624         self._parse_version(node, block)
625         self._parse_deprecated(node, block)
626         self._parse_attributes(node, block)
627
628     def _parse_version(self, node, block):
629         since_tag = self._get_tag(block, TAG_SINCE)
630         if since_tag is None:
631             return
632         node.version = since_tag.value
633
634     def _parse_deprecated(self, node, block):
635         deprecated_tag = self._get_tag(block, TAG_DEPRECATED)
636         if deprecated_tag is None:
637             return
638         value = deprecated_tag.value
639         if ': ' in value:
640             version, desc = value.split(': ')
641         else:
642             desc = value
643             version = None
644         node.deprecated = desc
645         if version is not None:
646             node.deprecated_version = version
647
648     def _parse_attributes(self, node, block):
649         annos_tag = self._get_tag(block, TAG_ATTRIBUTES)
650         if annos_tag is None:
651             return
652         for key, value in annos_tag.options.iteritems():
653             node.attributes.append((key, value.one()))
654
655     def _parse_rename_to_func(self, node, block):
656         rename_to_tag = self._get_tag(block, TAG_RENAME_TO)
657         if rename_to_tag is None:
658             return
659         new_name = rename_to_tag.value
660
661         shadowed = []
662
663         def shadowed_filter(n):
664             if isinstance(n, Function) and n.symbol == new_name:
665                 shadowed.append(n)
666                 return False
667             return True
668
669         self._namespace.remove_matching(shadowed_filter)
670         assert len(shadowed) == 1
671         node.name = shadowed[0].name
672
673     def _guess_direction(self, node):
674         if node.direction:
675             return node.direction
676         is_pointer = False
677         if node.type.ctype:
678             is_pointer = '*' in node.type.ctype
679
680         if is_pointer and node.type.name in BASIC_GIR_TYPES:
681             return PARAM_DIRECTION_OUT
682
683         return PARAM_DIRECTION_IN
684
685     def _guess_transfer(self, node, options):
686         if node.transfer is not None:
687             return node.transfer
688
689         if isinstance(node.type, Array):
690             return PARAM_TRANSFER_NONE
691         # Anything with 'const' gets none
692         if node.type.is_const:
693             return PARAM_TRANSFER_NONE
694
695         elif node.type.name in [TYPE_NONE, TYPE_ANY]:
696             return PARAM_TRANSFER_NONE
697         elif isinstance(node.type, Varargs):
698             return PARAM_TRANSFER_NONE
699         elif isinstance(node, Parameter):
700             if node.direction in [PARAM_DIRECTION_INOUT,
701                                   PARAM_DIRECTION_OUT]:
702                 return PARAM_TRANSFER_FULL
703             # This one is a hack for compatibility; the transfer
704             # for string parameters really has no defined meaning.
705             elif node.type.canonical == 'utf8':
706                 return PARAM_TRANSFER_FULL
707             else:
708                 return PARAM_TRANSFER_NONE
709         elif isinstance(node, Return):
710             if (node.type.canonical in BASIC_GIR_TYPES or
711                 (node.type.canonical in [TYPE_NONE, TYPE_ANY] and
712                  node.type.is_const)):
713                 return PARAM_TRANSFER_NONE
714             else:
715                 return PARAM_TRANSFER_FULL
716         elif isinstance(node, Field):
717             return PARAM_TRANSFER_NONE
718         else:
719             raise AssertionError(node)