Merge branch 'master' of ssh://walters@git.gnome.org/git/gobject-introspection
[gnome.gobject-introspection] / giscanner / annotationparser.py
1 # -*- Mode: Python -*-
2 # GObject-Introspection - a framework for introspecting GObject libraries
3 # Copyright (C) 2008  Johan Dahlin
4 #
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 # 02110-1301, USA.
19 #
20
21 # AnnotationParser - parses gtk-doc annotations
22
23 from .ast import (Array, Bitfield, Callback, Class, Enum, Field, Function,
24                   Interface, List, Map, Parameter, Record, Return, Type, Union,
25                   Varargs,
26                   default_array_types,
27                   BASIC_GIR_TYPES,
28                   PARAM_DIRECTION_INOUT,
29                   PARAM_DIRECTION_IN,
30                   PARAM_DIRECTION_OUT,
31                   PARAM_TRANSFER_NONE,
32                   PARAM_TRANSFER_CONTAINER,
33                   PARAM_TRANSFER_FULL,
34                   TYPE_ANY, TYPE_NONE)
35 from .odict import odict
36 from .glibast import GLibBoxed
37
38 # All gtk-doc comments needs to start with this:
39 _COMMENT_HEADER = '*\n '
40
41 # Tags - annotations applyed to comment blocks
42 TAG_SINCE = 'since'
43 TAG_DEPRECATED = 'deprecated'
44 TAG_RETURNS = 'returns'
45 TAG_RETURNS_ALT = 'return value'
46
47 # Options - annotations for parameters and return values
48 OPT_ALLOW_NONE = 'allow-none'
49 OPT_ARRAY = 'array'
50 OPT_ELEMENT_TYPE = 'element-type'
51 OPT_IN = 'in'
52 OPT_INOUT = 'inout'
53 OPT_INOUT_ALT = 'in-out'
54 OPT_OUT = 'out'
55 OPT_SCOPE = 'scope'
56 OPT_TRANSFER = 'transfer'
57 OPT_TYPE = 'type'
58
59 # Array options - array specific annotations
60 OPT_ARRAY_FIXED_SIZE = 'fixed-size'
61 OPT_ARRAY_LENGTH = 'length'
62 OPT_ARRAY_ZERO_TERMINATED = 'zero-terminated'
63
64
65 class InvalidAnnotationError(Exception):
66     pass
67
68
69 class DocBlock(object):
70
71     def __init__(self, name):
72         self.name = name
73         self.value = None
74         self.tags = odict()
75         self.comment = None
76
77     def __repr__(self):
78         return '<DocBlock %r>' % (self.name, )
79
80     def get(self, name):
81         if name == TAG_RETURNS:
82             value = self.tags.get(name)
83             if value is None:
84                 return self.tags.get(TAG_RETURNS_ALT)
85             else:
86                 return value
87         else:
88             return self.tags.get(name)
89
90
91 class DocTag(object):
92
93     def __init__(self, name):
94         self.name = name
95         self.options = []
96         self.comment = None
97
98
99 class Option(object):
100
101     def __init__(self, option):
102         self._array = []
103         self._dict = {}
104         for p in option.split(' '):
105             if '=' in p:
106                 name, value = p.split('=', 1)
107             else:
108                 name = p
109                 value = None
110             self._dict[name] = value
111             if value is None:
112                 self._array.append(name)
113             else:
114                 self._array.append((name, value))
115
116     def __repr__(self):
117         return '<Option %r>' % (self._array, )
118
119     def one(self):
120         assert len(self._array) == 1
121         return self._array[0]
122
123     def flat(self):
124         return self._array
125
126     def all(self):
127         return self._dict
128
129
130 class AnnotationParser(object):
131
132     def __init__(self, namespace, source_scanner, transformer):
133         self._blocks = {}
134         self._namespace = namespace
135         self._transformer = transformer
136         for comment in source_scanner.get_comments():
137             self._parse_comment(comment)
138
139     def parse(self):
140         aa = AnnotationApplier(self._blocks, self._transformer)
141         aa.parse(self._namespace)
142
143     def _parse_comment(self, comment):
144         # We're looking for gtk-doc comments here, they look like this:
145         # /**
146         #   * symbol:
147         #
148         # symbol is currently one of:
149         #  - function: gtk_widget_show
150         #  - signal:   GtkWidget::destroy
151         #  - property: GtkWidget:visible
152         #
153         comment = comment.lstrip()
154         if not comment.startswith(_COMMENT_HEADER):
155             return
156         comment = comment[len(_COMMENT_HEADER):]
157         comment = comment.strip()
158         if not comment.startswith('* '):
159             return
160         comment = comment[2:]
161
162         pos = comment.find('\n ')
163         if pos == -1:
164             return
165         block_name = comment[:pos]
166         block_name = block_name.strip()
167         if not block_name.endswith(':'):
168             return
169         block = DocBlock(block_name[:-1])
170         comment_lines = []
171         for line in comment[pos+1:].split('\n'):
172             line = line.lstrip()
173             line = line[2:].strip() # Skip ' *'
174             if not line:
175                 continue
176             if line.startswith('@'):
177                 line = line[1:]
178             elif not ': ' in line:
179                 comment_lines.append(line)
180                 continue
181             tag = self._parse_tag(line)
182             block.tags[tag.name.lower()] = tag
183         block.comment = '\n'.join(comment_lines)
184         self._blocks[block.name] = block
185
186     def _parse_tag(self, raw):
187         # Tag: bar
188         # Tag: bar opt1 opt2
189         parts = raw.split(': ', 1)
190         if len(parts) == 1:
191             tag_name = parts[0]
192             value = ''
193         else:
194             tag_name, value = parts
195         options, rest = self._parse_options(value)
196         tag = DocTag(tag_name)
197         tag.value = value
198         tag.options = options
199         tag.comment = rest
200         return tag
201
202     def _parse_options(self, value):
203         # (foo)
204         # (bar opt1 opt2...)
205         opened = -1
206         options = {}
207         last = None
208         for i, c in enumerate(value):
209             if c == '(' and opened == -1:
210                 opened = i+1
211             if c == ')' and opened != -1:
212                 segment = value[opened:i]
213                 parts = segment.split(' ', 1)
214                 if len(parts) == 2:
215                     name, option = parts
216                 elif len(parts) == 1:
217                     name = parts[0]
218                     option = None
219                 else:
220                     raise AssertionError
221                 if option is not None:
222                     option = Option(option)
223                 options[name] = option
224                 last = i + 2
225                 opened = -1
226
227         if last is not None:
228             rest = value[last:].strip()
229         else:
230             rest = None
231         return options, rest
232
233
234 class AnnotationApplier(object):
235
236     def __init__(self, blocks, transformer):
237         self._blocks = blocks
238         self._transformer = transformer
239
240     def _get_tag(self, block, tag_name):
241         if block is None:
242             return None
243
244         return block.get(tag_name)
245
246     def parse(self, namespace):
247         for node in namespace.nodes:
248             self._parse_node(node)
249
250     # Boring parsing boilerplate.
251
252     def _parse_node(self, node):
253         if isinstance(node, Function):
254             self._parse_function(node)
255         elif isinstance(node, Enum):
256             self._parse_enum(node)
257         elif isinstance(node, Bitfield):
258             self._parse_bitfield(node)
259         elif isinstance(node, Class):
260             self._parse_class(node)
261         elif isinstance(node, Interface):
262             self._parse_interface(node)
263         elif isinstance(node, Callback):
264             self._parse_callback(node)
265         elif isinstance(node, Record):
266             self._parse_record(node)
267         elif isinstance(node, Union):
268             self._parse_union(node)
269         elif isinstance(node, GLibBoxed):
270             self._parse_boxed(node)
271
272     def _parse_class(self, class_):
273         block = self._blocks.get(class_.type_name)
274         self._parse_version(class_, block)
275         self._parse_constructors(class_.constructors)
276         self._parse_methods(class_.methods)
277         self._parse_methods(class_.static_methods)
278         self._parse_properties(class_, class_.properties)
279         self._parse_signals(class_, class_.signals)
280         self._parse_fields(class_, class_.fields)
281         if block:
282             class_.doc = block.comment
283
284     def _parse_interface(self, interface):
285         block = self._blocks.get(interface.type_name)
286         self._parse_version(interface, block)
287         self._parse_methods(interface.methods)
288         self._parse_properties(interface, interface.properties)
289         self._parse_signals(interface, interface.signals)
290         self._parse_fields(interface, interface.fields)
291         if block:
292             interface.doc = block.comment
293
294     def _parse_record(self, record):
295         block = self._blocks.get(record.symbol)
296         self._parse_version(record, block)
297         self._parse_constructors(record.constructors)
298         self._parse_methods(record.methods)
299         self._parse_fields(record, record.fields)
300         if block:
301             record.doc = block.comment
302
303     def _parse_boxed(self, boxed):
304         block = self._blocks.get(boxed.name)
305         self._parse_version(boxed, block)
306         self._parse_constructors(boxed.constructors)
307         self._parse_methods(boxed.methods)
308         if block:
309             boxed.doc = block.comment
310
311     def _parse_union(self, union):
312         block = self._blocks.get(union.name)
313         self._parse_fields(union, union.fields)
314         self._parse_constructors(union.constructors)
315         self._parse_methods(union.methods)
316         if block:
317             union.doc = block.comment
318
319     def _parse_enum(self, enum):
320         block = self._blocks.get(enum.symbol)
321         self._parse_version(enum, block)
322         if block:
323             enum.doc = block.comment
324
325     def _parse_bitfield(self, bitfield):
326         block = self._blocks.get(bitfield.symbol)
327         self._parse_version(bitfield, block)
328         if block:
329             bitfield.doc = block.comment
330
331     def _parse_constructors(self, constructors):
332         for ctor in constructors:
333             self._parse_function(ctor)
334
335     def _parse_fields(self, parent, fields):
336         for field in fields:
337             self._parse_field(parent, field)
338
339     def _parse_properties(self, parent, properties):
340         for prop in properties:
341             self._parse_property(parent, prop)
342
343     def _parse_methods(self, methods):
344         for method in methods:
345             self._parse_function(method)
346
347     def _parse_signals(self, parent, signals):
348         for signal in signals:
349             self._parse_signal(parent, signal)
350
351     def _parse_property(self, parent, prop):
352         block = self._blocks.get('%s:%s' % (parent.type_name, prop.name))
353         self._parse_version(prop, block)
354         self._parse_deprecated(prop, block)
355         if block:
356             prop.doc = block.comment
357
358     def _parse_callback(self, callback):
359         block = self._blocks.get(callback.ctype)
360         self._parse_version(callback, block)
361         self._parse_params(callback, callback.parameters, block)
362         self._parse_return(callback, callback.retval, block)
363         if block:
364             callback.doc = block.comment
365
366     def _parse_function(self, func):
367         block = self._blocks.get(func.symbol)
368         self._parse_version(func, block)
369         self._parse_deprecated(func, block)
370         self._parse_params(func, func.parameters, block)
371         self._parse_return(func, func.retval, block)
372         if block:
373             func.doc = block.comment
374
375     def _parse_signal(self, parent, signal):
376         block = self._blocks.get('%s::%s' % (parent.type_name, signal.name))
377         self._parse_version(signal, block)
378         self._parse_deprecated(signal, block)
379         # We're only attempting to name the signal parameters if
380         # the number of parameter tags (@foo) is the same or greater
381         # than the number of signal parameters
382         if block and len(block.tags) > len(signal.parameters):
383             names = block.tags.items()
384         else:
385             names = []
386         for i, param in enumerate(signal.parameters):
387             if names:
388                 name, tag = names[i+1]
389                 param.name = name
390                 options = getattr(tag, 'options', {})
391                 param_type = options.get(OPT_TYPE)
392                 if param_type:
393                     param.type.name = param_type.one()
394             else:
395                 tag = None
396             self._parse_param(signal, param, tag)
397         self._parse_return(signal, signal.retval, block)
398         if block:
399             signal.doc = block.comment
400
401     def _parse_field(self, parent, field):
402         if isinstance(field, Callback):
403             self._parse_callback(field)
404
405     def _parse_params(self, parent, params, block):
406         for param in params:
407             tag = self._get_tag(block, param.name)
408             self._parse_param(parent, param, tag)
409
410     def _parse_return(self, parent, return_, block):
411         tag = self._get_tag(block, TAG_RETURNS)
412         self._parse_param_ret_common(parent, return_, tag)
413
414     def _parse_param(self, parent, param, tag):
415         if isinstance(parent, Function):
416             options = getattr(tag, 'options', {})
417             scope = options.get(OPT_SCOPE)
418             if scope:
419                 param.scope = scope.one()
420                 param.transfer = PARAM_TRANSFER_NONE
421         self._parse_param_ret_common(parent, param, tag)
422
423     def _parse_param_ret_common(self, parent, node, tag):
424         options = getattr(tag, 'options', {})
425         node.direction = self._extract_direction(node, options)
426         container_type = self._extract_container_type(
427             parent, node, options)
428         if container_type is not None:
429             node.type = container_type
430         if node.direction is None:
431             node.direction = self._guess_direction(node)
432         node.transfer = self._extract_transfer(parent, node, options)
433         if OPT_ALLOW_NONE in options:
434             node.allow_none = True
435
436         assert node.transfer is not None
437         if tag is not None and tag.comment is not None:
438             node.doc = tag.comment
439
440     def _extract_direction(self, node, options):
441         if (OPT_INOUT in options or
442             OPT_INOUT_ALT in options):
443             direction = PARAM_DIRECTION_INOUT
444         elif OPT_OUT in options:
445             direction = PARAM_DIRECTION_OUT
446         elif OPT_IN in options:
447             direction = PARAM_DIRECTION_IN
448         else:
449             direction = node.direction
450         return direction
451
452     def _guess_array(self, node):
453         ctype = node.type.ctype
454         if ctype is None:
455             return False
456         if not ctype.endswith('*'):
457             return False
458         if node.type.canonical in default_array_types:
459             return True
460         return False
461
462     def _extract_container_type(self, parent, node, options):
463         has_element_type = OPT_ELEMENT_TYPE in options
464         has_array = OPT_ARRAY in options
465
466         # FIXME: This is a hack :-(
467         if (not isinstance(node, Field) and
468             (not has_element_type and
469              (node.direction is None
470               or node.direction == PARAM_DIRECTION_IN))):
471             if self._guess_array(node):
472                 has_array = True
473
474         if has_array:
475             container_type = self._parse_array(parent, node, options)
476         elif has_element_type:
477             container_type = self._parse_element_type(parent, node, options)
478         else:
479             container_type = None
480
481         return container_type
482
483     def _parse_array(self, parent, node, options):
484         array_opt = options.get(OPT_ARRAY)
485         if array_opt:
486             array_values = array_opt.all()
487         else:
488             array_values = {}
489
490         element_type = options.get(OPT_ELEMENT_TYPE)
491         if element_type is not None:
492             element_type_name = element_type.one()
493         else:
494             element_type_name = node.type.name
495
496         container_type = Array(node.type.ctype,
497                                element_type_name)
498         if OPT_ARRAY_ZERO_TERMINATED in array_values:
499             container_type.zeroterminated = array_values.get(
500                 OPT_ARRAY_ZERO_TERMINATED) == '1'
501         length = array_values.get(OPT_ARRAY_LENGTH)
502         if length is not None:
503             param_index = parent.get_parameter_index(length)
504             container_type.length_param_index = param_index
505             # For in parameters we're incorrectly deferring
506             # char/unsigned char to utf8 when a length annotation
507             # is specified.
508             if (isinstance(node, Parameter) and
509                 node.type.name == 'utf8' and
510                 self._guess_direction(node) == PARAM_DIRECTION_IN):
511                 # FIXME: unsigned char/guchar should be uint8
512                 container_type.element_type = 'int8'
513         container_type.size = array_values.get(OPT_ARRAY_FIXED_SIZE)
514         return container_type
515
516     def _parse_element_type(self, parent, node, options):
517         element_type_opt = options.get(OPT_ELEMENT_TYPE)
518         element_type = element_type_opt.flat()
519         if node.type.name in ['GLib.List', 'GLib.SList']:
520             assert len(element_type) == 1
521             etype = Type(element_type[0])
522             container_type = List(
523                 node.type.name,
524                 node.type.ctype,
525                 self._transformer.resolve_param_type(etype))
526         elif node.type.name in ['GLib.HashTable']:
527             assert len(element_type) == 2
528             key_type = Type(element_type[0])
529             value_type = Type(element_type[1])
530             container_type = Map(
531                 node.type.name,
532                 node.type.ctype,
533                 self._transformer.resolve_param_type(key_type),
534                 self._transformer.resolve_param_type(value_type))
535         else:
536             print 'FIXME: unhandled element-type container:', node
537         return container_type
538
539     def _extract_transfer(self, parent, node, options):
540         transfer_opt = options.get(OPT_TRANSFER)
541         if transfer_opt is None:
542             transfer = self._guess_transfer(node, options)
543         else:
544             transfer = transfer_opt.one()
545             if transfer is None:
546                 transfer = PARAM_TRANSFER_FULL
547             if transfer not in [PARAM_TRANSFER_NONE,
548                                 PARAM_TRANSFER_CONTAINER,
549                                 PARAM_TRANSFER_FULL]:
550                 raise InvalidAnnotationError(
551                     "transfer for %s of %r is invalid (%r), must be one of "
552                     "none, container, full." % (node, parent.name, transfer))
553         return transfer
554
555     def _parse_version(self, node, block):
556         since_tag = self._get_tag(block, TAG_SINCE)
557         if since_tag is None:
558             return
559         node.version = since_tag.value
560
561     def _parse_deprecated(self, node, block):
562         deprecated_tag = self._get_tag(block, TAG_DEPRECATED)
563         if deprecated_tag is None:
564             return
565         value = deprecated_tag.value
566         if ': ' in value:
567             version, desc = value.split(': ')
568         else:
569             desc = value
570             version = None
571         node.deprecated = desc
572         if version is not None:
573             node.deprecated_version = version
574
575     def _guess_direction(self, node):
576         if node.direction:
577             return node.direction
578         is_pointer = False
579         if node.type.ctype:
580             is_pointer = '*' in node.type.ctype
581
582         if is_pointer and node.type.name in BASIC_GIR_TYPES:
583             return PARAM_DIRECTION_OUT
584
585         return PARAM_DIRECTION_IN
586
587     def _guess_transfer(self, node, options):
588         if node.transfer is not None:
589             return node.transfer
590
591         if isinstance(node.type, Array):
592             return PARAM_TRANSFER_NONE
593         # Anything with 'const' gets none
594         if node.type.is_const:
595             return PARAM_TRANSFER_NONE
596
597         elif node.type.name in [TYPE_NONE, TYPE_ANY]:
598             return PARAM_TRANSFER_NONE
599         elif isinstance(node.type, Varargs):
600             return PARAM_TRANSFER_NONE
601         elif isinstance(node, Parameter):
602             if node.direction in [PARAM_DIRECTION_INOUT,
603                                   PARAM_DIRECTION_OUT]:
604                 return PARAM_TRANSFER_FULL
605             # This one is a hack for compatibility; the transfer
606             # for string parameters really has no defined meaning.
607             elif node.type.canonical == 'utf8':
608                 return PARAM_TRANSFER_FULL
609             else:
610                 return PARAM_TRANSFER_NONE
611         elif isinstance(node, Return):
612             if (node.type.canonical in BASIC_GIR_TYPES or
613                 (node.type.canonical in [TYPE_NONE, TYPE_ANY] and
614                  node.type.is_const)):
615                 return PARAM_TRANSFER_NONE
616             else:
617                 return PARAM_TRANSFER_FULL
618         elif isinstance(node, Field):
619             return PARAM_TRANSFER_NONE
620         else:
621             raise AssertionError(node)