240b96e8ed7834ab06acdd555f5f40a8b1ed7800
[gnome.gobject-introspection] / giscanner / annotationparser.py
1 # -*- Mode: Python -*-
2 # GObject-Introspection - a framework for introspecting GObject libraries
3 # Copyright (C) 2008  Johan Dahlin
4 #
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 # 02110-1301, USA.
19 #
20
21 # AnnotationParser - parses gtk-doc annotations
22
23 from .ast import (Array, Bitfield, Callback, Class, Enum, Field, Function,
24                   Interface, List, Map, Parameter, Record, Return, Type, Union,
25                   Varargs,
26                   default_array_types,
27                   BASIC_GIR_TYPES,
28                   PARAM_DIRECTION_INOUT,
29                   PARAM_DIRECTION_IN,
30                   PARAM_DIRECTION_OUT,
31                   PARAM_TRANSFER_NONE,
32                   PARAM_TRANSFER_CONTAINER,
33                   PARAM_TRANSFER_FULL,
34                   TYPE_ANY, TYPE_NONE)
35 from .odict import odict
36 from .glibast import GLibBoxed
37
38 # All gtk-doc comments needs to start with this:
39 _COMMENT_HEADER = '*\n '
40
41 # Tags - annotations applyed to comment blocks
42 TAG_SINCE = 'Since'
43 TAG_DEPRECATED = 'Deprecated'
44 TAG_RETURNS = 'Returns'
45 TAG_RETURNS_ALT = 'Return value'
46
47 # Options - annotations for parameters and return values
48 OPT_ALLOW_NONE = 'allow-none'
49 OPT_ARRAY = 'array'
50 OPT_ELEMENT_TYPE = 'element-type'
51 OPT_IN = 'in'
52 OPT_INOUT = 'inout'
53 OPT_INOUT_ALT = 'in-out'
54 OPT_OUT = 'out'
55 OPT_SCOPE = 'scope'
56 OPT_TRANSFER = 'transfer'
57 OPT_TYPE = 'type'
58
59 # Array options - array specific annotations
60 OPT_ARRAY_FIXED_SIZE = 'fixed-size'
61 OPT_ARRAY_LENGTH = 'length'
62 OPT_ARRAY_ZERO_TERMINATED = 'zero-terminated'
63
64
65 class InvalidAnnotationError(Exception):
66     pass
67
68
69 class DocBlock(object):
70
71     def __init__(self, name):
72         self.name = name
73         self.value = None
74         self.tags = odict()
75         self.comment = None
76
77     def __repr__(self):
78         return '<DocBlock %r>' % (self.name, )
79
80     def get(self, name):
81         if name == TAG_RETURNS:
82             value = self.tags.get(name)
83             if value is None:
84                 return self.tags.get(TAG_RETURNS_ALT)
85             else:
86                 return value
87         else:
88             return self.tags.get(name)
89
90
91 class DocTag(object):
92
93     def __init__(self, name):
94         self.name = name
95         self.options = []
96         self.comment = None
97
98
99 class Option(object):
100
101     def __init__(self, option):
102         self._array = []
103         self._dict = {}
104         for p in option.split(' '):
105             if '=' in p:
106                 name, value = p.split('=', 1)
107             else:
108                 name = p
109                 value = None
110             self._dict[name] = value
111             if value is None:
112                 self._array.append(name)
113             else:
114                 self._array.append((name, value))
115
116     def __repr__(self):
117         return '<Option %r>' % (self._array, )
118
119     def one(self):
120         assert len(self._array) == 1
121         return self._array[0]
122
123     def flat(self):
124         return self._array
125
126     def all(self):
127         return self._dict
128
129
130 class AnnotationParser(object):
131
132     def __init__(self, namespace, source_scanner, transformer):
133         self._blocks = {}
134         self._namespace = namespace
135         self._transformer = transformer
136         for comment in source_scanner.get_comments():
137             self._parse_comment(comment)
138
139     def parse(self):
140         aa = AnnotationApplier(self._blocks, self._transformer)
141         aa.parse(self._namespace)
142
143     def _parse_comment(self, comment):
144         # We're looking for gtk-doc comments here, they look like this:
145         # /**
146         #   * symbol:
147         #
148         # symbol is currently one of:
149         #  - function: gtk_widget_show
150         #  - signal:   GtkWidget::destroy
151         #  - property: GtkWidget:visible
152         #
153         comment = comment.lstrip()
154         if not comment.startswith(_COMMENT_HEADER):
155             return
156         comment = comment[len(_COMMENT_HEADER):]
157         comment = comment.strip()
158         if not comment.startswith('* '):
159             return
160         comment = comment[2:]
161
162         pos = comment.find('\n ')
163         if pos == -1:
164             return
165         block_name = comment[:pos]
166         block_name = block_name.strip()
167         if not block_name.endswith(':'):
168             return
169         block = DocBlock(block_name[:-1])
170         comment_lines = []
171         for line in comment[pos+1:].split('\n'):
172             line = line.lstrip()
173             line = line[2:].strip() # Skip ' *'
174             if not line:
175                 continue
176             if line.startswith('@'):
177                 line = line[1:]
178             elif not ': ' in line:
179                 comment_lines.append(line)
180                 continue
181             tag = self._parse_tag(line)
182             block.tags[tag.name] = tag
183         block.comment = '\n'.join(comment_lines)
184         self._blocks[block.name] = block
185
186     def _parse_tag(self, raw):
187         # Tag: bar
188         # Tag: bar opt1 opt2
189         parts = raw.split(': ', 1)
190         if len(parts) == 1:
191             tag_name = parts[0]
192             value = ''
193         else:
194             tag_name, value = parts
195         options, rest = self._parse_options(value)
196         tag = DocTag(tag_name)
197         tag.value = value
198         tag.options = options
199         tag.comment = rest
200         return tag
201
202     def _parse_options(self, value):
203         # (foo)
204         # (bar opt1 opt2...)
205         opened = -1
206         options = {}
207         last = None
208         for i, c in enumerate(value):
209             if c == '(' and opened == -1:
210                 opened = i+1
211             if c == ')' and opened != -1:
212                 segment = value[opened:i]
213                 parts = segment.split(' ', 1)
214                 if len(parts) == 2:
215                     name, option = parts
216                 elif len(parts) == 1:
217                     name = parts[0]
218                     option = None
219                 else:
220                     raise AssertionError
221                 if option is not None:
222                     option = Option(option)
223                 options[name] = option
224                 last = i + 2
225                 opened = -1
226
227         if last is not None:
228             rest = value[last:].strip()
229         else:
230             rest = None
231         return options, rest
232
233
234 class AnnotationApplier(object):
235
236     def __init__(self, blocks, transformer):
237         self._blocks = blocks
238         self._transformer = transformer
239
240     def _get_tag(self, block, tag_name):
241         if block is None:
242             return None
243
244         return block.get(tag_name)
245
246     def parse(self, namespace):
247         for node in namespace.nodes:
248             self._parse_node(node)
249
250     # Boring parsing boilerplate.
251
252     def _parse_node(self, node):
253         if isinstance(node, Function):
254             self._parse_function(node)
255         elif isinstance(node, Enum):
256             self._parse_enum(node)
257         elif isinstance(node, Bitfield):
258             self._parse_bitfield(node)
259         elif isinstance(node, Class):
260             self._parse_class(node)
261         elif isinstance(node, Interface):
262             self._parse_interface(node)
263         elif isinstance(node, Callback):
264             self._parse_callback(node)
265         elif isinstance(node, Record):
266             self._parse_record(node)
267         elif isinstance(node, Union):
268             self._parse_union(node)
269         elif isinstance(node, GLibBoxed):
270             self._parse_boxed(node)
271
272     def _parse_class(self, class_):
273         block = self._blocks.get(class_.type_name)
274         self._parse_version(class_, block)
275         self._parse_constructors(class_.constructors)
276         self._parse_methods(class_.methods)
277         self._parse_methods(class_.static_methods)
278         self._parse_properties(class_, class_.properties)
279         self._parse_signals(class_, class_.signals)
280         self._parse_fields(class_, class_.fields)
281         if block:
282             class_.doc = block.comment
283
284     def _parse_interface(self, interface):
285         block = self._blocks.get(interface.type_name)
286         self._parse_version(interface, block)
287         self._parse_methods(interface.methods)
288         self._parse_properties(interface, interface.properties)
289         self._parse_signals(interface, interface.signals)
290         self._parse_fields(interface, interface.fields)
291         if block:
292             interface.doc = block.comment
293
294     def _parse_record(self, record):
295         block = self._blocks.get(record.symbol)
296         self._parse_version(record, block)
297         self._parse_constructors(record.constructors)
298         self._parse_fields(record, record.fields)
299         if isinstance(record, GLibBoxed):
300             self._parse_methods(record.methods)
301         if block:
302             record.doc = block.comment
303
304     def _parse_boxed(self, boxed):
305         block = self._blocks.get(boxed.name)
306         self._parse_version(boxed, block)
307         self._parse_constructors(boxed.constructors)
308         self._parse_methods(boxed.methods)
309         if block:
310             boxed.doc = block.comment
311
312     def _parse_union(self, union):
313         block = self._blocks.get(union.name)
314         self._parse_fields(union, union.fields)
315         self._parse_constructors(union.constructors)
316         if isinstance(union, GLibBoxed):
317             self._parse_methods(union.methods)
318         if block:
319             union.doc = block.comment
320
321     def _parse_enum(self, enum):
322         block = self._blocks.get(enum.symbol)
323         self._parse_version(enum, block)
324         if block:
325             enum.doc = block.comment
326
327     def _parse_bitfield(self, bitfield):
328         block = self._blocks.get(bitfield.symbol)
329         self._parse_version(bitfield, block)
330         if block:
331             bitfield.doc = block.comment
332
333     def _parse_constructors(self, constructors):
334         for ctor in constructors:
335             self._parse_function(ctor)
336
337     def _parse_fields(self, parent, fields):
338         for field in fields:
339             self._parse_field(parent, field)
340
341     def _parse_properties(self, parent, properties):
342         for prop in properties:
343             self._parse_property(parent, prop)
344
345     def _parse_methods(self, methods):
346         for method in methods:
347             self._parse_function(method)
348
349     def _parse_signals(self, parent, signals):
350         for signal in signals:
351             self._parse_signal(parent, signal)
352
353     def _parse_property(self, parent, prop):
354         block = self._blocks.get('%s:%s' % (parent.type_name, prop.name))
355         self._parse_version(prop, block)
356         self._parse_deprecated(prop, block)
357         if block:
358             prop.doc = block.comment
359
360     def _parse_callback(self, callback):
361         block = self._blocks.get(callback.ctype)
362         self._parse_version(callback, block)
363         self._parse_params(callback, callback.parameters, block)
364         self._parse_return(callback, callback.retval, block)
365         if block:
366             callback.doc = block.comment
367
368     def _parse_function(self, func):
369         block = self._blocks.get(func.symbol)
370         self._parse_version(func, block)
371         self._parse_deprecated(func, block)
372         self._parse_params(func, func.parameters, block)
373         self._parse_return(func, func.retval, block)
374         if block:
375             func.doc = block.comment
376
377     def _parse_signal(self, parent, signal):
378         block = self._blocks.get('%s::%s' % (parent.type_name, signal.name))
379         self._parse_version(signal, block)
380         self._parse_deprecated(signal, block)
381         # We're only attempting to name the signal parameters if
382         # the number of parameter tags (@foo) is the same or greater
383         # than the number of signal parameters
384         if block and len(block.tags) > len(signal.parameters):
385             names = block.tags.items()
386         else:
387             names = []
388         for i, param in enumerate(signal.parameters):
389             if names:
390                 name, tag = names[i+1]
391                 param.name = name
392                 options = getattr(tag, 'options', {})
393                 param_type = options.get(OPT_TYPE)
394                 if param_type:
395                     param.type.name = param_type.one()
396             else:
397                 tag = None
398             self._parse_param(signal, param, tag)
399         self._parse_return(signal, signal.retval, block)
400         if block:
401             signal.doc = block.comment
402
403     def _parse_field(self, parent, field):
404         if isinstance(field, Callback):
405             self._parse_callback(field)
406
407     def _parse_params(self, parent, params, block):
408         for param in params:
409             tag = self._get_tag(block, param.name)
410             self._parse_param(parent, param, tag)
411
412     def _parse_return(self, parent, return_, block):
413         tag = self._get_tag(block, TAG_RETURNS)
414         self._parse_param_ret_common(parent, return_, tag)
415
416     def _parse_param(self, parent, param, tag):
417         if isinstance(parent, Function):
418             options = getattr(tag, 'options', {})
419             scope = options.get(OPT_SCOPE)
420             if scope:
421                 param.scope = scope.one()
422                 param.transfer = PARAM_TRANSFER_NONE
423         self._parse_param_ret_common(parent, param, tag)
424
425     def _parse_param_ret_common(self, parent, node, tag):
426         options = getattr(tag, 'options', {})
427         node.direction = self._extract_direction(node, options)
428         container_type = self._extract_container_type(
429             parent, node, options)
430         if container_type is not None:
431             node.type = container_type
432         if node.direction is None:
433             node.direction = self._guess_direction(node)
434         node.transfer = self._extract_transfer(parent, node, options)
435         if OPT_ALLOW_NONE in options:
436             node.allow_none = True
437
438         assert node.transfer is not None
439         if tag is not None and tag.comment is not None:
440             node.doc = tag.comment
441
442     def _extract_direction(self, node, options):
443         if (OPT_INOUT in options or
444             OPT_INOUT_ALT in options):
445             direction = PARAM_DIRECTION_INOUT
446         elif OPT_OUT in options:
447             direction = PARAM_DIRECTION_OUT
448         elif OPT_IN in options:
449             direction = PARAM_DIRECTION_IN
450         else:
451             direction = node.direction
452         return direction
453
454     def _guess_array(self, node):
455         ctype = node.type.ctype
456         if ctype is None:
457             return False
458         if not ctype.endswith('*'):
459             return False
460         if node.type.canonical in default_array_types:
461             return True
462         return False
463
464     def _extract_container_type(self, parent, node, options):
465         has_element_type = OPT_ELEMENT_TYPE in options
466         has_array = OPT_ARRAY in options
467
468         # FIXME: This is a hack :-(
469         if (not isinstance(node, Field) and
470             (not has_element_type and
471              (node.direction is None
472               or node.direction == PARAM_DIRECTION_IN))):
473             if self._guess_array(node):
474                 has_array = True
475
476         if has_array:
477             container_type = self._parse_array(parent, node, options)
478         elif has_element_type:
479             container_type = self._parse_element_type(parent, node, options)
480         else:
481             container_type = None
482
483         return container_type
484
485     def _parse_array(self, parent, node, options):
486         array_opt = options.get(OPT_ARRAY)
487         if array_opt:
488             array_values = array_opt.all()
489         else:
490             array_values = {}
491
492         element_type = options.get(OPT_ELEMENT_TYPE)
493         if element_type is not None:
494             element_type_name = element_type.one()
495         else:
496             element_type_name = node.type.name
497
498         container_type = Array(node.type.ctype,
499                                element_type_name)
500         if OPT_ARRAY_ZERO_TERMINATED in array_values:
501             container_type.zeroterminated = array_values.get(
502                 OPT_ARRAY_ZERO_TERMINATED) == '1'
503         length = array_values.get(OPT_ARRAY_LENGTH)
504         if length is not None:
505             param_index = parent.get_parameter_index(length)
506             container_type.length_param_index = param_index
507             # For in parameters we're incorrectly deferring
508             # char/unsigned char to utf8 when a length annotation
509             # is specified.
510             if (isinstance(node, Parameter) and
511                 node.type.name == 'utf8' and
512                 self._guess_direction(node) == PARAM_DIRECTION_IN):
513                 # FIXME: unsigned char/guchar should be uint8
514                 container_type.element_type = 'int8'
515         container_type.size = array_values.get(OPT_ARRAY_FIXED_SIZE)
516         return container_type
517
518     def _parse_element_type(self, parent, node, options):
519         element_type_opt = options.get(OPT_ELEMENT_TYPE)
520         element_type = element_type_opt.flat()
521         if node.type.name in ['GLib.List', 'GLib.SList']:
522             assert len(element_type) == 1
523             etype = Type(element_type[0])
524             container_type = List(
525                 node.type.name,
526                 node.type.ctype,
527                 self._transformer.resolve_param_type(etype))
528         elif node.type.name in ['GLib.HashTable']:
529             assert len(element_type) == 2
530             key_type = Type(element_type[0])
531             value_type = Type(element_type[1])
532             container_type = Map(
533                 node.type.name,
534                 node.type.ctype,
535                 self._transformer.resolve_param_type(key_type),
536                 self._transformer.resolve_param_type(value_type))
537         else:
538             print 'FIXME: unhandled element-type container:', node
539         return container_type
540
541     def _extract_transfer(self, parent, node, options):
542         transfer_opt = options.get(OPT_TRANSFER)
543         if transfer_opt is None:
544             transfer = self._guess_transfer(node, options)
545         else:
546             transfer = transfer_opt.one()
547             if transfer is None:
548                 transfer = PARAM_TRANSFER_FULL
549             if transfer not in [PARAM_TRANSFER_NONE,
550                                 PARAM_TRANSFER_CONTAINER,
551                                 PARAM_TRANSFER_FULL]:
552                 raise InvalidAnnotationError(
553                     "transfer for %s of %r is invalid (%r), must be one of "
554                     "none, container, full." % (node, parent.name, transfer))
555         return transfer
556
557     def _parse_version(self, node, block):
558         since_tag = self._get_tag(block, TAG_SINCE)
559         if since_tag is None:
560             return
561         node.version = since_tag.value
562
563     def _parse_deprecated(self, node, block):
564         deprecated_tag = self._get_tag(block, TAG_DEPRECATED)
565         if deprecated_tag is None:
566             return
567         value = deprecated_tag.value
568         if ': ' in value:
569             version, desc = value.split(': ')
570         else:
571             desc = value
572             version = None
573         node.deprecated = desc
574         if version is not None:
575             node.deprecated_version = version
576
577     def _guess_direction(self, node):
578         if node.direction:
579             return node.direction
580         is_pointer = False
581         if node.type.ctype:
582             is_pointer = '*' in node.type.ctype
583
584         if is_pointer and node.type.name in BASIC_GIR_TYPES:
585             return PARAM_DIRECTION_OUT
586
587         return PARAM_DIRECTION_IN
588
589     def _guess_transfer(self, node, options):
590         if node.transfer is not None:
591             return node.transfer
592
593         if isinstance(node.type, Array):
594             return PARAM_TRANSFER_NONE
595         # Anything with 'const' gets none
596         if node.type.is_const:
597             return PARAM_TRANSFER_NONE
598
599         elif node.type.name in [TYPE_NONE, TYPE_ANY]:
600             return PARAM_TRANSFER_NONE
601         elif isinstance(node.type, Varargs):
602             return PARAM_TRANSFER_NONE
603         elif isinstance(node, Parameter):
604             if node.direction in [PARAM_DIRECTION_INOUT,
605                                   PARAM_DIRECTION_OUT]:
606                 return PARAM_TRANSFER_FULL
607             # This one is a hack for compatibility; the transfer
608             # for string parameters really has no defined meaning.
609             elif node.type.canonical == 'utf8':
610                 return PARAM_TRANSFER_FULL
611             else:
612                 return PARAM_TRANSFER_NONE
613         elif isinstance(node, Return):
614             if (node.type.canonical in BASIC_GIR_TYPES or
615                 (node.type.canonical in [TYPE_NONE, TYPE_ANY] and
616                  node.type.is_const)):
617                 return PARAM_TRANSFER_NONE
618             else:
619                 return PARAM_TRANSFER_FULL
620         elif isinstance(node, Field):
621             return PARAM_TRANSFER_NONE
622         else:
623             raise AssertionError(node)