1 import sys
2 IS_PYTHON3 = sys.version_info[0] >= 3
3
4 if IS_PYTHON3:
5 exec('from ._lexer import Token')
6 exec('from ._edit_descriptors import *')
7 exec('from ._exceptions import *')
8 exec('from . import config')
9 else:
10 exec('from _lexer import Token')
11 exec('from _edit_descriptors import *')
12 exec('from _exceptions import *')
13 exec('import config')
14
15 -def parser(tokens, version=None):
22
24
25 tokens = _remove_outer_parens(tokens)
26
27 if reversion == True:
28 tokens = _get_reversion_tokens(tokens)
29
30 tokens = _expand_parens(tokens)
31
32 token_sets = _split_on_commas(tokens)
33
34 token_sets = _split_on_ed9(token_sets)
35
36 token_sets = _split_on_ed10(token_sets)
37
38 token_sets = _split_on_ed8(token_sets)
39
40 eds = []
41 for token_set in token_sets:
42
43 ed_type = None
44 ed_value = None
45 for token in token_set:
46 if token.type in ['ED1', 'ED2', 'ED3', 'ED4', 'ED5', 'ED6', 'ED7', 'ED8', 'ED9', 'ED10', 'QUOTED_STRING']:
47 ed_type = token.type
48 ed_value = token.value
49 break
50
51 if ed_type is None:
52 continue
53
54 repeat = None
55 if ed_value in REPEATABLE_EDS and (token_set[0].type in ['NZUINT', 'UINT']):
56 repeat = token_set[0].value
57 token_set = token_set[1:]
58
59 if ed_type == 'QUOTED_STRING':
60 ed = _read_quoted_string(token_set)
61 elif ed_type == 'ED1':
62 ed = _read_ed1(token_set)
63 elif ed_type == 'ED2':
64 ed = _read_ed2(token_set)
65 elif ed_type == 'ED3':
66 ed = _read_ed3(token_set)
67 elif ed_type == 'ED4':
68 ed = _read_ed4(token_set)
69 elif ed_type == 'ED5':
70 ed = _read_ed5(token_set)
71 elif ed_type == 'ED6':
72 ed = _read_ed6(token_set)
73 elif ed_type == 'ED7':
74 ed = _read_ed7(token_set)
75 elif ed_type == 'ED8':
76 ed = _read_ed8(token_set)
77 elif ed_type == 'ED9':
78 ed = _read_ed9(token_set)
79 elif ed_type == 'ED10':
80 ed = _read_ed10(token_set)
81 else:
82 raise InvalidFormat('Could not identify edit descriptor in sequence $s' % str(token_set))
83
84 if repeat is not None:
85 ed.repeat = repeat
86
87 eds.append(ed)
88 return eds
89
90
91
92
94 new_tokens = []
95 get_tokens = iter(tokens)
96 for t0 in get_tokens:
97 if t0.type != 'LEFT_PARENS':
98 new_tokens.append(t0)
99 else:
100
101 paren_tokens = []
102 nesting = 1
103 while nesting > 0:
104 try:
105 if IS_PYTHON3:
106 t1 = next(get_tokens)
107 else:
108 t1 = get_tokens.next()
109 except StopIteration:
110 raise InvalidFormat('Open parens in format')
111 if t1.type == 'LEFT_PARENS':
112 nesting = nesting + 1
113 elif t1.type == 'RIGHT_PARENS':
114 nesting = nesting - 1
115 paren_tokens.append(t1)
116
117 paren_tokens = paren_tokens[:-1]
118
119 if (len(new_tokens) > 0) and (new_tokens[-1].type in ['NZUINT', 'UINT']):
120 repeat = new_tokens[-1].value
121
122 new_tokens = new_tokens[:-1]
123 new_tokens.extend(repeat * (_expand_parens(paren_tokens) + [Token('COMMA', None)]))
124 else:
125 new_tokens.extend(_expand_parens(paren_tokens))
126 return new_tokens
127
128
130 token_sets = []
131 set_buff = []
132 for t0 in tokens:
133 if t0.type == 'COMMA':
134 token_sets.append(set_buff)
135 set_buff = []
136 else:
137 set_buff.append(t0)
138 token_sets.append(set_buff)
139 return token_sets
140
141
143 '''Splits on :'''
144 new_token_sets = []
145 for token_set in token_sets:
146 if 'ED9' not in [t.type for t in token_set]:
147 new_token_sets.append(token_set)
148 else:
149 buff = []
150 for token in token_set:
151 if token.type == 'ED9':
152 if len(buff) > 0:
153 new_token_sets.append(buff)
154 buff = []
155 new_token_sets.append([token])
156 else:
157 buff.append(token)
158 if len(buff) > 0:
159 new_token_sets.append([token])
160 return new_token_sets
161
162
164 '''Splits on /'''
165 new_token_sets = []
166 for token_set in token_sets:
167
168 if (len(token_set) > 2) and ((token_set[0].type in ['UINT', 'NZUINT']) and (token_set[1].type == 'ED10')):
169 new_token_sets.append(token_set[:2])
170 token_set = token_set[2:]
171 buff = []
172 for token in token_set:
173 if token.type == 'ED10':
174 if len(buff) > 0:
175 new_token_sets.append(buff)
176 buff = []
177 new_token_sets.append([token])
178 else:
179 buff.append(token)
180 if len(buff) > 0:
181 new_token_sets.append(buff)
182 return new_token_sets
183
184
186 '''Splits on ED8 (i.e. P edit descriptors)'''
187 new_token_sets = []
188 for token_set in token_sets:
189
190 if 'ED8' not in [t.type for t in token_set]:
191 new_token_sets.append(token_set)
192
193 elif (token_set[0].type in ['INT', 'UINT', 'NZUINT']) and (token_set[1].type == 'ED8'):
194 new_token_sets.append(token_set[:2])
195 new_token_sets.append(token_set[2:])
196 else:
197 raise InvalidFormat('P edit descriptor in invalid position')
198 return new_token_sets
199
200
201
203 reversion_tokens = []
204
205 nesting = None
206 for token in tokens[::-1]:
207
208 if (nesting is not None) and (nesting < 1):
209
210 if token.type in ['UINT', 'NZUINT']:
211 reversion_tokens.append(token)
212 break
213
214 if token.type == 'RIGHT_PARENS':
215 if nesting is None:
216 nesting = 1
217 else:
218 nesting = nesting + 1
219 elif token.type == 'LEFT_PARENS':
220 if nesting is None:
221 raise InvalidFormat('Unbalanced parens in format')
222 else:
223 nesting = nesting - 1
224 reversion_tokens.append(token)
225
226 reversion_tokens.reverse()
227 return reversion_tokens
228
229
230
232
233 type_string = ",".join([t.type for t in tokens])
234 if type_string != "QUOTED_STRING":
235 raise InvalidFormat('Token %s has invalid neighbouring token' % tokens[0])
236 ed = QuotedString()
237 ed.char_string = tokens[0].value
238 return ed
239
241
242 type_string = ",".join([t.type for t in tokens])
243 if type_string != "ED1":
244 raise InvalidFormat('Token %s has invalid neighbouring token' % tokens[0])
245 ed = get_edit_descriptor_obj(tokens[0].value)
246 return ed
247
249
250 type_string = ",".join([t.type for t in tokens])
251 if type_string != "NZUINT,ED2":
252 raise InvalidFormat('Token %s has invalid neighbouring token' % tokens[0])
253 ed = get_edit_descriptor_obj(tokens[1].value)
254 ed.num_chars = tokens[0].value
255 return ed
256
258
259 type_string = ",".join([t.type for t in tokens])
260 if type_string != "ED3,NZUINT":
261 raise InvalidFormat('Token %s has invalid neighbouring token' % tokens[0])
262 ed = get_edit_descriptor_obj(tokens[0].value)
263
264 if hasattr(ed, 'width'):
265 ed.width = tokens[1].value
266 else:
267 ed.num_chars = tokens[1].value
268 return ed
269
271
272 type_string = ",".join([t.type for t in tokens])
273 if type_string in ["ED4", "ED4,NZUINT"] or \
274 (config.ALLOW_ZERO_WIDTH_EDS and (type_string == "ED4,UINT")):
275 ed = get_edit_descriptor_obj(tokens[0].value)
276 if len(tokens) > 1:
277 ed.width = tokens[1].value
278 else:
279 raise InvalidFormat('Token %s has invalid neighbouring token' % tokens[0])
280 return ed
281
283
284 type_string = ",".join([t.type for t in tokens])
285 if type_string in ["ED5,NZUINT,DOT,UINT", "ED5,NZUINT,DOT,NZUINT"] or \
286 (config.ALLOW_ZERO_WIDTH_EDS and (type_string in \
287 ["ED5,UINT,DOT,UINT", "ED5,UINT,DOT,NZUINT"])):
288 ed = get_edit_descriptor_obj(tokens[0].value)
289 ed.width = tokens[1].value
290 ed.decimal_places = tokens[3].value
291 else:
292 raise InvalidFormat('%s has invalid neighbouring token' % tokens[0])
293 return ed
294
296
297 type_string = ",".join([t.type for t in tokens])
298 if type_string == "ED6,NZUINT" or \
299 (config.ALLOW_ZERO_WIDTH_EDS and (type_string == "ED6,UINT")):
300 ed = get_edit_descriptor_obj(tokens[0].value)
301 ed.width = tokens[1].value
302 ed.min_digits = None
303 elif type_string in ["ED6,NZUINT,DOT,UINT", "ED6,NZUINT,DOT,NZUINT"] or \
304 (config.ALLOW_ZERO_WIDTH_EDS and (type_string in \
305 ["ED6,UINT,DOT,UINT", "ED6,UINT,DOT,NZUINT"])):
306 ed = get_edit_descriptor_obj(tokens[0].value)
307 ed.width = tokens[1].value
308 ed.min_digits = tokens[3].value
309 else:
310 raise InvalidFormat('%s has invalid neighbouring token' % tokens[0])
311 return ed
312
314
315 type_string = ",".join([t.type for t in tokens])
316 if type_string in ["ED7,NZUINT,DOT,UINT", "ED7,NZUINT,DOT,NZUINT"] or \
317 (config.ALLOW_ZERO_WIDTH_EDS and (type_string in \
318 ["ED7,UINT,DOT,UINT", "ED7,UINT,DOT,NZUINT"])):
319 ed = get_edit_descriptor_obj(tokens[0].value)
320 ed.width = tokens[1].value
321 ed.decimal_places = tokens[3].value
322 ed.exponent = None
323 elif type_string in ['ED7,NZUINT,DOT,NZUINT,ED7,NZUINT', \
324 'ED7,NZUINT,DOT,NZUINT,ED7,UINT', \
325 'ED7,NZUINT,DOT,NZUINT,ED7,INT', \
326 'ED7,NZUINT,DOT,UINT,ED7,NZUINT', \
327 'ED7,NZUINT,DOT,UINT,ED7,UINT', \
328 'ED7,NZUINT,DOT,UINT,ED7,INT'] or \
329 (config.ALLOW_ZERO_WIDTH_EDS and (type_string in \
330 ['ED7,UINT,DOT,NZUINT,ED7,NZUINT', \
331 'ED7,UINT,DOT,NZUINT,ED7,UINT', \
332 'ED7,UINT,DOT,NZUINT,ED7,INT', \
333 'ED7,UINT,DOT,UINT,ED7,NZUINT', \
334 'ED7,UINT,DOT,UINT,ED7,UINT', \
335 'ED7,UINT,DOT,UINT,ED7,INT'])):
336 ed = get_edit_descriptor_obj(tokens[0].value)
337 ed.width = tokens[1].value
338 ed.decimal_places = tokens[3].value
339 ed.exponent = tokens[5].value
340 else:
341 raise InvalidFormat('%s has invalid neighbouring token' % tokens[0])
342 return ed
343
345
346
347 type_string = ",".join([t.type for t in tokens])
348 if type_string in ["NZUINT,ED8", "UINT,ED8", "INT,ED8"]:
349 ed = get_edit_descriptor_obj(tokens[1].value)
350 ed.scale = tokens[0].value
351 else:
352 raise InvalidFormat('%s has invalid neighbouring token' % tokens[0])
353 return ed
354
356
357 type_string = ",".join([t.type for t in tokens])
358 if type_string == "ED9":
359 ed = get_edit_descriptor_obj(tokens[0].value)
360 else:
361 raise InvalidFormat('%s has invalid neighbouring token' % tokens[0])
362 return ed
363
365
366 type_string = ",".join([t.type for t in tokens])
367 if type_string == "ED10":
368 ed = get_edit_descriptor_obj(tokens[0].value)
369 else:
370 raise InvalidFormat('%s has invalid neighbouring token' % tokens[0])
371 return ed
372
373
374
375
377
378 if (tokens[0].type == 'LEFT_PARENS') and (tokens[-1].type == 'RIGHT_PARENS'):
379 tokens = tokens[1:-1]
380 return tokens
381
382
383
384
385
386
387
388
389
390
391
392
393
394