Package chianti :: Package fortranformat :: Module _parser
[hide private]
[frames] | no frames]

Source Code for Module chianti.fortranformat._parser

  1  import sys 
  2  IS_PYTHON3 = sys.version_info[0] >= 3 
  3  
 
  4  if IS_PYTHON3: 
  5      exec('from ._lexer import Token') 
  6      exec('from ._edit_descriptors import *') 
  7      exec('from ._exceptions import *') 
  8      exec('from . import config') 
  9  else: 
 10      exec('from _lexer import Token') 
 11      exec('from _edit_descriptors import *') 
 12      exec('from _exceptions import *') 
 13      exec('import config') 
 14  
 
15 -def parser(tokens, version=None):
16 # Parse the full edit descriptors 17 eds = _parse_tokens(tokens, reversion=False, version=None) 18 # Parse the edit descriptors used for the reversion of format control 19 # (F95 format 12.2.2) 20 reversion_eds = _parse_tokens(tokens, reversion=True, version=None) 21 return eds, reversion_eds
22
23 -def _parse_tokens(tokens, reversion=False, version=None):
24 # Remove outer parens is there are any 25 tokens = _remove_outer_parens(tokens) 26 # Get only the reversion tokens 27 if reversion == True: 28 tokens = _get_reversion_tokens(tokens) 29 # First expand the parentheses 30 tokens = _expand_parens(tokens) 31 # Split on commas 32 token_sets = _split_on_commas(tokens) 33 # Split on ED9 (i.e. :) 34 token_sets = _split_on_ed9(token_sets) 35 # Split the ED10 (i.e. /) 36 token_sets = _split_on_ed10(token_sets) 37 # Split the ED8 (i.e. P edit descriptors) 38 token_sets = _split_on_ed8(token_sets) 39 # Process each set of edit descriptors 40 eds = [] 41 for token_set in token_sets: 42 # Assume first edit descriptor is the one to process 43 ed_type = None 44 ed_value = None 45 for token in token_set: 46 if token.type in ['ED1', 'ED2', 'ED3', 'ED4', 'ED5', 'ED6', 'ED7', 'ED8', 'ED9', 'ED10', 'QUOTED_STRING']: 47 ed_type = token.type 48 ed_value = token.value 49 break 50 # TODO: something more responsible here ... 51 if ed_type is None: 52 continue 53 # If repeatable and first token is repeat number then cache 54 repeat = None 55 if ed_value in REPEATABLE_EDS and (token_set[0].type in ['NZUINT', 'UINT']): 56 repeat = token_set[0].value 57 token_set = token_set[1:] 58 # Process the edit descriptor 59 if ed_type == 'QUOTED_STRING': 60 ed = _read_quoted_string(token_set) 61 elif ed_type == 'ED1': 62 ed = _read_ed1(token_set) 63 elif ed_type == 'ED2': 64 ed = _read_ed2(token_set) 65 elif ed_type == 'ED3': 66 ed = _read_ed3(token_set) 67 elif ed_type == 'ED4': 68 ed = _read_ed4(token_set) 69 elif ed_type == 'ED5': 70 ed = _read_ed5(token_set) 71 elif ed_type == 'ED6': 72 ed = _read_ed6(token_set) 73 elif ed_type == 'ED7': 74 ed = _read_ed7(token_set) 75 elif ed_type == 'ED8': 76 ed = _read_ed8(token_set) 77 elif ed_type == 'ED9': 78 ed = _read_ed9(token_set) 79 elif ed_type == 'ED10': 80 ed = _read_ed10(token_set) 81 else: 82 raise InvalidFormat('Could not identify edit descriptor in sequence $s' % str(token_set)) 83 # If there is a repeat number cached, then apply 84 if repeat is not None: 85 ed.repeat = repeat 86 # Add to the list 87 eds.append(ed) 88 return eds
89 90 91 # Functions that group the tokens into sets 92
93 -def _expand_parens(tokens):
94 new_tokens = [] 95 get_tokens = iter(tokens) 96 for t0 in get_tokens: 97 if t0.type != 'LEFT_PARENS': 98 new_tokens.append(t0) 99 else: 100 # Read in all tokens in subformat and recurse back to self 101 paren_tokens = [] 102 nesting = 1 103 while nesting > 0: 104 try: 105 if IS_PYTHON3: 106 t1 = next(get_tokens) 107 else: 108 t1 = get_tokens.next() 109 except StopIteration: 110 raise InvalidFormat('Open parens in format') 111 if t1.type == 'LEFT_PARENS': 112 nesting = nesting + 1 113 elif t1.type == 'RIGHT_PARENS': 114 nesting = nesting - 1 115 paren_tokens.append(t1) 116 # Remove last right paren 117 paren_tokens = paren_tokens[:-1] 118 # If repeated, then repeat the tokens accordingly 119 if (len(new_tokens) > 0) and (new_tokens[-1].type in ['NZUINT', 'UINT']): 120 repeat = new_tokens[-1].value 121 # Remove the repeat NZUINT, UINT 122 new_tokens = new_tokens[:-1] 123 new_tokens.extend(repeat * (_expand_parens(paren_tokens) + [Token('COMMA', None)])) 124 else: 125 new_tokens.extend(_expand_parens(paren_tokens)) 126 return new_tokens
127 128
129 -def _split_on_commas(tokens):
130 token_sets = [] 131 set_buff = [] 132 for t0 in tokens: 133 if t0.type == 'COMMA': 134 token_sets.append(set_buff) 135 set_buff = [] 136 else: 137 set_buff.append(t0) 138 token_sets.append(set_buff) 139 return token_sets
140 141
142 -def _split_on_ed9(token_sets):
143 '''Splits on :''' 144 new_token_sets = [] 145 for token_set in token_sets: 146 if 'ED9' not in [t.type for t in token_set]: 147 new_token_sets.append(token_set) 148 else: 149 buff = [] 150 for token in token_set: 151 if token.type == 'ED9': 152 if len(buff) > 0: 153 new_token_sets.append(buff) 154 buff = [] 155 new_token_sets.append([token]) 156 else: 157 buff.append(token) 158 if len(buff) > 0: 159 new_token_sets.append([token]) 160 return new_token_sets
161 162
163 -def _split_on_ed10(token_sets):
164 '''Splits on /''' 165 new_token_sets = [] 166 for token_set in token_sets: 167 # May have a repeat on the slash if preceded by a comma 168 if (len(token_set) > 2) and ((token_set[0].type in ['UINT', 'NZUINT']) and (token_set[1].type == 'ED10')): 169 new_token_sets.append(token_set[:2]) 170 token_set = token_set[2:] 171 buff = [] 172 for token in token_set: 173 if token.type == 'ED10': 174 if len(buff) > 0: 175 new_token_sets.append(buff) 176 buff = [] 177 new_token_sets.append([token]) 178 else: 179 buff.append(token) 180 if len(buff) > 0: 181 new_token_sets.append(buff) 182 return new_token_sets
183 184
185 -def _split_on_ed8(token_sets):
186 '''Splits on ED8 (i.e. P edit descriptors)''' 187 new_token_sets = [] 188 for token_set in token_sets: 189 # Append to new list if no ED8 190 if 'ED8' not in [t.type for t in token_set]: 191 new_token_sets.append(token_set) 192 # Otherwise split string on ED9 193 elif (token_set[0].type in ['INT', 'UINT', 'NZUINT']) and (token_set[1].type == 'ED8'): 194 new_token_sets.append(token_set[:2]) 195 new_token_sets.append(token_set[2:]) 196 else: 197 raise InvalidFormat('P edit descriptor in invalid position') 198 return new_token_sets
199 200 # Function to extract only the tokens for the reversion of control 201
202 -def _get_reversion_tokens(tokens):
203 reversion_tokens = [] 204 # Easier to work backwards 205 nesting = None 206 for token in tokens[::-1]: 207 # End of loop condition 208 if (nesting is not None) and (nesting < 1): 209 # Parens may have a repeat number 210 if token.type in ['UINT', 'NZUINT']: 211 reversion_tokens.append(token) 212 break 213 # Read till the first right parens 214 if token.type == 'RIGHT_PARENS': 215 if nesting is None: 216 nesting = 1 217 else: 218 nesting = nesting + 1 219 elif token.type == 'LEFT_PARENS': 220 if nesting is None: 221 raise InvalidFormat('Unbalanced parens in format') 222 else: 223 nesting = nesting - 1 224 reversion_tokens.append(token) 225 # Tokens are in reverse order 226 reversion_tokens.reverse() 227 return reversion_tokens
228 229 # The functions that read particular edit descriptors sequences 230
231 -def _read_quoted_string(tokens):
232 # Of form X only 233 type_string = ",".join([t.type for t in tokens]) 234 if type_string != "QUOTED_STRING": 235 raise InvalidFormat('Token %s has invalid neighbouring token' % tokens[0]) 236 ed = QuotedString() 237 ed.char_string = tokens[0].value 238 return ed
239
240 -def _read_ed1(tokens):
241 # Of form X only 242 type_string = ",".join([t.type for t in tokens]) 243 if type_string != "ED1": 244 raise InvalidFormat('Token %s has invalid neighbouring token' % tokens[0]) 245 ed = get_edit_descriptor_obj(tokens[0].value) 246 return ed
247
248 -def _read_ed2(tokens):
249 # Of form nX only 250 type_string = ",".join([t.type for t in tokens]) 251 if type_string != "NZUINT,ED2": 252 raise InvalidFormat('Token %s has invalid neighbouring token' % tokens[0]) 253 ed = get_edit_descriptor_obj(tokens[1].value) 254 ed.num_chars = tokens[0].value 255 return ed
256
257 -def _read_ed3(tokens):
258 # Of form Xn only 259 type_string = ",".join([t.type for t in tokens]) 260 if type_string != "ED3,NZUINT": 261 raise InvalidFormat('Token %s has invalid neighbouring token' % tokens[0]) 262 ed = get_edit_descriptor_obj(tokens[0].value) 263 # L edit descriptor has a width rather than num_chars 264 if hasattr(ed, 'width'): 265 ed.width = tokens[1].value 266 else: 267 ed.num_chars = tokens[1].value 268 return ed
269
270 -def _read_ed4(tokens):
271 # Of form X or Xn 272 type_string = ",".join([t.type for t in tokens]) 273 if type_string in ["ED4", "ED4,NZUINT"] or \ 274 (config.ALLOW_ZERO_WIDTH_EDS and (type_string == "ED4,UINT")): 275 ed = get_edit_descriptor_obj(tokens[0].value) 276 if len(tokens) > 1: 277 ed.width = tokens[1].value 278 else: 279 raise InvalidFormat('Token %s has invalid neighbouring token' % tokens[0]) 280 return ed
281
282 -def _read_ed5(tokens):
283 # Of form Xn.m only 284 type_string = ",".join([t.type for t in tokens]) 285 if type_string in ["ED5,NZUINT,DOT,UINT", "ED5,NZUINT,DOT,NZUINT"] or \ 286 (config.ALLOW_ZERO_WIDTH_EDS and (type_string in \ 287 ["ED5,UINT,DOT,UINT", "ED5,UINT,DOT,NZUINT"])): 288 ed = get_edit_descriptor_obj(tokens[0].value) 289 ed.width = tokens[1].value 290 ed.decimal_places = tokens[3].value 291 else: 292 raise InvalidFormat('%s has invalid neighbouring token' % tokens[0]) 293 return ed
294
295 -def _read_ed6(tokens):
296 # Of form Xn or Xn.m 297 type_string = ",".join([t.type for t in tokens]) 298 if type_string == "ED6,NZUINT" or \ 299 (config.ALLOW_ZERO_WIDTH_EDS and (type_string == "ED6,UINT")): 300 ed = get_edit_descriptor_obj(tokens[0].value) 301 ed.width = tokens[1].value 302 ed.min_digits = None 303 elif type_string in ["ED6,NZUINT,DOT,UINT", "ED6,NZUINT,DOT,NZUINT"] or \ 304 (config.ALLOW_ZERO_WIDTH_EDS and (type_string in \ 305 ["ED6,UINT,DOT,UINT", "ED6,UINT,DOT,NZUINT"])): 306 ed = get_edit_descriptor_obj(tokens[0].value) 307 ed.width = tokens[1].value 308 ed.min_digits = tokens[3].value 309 else: 310 raise InvalidFormat('%s has invalid neighbouring token' % tokens[0]) 311 return ed
312
313 -def _read_ed7(tokens):
314 # Of form Xn.m or Xn.mEe 315 type_string = ",".join([t.type for t in tokens]) 316 if type_string in ["ED7,NZUINT,DOT,UINT", "ED7,NZUINT,DOT,NZUINT"] or \ 317 (config.ALLOW_ZERO_WIDTH_EDS and (type_string in \ 318 ["ED7,UINT,DOT,UINT", "ED7,UINT,DOT,NZUINT"])): 319 ed = get_edit_descriptor_obj(tokens[0].value) 320 ed.width = tokens[1].value 321 ed.decimal_places = tokens[3].value 322 ed.exponent = None 323 elif type_string in ['ED7,NZUINT,DOT,NZUINT,ED7,NZUINT', \ 324 'ED7,NZUINT,DOT,NZUINT,ED7,UINT', \ 325 'ED7,NZUINT,DOT,NZUINT,ED7,INT', \ 326 'ED7,NZUINT,DOT,UINT,ED7,NZUINT', \ 327 'ED7,NZUINT,DOT,UINT,ED7,UINT', \ 328 'ED7,NZUINT,DOT,UINT,ED7,INT'] or \ 329 (config.ALLOW_ZERO_WIDTH_EDS and (type_string in \ 330 ['ED7,UINT,DOT,NZUINT,ED7,NZUINT', \ 331 'ED7,UINT,DOT,NZUINT,ED7,UINT', \ 332 'ED7,UINT,DOT,NZUINT,ED7,INT', \ 333 'ED7,UINT,DOT,UINT,ED7,NZUINT', \ 334 'ED7,UINT,DOT,UINT,ED7,UINT', \ 335 'ED7,UINT,DOT,UINT,ED7,INT'])): 336 ed = get_edit_descriptor_obj(tokens[0].value) 337 ed.width = tokens[1].value 338 ed.decimal_places = tokens[3].value 339 ed.exponent = tokens[5].value 340 else: 341 raise InvalidFormat('%s has invalid neighbouring token' % tokens[0]) 342 return ed
343
344 -def _read_ed8(tokens):
345 # Of form kX only, where k is a signed integer, may omit comma if followed 346 # by Type 5 or 7 edit descriptor 347 type_string = ",".join([t.type for t in tokens]) 348 if type_string in ["NZUINT,ED8", "UINT,ED8", "INT,ED8"]: 349 ed = get_edit_descriptor_obj(tokens[1].value) 350 ed.scale = tokens[0].value 351 else: 352 raise InvalidFormat('%s has invalid neighbouring token' % tokens[0]) 353 return ed
354
355 -def _read_ed9(tokens):
356 # Of form X only, may omit comma either side 357 type_string = ",".join([t.type for t in tokens]) 358 if type_string == "ED9": 359 ed = get_edit_descriptor_obj(tokens[0].value) 360 else: 361 raise InvalidFormat('%s has invalid neighbouring token' % tokens[0]) 362 return ed
363
364 -def _read_ed10(tokens):
365 # Of form X only, may omit following comma and leading comma if no repeat 366 type_string = ",".join([t.type for t in tokens]) 367 if type_string == "ED10": 368 ed = get_edit_descriptor_obj(tokens[0].value) 369 else: 370 raise InvalidFormat('%s has invalid neighbouring token' % tokens[0]) 371 return ed
372 373 374 # Functions that pre-process the token list 375
376 -def _remove_outer_parens(tokens):
377 # Finally, remove outer parens is there are any 378 if (tokens[0].type == 'LEFT_PARENS') and (tokens[-1].type == 'RIGHT_PARENS'): 379 tokens = tokens[1:-1] 380 return tokens
381 382 383 # Run some tests if run as a script 384 385 #if __name__ == '__main__': 386 # import doctest 387 # import os 388 # from _lexer import lexer 389 # globs = {'lexer' : lexer, 'parser' : parser} 390 # # Need to normalize whitespace since pasting into VIM converts tabs to 391 # # spaces 392 # doctest.testfile(os.path.join('tests', 'parser_test.txt'), \ 393 # globs=globs, optionflags=doctest.NORMALIZE_WHITESPACE) 394