123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299 |
- #!/usr/bin/python
- """Utility to generate files to benchmark"""
- # Copyright Abel Sinkovics (abel@sinkovics.hu) 2016.
- # Distributed under the Boost Software License, Version 1.0.
- # (See accompanying file LICENSE_1_0.txt or copy at
- # http://www.boost.org/LICENSE_1_0.txt)
- import argparse
- import os
- import string
- import random
- import re
- import json
- import Cheetah.Template
- import chars
- def regex_to_error_msg(regex):
- """Format a human-readable error message from a regex"""
- return re.sub('([^\\\\])[()]', '\\1', regex) \
- .replace('[ \t]*$', '') \
- .replace('^', '') \
- .replace('$', '') \
- .replace('[ \t]*', ' ') \
- .replace('[ \t]+', ' ') \
- .replace('[0-9]+', 'X') \
- \
- .replace('\\[', '[') \
- .replace('\\]', ']') \
- .replace('\\(', '(') \
- .replace('\\)', ')') \
- .replace('\\.', '.')
- def mkdir_p(path):
- """mkdir -p path"""
- try:
- os.makedirs(path)
- except OSError:
- pass
- def in_comment(regex):
- """Builds a regex matching "regex" in a comment"""
- return '^[ \t]*//[ \t]*' + regex + '[ \t]*$'
- def random_chars(number):
- """Generate random characters"""
- char_map = {
- k: v for k, v in chars.CHARS.iteritems()
- if not format_character(k).startswith('\\x')
- }
- char_num = sum(char_map.values())
- return (
- format_character(nth_char(char_map, random.randint(0, char_num - 1)))
- for _ in xrange(0, number)
- )
- def random_string(length):
- """Generate a random string or character list depending on the mode"""
- return \
- 'BOOST_METAPARSE_STRING("{0}")'.format(''.join(random_chars(length)))
- class Mode(object):
- """Represents a generation mode"""
- def __init__(self, name):
- self.name = name
- if name == 'BOOST_METAPARSE_STRING':
- self.identifier = 'bmp'
- elif name == 'manual':
- self.identifier = 'man'
- else:
- raise Exception('Invalid mode: {0}'.format(name))
- def description(self):
- """The description of the mode"""
- if self.identifier == 'bmp':
- return 'Using BOOST_METAPARSE_STRING'
- elif self.identifier == 'man':
- return 'Generating strings manually'
- def convert_from(self, base):
- """Convert a BOOST_METAPARSE_STRING mode document into one with
- this mode"""
- if self.identifier == 'bmp':
- return base
- elif self.identifier == 'man':
- result = []
- prefix = 'BOOST_METAPARSE_STRING("'
- while True:
- bmp_at = base.find(prefix)
- if bmp_at == -1:
- return ''.join(result) + base
- else:
- result.append(
- base[0:bmp_at] + '::boost::metaparse::string<'
- )
- new_base = ''
- was_backslash = False
- comma = ''
- for i in xrange(bmp_at + len(prefix), len(base)):
- if was_backslash:
- result.append(
- '{0}\'\\{1}\''.format(comma, base[i])
- )
- was_backslash = False
- comma = ','
- elif base[i] == '"':
- new_base = base[i+2:]
- break
- elif base[i] == '\\':
- was_backslash = True
- else:
- result.append('{0}\'{1}\''.format(comma, base[i]))
- comma = ','
- base = new_base
- result.append('>')
- class Template(object):
- """Represents a loaded template"""
- def __init__(self, name, content):
- self.name = name
- self.content = content
- def instantiate(self, value_of_n):
- """Instantiates the template"""
- template = Cheetah.Template.Template(
- self.content,
- searchList={'n': value_of_n}
- )
- template.random_string = random_string
- return str(template)
- def range(self):
- """Returns the range for N"""
- match = self._match(in_comment(
- 'n[ \t]+in[ \t]*\\[([0-9]+)\\.\\.([0-9]+)\\),[ \t]+'
- 'step[ \t]+([0-9]+)'
- ))
- return range(
- int(match.group(1)),
- int(match.group(2)),
- int(match.group(3))
- )
- def property(self, name):
- """Parses and returns a property"""
- return self._get_line(in_comment(name + ':[ \t]*(.*)'))
- def modes(self):
- """Returns the list of generation modes"""
- return [Mode(s.strip()) for s in self.property('modes').split(',')]
- def _match(self, regex):
- """Find the first line matching regex and return the match object"""
- cregex = re.compile(regex)
- for line in self.content.splitlines():
- match = cregex.match(line)
- if match:
- return match
- raise Exception('No "{0}" line in {1}.cpp'.format(
- regex_to_error_msg(regex),
- self.name
- ))
- def _get_line(self, regex):
- """Get a line based on a regex"""
- return self._match(regex).group(1)
- def load_file(path):
- """Returns the content of the file"""
- with open(path, 'rb') as in_file:
- return in_file.read()
- def templates_in(path):
- """Enumerate the templates found in path"""
- ext = '.cpp'
- return (
- Template(f[0:-len(ext)], load_file(os.path.join(path, f)))
- for f in os.listdir(path) if f.endswith(ext)
- )
- def nth_char(char_map, index):
- """Returns the nth character of a character->occurrence map"""
- for char in char_map:
- if index < char_map[char]:
- return char
- index = index - char_map[char]
- return None
- def format_character(char):
- """Returns the C-formatting of the character"""
- if \
- char in string.ascii_letters \
- or char in string.digits \
- or char in [
- '_', '.', ':', ';', ' ', '!', '?', '+', '-', '/', '=', '<',
- '>', '$', '(', ')', '@', '~', '`', '|', '#', '[', ']', '{',
- '}', '&', '*', '^', '%']:
- return char
- elif char in ['"', '\'', '\\']:
- return '\\{0}'.format(char)
- elif char == '\n':
- return '\\n'
- elif char == '\r':
- return '\\r'
- elif char == '\t':
- return '\\t'
- else:
- return '\\x{:02x}'.format(ord(char))
- def write_file(filename, content):
- """Create the file with the given content"""
- print 'Generating {0}'.format(filename)
- with open(filename, 'wb') as out_f:
- out_f.write(content)
- def out_filename(template, n_val, mode):
- """Determine the output filename"""
- return '{0}_{1}_{2}.cpp'.format(template.name, n_val, mode.identifier)
- def main():
- """The main function of the script"""
- desc = 'Generate files to benchmark'
- parser = argparse.ArgumentParser(description=desc)
- parser.add_argument(
- '--src',
- dest='src_dir',
- default='src',
- help='The directory containing the templates'
- )
- parser.add_argument(
- '--out',
- dest='out_dir',
- default='generated',
- help='The output directory'
- )
- parser.add_argument(
- '--seed',
- dest='seed',
- default='13',
- help='The random seed (to ensure consistent regeneration)'
- )
- args = parser.parse_args()
- random.seed(int(args.seed))
- mkdir_p(args.out_dir)
- for template in templates_in(args.src_dir):
- modes = template.modes()
- n_range = template.range()
- for n_value in n_range:
- base = template.instantiate(n_value)
- for mode in modes:
- write_file(
- os.path.join(
- args.out_dir,
- out_filename(template, n_value, mode)
- ),
- mode.convert_from(base)
- )
- write_file(
- os.path.join(args.out_dir, '{0}.json'.format(template.name)),
- json.dumps({
- 'files': {
- n: {
- m.identifier: out_filename(template, n, m)
- for m in modes
- } for n in n_range
- },
- 'name': template.name,
- 'x_axis_label': template.property('x_axis_label'),
- 'desc': template.property('desc'),
- 'modes': {m.identifier: m.description() for m in modes}
- })
- )
- if __name__ == '__main__':
- main()
|