diff --git a/MANIFEST.in b/MANIFEST.in index 8979061..05daea9 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,5 @@ include codebasin/schema/analysis.schema include codebasin/schema/compilation-database.schema include codebasin/schema/config.schema -include codebasin/schema/coverage-0.1.0.schema +include codebasin/schema/coverage-0.3.0.schema include codebasin/schema/cbiconfig.schema diff --git a/etc/coverage.py b/bin/cbicov old mode 100644 new mode 100755 similarity index 58% rename from etc/coverage.py rename to bin/cbicov index ecc6019..9a9b093 --- a/etc/coverage.py +++ b/bin/cbicov @@ -21,14 +21,32 @@ # Read command-line arguments desc = "Code Base Investigator Coverage Tool" parser = argparse.ArgumentParser(description=desc) + parser.add_argument( + "-S", + "--source-dir", + metavar="", + dest="source_dir", + help="path to source directory", + default=os.getcwd(), + ) + parser.add_argument( + "-x", + "--exclude", + dest="excludes", + metavar="", + action="append", + default=[], + help="Exclude files matching this pattern from the code base. " + + "May be specified multiple times.", + ) parser.add_argument( "ifile", - metavar="INPUT", + metavar="", help="path to compilation database JSON file", ) parser.add_argument( "ofile", - metavar="OUTPUT", + metavar="", help="path to coverage JSON file", ) args = parser.parse_args() @@ -41,6 +59,8 @@ if not util.ensure_ext(path, [".json"]): raise ValueError(f"{path} is not a JSON file.") + source_dir = os.path.realpath(args.source_dir) + # Ensure regular CBI output goes to stderr stderr_log = logging.StreamHandler(sys.stderr) stderr_log.setFormatter(logging.Formatter("[%(levelname)-8s] %(message)s")) @@ -50,21 +70,33 @@ # Run CBI configured as-if: # - configuration contains a single (dummy) platform # - codebase contains all files in the specified compilation database - db = config.load_database(dbpath, os.getcwd()) + db = config.load_database(dbpath, source_dir) configuration = {"cli": db} - files = [e["file"] for e in db] - codebase = {"files": files, "platforms": ["cli"], "exclude_files": []} + codebase = { + "files": [], + "platforms": ["cli"], + "exclude_files": [], + "exclude_patterns": args.excludes, + "rootdir": source_dir, + } - state = finder.find(os.getcwd(), codebase, configuration) + state = finder.find(source_dir, codebase, configuration) - exporter = Exporter(codebase) + exporter = Exporter(codebase, hash_filenames=False, export_regions=False) exports = exporter.walk(state) for p in codebase["platforms"]: covarray = [] for filename in exports[p]: - covobject = {"file": filename, "regions": []} - for region in exports[p][filename]: - covobject["regions"].append(list(region)) + relative_path = os.path.relpath(filename, start=source_dir) + covobject = { + "file": relative_path, + "id": util.compute_file_hash(filename), + "lines": [], + } + # This initial implementation makes no attempt to compress + # consecutive lines, even though this is permitted. + for lines in exports[p][filename]: + covobject["lines"].extend(lines) covarray.append(covobject) util._validate_json(covarray, "coverage") json_string = json.dumps(covarray) diff --git a/codebasin/file_parser.py b/codebasin/file_parser.py index 2b14873..65a18a8 100644 --- a/codebasin/file_parser.py +++ b/codebasin/file_parser.py @@ -24,6 +24,7 @@ def __init__(self): self.line_count = 0 self.start_line = -1 self.end_line = -1 + self.lines = [] self.body = [] def empty(self): @@ -39,7 +40,7 @@ def empty(self): return False return True - def add_line(self, phys_int, sloc_count, source=None): + def add_line(self, phys_int, sloc_count, source=None, lines=None): """ Add a line to this line group. Update the extent appropriately, and if it's a countable line, add it to the line count. @@ -54,6 +55,8 @@ def add_line(self, phys_int, sloc_count, source=None): self.line_count += sloc_count if source is not None: self.body.append(source) + if lines is not None: + self.lines.extend(lines) def reset(self): """ @@ -62,6 +65,7 @@ def reset(self): self.line_count = 0 self.start_line = -1 self.end_line = -1 + self.lines = [] self.body = [] def merge(self, line_group): @@ -77,6 +81,7 @@ def merge(self, line_group): line_group.start_line = self.start_line self.start_line = min(self.start_line, line_group.start_line) + self.lines.extend(line_group.lines) self.body.extend(line_group.body) self.end_line = max(self.end_line, line_group.end_line) @@ -125,6 +130,7 @@ def insert_code_node(tree, line_group): line_group.end_line, line_group.line_count, line_group.body, + lines=line_group.lines, ) tree.insert(new_node) @@ -140,6 +146,7 @@ def insert_directive_node(tree, line_group, logical_line): new_node.start_line = line_group.start_line new_node.end_line = line_group.end_line new_node.num_lines = line_group.line_count + new_node.lines = line_group.lines # Issue a warning for unrecognized directives, but suppress warnings # for common directives that shouldn't impact correctness. @@ -156,7 +163,7 @@ def insert_directive_node(tree, line_group, logical_line): tree.insert(new_node) - def parse_file(self, *, summarize_only=True, language=None): + def parse_file(self, *, summarize_only=False, language=None): """ Parse the file that this parser points at, build a SourceTree representing this file, and return it. @@ -197,6 +204,7 @@ def parse_file(self, *, summarize_only=True, language=None): phys_int, logical_line.local_sloc, logical_line.flushed_line, + lines=logical_line.lines, ) FileParser.handle_directive( @@ -211,12 +219,14 @@ def parse_file(self, *, summarize_only=True, language=None): groups["code"].add_line( phys_int, logical_line.local_sloc, + lines=logical_line.lines, ) else: groups["code"].add_line( phys_int, logical_line.local_sloc, logical_line.flushed_line, + lines=logical_line.lines, ) except StopIteration as it: _, physical_loc = it.value diff --git a/codebasin/file_source.py b/codebasin/file_source.py index 296b75c..30ca94d 100644 --- a/codebasin/file_source.py +++ b/codebasin/file_source.py @@ -423,6 +423,7 @@ def __init__(self): self.current_logical_line = one_space_line() self.current_physical_start = 1 self.current_physical_end = None + self.lines = [] self.local_sloc = 0 self.category = None self.flushed_line = None @@ -433,12 +434,26 @@ def join(self, other_line): """ self.current_logical_line.join(other_line) + # This function isn't actually used any more, but can't be removed yet. def physical_nonblank(self, n): """ Mark nonblank link in this logical like. """ self.local_sloc += n + def add_physical_lines(self, lines: list[int]) -> None: + """ + Add the specified physical lines to this logical line. + """ + self.lines.extend(lines) + self.local_sloc += len(lines) + + def add_physical_line(self, line: int) -> None: + """ + Add the specified physical line to this logical line. + """ + self.add_physical_lines([line]) + def physical_update(self, physical_line_num): """ Mark end of new physical line. @@ -453,6 +468,7 @@ def physical_reset(self): """ self.current_physical_start = self.current_physical_end local_sloc_copy = self.local_sloc + self.lines = [] self.local_sloc = 0 self.flushed_line = None return local_sloc_copy @@ -507,7 +523,7 @@ def c_file_source(fp, relaxed=False, directives_only=False): cleaner.logical_newline() if not current_physical_line.category() == "BLANK": - curr_line.physical_nonblank(1) + curr_line.add_physical_line(physical_line_num) curr_line.join(current_physical_line) @@ -583,7 +599,7 @@ def fortran_file_source(fp, relaxed=False): ) if not current_physical_line.category() == "BLANK": - curr_line.physical_nonblank(src_c_line.local_sloc) + curr_line.add_physical_lines(src_c_line.lines) curr_line.join(current_physical_line) @@ -677,7 +693,7 @@ def asm_file_source(fp, relaxed=False): cleaner.process(it.islice(line, 0, end)) if not current_physical_line.category() == "BLANK": - curr_line.physical_nonblank(1) + curr_line.add_physical_line(physical_line_num) curr_line.join(current_physical_line) diff --git a/codebasin/preprocessor.py b/codebasin/preprocessor.py index 85ad9e5..3bf1676 100644 --- a/codebasin/preprocessor.py +++ b/codebasin/preprocessor.py @@ -646,11 +646,22 @@ class CodeNode(Node): the original source. """ - def __init__(self, start_line=-1, end_line=-1, num_lines=0, source=None): + def __init__( + self, + start_line=-1, + end_line=-1, + num_lines=0, + source=None, + lines=None, + ): super().__init__() self.start_line = start_line self.end_line = end_line self.num_lines = num_lines + if lines is None: + self.lines = [] + else: + self.lines = lines self.source = source def to_json(self, assoc): diff --git a/codebasin/schema/coverage-0.1.0.schema b/codebasin/schema/coverage-0.1.0.schema deleted file mode 100644 index 6eccb1e..0000000 --- a/codebasin/schema/coverage-0.1.0.schema +++ /dev/null @@ -1,37 +0,0 @@ -{ - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://raw.githubusercontent.com/intel/p3-analysis-library/main/p3/data/coverage-0.1.0.schema", - "title": "Coverage", - "description": "Lines of code used in each file of a code base.", - "type": "array", - "items": { - "type": "object", - "properties": { - "file": { - "type": "string" - }, - "regions": { - "type": "array", - "items": { - "type": "array", - "prefixItems": [ - { - "type": "integer" - }, - { - "type": "integer" - }, - { - "type": "integer" - } - ], - "items": false - } - } - }, - "required": [ - "file", - "regions" - ] - } -} diff --git a/codebasin/schema/coverage-0.3.0.schema b/codebasin/schema/coverage-0.3.0.schema new file mode 100644 index 0000000..b9e8f78 --- /dev/null +++ b/codebasin/schema/coverage-0.3.0.schema @@ -0,0 +1,41 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/intel/p3-analysis-library/main/p3/data/coverage-0.3.0.schema", + "title": "Coverage", + "description": "Lines of code used in each file of a code base.", + "type": "array", + "items": { + "type": "object", + "properties": { + "file": { + "type": "string" + }, + "id": { + "type": "string" + }, + "lines": { + "type": "array", + "items": { + "oneOf": [ + { + "type": "integer" + }, + { + "type": "array", + "contains": { + "type": "integer" + }, + "minContains": 2, + "maxContains": 2 + } + ] + } + } + }, + "required": [ + "file", + "id", + "lines" + ] + } +} diff --git a/codebasin/util.py b/codebasin/util.py index 8997406..ab9860e 100644 --- a/codebasin/util.py +++ b/codebasin/util.py @@ -143,7 +143,7 @@ def _validate_json(json_object: object, schema_name: str) -> bool: "analysis": "schema/analysis.schema", "compiledb": "schema/compilation-database.schema", "config": "schema/config.schema", - "coverage": "schema/coverage-0.1.0.schema", + "coverage": "schema/coverage-0.3.0.schema", "cbiconfig": "schema/cbiconfig.schema", } if schema_name not in schema_paths.keys(): diff --git a/codebasin/walkers/exporter.py b/codebasin/walkers/exporter.py index 23b86d7..2e6a47f 100644 --- a/codebasin/walkers/exporter.py +++ b/codebasin/walkers/exporter.py @@ -17,19 +17,22 @@ class Exporter(TreeWalker): Build a per-platform list of mappings. """ - def __init__(self, codebase): + def __init__(self, codebase, *, hash_filenames=True, export_regions=True): super().__init__(None, None) self.codebase = codebase self.exports = None + self.hash_filenames = hash_filenames + self.export_regions = export_regions def walk(self, state): self.exports = collections.defaultdict( lambda: collections.defaultdict(list), ) for fn in state.get_filenames(): - hashed_fn = util.compute_file_hash(fn) + if self.hash_filenames: + fn = util.compute_file_hash(fn) self._export_node( - hashed_fn, + fn, state.get_tree(fn).root, state.get_map(fn), ) @@ -47,15 +50,19 @@ def _export_node(self, _filename, _node, _map): if isinstance(_node, CodeNode): association = _map[_node] for p in frozenset(association): - start_line = _node.start_line - end_line = _node.end_line - num_lines = _node.num_lines - self.exports[p][_filename].append( - (start_line, end_line, num_lines), - ) + if self.export_regions: + start_line = _node.start_line + end_line = _node.end_line + num_lines = _node.num_lines + self.exports[p][_filename].append( + (start_line, end_line, num_lines), + ) + else: + lines = _node.lines + self.exports[p][_filename].append(lines) next_filename = _filename if isinstance(_node, FileNode): - next_filename = util.compute_file_hash(_node.filename) + next_filename = _node.filename for child in _node.children: self._export_node(next_filename, child, _map) diff --git a/setup.py b/setup.py index ee78dbb..7df531a 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ url="https://www.github.com/intel/code-base-investigator", packages=["codebasin", "codebasin.schema", "codebasin.walkers"], include_package_data=True, - scripts=["bin/codebasin"], + scripts=["bin/codebasin", "bin/cbicov"], classifiers=[ "Development Status :: 3 - Alpha", "Environment :: Console",