-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathparser_base.py
63 lines (48 loc) · 1.96 KB
/
parser_base.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import logging
from abc import ABCMeta, abstractmethod
from functools import cached_property
from pathlib import Path
from timeout_decorator import timeout_decorator
from tree_sitter import Language, Parser, Tree
from megavul.parser.parser_util import ExtractedFunction
from megavul.util.utils import build_tree_sitter_language, save_marshmallow_dataclass_to_json_file
class ParserBase(metaclass=ABCMeta):
DEBUG_MODE = False
def __init__(self, logger: logging.Logger):
self.logger = logger
@property
@abstractmethod
def language_name(self) -> str:
# tree-sitter-c ---> c
# tree-sitter-cpp ---> cpp
# tree-sitter-java ---> java
...
@cached_property
def language(self) -> Language:
return build_tree_sitter_language(self.language_name, ParserBase.DEBUG_MODE)
@cached_property
def parser(self) -> Parser:
""" set tree-sitter parser """
parser = Parser()
parser.set_language(self.language)
return parser
def parse_file(self, fp: Path, result_save_path: Path):
file_lines = fp.open(mode='r',encoding='utf-8-sig').readlines() # remove u'\ufeff'
file_b = ''.join(file_lines)
tree = self.parser.parse(bytes(file_b, encoding='utf-8'))
extracted_funcs = self.parse(tree, file_lines)
save_marshmallow_dataclass_to_json_file(ExtractedFunction, result_save_path, extracted_funcs)
@timeout_decorator.timeout(seconds=20)
@abstractmethod
def parse(self, tree: Tree, file_lines: list[str]) -> list[ExtractedFunction]:
...
@abstractmethod
def can_handle_this_language(self, language_name: str) -> bool:
...
########### for debug ############
@property
def parser_name(self):
return self.__class__.__name__
def debug(self,msg:str):
if ParserBase.DEBUG_MODE:
self.logger.debug(f'[{self.parser_name}] {msg}')