-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #12 from Yoctol/serializable_cherry_pick
serialization cherry pick
- Loading branch information
Showing
13 changed files
with
276 additions
and
35 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +0,0 @@ | ||
from .char_indexer import CharIndexer | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
from .base import Indexer | ||
from .char_indexer import CharIndexer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
4 changes: 2 additions & 2 deletions
4
text_indexer/pipe_indexer.py → text_indexer/indexers/pipe_indexer.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import os | ||
import errno | ||
import json | ||
|
||
|
||
def save_json(data, path): | ||
with open(path, 'w', encoding='utf-8') as filep: | ||
json.dump(data, filep, ensure_ascii=False, indent=2) | ||
|
||
|
||
def load_json(path): | ||
with open(path, 'r', encoding='utf-8') as filep: | ||
output = json.load(filep) | ||
return output | ||
|
||
|
||
def mkdir_p(path): | ||
try: | ||
os.makedirs(path) | ||
except OSError as exc: | ||
if exc.errno == errno.EEXIST and os.path.isdir(path): | ||
pass | ||
else: | ||
raise |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
from os.path import join, dirname, basename, isdir, isfile | ||
import tarfile | ||
import shutil | ||
import logging | ||
|
||
from .indexers import ( | ||
Indexer, | ||
CharIndexer, | ||
) | ||
|
||
|
||
LOGGER = logging.getLogger('__file__') | ||
INDEXERS = { | ||
indexer_cls.__class__.__name__: indexer_cls for indexer_cls in [ | ||
CharIndexer, | ||
] | ||
} | ||
|
||
|
||
def save_indexer( | ||
indexer: Indexer, | ||
output_dir: str, | ||
logger: logging.Logger = LOGGER, | ||
) -> str: | ||
|
||
_validate_dir(output_dir) | ||
|
||
# save indexer class name | ||
class_name = indexer.__class__.__name__ | ||
_save_name(class_name, _gen_name_path(output_dir)) | ||
|
||
# save indexer | ||
indexer.save(output_dir) # save indexer | ||
del indexer | ||
|
||
# compress | ||
compressed_filepath = _compress_to_tar(output_dir) # compressed | ||
shutil.rmtree(output_dir) # remove output_dir | ||
logger.info(f'Export to {compressed_filepath}') | ||
|
||
return compressed_filepath | ||
|
||
|
||
def load_indexer( | ||
path: str, | ||
logger: logging.Logger = LOGGER, | ||
) -> Indexer: | ||
|
||
_validate_file(path) | ||
|
||
# extract | ||
output_dir = _extract_from_tar(path) | ||
logger.info(f'Extract to {output_dir}') | ||
|
||
# load indexer | ||
indexer_name = _load_name(_gen_name_path(output_dir)) | ||
indexer_module = _get_indexer_module(indexer_name) | ||
indexer = indexer_module.load(output_dir) | ||
|
||
return indexer | ||
|
||
|
||
def _validate_file(path: str): | ||
if not isfile(path): | ||
raise ValueError(f'[{path}] is not a file path.') | ||
|
||
|
||
def _validate_dir(directory: str): | ||
if not isdir(directory): | ||
raise ValueError(f'[{directory}] is not a directory.') | ||
|
||
|
||
def _save_name(name: str, path: str) -> None: | ||
with open(path, 'w', encoding='utf-8') as text_file: | ||
text_file.write(name) | ||
|
||
|
||
def _load_name(path: str) -> str: | ||
with open(path, 'r', encoding='utf-8') as text_file: | ||
name = text_file.read() | ||
return name | ||
|
||
|
||
def _compress_to_tar(output_dir: str) -> str: | ||
tar_path = _gen_compression_path(output_dir) | ||
with tarfile.open(tar_path, "w:gz") as tar: | ||
tar.add(output_dir, arcname=basename(output_dir)) | ||
return tar_path | ||
|
||
|
||
def _extract_from_tar(path: str) -> str: | ||
output_dir = _gen_extraction_dir(path) | ||
with tarfile.open(path, "r:gz") as tar: | ||
tar.extractall(path=output_dir) | ||
return output_dir | ||
|
||
|
||
def _gen_name_path(directory: str) -> str: | ||
return join(directory, 'name') | ||
|
||
|
||
def _gen_compression_path(directory: str) -> str: | ||
parent_dir = dirname(dirname(directory)) | ||
dir_name = basename(dirname(directory)) | ||
path = join(parent_dir, f'{dir_name}-all.tar.gz') | ||
return path | ||
|
||
|
||
def _gen_extraction_dir(path: str) -> str: | ||
parent_dir = dirname(path) | ||
filename = basename(path) | ||
output_dirname = '{}/'.format(filename.split('-')[0]) | ||
return join(parent_dir, output_dirname) | ||
|
||
|
||
def _get_indexer_module(indexer_name: str) -> Indexer: | ||
return INDEXERS[indexer_name] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
from unittest import TestCase | ||
from unittest.mock import patch | ||
import shutil | ||
from os.path import join, abspath, exists, dirname | ||
import os | ||
|
||
from ..io import save_indexer, load_indexer | ||
from text_indexer.indexers.utils import save_json, load_json | ||
|
||
|
||
class MockIndexer(object): | ||
|
||
def __init__(self, aa=1, bb=2): | ||
self.aa = aa | ||
self.bb = bb | ||
self.a = 1 | ||
self.b = 2 | ||
|
||
def save(self, output_dir): | ||
save_json({'a': self.a, 'b': self.b}, join(output_dir, 'fake_pipe.json')) | ||
save_json({'aa': self.aa, 'bb': self.bb}, join(output_dir, 'fake_indexer.json')) | ||
|
||
@classmethod | ||
def load(cls, output_dir): | ||
pipe = load_json(join(output_dir, 'fake_pipe.json')) | ||
params = load_json(join(output_dir, 'fake_indexer.json')) | ||
indexer = cls(**params) | ||
indexer.pipe = pipe | ||
return indexer | ||
|
||
|
||
class IOTestCase(TestCase): | ||
|
||
def setUp(self): | ||
root_dir = dirname(abspath(__file__)) | ||
self.output_dir = join(root_dir, 'example/') | ||
os.mkdir(self.output_dir) | ||
|
||
def tearDown(self): | ||
if exists(self.output_dir): | ||
shutil.rmtree(self.output_dir) | ||
|
||
def test_save_indexer(self): | ||
export_path = save_indexer(indexer=MockIndexer(), output_dir=self.output_dir) | ||
self.assertTrue(exists(export_path)) | ||
os.remove(export_path) | ||
|
||
def test_load_indexer(self): | ||
export_path = save_indexer(indexer=MockIndexer(), output_dir=self.output_dir) | ||
with patch('text_indexer.io._get_indexer_module', return_value=MockIndexer): | ||
load_indexer(export_path) | ||
self.assertTrue(exists(self.output_dir)) | ||
os.remove(export_path) |