-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from Esukhia/text-saving
Text saving
- Loading branch information
Showing
18 changed files
with
662 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -130,4 +130,5 @@ dmypy.json | |
|
||
|
||
.env | ||
.vscode | ||
.vscode | ||
.github |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
from openpecha.blupdate import * | ||
from pedurma.pecha import * | ||
from pedurma.texts import serialize_text_obj | ||
|
||
def get_old_vol(pecha_opf_path, pecha_id, text_obj): | ||
old_vols = {} | ||
for vol_id in text_obj.vol_span: | ||
old_vols[vol_id] = (pecha_opf_path / f"{pecha_id}.opf/base/{vol_id}.txt").read_text(encoding='utf-8') | ||
return old_vols | ||
|
||
def get_old_text_base(pecha_idx, old_vol_base, text_id, text_vol_num): | ||
text_span = pecha_idx['annotations'][text_id]['span'] | ||
for vol_span in text_span: | ||
if vol_span['vol'] == text_vol_num: | ||
return old_vol_base[vol_span['start']:vol_span['end']] | ||
return '' | ||
|
||
def get_new_vol(old_vols, pecha_idx, text_obj): | ||
new_vols = {} | ||
new_text = serialize_text_obj(text_obj) | ||
for vol_id, new_text_base in new_text.items(): | ||
vol_num = int(vol_id[1:]) | ||
old_vol_base = old_vols[vol_id] | ||
old_text_base = get_old_text_base(pecha_idx, old_vol_base, text_obj.id, vol_num) | ||
if new_text_base[0] == "\n": | ||
new_text_base = new_text_base[1:] | ||
new_vol_base = old_vol_base.replace(old_text_base, new_text_base) | ||
new_vols[vol_id] = new_vol_base | ||
return new_vols | ||
|
||
def update_base(pecha_opf_path, pecha_id, text_obj, pecha_idx = None): | ||
if not pecha_idx: | ||
pecha_idx = yaml.safe_load((pecha_opf_path / f"{pecha_id}.opf/index.yml").read_text(encoding='utf-8')) | ||
old_vols = get_old_vol(pecha_opf_path, pecha_id, text_obj) | ||
new_vols = get_new_vol(old_vols, pecha_idx, text_obj) | ||
for vol_id, new_vol_base in new_vols.items(): | ||
(pecha_opf_path / f"{pecha_id}.opf/base/{vol_id}.txt").write_text(new_vol_base, encoding='utf-8') | ||
print(f'INFO: {vol_id} base updated..') | ||
|
||
def get_old_layers(pecha_opf_path, pecha_id, vol_id): | ||
old_layers = {} | ||
layer_paths = list((pecha_opf_path / f"{pecha_id}.opf/layers/{vol_id}").iterdir()) | ||
for layer_path in layer_paths: | ||
layer_name = layer_path.stem | ||
layer_content = yaml.safe_load(layer_path.read_text(encoding='utf-8')) | ||
old_layers[layer_name] = layer_content | ||
return old_layers | ||
|
||
def update_layer(pecha_opf_path, pecha_id, vol_id, old_layers, updater): | ||
for layer_name, old_layer in old_layers.items(): | ||
update_ann_layer(old_layer, updater) | ||
new_layer = yaml.safe_dump(old_layer, sort_keys=False) | ||
(pecha_opf_path / f"{pecha_id}.opf/layers/{vol_id}/{layer_name}.yml").write_text(new_layer, encoding='utf-8') | ||
print(f'INFO: {vol_id} {layer_name} has been updated...') | ||
|
||
def update_old_layers(pecha_opf_path, pecha_id, text_obj, pecha_idx = None): | ||
if not pecha_idx: | ||
pecha_idx = yaml.safe_load((pecha_opf_path / f"{pecha_id}.opf/index.yml").read_text(encoding='utf-8')) | ||
old_vols = get_old_vol(pecha_opf_path, pecha_id, text_obj) | ||
new_vols = get_new_vol(old_vols, pecha_idx, text_obj) | ||
for (vol_id, old_vol_base), (_, new_vol_base) in zip(old_vols.items(), new_vols.items()): | ||
updater = Blupdate(old_vol_base, new_vol_base) | ||
old_layers = get_old_layers(pecha_opf_path, pecha_id, vol_id) | ||
update_layer(pecha_opf_path, pecha_id, vol_id, old_layers, updater) | ||
|
||
def update_other_text_index(pecha_idx, text_id, cur_vol_offset, vol_num): | ||
check_flag = False | ||
for text_uuid, text in pecha_idx['annotations'].items(): | ||
if check_flag: | ||
for vol_walker, vol_span in enumerate(text['span']): | ||
if vol_span['vol'] == vol_num: | ||
pecha_idx["annotations"][text_uuid]['span'][vol_walker]['start'] += cur_vol_offset | ||
pecha_idx["annotations"][text_uuid]['span'][vol_walker]['end'] += cur_vol_offset | ||
elif vol_span['vol'] > vol_num: | ||
return pecha_idx | ||
if text_uuid == text_id: | ||
check_flag = True | ||
return pecha_idx | ||
|
||
def update_index(pecha_opf_path, pecha_id, text_obj, pecha_idx=None): | ||
if not pecha_idx: | ||
pecha_idx = yaml.safe_load((pecha_opf_path / f"{pecha_id}.opf/index.yml").read_text(encoding='utf-8')) | ||
old_vols = get_old_vol(pecha_opf_path, pecha_id, text_obj) | ||
new_vols = get_new_vol(old_vols, pecha_idx, text_obj) | ||
for (vol_id, old_vol_base), (_, new_vol_base) in zip(old_vols.items(), new_vols.items()): | ||
check_next_text = True | ||
vol_num = int(vol_id[1:]) | ||
cur_vol_offset = len(new_vol_base) - len(old_vol_base) | ||
if cur_vol_offset != 0: | ||
for vol_walker, vol_span in enumerate(pecha_idx["annotations"][text_obj.id]['span']): | ||
if vol_span['vol'] == vol_num: | ||
pecha_idx["annotations"][text_obj.id]['span'][vol_walker]['end'] += cur_vol_offset | ||
elif vol_span['vol'] > vol_num: | ||
check_next_text = False | ||
break | ||
if check_next_text: | ||
pecha_idx = update_other_text_index(pecha_idx, text_obj.id, cur_vol_offset, vol_num) | ||
return pecha_idx | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
id: 559d95c999ba4b56b704539f48c88019 | ||
annotation_type: index | ||
revision: '00001' | ||
annotations: | ||
259260e8e3544fc1a9a27d7dffc72df6: | ||
parts: [] | ||
span: | ||
- vol: 1 | ||
start: 0 | ||
end: 179 | ||
- vol: 2 | ||
start: 0 | ||
end: 218 | ||
work_id: D1115 | ||
cf52cbae1a7640b688b24135fe566920: | ||
parts: [] | ||
span: | ||
- vol: 2 | ||
start: 219 | ||
end: 384 | ||
work_id: D1116 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
id: 9b28dc8b0d6549929106c66d4d02f784 | ||
annotation_type: Durchen | ||
revision: '00001' | ||
annotations: | ||
d724dd7a79704a4088a0a625717d7fa6: | ||
span: | ||
start: 169 | ||
end: 215 | ||
97ca36f7b601415a8187a46e23fa9db2: | ||
span: | ||
start: 339 | ||
end: 384 | ||
local_ids: | ||
d724dd7a79704a4088a0a625717d7fa6: 200000 | ||
97ca36f7b601415a8187a46e23fa9db2: 200001 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
id: b745a20831cc4ab5a38a46a8738294a6 | ||
annotation_type: Pagination | ||
revision: '00001' | ||
annotations: | ||
c11d8db649854c5d89ca3df22047d07b: | ||
page_index: 1a | ||
page_info: '' | ||
reference: null | ||
span: | ||
start: 0 | ||
end: 57 | ||
note_ref: 05d117045b0c4ea5aee3aeba558e94bd | ||
21671cb910d9486c8ba4793305c00d58: | ||
page_index: 1b | ||
page_info: '' | ||
reference: null | ||
span: | ||
start: 60 | ||
end: 116 | ||
note_ref: 05d117045b0c4ea5aee3aeba558e94bd | ||
671dc26715434318b3d641521d4e9292: | ||
page_index: 2a | ||
page_info: '' | ||
reference: null | ||
span: | ||
start: 119 | ||
end: 166 | ||
note_ref: 05d117045b0c4ea5aee3aeba558e94bd | ||
05d117045b0c4ea5aee3aeba558e94bd: | ||
page_index: 2b | ||
page_info: '' | ||
reference: null | ||
span: | ||
start: 169 | ||
end: 215 | ||
3373e79434004aaeb8b2e69649243d2a: | ||
page_index: 3a | ||
page_info: '' | ||
reference: null | ||
span: | ||
start: 218 | ||
end: 281 | ||
note_ref: 9efa117a2b9444ac8cb09c198d21cdd8 | ||
71dff610d4c841c58e9c815582bf8508: | ||
page_index: 3b | ||
page_info: '' | ||
reference: null | ||
span: | ||
start: 284 | ||
end: 336 | ||
note_ref: 9efa117a2b9444ac8cb09c198d21cdd8 | ||
9efa117a2b9444ac8cb09c198d21cdd8: | ||
page_index: 4a | ||
page_info: '' | ||
reference: null | ||
span: | ||
start: 339 | ||
end: 384 | ||
local_ids: | ||
c11d8db649854c5d89ca3df22047d07b: 200000 | ||
21671cb910d9486c8ba4793305c00d58: 200001 | ||
671dc26715434318b3d641521d4e9292: 200002 | ||
05d117045b0c4ea5aee3aeba558e94bd: 200003 | ||
3373e79434004aaeb8b2e69649243d2a: 200004 | ||
71dff610d4c841c58e9c815582bf8508: 200005 | ||
9efa117a2b9444ac8cb09c198d21cdd8: 200006 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
་་༄ལོ། །རྒྱ་གར་སྐད་དུ། | ||
དབྱིངས་སུ་བསྟོད་པ། | ||
འཚལ་ལོ། །གང་ཞིག་ | ||
|
||
མཐོང་ངོ་། །ཕྱོགས་ | ||
དེ་དང་དེ་ཡི་ཕྱོགས་ | ||
ཏིང་འཛིན་རྡོ་རྗེ་ཡིས | ||
|
||
རིམ་གྱིས་སྦྱངས་ | ||
མེད་ཉི་མ་ཟླ་བ་ཡང་། | ||
་རྡུལ་ལ་སོགས། | ||
|
||
འབྱོར་ཆེན་པོ་དེར་ | ||
སྡུག་བསྔལ་གྱིས་ | ||
དེ་ཡི་སྐུ་ལས་ | ||
|
||
ངོས་ལྗོན་ཤིང་ | ||
ལེན་པ་པོ་ཕུན་སུམ་ཚོགས་པའོ། | ||
འདི་དག་གིས་ནི་སྦྱིན་པར་ | ||
|
||
མངའ་དབང་མཛད་པ་ | ||
འདི་དག་གིས་ནི་དེའི་ | ||
གིས་ནི་སྐྱེ་kkབ་ལ་ | ||
|
||
དེ་ལ་ནམ་མཁའི་ | ||
བ་ཡང་དག་པར་ | ||
གིས་ནི་ཆོས་སྟོན་པའི་ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
ཉ༄ཚོ། །རྒྱ་གར་སྐད་དུ། | ||
སྟ་བ་ནཱ་མ། བོད་སྐད་དུ། | ||
པར་འོས་པ་བསྔགས་ | ||
|
||
གཏམ་འདི་ཙམ | ||
འདི་ཉིད་སྨྲ་བར་ | ||
དང་-། །ཁྱོད་མ | ||
|
||
འདོད་གང་དག་ | ||
སྐྱབས་འགྲོ་བ། | ||
སྟོང་གིས་ཀྱང་། | ||
|
||
རྒྱ་གར་གྱི་ | ||
༢༦༤ ༧པེ་〉〉་ | ||
བཞུགས་གོ། |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
་་༄ལོ། །རྒྱ་གར་སྐད་དུ། | ||
དབྱིངས་སུ་བསྟོད་པ། | ||
འཚལ་ལོ། །གང་ཞིག་ | ||
|
||
མཐོང་ངོ་། །ཕྱོགས་ | ||
དེ་དང་དེ་ཡི་ཕྱོགས་ | ||
ཏིང་འཛིན་རྡོ་རྗེ་ཡིས | ||
|
||
རིམ་གྱིས་སྦྱངས་ | ||
མེད་ཉི་མ་ཟླ་བ་ཡང་། | ||
་རྡུལ་ལ་སོགས། | ||
|
||
འབྱོར་ཆེན་པོ་དེར་ | ||
སྡུག་བསྔལ་གྱིས་ | ||
དེ་ཡི་སྐུ་ལས་ | ||
|
||
ངོས་ལྗོན་ཤིང་ | ||
ལེན་པ་པོ་ཕུན་སུམ་ཚོགས་པའོ། | ||
འདི་དག་གིས་ནི་སྦྱིན་པར་ | ||
|
||
མངའ་དབང་མཛད་པ་ | ||
འདི་དག་གིས་ནི་དེའི་ | ||
གིས་ནི་སྐྱེ་བ་ལ་ | ||
|
||
དེ་ལ་ནམ་མཁའི་ | ||
བ་ཡང་དག་པར་ | ||
གིས་ནི་ཆོས་སྟོན་པའི་ |
Oops, something went wrong.