-
Notifications
You must be signed in to change notification settings - Fork 0
/
outgrab_tools.py
executable file
·1751 lines (1536 loc) · 73.5 KB
/
outgrab_tools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/python3
"""outgrab: Programatically select important information from large
files and send it to other files for human or program consumption.
outgrab.py Can be invoked as a program interpreter or
outgrab_tools.py and outgrab_startup can be imported as a library of python
classes and methods.
"""
#-------------------------------------------------------------------------------
# Why would you use this program instead of grep, sed, awk, cut, paste, etc.?
# 1. Because you forget how to use all of the options of those programs.
# 2. Because outgrab maintains its state (the current line) between commands:
# this can make a sequence of commands more simple and more efficient.
# 3. Because the commands included are very close to what you would do by hand
# when searching through an output file and selecting the bits of interest.
#-------------------------------------------------------------------------------
# Usage:
#
# See near the end of this file for a summary of the outgrab command language.
#
# Input text files can either come from stdin and optionally from the -i flag.
# These will be treated separately and are named $file1, $file2, etc.
# Output goes to stdout or may be redirected into an output file
# If mytest.grab contains outgrab commands, use the outgrab interpreter:
# python outgrab.py -p mytest.grab < a.txt
# python outgrab.py -p mytest.grab < a.txt > output.ext
# cat *.txt | python -p mytest.grab > output.txt
# where mytest.grab contains outgrab commands, a.txt is an input text file,
# and output.txt is an output file. The first version sends output to the screen
# and the last version concatenates all .txt files in the current directory and uses them as input.
# e.g.
# Use as a python class/method library is less well tested. See outgrab.py as a starter.
# If mytest.py contains python statements using the classes and functions within this file:
# e.g. python mytest.py < a.txt
# python mytest.py < a.txt > output.txt
# python mytest.py -i b.txt < a.txt > output.txt
# cat *.txt | python mytest.py > output.txt (files concatenated and together named $file1)
#
#-------------------------------------------------------------------------------
import sys
import os
import re
import logging
from outgrab_startup import getparser, setlogging, setverbositylevels
# global variables related to messaging
verbosity = 0
maxlevel = 4
ogdebug = 0
ogverbose = 0
oginfo = 0
ogmain = 0
parserargs = ""
msg = ""
# and a global dictionary to hold the internal files by their names
ifilesd = {}
#standard filename prefix; use with a postfix number in addfilename
filebase = "$file"
#
# module level functions
#
def startup():
# Set up command line parsing; get verbosity level from command line
global parserargs
global verbosity, maxlevel
parserargs = getparser()
verbosity = parserargs.verbosity
setuplogging()
return
def setuplogging():
global msg
global ogdebug, ogverbose, oginfo, ogmain
# Set up logging levels from most verbose to least.
myloglevel = setverbositylevels(verbosity,verbosity_default=2)
myloglevelnames = ("ogdebug","ogverbose","oginfo","ogmain")
(msg, maxlevel, ( ogdebug, ogverbose, oginfo, ogmain)) = setlogging(myloglevel,myloglevelnames)
#Demonstration/Test of logging.
#Only those with log(verbosity) level >=(<=) loglevel(verbosity) should be printed.
msg(ogmain, "printing status messages at verbosity level {} (ogmain)".format(maxlevel-ogmain+1))
msg(oginfo, "printing status messages at verbosity level {} (oginfo)".format(maxlevel-oginfo+1))
msg(ogverbose,"printing status messages at verbosity level {} (ogverbose)".format(maxlevel-ogverbose+1))
msg(ogdebug, "printing debug messages at verbosity level {} (ogdebug)".format(maxlevel-ogdebug+1))
def runoutgrab(programfile,verboseness,outputfile,*inputfiles):
""" function to set up files and launch outgrab from your program
needs explicit file paths or local names for programfile, outputfile, and
an arbitrary number of inputfiles
"""
global verbosity
verbosity = verboseness
setuplogging()
# create program file
filenum = 0
pgmfh = open(programfile, "r")
x = createInputFile(pgmfh,ProgramFile)
pgmfh.close()
addfilename(x,filebase,filenum)
addfilename(x,"program")
msg(oginfo,"Creating program file with names = {}".format(x.names))
# create output file
y = OutputFile()
addfilename(x,outputfile)
y.filename = outputfile
msg(oginfo,"Creating output file with names: {}".format(y.names))
# create scratch file
x = createScratchFile("ScratchFile")
addfilename(x,"scratch")
msg(oginfo,"Creating scratch file with names: {}".format(x.names))
# create input files
for myfile in inputfiles:
filenum += 1
readinputfile(myfile,filenum)
# Assign input and output files for the outgrab program so it processes the former and writes to the latter
# Initial focus is on the input file coming from stdin
x = getfilefromname("$file1")
z = getfilefromname("program")
z.setinputfile(x)
z.setoutputfile(y)
# Process the outgrab program file
z.processcommands()
# Write the output file
outf = open(y.filename,"w")
y.writefile(outf)
outf.close()
def readinputfile(myfile,filenum):
""" given the path/name of a file, read it in to an internal input file
give it a name filebase ($file) + str(filenum)
"""
fh = open(myfile, "r")
x = createInputFile(fh,InputFile)
fh.close()
addfilename(x,filebase,filenum) # create the standard filename ($fileN)
msg(ogdebug,"Creating input file with names = {}".format(x.names))
def createInputFiles():
"""Create input files from the command line: from stdin, from --inputfiles,
and also create an empty scratch file
Return them in a dictionary with standard names as keys: names = ("$file1", "$file2", etc. )
Also give them names name = <stdin>, filename from the command line, or "scratch"
"""
global ifilesd
# Create InputFile from stdin
msg(oginfo,"Creating input files from stdin")
x = createInputFile(sys.stdin,InputFile)
filenum = 1
addfilename(x,filebase,filenum)
msg(oginfo,"Names = {}".format(x.names))
# Create any files from input argument "-i" or "--inputfiles"
if parserargs.inputfiles:
msg(oginfo,"Creating input files from -i or --inputfiles")
for myfile in parserargs.inputfiles:
msg(ogdebug,"file = {}".format(myfile))
filenum += 1
x = createInputFile(myfile,InputFile)
addfilename(x,filebase,filenum)
msg(oginfo,"Names = {}".format(x.names))
# Create outgrab program files from input argument "-p" or "--program"
if parserargs.program:
msg(oginfo,"Creating program files from -p or --program")
filenum = 0
x = createInputFile(parserargs.program,ProgramFile)
addfilename(x,filebase,filenum)
addfilename(x,"program")
msg(oginfo,"Names = {}".format(x.names))
# now create one empty file named "scratch" for a scratch space
# ignore the original name created
x = createScratchFile("Scratch")
addfilename(x,"scratch")
msg(oginfo,"Creating scratch file with names: \"{}\"".format(x.names))
msg(ogdebug,"The ifilesd dictionary at end of createInputFiles:")
for key,value in ifilesd.items():
msg(ogdebug,"name= {} : object = {}".format(key,value))
def createScratchFile(content):
# create ScratchFile object
newfile = ScratchFile(content)
msg(ogdebug,"new file object: {}".format(newfile))
try:
addfilename(newfile,content.name)
except:
pass
return newfile
def createInputFile(content,Inp):
# create Inp=InputFile or ProgramFile object from filehandler,string, or list of strings
newfile = Inp(content)
msg(ogdebug,"new file object: {}".format(newfile))
# add a blank line at the end of the new file (to prevent matches on last line repeating...)
newfile.addblankline() # note "bottom" set to line before this blank line
try:
addfilename(newfile,content.name)
except:
pass
return newfile
def getnextfilenum():
""" look through ifilesd for all files named $filexyz (actually filebasexyz)
and return the highest int(xyz) + 1: to be used as the next filenum
"""
largest = -1
start = len(filebase)
for key in ifilesd.keys():
if key.startswith(filebase):
oldfilenum = int(key[start:])
if oldfilenum > largest: largest = oldfilenum
return largest+1
def getfilefromname(name):
# returns the internal file object corresponding to myname
return ifilesd[name]
def samevaluekeys(mykey,mydict):
sameas = [k for k,v in mydict.items() if v == mydict[mykey]]
return sameas
def listofsamevaluekeys(mydict):
usedkeys = []
dumplist = []
for key in mydict.keys():
if key not in usedkeys:
samekeys = samevaluekeys(key,mydict)
dumplist.append(samekeys)
for item in samekeys:
usedkeys.append(item)
return dumplist
def addfilename(fileobj,name,postfix=""):
# add a name for fileobj to the ifilesd dictionary
# and to the attribute list of names for the object
# if postfix is present, it is added to the end of name
# (to create standard names like $file1)
global ifilesd
if isinstance(postfix,int):
postfix = str(postfix)
name = name + postfix
ifilesd[name] = fileobj
fileobj.names.append(name)
msg(oginfo,"Adding name {} for file object {}".format(name,fileobj))
def setfilename(*names):
# give an internal file a new name (old one remains unless you overwrite it)
# first names should be old one, 2nd is new one.
x = getfilefromname(names[0])
addfilename(x,names[1])
def stringlisttostring(stringlist,delim=" "):
# Concatenate each string in stringlist togther with delimiter between them.
result = ""
for mystring in stringlist: #does this ignore empty strings ("") ?
result += (mystring + delim)
result.rstrip()
msg(ogdebug,"--in stringlisttostring, result= \"{}\"".format(result))
return result
def stringtostringlist(mystring,delim="whitespace"):
# split mystring into fields based on a regular expression delimiter
# returns list of strings
msg(ogdebug,"getting fields from string based on delimiter = {}".format(delim))
if delim == "whitespace":
delim = "[\s]+" # regular expression version
if delim == "comma":
delim = "[,]" # regular expression version. no + so that empty fields are maintained
stringlist = re.split(delim,mystring) # regular expression version
msg(ogdebug,"stringtostringlist found {} fields".format(len(stringlist)))
msg(ogdebug,"--list of fields:")
msg(ogdebug,stringlist)
return stringlist
def getslicelist(mystring,startend):
# get sections of mystring beginning at character (or column) start
# and ending with character (or column) end; return as new string.
# startend is list of tuples, each with a start and end
# [(start1,end1),(start2,end2),...]
# Returns a list of the resulting string slices
mylength = len(mystring)
msg(ogdebug,"string has {} characters in getslicelist".format(mylength))
myslices = []
if mylength == 0: return myslices
count = 0
for (start,end) in startend:
count += 1
if end > len(mystring): #short circuit if section too long
msg(oginfo,"section {} too long in getslicelist".format(count))
end = len(mystring)
myslices.append(mystring[start:end+1])
break
myslices.append(mystring[start:end+1])
msg(ogdebug,"--in getslicelist, captured slices = {}".format(myslices))
return myslices
def getfielddic(mystring,fieldnameslist,delim="whitespace"):
# split string into fields based on a regular expression delimiter
# returns dictionary with keys = $field1, $field2, etc.
# if fieldnameslist is defined correctly.
fieldlist = stringtostringlist(mystring.strip())
fieldnames = fieldnameslist[:len(fieldlist)]
# msg(ogdebug,"--list of field names:")
# msg(ogdebug,fieldnames)
fields = dict(zip(fieldnames,fieldlist))
msg(ogdebug,"--fields dictionary:")
msg(ogdebug,fields)
return fields
def getslicedic(mystring,slicenameslist,startend):
# split current line into slices based on startend (see getsliceslist)
# returns dictionary with keys = $slice1, $slice2, etc.
# if slicenameslist is defined correctly.
slicelist = getslicelist(mystring,startend)
slicenames = slicenameslist[:len(slicelist)]
# msg(ogdebug,"--list of slice names:")
# msg(ogdebug,slicenames)
slices = dict(zip(slicenames,slicelist))
msg(ogdebug,"--slices dictionary:")
msg(ogdebug,slices)
return slices
def translatefields(stringlist,fields=None,slices=None):
# given a mixed list of strings, some normal strings,
# and some designating fields ($field1 etc.), or slices ($slice1 etc.),
# substitute the contents of the fields for the field designators
# and return a list of simple strings
# stringlist = list, fields = dictionary
fieldnamebase = "$field"
slicenamebase = "$slice"
for i, mystring in enumerate(stringlist):
if fieldnamebase in mystring:
if mystring in fields:
stringlist[i] = fields[mystring]
else:
stringlist[i] = ""
if slicenamebase in mystring:
if mystring in slices:
stringlist[i] = slices[mystring]
else:
stringlist[i] = ""
msg(ogdebug,"--in translatefields, final list of strings = {}".format(stringlist))
return stringlist
def stringlistfromfields(fieldtypes,texts,slicetexts,fieldtexts,holdtexts=[]):
# build a string from lists of fields of different types
# separate the fields by delimiter to produce the string
# fieldtypes list should contain the types corresponding to the different lists
# The kinds of fields supported are: "text", "slice", "field", "hold"
# The fields are held in lists texts, slicetexts, fieldtexts, holdtexts
fieldcount = 0
slicecount = 0
textcount = 0
holdcount = 0
nfield = len(fieldtexts)
nslice = len(slicetexts)
ntext = len(texts)
nhold = len(holdtexts)
outstringlist = []
for fieldtype in fieldtypes:
if fieldtype == "text" and ntext > 0:
if textcount <= ntext - 1:
outstringlist.append(texts[textcount])
textcount += 1
else:
msg(ogmain,"Ignoring text: only {} available.".format(ntext))
if fieldtype == "slice" and nslice > 0:
if slicecount <= nslice - 1:
outstringlist.append(slicetexts[slicecount])
slicecount += 1
else:
msg(ogmain,"Ignoring slice: only {} available.".format(nslice))
if fieldtype == "field" and nfield > 0:
if fieldcount <= nfield - 1:
outstringlist.append(fieldtexts[fieldcount])
fieldcount += 1
else:
msg(ogmain,"Ignoring field: only {} available.".format(nfield))
if fieldtype == "hold" and nhold > 0:
if holdcount <= nhold - 1:
outstringlist.append(holdtexts[holdcount])
holdcount += 1
else:
msg(ogmain,"Ignoring hold: only {} available.".format(nhold))
return outstringlist
def combinequoted(inlist):
"""if a sequence of items in a list is preceded and followed by a double quote,
combine the separate words between them into one. Looks for a double quotes
by themselves (separated by space) or one at the beginning of one
word and one (later) at the end of a word.
e.g. "one two three" becomes one element as does " one two three ".
The quotes are removed.
"""
msg(ogdebug,"In combinequoted, initial tokens = {} ".format(inlist))
mybegin = False
myend = False
ibegin = -1
iend = -1
quotechar = '"'
msg(ogdebug,"In combinequoted, quotechar = {} ".format(quotechar))
# find first double quote at beginning of list item
for i,item in enumerate(inlist):
if item.startswith(quotechar):
mybegin = True
ibegin = i
break
# find last double quote at end of list item
for i,item in enumerate(inlist):
if item.endswith(quotechar):
myend = True
iend = i
if mybegin:
msg(ogdebug,"In combinequoted, found token with beginning quote: {} ".format(inlist[ibegin]))
if myend:
msg(ogdebug,"In combinequoted, found token with ending quote: {} ".format(inlist[iend]))
# concatenate all items between quotes
outlist = []
newitem = ""
if mybegin and myend and (iend > ibegin):
inlist[ibegin] = inlist[ibegin][1:]
inlist[iend] = inlist[iend][0:-1]
for i in range(ibegin):
outlist.append(inlist[i])
for item in inlist[ibegin:iend]:
newitem+= item + " "
newitem+= inlist[iend]
outlist.append(newitem)
if len(inlist) > iend:
for i in range(iend+1,len(inlist)):
outlist.append(inlist[i])
msg(ogdebug,"In combinequoted, final tokens = {} ".format(outlist))
return outlist
else:
msg(ogdebug,"In combinequoted, no change to tokens because no pair of suitable quote characters")
return inlist
def substitute(pattern,repl,instring,count=1):
# substitute repl for pattern in instring count times (if there are that many)
# return resulting string or None if no substitution occurred
outstring=re.sub(pattern,repl,instring,count)
if instring == outstring:
outstring = None
msg(oginfo,"no substitute performed in substitute.")
else:
msg(oginfo,"in substitute, replaced line")
msg(ogdebug,"--old: \"{}\"".format(instring))
msg(ogdebug,"--new: \"{}\"".format(outstring))
return outstring
def matchnextcopy(infile,outfile,mystring,*,nfind=1,increment=0,nlines=1):
# for nfind instances: find line in infile object with match string, go forward or backward increment lines,
# (determined by sign of increment) and write nlines lines to outfile object.
# uses matchnextreturn, but actually writes to outfile
mylines = infile.matchnextreturn(mystring,nfind,increment,nlines)
if mylines:
msg(oginfo,"found match in matchnextcopy")
msg(ogdebug,mylines)
outfile.addlines(mylines)
def copyline(infile,outfile):
msg(oginfo,"copying line from input to output")
myline = infile.getline()
outfile.addline(myline)
def copylines(infile,outfile,nlines=1):
# copy nlines lines, starting from current line in infile, to outfile
msg(ogverbose,"copying {} lines from input to output".format(nlines))
mylines = infile.getlines(nlines)
outfile.addlines(mylines)
def copyuntilmatch(infile,outfile,mystring,*,start=False,end=False):
# copy all lines (exclusive of start and end by default) from current to line matching mystring
msg(oginfo,"copying all lines until {} matched from input to output".format(mystring))
mylines,endpos = infile.getuntilmatch(mystring,start=start,end=end)
outfile.addlines(mylines)
return endpos
def copysection(infile,outfile,start,end):
# copy a section of input lines from line start to line end, inclusive
msg(oginfo,"copying section ( {} to {} ) from input to output".format(start,end))
mylines,endposition = infile.getsection(start,end)
outfile.addlines(mylines)
return endposition
def initializenameslist(namebase,maxnum):
# produce list of names (for fields or slices etc.) to potentially be used later
# e.g. if namebase = "$field", produce: ["$field1", "$field2",..]
# maxnum should be larger than expected number of names required
names = [namebase]*maxnum
for i in range(maxnum):
names[i] += str(i+1)
return names
def parameterstartswithkey(param,default,mydict):
""" Determine if any of the keys in a dictionary are a shortened form of
an input string. (e.g. key ~ "dir" and string = "direction")
If they are, return value corresponding to key, otherwise, return default
"""
returnvalue = default
for key,value in mydict.items():
if param.startswith(key):
returnvalue = value
break
return returnvalue
def removesubstring(mystring,substring,occurrence=1):
""" find the nth occurrence of substring in string and remove it
"""
nfound = 0
position = 0
while True:
idx = mystring.find(substring,position)
if idx >= 0:
nfound += 1
position = idx + len(substring)
if nfound == occurrence:
newstring = mystring[:idx] + mystring[position:]
return newstring
else:
msg(ogmain,"In removesubstring, {} occurrences of {} not found".format(occurrence,substring))
return mystring
def replacesubstring(mystring,substring,replacement,occurrence=1):
""" find the nth occurrence of substring in string and remove it
"""
nfound = 0
position = 0
while True:
idx = mystring.find(substring,position)
if idx >= 0:
nfound += 1
position = idx + len(substring)
if nfound == occurrence:
newstring = mystring[:idx] + replacement + mystring[position:]
return newstring
else:
msg(ogmain,"In replacesubstring, {} occurrences of {} not found".format(occurrence,substring))
return mystring
class InternalFile:
# Base class for internal representation of files
def __init__(self):
self.lines = []
self.length = 0
self.names = []
self.type = "InternalFile"
msg(ogdebug,"initializing empty InternalFile")
def checkstartposition(self,start):
# if position is before begin of file, set to to begin of file and report
if start < 0:
start = 0
msg(ogdebug,"position past beginning of file, reset to first line")
return start
def checkendposition(self,end):
# if position is past end of file, set to end of file and report
if end >= self.length:
end = self.length - 1
msg(ogdebug,"position past end of file, reset to end")
return end
def addblankline(self):
self.lines.append("")
self.length += 1
def writefile(self,fileh=sys.stdout):
# write the in-memory file object to file (default stdout)
msg(oginfo,"writing {} file {}".format(self.type,self.names))
msg(oginfo,"-----------------------------------------------")
for line in self.lines:
print(line,file=fileh)
msg(ogmain,"finished writing")
class OutputFile(InternalFile):
# Output file is list of lines (usually) eventually to be sent to stdout
def __init__(self):
InternalFile.__init__(self)
self.type = "OutputFile"
addfilename(self,"output")
msg(ogdebug,"Initializing output file.")
msg(ogdebug,"Names = {}".format(self.names))
def addline(self,mystring,printblank=False):
# add mystring as new line at end of file
if mystring == "" or mystring == None:
if not printblank:
msg(ogdebug,"-- not adding blank line in addline")
return
else:
msg(ogdebug,"--adding line \"{}\" to {} file {}".format(mystring,self.type,self.names))
self.lines.append(mystring)
self.length += 1
def addlines(self,mylines,printblank=False):
# add mylines as new lines at end of output file
if mylines == [] and not printblank:
msg(ogdebug,"-- not adding empty lines in addlines")
return
else:
msg(ogdebug,"--adding lines \"{}\" to {} file {}".format(mylines,self.type,self.names))
self.lines.extend(mylines)
self.length += len(mylines)
def joinlastlines(self,joiner=""):
# join last two lines of output file,
# assign the result to the next-to-last line and
# delete the last line.
msg(ogdebug,"In joinlastlines, file length = {}:".format(self.length))
msg(ogdebug,"Last two lines are:")
msg(ogdebug,"{}".format(self.lines[-2]))
msg(ogdebug,"{}".format(self.lines[-1]))
self.lines[-2] = self.lines[-2] + joiner + self.lines[-1]
msg(ogdebug,"New line is: {}".format(self.lines[-2]))
del self.lines[-1]
self.length = len(self.lines)
msg(ogdebug,"In joinlastlines, file length = {}:".format(self.length))
self.current = self.length - 1
def switchlastlines(self):
# switch last two lines of output file,
msg(ogdebug,"In switchlastlines, file length = {}:".format(self.length))
msg(ogdebug,"Last two lines are:")
msg(ogdebug,"{}".format(self.lines[-2]))
msg(ogdebug,"{}".format(self.lines[-1]))
self.lines[-2],self.lines[-1] = self.lines[-1],self.lines[-2]
msg(ogdebug,"Last two lines are:")
msg(ogdebug,"{}".format(self.lines[-2]))
msg(ogdebug,"{}".format(self.lines[-1]))
msg(ogdebug,"In switchlastlines, file length = {}:".format(self.length))
def replacelastline(self,newtext):
# replace the last line with some new text
msg(ogdebug,"In replacelastline before replacement, last line is:")
msg(ogdebug,"{}".format(self.lines[-1]))
self.lines[-1] = newtext
msg(ogdebug,"In replacelastline after replacement, last line is:")
msg(ogdebug,"{}".format(self.lines[-1]))
class InputFile(InternalFile):
# InputFile is object holding an input file
def __init__(self,content,start=None,end=None): # fh is a filehandler object or a list of strings
InternalFile.__init__(self)
self.type = "InputFile"
if isinstance(content,list):
self.loadinputfilefromstringlist(content,start,end)
elif isinstance(content,str):
contentlist = []
contentlist.append(content)
self.loadinputfilefromstringlist(contentlist,start,end)
else:
self.getinputfile(content,start,end)
msg(ogmain,"reading {}".format(content.name))
self.initializepositions()
self.fieldnameslist = initializenameslist("$field",100)
self.slicenameslist = initializenameslist("$slice",100)
self.holdnameslist = initializenameslist("$hold",100)
def loadinputfilefromstringlist(self,mystringlist,start=None,end=None):
# Alternative to getinputfile: assign all or part of mystringlist as the content
# of an InputFile object
if end is None:
if start is None: # no start and no end specified: read entire file
self.lines=[x.rstrip() for x in mystringlist]
else: # start given: read from "start" to EOF
self.lines=[x.rstrip() for i,x in enumerate(mystringlist) if i>=start]
else: # end is given: read up to "end"
if start is None: # read from start to "end"
self.lines=[x.rstrip() for i,x in enumerate(mystringlist) if i<=end]
else: # both given: read from "start" to "end"
self.lines=[x.rstrip() for i,x in enumerate(mystringlist) if i>= start and i<=end]
self.length = len(self.lines)
def getinputfile(self,fh,start=None,end=None):
# fh is a filehandler object. If fh == 0 or None, do nothing
# read in the file (or part of it) and load into "lines" list
if not fh or fh == 0:
return
if end is None:
if start is None: # no start and no end specified: read entire file
with fh as f:
self.lines=[x.rstrip() for x in f]
else: # start given: read from "start" to EOF
with fh as f:
self.lines=[x.rstrip() for i,x in enumerate(f) if i>=start]
else: # end is given: read up to "end"
if start is None: # read from start to "end"
with fh as f:
self.lines=[x.rstrip() for i,x in enumerate(f) if i<=end]
else: # both given: read from "start" to "end"
with fh as f:
self.lines=[x.rstrip() for i,x in enumerate(f) if i>= start and i<=end]
self.length = len(self.lines)
def initializepositions(self):
# define standard locations within the file
# define a dictionary to hold them and any remembered positions
# initialize current to first line of file
self.current=0
self.positions = {}
self.positions["current"] = self.current
self.positions["top"] = 0
self.positions["bottom"] = self.length - 1
self.reserved_positions = ["top","bottom"]
def updatecurrent(self,newline):
# set the current line to newlineno
# or to begin or end of file if newlineno is past one of those
self.current = self.interpretposition(newline)
self.positions["current"] = self.current
return self.current
def goto(self,myposition):
# set current line to myposition; this is now just a synonym for update current
return self.updatecurrent(myposition)
def interpretposition(self,myposition):
# convert named positions to line numbers etc.
if isinstance(myposition,str):
try: # case where myposition is a string version of a number
myposition = int(myposition)
except:
myposition = self.positions[myposition]
elif isinstance(myposition,int):
pass
else:
msg(ogmain,"stopping: position must be string (label) or integer (line no.)")
sys.exit("stopping: position must be string (label) or integer (line no.)")
myposition = self.checkstartposition(myposition)
myposition = self.checkendposition(myposition)
return myposition
def interpretpositionpair(self,start,end):
# set defaults for starting and ending lines if start and/or end missing
if start is None: start = 0
if end is None: end = self.length - 1
start = self.interpretposition(start)
end = self.interpretposition(end)
return start,end
def step(self,increment=1):
# go forward increment number of lines (increment=1 => goto next line)
# if increment < 0, go backwards
newlineno = self.current + increment
result = self.updatecurrent(newlineno)
return result
def back(self,increment=1):
# go back increment number of lines
if increment < 0:
increment = -increment # only allow positive increment in back
msg(oginfo,"Only positive increments allowed in back: set to positive")
self.current -= increment
result = self.updatecurrent(self.current)
return result
def remember(self,myposition):
# label current line for later use
msg(oginfo,"--remembering current line as {}".format(myposition))
if myposition in self.reserved_positions:
msg(ogmain,"Unable to overwrite reserved position {}.".format(myposition))
return
else:
self.positions[myposition] = self.current
def forget(self,myposition):
# remove reference to myposition
msg(ogdebug,"--forgetting line position {}".format(myposition))
if myposition in self.reserved_positions:
msg(ogmain,"Unable to forget reserved position {}.".format(myposition))
return
else:
del self.positions[myposition]
def printcurrent(self):
msg(oginfo,"printing current line to stout (not to OutputFile)")
print(self.lines[self.current])
def getline(self):
# return the current line
return self.lines[self.current]
def getlines(self,nlines):
# return nlines lines including current line
myend = self.interpretposition(self.current+nlines)
return self.lines[self.current:myend]
def getsection(self,start,end):
# return lines from start to end, inclusive
mystart = self.interpretposition(start)
myend = self.interpretposition(end)
return self.lines[mystart:myend+1],myend
def getuntilmatch(self,mystring,*,start=False,end=False):
# starting from the current line, return all lines up to
# first line that matches mystring. Exclusive of start and end,
# unless start = True and/or end = True
if start:
startpos = self.current
else:
startpos = self.current + 1
self.match(mystring,nfind=1,dir=1)
if end:
endpos = self.current
else:
endpos = self.current - 1
return self.getsection(startpos,endpos) #returns the lines and the ending position?
def getfields(self,delim="whitespace"):
# split (on delimiter) the current line into fields and return them as a dictionary
# with keys $field1, $field2, etc. defined in fieldnameslist
return getfielddic(self.lines[self.current],self.fieldnameslist,delim)
def getslices(self,startend):
# return slices from current line based on startend (see getsliceslist)
# e.g.:to get columns 2-5 as $slice1 and 8-13 as $slice2, startend = [(2,5),(8,13)]
return getslicedic(self.lines[self.current],self.slicenameslist,startend)
def match(self,mystring,*,nfind=1,dir=1):
# starting with current line, search, in dir direction (dir<=0:up, dir>0:down)
# for nfind lines containing mystring and set current line at the last one
dir = int(dir)
mystart = self.current
if dir >= 0:
dir = 1
myend = self.length
else:
dir = -1
myend = -1
msg(ogdebug,"--in match, dir= {}".format(dir))
msg(ogdebug,"--in match, searching for \"{}\" from line {} to {}".format(mystring,mystart,myend-1))
nfound = 0
self.matchflag = False
msg(ogdebug,"--in match, setting matchflag to {}".format(self.matchflag))
for lineno in range(mystart,myend,dir):
searchObj = re.search(mystring,self.lines[lineno])
if searchObj:
nfound += 1
self.goto(lineno)
msg(oginfo,"found {} match of \"{}\" out of {} on line {}:".format(nfound,mystring,nfind,lineno))
msg(oginfo,self.lines[lineno])
msg(ogverbose,"found returned search object \"{}\"".format(searchObj))
if nfind == nfound:
self.matchflag = True
msg(ogdebug,"--in match, setting matchflag to {}".format(self.matchflag))
return nfind
elif mystart != self.length - 1 and lineno == self.length -1:
self.matchflag = False
msg(ogdebug,"--in match, setting matchflag to {}".format(self.matchflag))
msg(oginfo,"reached end of file during match.")
msg(oginfo,"found only {} out of {} matches of \"{}\"".format(nfound,nfind,mystring))
self.goto(self.length - 1)
return -1
elif mystart != 0 and lineno == 0:
self.matchflag = False
msg(ogdebug,"--in match, setting matchflag to {}".format(self.matchflag))
msg(oginfo,"reached beginning of file during match.")
msg(oginfo,"found only {} out of {} matches of \"{}\"".format(nfound,nfind,mystring))
self.goto(0)
return -1
return 0
def matchnextreturn(self,mystring,nfind=1,increment=0,nlines=1):
# for nfind instances: find match string, go forward or backward increment lines,
# (determined by sign) and return nlines lines.
# The next operates forward or backward,
# but searches and captures only in positive direction.
# Default is search for mystring once, and return the line containing the match.
# Note the special value for nfind: "all": searches the entire file
if nfind == "all":
nfind = self.length
mylines = []
for i in range(nfind):
result = self.match(mystring,nfind=1,dir=1)
if result == 0:
msg(oginfo,"match {} of \"{}\" not found in matchnextreturn.".format(i+1,mystring))
break
elif result == -1:
msg(oginfo,"reached end or begin of file in matchnextreturn")
break
elif result < -1:
msg(oginfo,"something weird in match called from matchnextreturn")
break
if self.current + nlines <= self.length:
if increment == 0 and nlines == 1:
mylines.append(self.getline())
self.step(nlines)
else:
self.step(increment)
mylines.extend(self.getlines(nlines))
self.step(nlines)
else:
msg(oginfo,"Not enough lines in file for return section in matchnextreturn".format(i+1))
mylines.append(self.getlines(nlines))
result = self.step(nlines)
break
return mylines
def empty(self):
# empty the file in memory
self.deleteinputsection("top","bottom")
def deleteinputsection(self,position1,position2):
# delete lines from an input file in memory
# probably doing this for memory or efficiency for future searches
msg(ogdebug,"--deleteinputsection positions 1 & 2: {} {} ".format(position1,position2))
start = self.interpretposition(position1)
end = self.interpretposition(position2)
msg(oginfo,"deleting input from line {} to line {} in {} ".format(start,end,self.names))
del self.lines[start:end+1]
if len(self.lines) == 0:
self.lines.append("")
self.updatelabels(start,end)
return
def updatelabels(self,start,end):
# update the remembered labels after deleting section of input file in memory
keylist = list(self.positions.keys())
for mykey in keylist:
z = self.positions[mykey]
if z >= start and z <= end:
msg(oginfo,"deleting remembered position {} with value {}".format(mykey,z))
# del self.positions[mykey]
self.forget(mykey)
if z > end:
z = z - (end - start)
msg(oginfo,"resetting remembered position {} to {}".format(mykey,z))
self.positions[mykey]=z
msg(oginfo,"resetting current position to top of file")
self.updatecurrent(0)
msg(oginfo,"resetting \"top\" and \"bottom\" remembered labels")
self.length = len(self.lines)
self.positions["top"] = 0
finalline = self.lines[self.length - 1]
if finalline == "":
self.positions["bottom"] = self.length - 2 # assume xtra blank line is there
else:
self.positions["bottom"] = self.length - 1 # assume xtra blank line is not there
return
class ScratchFile(InputFile,OutputFile):
# A Scratchfile has the methods and attributes of both InputFile and OutputFile
# It can be added to, removed from, and has named line labels
# Initialize lt as an InputFile. Just use a blank string if nothing else.
def __init__(self,content,start=None,end=None):
InputFile.__init__(self,content,start,end)
self.type = "ScratchFile"
addfilename(self,"scratch")
msg(ogdebug,"Initializing scratch file.")