-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnormalize.go
576 lines (529 loc) · 20.5 KB
/
normalize.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
// The MIT License (MIT)
//
// Copyright (c) 2016, 2017 Fabian Wenzelmann
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
package goel
import (
"fmt"
"log"
"reflect"
"sync"
)
// PhaseOneResult is a helper type that stores certain information about
// normalization in phase 1.
//
// The idea is that each formula that gets normalized uses its own result
// and stores all information inside.
//
// It is not save for concurrent use.
//
// The following information is stored:
//
// Distributor is used to create new names, so it should be the same in
// all objects of this type when normalizing a set of formulae.
// Waiting is a "queue" that stores all elements that need further processing,
// that means it may be that additional rules can be applied in phase 1.
// So when replacing a formula with other formulae just add the new formulae
// to Waiting.
// IntermediateRes is the set of all formulae that must not be processed in
// phase 1 but must be used in phase 2.
// The other intermediate results are used for formulae already in normal form:
// We don't have to check them again in phase 2 so we can already add them here.
type PhaseOneResult struct {
Distributor *IntDistributor
Waiting []*GCIConstraint
IntermediateRes []*GCIConstraint
// for CIs already in normal form we can just add them already for phase 2
IntermediateCIs []*NormalizedCI
IntermediateCILeft []*NormalizedCILeftEx
IntermediateCIRight []*NormalizedCIRightEx
}
func NewPhaseOneResult(distributor *IntDistributor) *PhaseOneResult {
return &PhaseOneResult{Distributor: distributor}
}
// Union merges two results.
// That is it builds the union of all intermediate results.
// This is useful when combining results from different normalizations.
// The combined results are stored in res.
func (res *PhaseOneResult) Union(other *PhaseOneResult) {
var wg sync.WaitGroup
wg.Add(4)
go func() {
defer wg.Done()
res.IntermediateRes = append(res.IntermediateRes, other.IntermediateRes...)
}()
go func() {
defer wg.Done()
res.IntermediateCIs = append(res.IntermediateCIs, other.IntermediateCIs...)
}()
go func() {
defer wg.Done()
res.IntermediateCILeft = append(res.IntermediateCILeft, other.IntermediateCILeft...)
}()
go func() {
defer wg.Done()
res.IntermediateCIRight = append(res.IntermediateCIRight, other.IntermediateCIRight...)
}()
wg.Wait()
}
// PhaseTwoResult stores information about normalization in phase 2.
// The idea is the same as for PhaseOneResult, so see documentation there.
type PhaseTwoResult struct {
Distributor *IntDistributor
Waiting []*GCIConstraint
IntermediateCIs []*NormalizedCI
IntermediateCILeft []*NormalizedCILeftEx
IntermediateCIRight []*NormalizedCIRightEx
}
func NewPhaseTwoResult(distributor *IntDistributor) *PhaseTwoResult {
return &PhaseTwoResult{Distributor: distributor}
}
func (res *PhaseTwoResult) Union(other *PhaseTwoResult) {
var wg sync.WaitGroup
wg.Add(3)
go func() {
defer wg.Done()
res.IntermediateCIs = append(res.IntermediateCIs, other.IntermediateCIs...)
}()
go func() {
defer wg.Done()
res.IntermediateCILeft = append(res.IntermediateCILeft, other.IntermediateCILeft...)
}()
go func() {
defer wg.Done()
res.IntermediateCIRight = append(res.IntermediateCIRight, other.IntermediateCIRight...)
}()
wg.Wait()
}
// NormalizeStepPhaseOne performs one step of the normalization in phase 1.
// It tests what form the gci has and adds new formulae to the right slice
// of the intermediate result. That is it appends formulae to waiting
// or one of the intermediate results.
//
// Phase one means to apply the rules NF2 - NF4 (NF1 is applied somewhere else).
func NormalizeStepPhaseOne(gci *GCIConstraint, intermediate *PhaseOneResult) {
// do a case distinction on lhs. Check if gci is already in normal form, if
// a rule can be applied in phase 1 or if no rule can be applied in phase 1
switch lhs := gci.C.(type) {
default:
log.Printf("Unexepcted type in TBox normalization phase 1: %v\n", reflect.TypeOf(lhs))
case NamedConcept, NominalConcept, ConcreteDomainExtension, TopConcept:
// the lhs is in BCD, check if rhs is in BCD or false
switch rhs := gci.D.(type) {
case NamedConcept, NominalConcept, ConcreteDomainExtension, TopConcept, BottomConcept:
// lhs is in BCD and rhs is in BCD or false, so it is already in
// normal form
normalized := NewNormalizedCI(lhs, nil, rhs)
intermediate.IntermediateCIs = append(intermediate.IntermediateCIs, normalized)
case ExistentialConcept:
// check if the rhs is of the form ∃r.C2 where C2 is in BCD
if rhs.C.IsInBCD() {
// in normal form, add it
normalized := NewNormalizedCIRightEx(lhs, rhs.R, rhs.C)
intermediate.IntermediateCIRight = append(intermediate.IntermediateCIRight, normalized)
} else {
// not in normal form
intermediate.IntermediateRes = append(intermediate.IntermediateRes, gci)
}
default:
// no rule applicable in phase one and not in normal form
intermediate.IntermediateRes = append(intermediate.IntermediateRes, gci)
}
case BottomConcept:
// rule NF4: Nothing to be done
case Conjunction:
// there are four cases here:
// gci is of the form C1 ⊓ C2 ⊑ D (already in normal form)
// or rule NF2 can be applied that is one of the concepts on the lhs
// is in BCD and the ohter is not
// or neither of these cases, in this case no rule can be applied
firstBCD := lhs.C.IsInBCD()
secondBCD := lhs.D.IsInBCD()
switch {
case firstBCD && secondBCD && gci.D.IsInBCD():
// in normal form, so add it
normalized := NewNormalizedCI(lhs.C, lhs.D, gci.D)
intermediate.IntermediateCIs = append(intermediate.IntermediateCIs, normalized)
case !firstBCD:
// apply rule NF2
newName := NewNamedConcept(intermediate.Distributor.Next())
newConjunction := NewConjunction(lhs.D, newName)
first := NewGCIConstraint(lhs.C, newName)
second := NewGCIConstraint(newConjunction, gci.D)
intermediate.Waiting = append(intermediate.Waiting, first, second)
case !secondBCD:
// apply rule NF2
newName := NewNamedConcept(intermediate.Distributor.Next())
newConjunction := NewConjunction(lhs.C, newName)
first := NewGCIConstraint(lhs.D, newName)
second := NewGCIConstraint(newConjunction, gci.D)
intermediate.Waiting = append(intermediate.Waiting, first, second)
default:
// no rule can be applied in phase one
intermediate.IntermediateRes = append(intermediate.IntermediateRes, gci)
}
case ExistentialConcept:
// try to apply rule NF3, this is only possible the concept of the
// existential concept is not in BCD
if lhs.C.IsInBCD() {
if BCDOrFalse(gci.D) {
// in normal form, append to result
normalized := NewNormalizedCILeftEx(lhs.R, lhs.C, gci.D)
intermediate.IntermediateCILeft = append(intermediate.IntermediateCILeft, normalized)
} else {
// can't apply rule, add for further processing
intermediate.IntermediateRes = append(intermediate.IntermediateRes, gci)
}
} else {
// apply rule and add new elements
newName := NewNamedConcept(intermediate.Distributor.Next())
first := NewGCIConstraint(lhs.C, newName)
newExistential := NewExistentialConcept(lhs.R, newName)
second := NewGCIConstraint(newExistential, gci.D)
intermediate.Waiting = append(intermediate.Waiting, first, second)
// TODO(Fabian): Can we append second to the IntermediateRes already?
// it should never be affected again in phase one?
}
}
}
// PhaseOneSingle runs phase 1 of the normalization for a given gci.
// That is it exhaustively applies the rules NF2 - NF4 and returns the
// result.
func PhaseOneSingle(gci *GCIConstraint, distributor *IntDistributor) *PhaseOneResult {
res := NewPhaseOneResult(distributor)
res.Waiting = []*GCIConstraint{gci}
for len(res.Waiting) > 0 {
n := len(res.Waiting)
next := res.Waiting[n-1]
res.Waiting[n-1] = nil
res.Waiting = res.Waiting[:n-1]
NormalizeStepPhaseOne(next, res)
}
return res
}
// NormalizeStepPhaseTwo performs one step of the normalization in phase 2.
// It tests what form the gci has and adds new formulae to the right slice
// of the intermediate result. That is it appends formulae to waiting
// or one of the intermediate results.
//
// Phase one means to apply the rules NF5 - NF7.
func NormalizeStepPhaseTwo(gci *GCIConstraint, intermediate *PhaseTwoResult) {
// again a analyze what form we have
// check which of the rules of phase two can be applied, if none can be
// applied gci must already be in normal form
if !gci.C.IsInBCD() && !gci.D.IsInBCD() {
// apply NF5
newName := NewNamedConcept(intermediate.Distributor.Next())
first := NewGCIConstraint(gci.C, newName)
second := NewGCIConstraint(newName, gci.D)
intermediate.Waiting = append(intermediate.Waiting, first, second)
return
}
switch rhs := gci.D.(type) {
case ExistentialConcept:
if rhs.C.IsInBCD() {
// add result, now lhs must be in bcd, otherwise there was some mistake...
// TODO remove this test once everything has been tested 1000 times
if !gci.C.IsInBCD() {
log.Printf("Found existential restriction on RHS, but the LHS is not in BCD (type %v) in normalization phase 2. MISTAKE\n",
reflect.TypeOf(gci.C))
return
} else {
// everything ok, add the result
normalized := NewNormalizedCIRightEx(gci.C, rhs.R, rhs.C)
intermediate.IntermediateCIRight = append(intermediate.IntermediateCIRight, normalized)
}
} else {
// apply NF6
newName := NewNamedConcept(intermediate.Distributor.Next())
newExistential := NewExistentialConcept(rhs.R, newName)
first := NewGCIConstraint(gci.C, newExistential)
second := NewGCIConstraint(newName, rhs.C)
intermediate.Waiting = append(intermediate.Waiting, first, second)
}
case Conjunction:
// aply NF7
first := NewGCIConstraint(gci.C, rhs.C)
second := NewGCIConstraint(gci.C, rhs.D)
intermediate.Waiting = append(intermediate.Waiting, first, second)
default:
// none of the rules can be applied in phase two, so it must be in normal
// form and we can simply add the result
// check for the type of the normal form
// first case: lhs is in BCD and rhs is in BCD or false
if gci.C.IsInBCD() && BCDOrFalse(gci.D) {
normalized := NewNormalizedCI(gci.C, nil, gci.D)
intermediate.IntermediateCIs = append(intermediate.IntermediateCIs, normalized)
return
}
// second case: lhs is a concjunction, both C1 and C2 (lhs) must be in BCD
// and D (rhs) must be in BCD or false
if lhs, ok := gci.C.(Conjunction); ok && BCDOrFalse(gci.D) {
// now both parts of the conjunction must be in BCD, check for this
if lhs.C.IsInBCD() && lhs.D.IsInBCD() {
normalized := NewNormalizedCI(lhs.C, lhs.D, rhs)
intermediate.IntermediateCIs = append(intermediate.IntermediateCIs, normalized)
return
} else {
fmt.Printf("Normalization phase two: Found conjunction on LHS, but its parts are not both in the BCD, types: %v and %v\n",
reflect.TypeOf(lhs.C), reflect.TypeOf(lhs.D))
return
}
}
// third case: existential on rhs, this must not be checked because we
// checked it before
// foruth case: existential on lhs, the concept must be in BCD and the
// rhs must be in BCD or false
if lhs, ok := gci.C.(ExistentialConcept); ok && BCDOrFalse(gci.D) {
// now check if condition is satisfied again
if lhs.C.IsInBCD() {
// TODO bad, don't remeber why...
normalized := NewNormalizedCILeftEx(lhs.R, lhs.C, gci.D)
intermediate.IntermediateCILeft = append(intermediate.IntermediateCILeft, normalized)
return
} else {
log.Printf("Normalization phase two: Found existential on LHS, but its concept is not in BCD (type %v): MISTAKE\n",
reflect.TypeOf(lhs.C))
return
}
}
// if we reach this point something went wrong: not in normal form
log.Printf("Normalization phase two: Found invalid GCI %v. Types: %v and %v\n",
gci, reflect.TypeOf(gci.C), reflect.TypeOf(gci.D))
}
}
// PhaseTwoSingle runs phase 2 of the normalization for a given gci.
// That is it exhaustively applies the rules NF5 - NF7 and returns the
// result.
func PhaseTwoSingle(gci *GCIConstraint, distributor *IntDistributor) *PhaseTwoResult {
res := NewPhaseTwoResult(distributor)
res.Waiting = []*GCIConstraint{gci}
for len(res.Waiting) > 0 {
n := len(res.Waiting)
next := res.Waiting[n-1]
res.Waiting[n-1] = nil
res.Waiting = res.Waiting[:n-1]
NormalizeStepPhaseTwo(next, res)
}
return res
}
// NormalizeSingleRI normalizes a rule inclusion, that is it exhaustively
// applies rule NF1.
// firstNewIndex must be the next index we can use to create new roles.
// If k is the length of the left hand side it requires k - 2 new role names
// if k > 2 and 0 new names otherwise.
func NormalizeSingleRI(ri *RoleInclusion, firstNewIndex uint) []*NormalizedRI {
lhs := ri.LHS
n := uint(len(lhs))
switch n {
case 0:
// TODO is it possible that the lhs is empty?
return []*NormalizedRI{NewNormalizedRI(NoRole, NoRole, ri.RHS)}
case 1:
return []*NormalizedRI{NewNormalizedRI(lhs[0], NoRole, ri.RHS)}
case 2:
return []*NormalizedRI{NewNormalizedRI(lhs[0], lhs[1], ri.RHS)}
default:
// TODO thoroughly test this case
rList := make([]*NormalizedRI, 0, n-1)
// now we need a loop to add all RIs as given in the algorithm
// add first one by hand
// the first one always has the following form:
// u1 o rk ⊑ s where u1 is a new role
rList = append(rList, NewNormalizedRI(NewRole(firstNewIndex), lhs[n-1], ri.RHS))
var i uint
for i = 0; i < n-3; i++ {
rList = append(rList, NewNormalizedRI(
NewRole(firstNewIndex+i+1),
lhs[n-2-i],
NewRole(firstNewIndex+i)))
}
// also add the following RI which is left after the loop:
// r1 o r2 ⊑ un
// where un is the last new role
rList = append(rList, NewNormalizedRI(lhs[0],
lhs[1],
NewRole(firstNewIndex+n-2-1)))
return rList
}
}
// TBoxNormalformBuilder is anything that transforms a tbox to a normalized
// tbox. Multiple calls to Normalize should be possible (i.e. data must be
// reset when running this method).
type TBoxNormalformBuilder interface {
Normalize(tbox *TBox) *NormalizedTBox
}
// DefaultNormalFormBuilder is an implementation of TBoxNormalformBuilder
// that normalizes up to k formulae concurrently.
type DefaultNormalFormBuilder struct {
k int
rolesAfterPhaseOne uint
distributor *IntDistributor
phaseOne *PhaseOneResult
ris []*NormalizedRI
}
// NewDefaultNormalFormBUilder returns a new DefaultNormalFormBuilder.
// k must be a value > 0 and is the number of formulae that will be
// normalized concurrently. That is at most k normalizations will be performed
// at the same time. So k = 1 means that no concurrency happens, instead
// each formulae is normalized on it's own.
func NewDefaultNormalFormBUilder(k int) *DefaultNormalFormBuilder {
return &DefaultNormalFormBuilder{k: k, phaseOne: NewPhaseOneResult(nil)}
}
// reset clears all data from the previous run.
func (builder *DefaultNormalFormBuilder) reset() {
builder.rolesAfterPhaseOne = 0
builder.distributor = nil
builder.phaseOne = NewPhaseOneResult(nil)
builder.ris = nil
}
// runPhaseOne normalizes all GCIs and RIs of the tbox (only rules NF1 - NF4).
// As described above at most k normalizations will happen concurrently.
func (builder *DefaultNormalFormBuilder) runPhaseOne(tbox *TBox) {
distributor := NewIntDistributor(tbox.Components.Names)
builder.distributor = distributor
// create a channel that is used to coordinate the workers
// this means: create a channel with buffer size k, writes to this channel
// will block once k things are running, when a normlization is done
// it will read a value from the channel
workers := make(chan struct{}, builder.k)
// a channel that is used to collect all normalization results for a single
// formula
resChan := make(chan *PhaseOneResult)
// a channel for normalized RIs, the idea is the same as with resChan
riChan := make(chan []*NormalizedRI)
// a wait group to wait for everything to finish
var wg sync.WaitGroup
wg.Add(2)
// start a listener that waits for all phase one results and merges
// the current result with the new result
go func() {
defer wg.Done()
for i := 0; i < len(tbox.GCIs); i++ {
next := <-resChan
builder.phaseOne.Union(next)
}
}()
// listener for the ris
go func() {
defer wg.Done()
for i := 0; i < len(tbox.RIs); i++ {
next := <-riChan
builder.ris = append(builder.ris, next...)
}
}()
// start the GCI normalization
for _, gci := range tbox.GCIs {
// wait for a free worker
workers <- struct{}{}
// run normalization concurrently, free worker when done
go func(gci *GCIConstraint) {
resChan <- PhaseOneSingle(gci, distributor)
<-workers
}(gci)
}
// iterate over all ris and normalize them
// this variable contains the next free index the normalization
// can use to create new roles
// the first one that is safe to use is the number of roles used in the
// original model
nextNew := tbox.Components.Roles
for _, ri := range tbox.RIs {
// wait for a free worker
workers <- struct{}{}
// run normalization concurrently
go func(ri *RoleInclusion, nextIndex uint) {
// TODO
riChan <- NormalizeSingleRI(ri, nextIndex)
<-workers
}(ri, nextNew)
// an RI of the form r1 o ... rk [= s requires k - 2 new role names if
// k > 2
// otherwise it requires 0
k := uint(len(ri.LHS))
if k > 2 {
nextNew += (k - 2)
}
}
builder.rolesAfterPhaseOne = nextNew
wg.Wait()
}
// runPhaseTwo normalizes all GCIs of the tbox (rules NF5 - NF7).
// As described above at most k normalizations will happen concurrently.
func (builder *DefaultNormalFormBuilder) runPhaseTwo() *PhaseTwoResult {
res := NewPhaseTwoResult(builder.distributor)
res.IntermediateCILeft = builder.phaseOne.IntermediateCILeft
res.IntermediateCIRight = builder.phaseOne.IntermediateCIRight
res.IntermediateCIs = builder.phaseOne.IntermediateCIs
// create a channel that is used to coordinate the workers
// this means: create a channel with buffer size k, writes to this channel
// will block once k things are running, when a normlization is done
// it will read a value from the channel
workers := make(chan struct{}, builder.k)
// a channel that is used to collect all normalization results for a single
// formula
resChan := make(chan *PhaseTwoResult)
// a channel to wait for everything to finish
done := make(chan struct{})
// start a listener for that waits for all phase two results and merges
// the current result with the new result
go func() {
for i := 0; i < len(builder.phaseOne.IntermediateRes); i++ {
next := <-resChan
res.Union(next)
}
done <- struct{}{}
}()
// start the GCI normalization
for _, gci := range builder.phaseOne.IntermediateRes {
// wait for a free worker
workers <- struct{}{}
// run normalization concurrently, free worker when done
go func(gci *GCIConstraint) {
resChan <- PhaseTwoSingle(gci, builder.distributor)
<-workers
}(gci)
}
<-done
return res
}
func (builder *DefaultNormalFormBuilder) Normalize(tbox *TBox) *NormalizedTBox {
// run phase one
builder.runPhaseOne(tbox)
// now rune phase two
phaseTwoRes := builder.runPhaseTwo()
// create the updated components
newComponents := NewELBaseComponents(tbox.Components.Nominals,
tbox.Components.CDExtensions,
// TODO should be correct?
builder.distributor.Next(),
builder.rolesAfterPhaseOne)
res := NormalizedTBox{
Components: newComponents,
CIs: phaseTwoRes.IntermediateCIs,
CIRight: phaseTwoRes.IntermediateCIRight,
CILeft: phaseTwoRes.IntermediateCILeft,
RIs: builder.ris,
}
// empty all fields, just to be sure
// this may help garbage collection if we already delete all stuff
builder.reset()
return &res
}