From 4d6f1aa7ffbe6e995252fa3ec5ae68088bdb0b99 Mon Sep 17 00:00:00 2001
From: "James.Hester" <jxh@ansto.gov.au>
Date: Thu, 7 Dec 2023 18:42:08 +1100
Subject: [PATCH] Fixes to dictionary output and misc bugfixes.

DDL2 dictionary output bugs fixed
Additional DDL2-derived attributes added to DDLm dictionary output
Improved cleaning of dREL function cache
---
 src/DataContainer/Relations.jl |  4 ++--
 src/cif_output.jl              | 27 +++++++++++++++++++++------
 src/ddl2_dictionary_ng.jl      | 16 ++++++++++++----
 src/ddlm_dictionary_ng.jl      |  9 +++++++--
 4 files changed, 42 insertions(+), 14 deletions(-)

diff --git a/src/DataContainer/Relations.jl b/src/DataContainer/Relations.jl
index 6592a8c..e6e9776 100644
--- a/src/DataContainer/Relations.jl
+++ b/src/DataContainer/Relations.jl
@@ -782,9 +782,9 @@ end ==#
     for (tab,cols) in att_info
         if !(:master_id in propertynames(cols))
             println("Adding a master_id to $tab")
-            att_info[tab].master_id = dicname
+            att_info[tab][!,:master_id] .= dicname
         end
     end
-    DDL2_Dictionary(att_info,nspace)
+    DDL2_Dictionary(att_info, nspace)
 end
 
diff --git a/src/cif_output.jl b/src/cif_output.jl
index 6466776..f0a733f 100644
--- a/src/cif_output.jl
+++ b/src/cif_output.jl
@@ -117,7 +117,8 @@ If `loop`, multi-line values should be indented for presentation in a loop,
 if `pretty` is true.
 
 As a simple heuristic, the string is assumed to be pre-formatted if at least
-one line contains a sequence of 5 '#' characters.
+one line contains a sequence of 5 '#' characters, or five spaces not at
+the end of a line.
 """
 format_for_cif(val::AbstractString;delim=nothing,pretty=false,cif1=false,loop=false,kwargs...) = begin
     tgtval = val
@@ -138,6 +139,7 @@ format_for_cif(val::AbstractString;delim=nothing,pretty=false,cif1=false,loop=fa
     if delim == "\n;"
         
         is_preformat = match(r"#####",tgtval) != nothing
+        is_preformat |= match(r"     ", tgtval) != nothing
         if pretty && !is_preformat  #
             if loop == :short indent = loop_align - 1
             elseif loop == :long indent = text_indent + loop_indent + 1
@@ -285,7 +287,7 @@ so that no lines are greater than `line_length`, and each line starts with
 If `justify` is true, each line will be filled as 
 close as possible to the maximum length and all spaces replaced by 
 a single space, which could potentially spoil formatting like centering, tabulation
-or ASCII equations.
+or ASCII equations. 
 """
 format_cif_text_string(value::AbstractString,indent;width=line_length,justify=false,prefix="",kwargs...) = begin
     # catch pathological all whitespace values
@@ -684,10 +686,20 @@ const ddlm_attribute_order = (:definition => (:id,:scope,:class),
                               :description_example => (:case,:detail),
                               :enumeration_default => (:index, :value),
                          :import => (:get,),
-                         :method => (:purpose,:expression)
+                              :method => (:purpose, :expression),
+                              :nx_mapping => (:details, ),
+                              :ddl2_sub_category => (:id, :name),
+                              :ddl2_sub_category_list => (:description, :id),
+                              :ddl2_category_group => (:id, ),
+                              :ddl2_regex_type => (:code, :primitive_code, :construct,
+                                                   :detail),
+                              :ddl2_category_group_list => (:id, :parent_id, :description),
+                              :ddl2_units_list => (:code, :detail),
+                              :ddl2_enumeration_range => (:minimum, :maximum),
+                              :ddl2_item_related => (:related_name, :function_code)
                          )
 
-const ddlm_no_justify = (:method,:description,:description_example) #do not reformat items in this category
+const ddlm_no_justify = (:method,:description,:description_example, :nx_mapping) #do not reformat items in this category
 const ddl2_no_justify = (:category,:category_examples,:item,:item_examples)
 # Always use semicolon delimiters
 const ddlm_semicolons = Dict(:description=>(:text,),:method=>(:expression,))
@@ -721,6 +733,7 @@ show_one_def(io,def_name,info_dic;implicits=[],ordering=ddlm_attribute_order) =
     for (k,_) in ordering
         haskey(final_chance,k) ? final_chance[k] = final_chance[k]+1 : 0
     end
+    @debug "Before output" ordering final_chance leftover
     for chunk in ordering
         cat,objs = chunk
         if !haskey(info_dic,cat) continue end
@@ -845,7 +858,9 @@ show_loop(io,cat,df;implicits=[],indents=[text_indent,value_col],order=(),
               rej_names = filter(x->split(x,".")[1]==cat,implicits)
               rej_names = map(x->split(x,".")[2],rej_names)
               append!(rej_names,["master_id","__blockname","__object_id"])
-              imp_reg = Regex("$(join(rej_names,"|^"))")
+              @debug "Ignoring" rej_names
+              imp_reg = Regex("^$(join(rej_names,"\$|^"))\$")
+              @debug "As a regex" imp_reg
               write(io,format_for_cif(df[!,Not(imp_reg)];catname=cat,indent=indents,order=order,
                                       pretty=reflow,justify=justify))
 end       
@@ -1115,7 +1130,7 @@ show(io::IOContext,::MIME"text/cif",ddl2_dic::DDL2_Dictionary) = begin
     top_level = ddl2_dic[:dictionary]
     show_set(io,"dictionary",top_level,implicits=implicit_info)
     # Now for the rest
-    all_cats = sort(get_categories(ddl2_dic))
+    all_cats = sort(get_categories(ddl2_dic, referred = true))
     for one_cat in all_cats
         cat_info = ddl2_dic[one_cat]
         show_one_def(io,one_cat,cat_info,implicits=implicit_info,ordering=())
diff --git a/src/ddl2_dictionary_ng.jl b/src/ddl2_dictionary_ng.jl
index 635c6a8..43f178e 100644
--- a/src/ddl2_dictionary_ng.jl
+++ b/src/ddl2_dictionary_ng.jl
@@ -161,11 +161,19 @@ find_object(d::DDL2_Dictionary,dataname) = begin
 end
 
 """
-    get_categories(d::DDL2_Dictionary)
+    get_categories(d::DDL2_Dictionary; referred = false)
 
-List all categories defined in `d`
+List all categories defined in `d`. If `referred` is `true`, categories
+for which data names are defined, but no category is defined, are also included.
 """
-get_categories(d::DDL2_Dictionary) = d.block[:category][!,:id]
+get_categories(d::DDL2_Dictionary; referred = false) = begin
+
+    defed_cats = lowercase.(d.block[:category][!,:id])
+    if !referred return defed_cats end
+    more_cats = unique!(lowercase.(d[:item].category_id))
+    return union(defed_cats, more_cats)
+end
+
 get_set_categories(d::DDL2_Dictionary) = []
 get_loop_categories(d::DDL2_Dictionary) = get_categories(d)
 
@@ -455,7 +463,7 @@ const ddl2_type_mapping = Dict( "text" => String,
 
 get_julia_type_name(cdic::DDL2_Dictionary,cat::AbstractString,obj::AbstractString) = begin
     definition = cdic[find_name(cdic,cat,obj)]
-    type_index = definition[:item_type][!,:code][]
+    type_index = haskey(definition, :item_type) ? definition[:item_type][!,:code][] : "text"
     all_types = cdic[:item_type_list]
     type_base = all_types[all_types[!,:code] .== type_index,:primitive_code][]
     if type_index in keys(ddl2_type_mapping)
diff --git a/src/ddlm_dictionary_ng.jl b/src/ddlm_dictionary_ng.jl
index 440fc9c..0074110 100644
--- a/src/ddlm_dictionary_ng.jl
+++ b/src/ddlm_dictionary_ng.jl
@@ -793,6 +793,10 @@ further automatic derivation taking place.
 """
 remove_methods!(dict::DDLm_Dictionary) = begin
     dict.block[:method] = groupby(DataFrame([[]],[:master_id]),:master_id)
+    for k in keys(dict.func_defs)
+        delete!(dict.func_defs, k)
+        delete!(dict.func_text, k)
+    end
 end
 
 """
@@ -1554,8 +1558,9 @@ resolve_full_imports!(d::Dict{Symbol,DataFrame},original_dir) = begin
             new_head = d[:name][d[:name].master_id .== block_id,:].object_id[]
             # find duplicates
             all_defs = importee[:definition][!,:master_id]
-            #println("All visible defs: $all_defs")
-            dups = filter(x-> count(isequal(x),all_defs)>1,all_defs)
+            @debug "All imported defs:" all_defs
+            prior_defs = d[:definition][!,:master_id]
+            dups = filter(x-> count(isequal(x),all_defs)>0, prior_defs)
             if length(dups) > 0
                 @debug "Duplicated frames" dups
                 if if_dupl == "Replace"