From e89d24968955ae48c666a161fae5629edad324b9 Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Mon, 25 Sep 2023 15:17:40 +0000
Subject: [PATCH] some moer general cleanup

---
 base/Base.jl               | 26 ++++++--------
 base/array.jl              | 70 +++++++++++---------------------------
 base/compiler/tfuncs.jl    | 20 ++++++-----
 base/compiler/utilities.jl |  2 +-
 base/genericmemory.jl      | 30 ++++++++++------
 base/summarysize.jl        |  2 +-
 doc/src/base/c.md          |  2 +-
 doc/src/devdocs/types.md   |  2 ++
 src/typemap.c              |  6 ++--
 test/core.jl               |  8 ++---
 10 files changed, 73 insertions(+), 95 deletions(-)

diff --git a/base/Base.jl b/base/Base.jl
index 092bc9f7e6349b..8e491f256e8771 100644
--- a/base/Base.jl
+++ b/base/Base.jl
@@ -203,29 +203,14 @@ include("views.jl")
 include("baseext.jl")
 
 include("c.jl")
-# Initialize DL_LOAD_PATH as early as possible.  We are defining things here in
-# a slightly more verbose fashion than usual, because we're running so early.
-const DL_LOAD_PATH = String[]
-let os = ccall(:jl_get_UNAME, Any, ())
-    if os === :Darwin || os === :Apple
-        if Base.DARWIN_FRAMEWORK
-            push!(DL_LOAD_PATH, "@loader_path/Frameworks")
-        end
-        push!(DL_LOAD_PATH, "@loader_path")
-    end
-end
-
 include("ntuple.jl")
-
 include("abstractdict.jl")
 include("iddict.jl")
 include("idset.jl")
-
 include("iterators.jl")
 using .Iterators: zip, enumerate, only
 using .Iterators: Flatten, Filter, product  # for generators
 using .Iterators: Stateful    # compat (was formerly used in reinterpretarray.jl)
-
 include("namedtuple.jl")
 
 # For OS specific stuff
@@ -241,6 +226,17 @@ function strcat(x::String, y::String)
 end
 include(strcat((length(Core.ARGS)>=2 ? Core.ARGS[2] : ""), "build_h.jl"))     # include($BUILDROOT/base/build_h.jl)
 include(strcat((length(Core.ARGS)>=2 ? Core.ARGS[2] : ""), "version_git.jl")) # include($BUILDROOT/base/version_git.jl)
+# Initialize DL_LOAD_PATH as early as possible.  We are defining things here in
+# a slightly more verbose fashion than usual, because we're running so early.
+const DL_LOAD_PATH = String[]
+let os = ccall(:jl_get_UNAME, Any, ())
+    if os === :Darwin || os === :Apple
+        if Base.DARWIN_FRAMEWORK
+            push!(DL_LOAD_PATH, "@loader_path/Frameworks")
+        end
+        push!(DL_LOAD_PATH, "@loader_path")
+    end
+end
 
 # numeric operations
 include("hashing.jl")
diff --git a/base/array.jl b/base/array.jl
index 480da74619c079..b345d27b9369ed 100644
--- a/base/array.jl
+++ b/base/array.jl
@@ -211,49 +211,17 @@ julia> Base.isbitsunion(Union{Float64, String})
 false
 ```
 """
-isbitsunion(u::Union) = allocatedinline(u)
-isbitsunion(x) = false
+isbitsunion(u::Type) = u isa Union && allocatedinline(u)
 
-function _unsetindex!(A::Array{T}, i::Int) where {T}
+function _unsetindex!(A::Array, i::Int)
     @inline
     @boundscheck checkbounds(A, i)
-    t = @_gc_preserve_begin A
-    p = Ptr{Ptr{Cvoid}}(pointer(A, i))
-    if !allocatedinline(T)
-        Intrinsics.atomic_pointerset(p, C_NULL, :monotonic)
-    elseif T isa DataType
-        if !datatype_pointerfree(T)
-            for j = 1:Core.sizeof(Ptr{Cvoid}):Core.sizeof(T)
-                Intrinsics.atomic_pointerset(p + j - 1, C_NULL, :monotonic)
-            end
-        end
-    end
-    @_gc_preserve_end t
+    @inbounds _unsetindex!(MemoryRef(A.ref, i))
     return A
 end
 
 
 # TODO: deprecate this (aligned_sizeof and/or elsize and/or sizeof(Some{T}) are more correct)
-"""
-    Base.bitsunionsize(U::Union) -> Int
-
-For a `Union` of [`isbitstype`](@ref) types, return the size of the largest type; assumes `Base.isbitsunion(U) == true`.
-
-# Examples
-```jldoctest
-julia> Base.bitsunionsize(Union{Float64, UInt8})
-8
-
-julia> Base.bitsunionsize(Union{Float64, UInt8, Int128})
-16
-```
-"""
-function bitsunionsize(u::Union)
-    isinline, sz, _ = uniontype_layout(u)
-    @assert isinline
-    return sz
-end
-
 elsize(::Type{A}) where {T,A<:Array{T}} = aligned_sizeof(T)
 function elsize(::Type{Ptr{T}}) where T
     # this only must return something valid for values which satisfy is_valid_intrinsic_elptr(T),
@@ -1144,12 +1112,12 @@ function _growat!(a::Vector, i::Integer, delta::Integer)
         unsafe_copyto!(newref, ref, i)
         a.ref = newref
         for j in i:i+delta-1
-            _unsetindex!(a, j)
+            @inbounds _unsetindex!(a, j)
         end
     elseif !prefer_start && memlen >= newmemlen
         unsafe_copyto!(mem, offset+delta+i, mem, offset+i, len-i+1)
         for j in i:i+delta-1
-            _unsetindex!(a, j)
+            @inbounds _unsetindex!(a, j)
         end
     else
         # since we will allocate the array in the middle of the memory we need at least 2*delta extra space
@@ -1170,7 +1138,7 @@ function _deletebeg!(a::Vector, delta::Integer)
     len = length(a)
     0 <= delta <= len || throw(ArgumentError("_deleteat! requires delta in 0:length(a)"))
     for i in 1:delta
-        _unsetindex!(a, i)
+        @inbounds _unsetindex!(a, i)
     end
     newlen = len - delta
     if newlen != 0 # if newlen==0 we could accidentally index past the memory
@@ -1185,7 +1153,7 @@ function _deleteend!(a::Vector, delta::Integer)
     0 <= delta <= len || throw(ArgumentError("_deleteat! requires delta in 0:length(a)"))
     newlen = len - delta
     for i in newlen+1:len
-        _unsetindex!(a, i)
+        @inbounds _unsetindex!(a, i)
     end
     a.size = (newlen,)
     return
@@ -1782,17 +1750,19 @@ struct Nowhere; end
 push!(::Nowhere, _) = nothing
 _growend!(::Nowhere, _) = nothing
 
-@inline function _push_deleted!(dltd, a::Vector, ind)
-    if @inbounds isassigned(a, ind)
-        push!(dltd, @inbounds a[ind])
+function _push_deleted!(dltd, a::Vector, ind)
+    @_propagate_inbounds_meta
+    if isassigned(a, ind)
+        push!(dltd, a[ind])
     else
         _growend!(dltd, 1)
     end
 end
 
-@inline function _copy_item!(a::Vector, p, q)
-    if @inbounds isassigned(a, q)
-        @inbounds a[p] = a[q]
+function _copy_item!(a::Vector, p, q)
+    @_propagate_inbounds_meta
+    if isassigned(a, q)
+        a[p] = a[q]
     else
         _unsetindex!(a, p)
     end
@@ -1804,7 +1774,7 @@ function _deleteat!(a::Vector, inds, dltd=Nowhere())
     y === nothing && return a
     (p, s) = y
     checkbounds(a, p)
-    _push_deleted!(dltd, a, p)
+    @inbounds _push_deleted!(dltd, a, p)
     q = p+1
     while true
         y = iterate(inds, s)
@@ -1818,14 +1788,14 @@ function _deleteat!(a::Vector, inds, dltd=Nowhere())
             end
         end
         while q < i
-            _copy_item!(a, p, q)
+            @inbounds _copy_item!(a, p, q)
             p += 1; q += 1
         end
-        _push_deleted!(dltd, a, i)
+        @inbounds _push_deleted!(dltd, a, i)
         q = i+1
     end
     while q <= n
-        _copy_item!(a, p, q)
+        @inbounds _copy_item!(a, p, q)
         p += 1; q += 1
     end
     _deleteend!(a, n-p+1)
@@ -1838,7 +1808,7 @@ function deleteat!(a::Vector, inds::AbstractVector{Bool})
     length(inds) == n || throw(BoundsError(a, inds))
     p = 1
     for (q, i) in enumerate(inds)
-        _copy_item!(a, p, q)
+        @inbounds _copy_item!(a, p, q)
         p += !i
     end
     _deleteend!(a, n-p+1)
diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl
index 9bbe3f7241083c..aef8140acee9df 100644
--- a/base/compiler/tfuncs.jl
+++ b/base/compiler/tfuncs.jl
@@ -1283,17 +1283,19 @@ end
     fcnt = fieldcount_noerror(typ)
     fcnt === nothing && return false
     0 < fidx ≤ fcnt || return true # no undefined behavior if thrown
+    fidx ≤ datatype_min_ninitialized(typ) && return true # always defined
     ftyp = fieldtype(typ, fidx)
-    is_undefref_fieldtype(ftyp) && return true
-    return fidx ≤ datatype_min_ninitialized(typ)
+    is_undefref_fieldtype(ftyp) && return true # always initialized
+    return false
 end
-# checks if a field of this type will not be initialized with undefined value
-# and the access to that uninitialized field will cause and `UndefRefError`, e.g.,
+# checks if a field of this type is guaranteed to be defined to a value
+# and that access to an uninitialized field will cause an `UndefRefError` or return zero
 # - is_undefref_fieldtype(String) === true
 # - is_undefref_fieldtype(Integer) === true
 # - is_undefref_fieldtype(Any) === true
 # - is_undefref_fieldtype(Int) === false
 # - is_undefref_fieldtype(Union{Int32,Int64}) === false
+# - is_undefref_fieldtype(T) === false
 function is_undefref_fieldtype(@nospecialize ftyp)
     return !has_free_typevars(ftyp) && !allocatedinline(ftyp)
 end
@@ -2063,15 +2065,15 @@ end
 
 # whether getindex for the elements can potentially throw UndefRef
 function array_type_undefable(@nospecialize(arytype))
+    arytype = unwrap_unionall(arytype)
     if isa(arytype, Union)
         return array_type_undefable(arytype.a) || array_type_undefable(arytype.b)
-    elseif isa(arytype, UnionAll)
-        return true
-    else
-        elmtype = memoryref_elemtype(arytype::DataType)
+    elseif arytype isa DataType
+        elmtype = memoryref_elemtype(arytype)
         # TODO: use arraytype layout instead to derive this
-        return !(isbitstype(elmtype) || isbitsunion(elmtype))
+        return !((elmtype isa DataType && isbitstype(elmtype)) || (elmtype isa Union && isbitsunion(elmtype)))
     end
+    return true
 end
 
 @nospecs function memoryset_typecheck(memtype, elemtype)
diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl
index a6cf08326e789f..9e866ed1a79f1f 100644
--- a/base/compiler/utilities.jl
+++ b/base/compiler/utilities.jl
@@ -91,7 +91,7 @@ function count_const_size(@nospecialize(x), count_self::Bool = true)
     sz = count_self ? sizeof(dt) : 0
     sz > MAX_INLINE_CONST_SIZE && return MAX_INLINE_CONST_SIZE + 1
     dtfd = DataTypeFieldDesc(dt)
-    for i = 1:datatype_nfields(dt)
+    for i = 1:Int(datatype_nfields(dt))
         isdefined(x, i) || continue
         f = getfield(x, i)
         if !dtfd[i].isptr && datatype_pointerfree(typeof(f))
diff --git a/base/genericmemory.jl b/base/genericmemory.jl
index c0302e33bde85a..92f87561f25e4c 100644
--- a/base/genericmemory.jl
+++ b/base/genericmemory.jl
@@ -26,19 +26,27 @@ size(a::GenericMemory, d::Int) =
 size(a::GenericMemory, d::Integer) =  size(a, convert(d, Int))
 size(a::GenericMemory) = (length(a),)
 
-pointer(mem::GenericMemory, i::Int) = unsafe_convert(Ptr{Cvoid}, MemoryRef(mem, i))
-function _unsetindex!(A::Memory{T}, i::Int) where {T}
+pointer(mem::GenericMemory, i::Int) = (@_propagate_inbounds_meta; unsafe_convert(Ptr{Cvoid}, MemoryRef(mem, i))) # boundschecked, even for i==1
+pointer(mem::MemoryRef) = unsafe_convert(Ptr{Cvoid}, mem) # no bounds check, even for empty array
+
+_unsetindex!(A::Memory, i::Int) =  (@_propagate_inbounds_meta; _unsetindex!(MemoryRef(A, i)); A)
+function _unsetindex!(A::MemoryRef{:not_atomic,T}) where T
     @_terminates_locally_meta
+    @_propagate_inbounds_meta
     @inline
-    @boundscheck checkbounds(A, i)
-    t = @_gc_preserve_begin A
-    p = Ptr{Ptr{Cvoid}}(pointer(A, i))
-    # TODO(jwn): access datatype_layout from A instead
-    if !allocatedinline(T)
+    @boundscheck MemoryRef(A, 1)
+    mem = A.mem
+    MemT = typeof(mem)
+    arrayelem = datatype_arrayelem(MemT)
+    elsz = datatype_layoutsize(MemT)
+    isboxed = 1; isunion = 2
+    t = @_gc_preserve_begin mem
+    p = Ptr{Ptr{Cvoid}}(@inbounds pointer(A))
+    if arrayelem == isboxed
         Intrinsics.atomic_pointerset(p, C_NULL, :monotonic)
-    elseif T isa DataType
-        if !datatype_pointerfree(T)
-            for j = 1:Core.sizeof(Ptr{Cvoid}):Core.sizeof(T)
+    elseif arrayelem != isunion
+        if !datatype_pointerfree(T::DataType)
+            for j = 1:Core.sizeof(Ptr{Cvoid}):elsz
                 Intrinsics.atomic_pointerset(p + j - 1, C_NULL, :monotonic)
             end
         end
@@ -65,7 +73,7 @@ function unsafe_copyto!(dest::MemoryRef{:not_atomic, T}, src::MemoryRef{:not_ato
     @_terminates_locally_meta
     n == 0 && return dest
     @boundscheck MemoryRef(dest, n), MemoryRef(src, n)
-    ccall(:jl_genericmemory_copyto, Cvoid, (Any, Int, Any, Int, Int), dest.mem, dest.ptr, src.mem, src.ptr, Int(n))
+    ccall(:jl_genericmemory_copyto, Cvoid, (Any, Ptr{Cvoid}, Any, Ptr{Cvoid}, Int), dest.mem, dest.ptr, src.mem, src.ptr, Int(n))
     return dest
 end
 
diff --git a/base/summarysize.jl b/base/summarysize.jl
index 6fb051a4d8632a..25058247680995 100644
--- a/base/summarysize.jl
+++ b/base/summarysize.jl
@@ -26,7 +26,7 @@ julia> Base.summarysize(1.0)
 8
 
 julia> Base.summarysize(Ref(rand(100)))
-848
+864
 
 julia> sizeof(Ref(rand(100)))
 8
diff --git a/doc/src/base/c.md b/doc/src/base/c.md
index e221a6432542f3..bf7e2577029fef 100644
--- a/doc/src/base/c.md
+++ b/doc/src/base/c.md
@@ -14,7 +14,7 @@ Base.unsafe_modify!
 Base.unsafe_replace!
 Base.unsafe_swap!
 Base.unsafe_copyto!{T}(::Ptr{T}, ::Ptr{T}, ::Any)
-Base.unsafe_copyto!{T}(::Array{T}, ::Any, ::Array{T}, ::Any, ::Any)
+Base.unsafe_copyto!(::Array, ::Any, ::Array, ::Any, ::Any)
 Base.copyto!
 Base.pointer
 Base.unsafe_wrap{T,N}(::Union{Type{Array},Type{Array{T}},Type{Array{T,N}}}, ::Ptr{T}, ::NTuple{N,Int})
diff --git a/doc/src/devdocs/types.md b/doc/src/devdocs/types.md
index c3afc26600c65e..2e5d1a9ec8b208 100644
--- a/doc/src/devdocs/types.md
+++ b/doc/src/devdocs/types.md
@@ -100,6 +100,8 @@ UnionAll
       lb: Union{}
       ub: Any
     body: Array{T, N} <: DenseArray{T, N}
+      ref::MemoryRef{:not_atomic, T}
+      size::Tuple{Vararg{Int64, N}}
 ```
 
 This indicates that `Array` actually names a `UnionAll` type. There is one `UnionAll` type for
diff --git a/src/typemap.c b/src/typemap.c
index 06273b83c25ad6..6d14949c5d601e 100644
--- a/src/typemap.c
+++ b/src/typemap.c
@@ -1042,7 +1042,7 @@ jl_typemap_entry_t *jl_typemap_assoc_by_type(
                         jl_genericmemory_t *tname = jl_atomic_load_relaxed(&cache->tname); // reload after type-intersect
                         // couldn't figure out unique `a0` initial point, so scan all for matches
                         size_t i, l = tname->length;
-                        _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*) tname->data;
+                        _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*) jl_genericmemory_ptr_data(tname);
                         JL_GC_PUSH1(&tname);
                         for (i = 1; i < l; i += 2) {
                             jl_typemap_t *ml = jl_atomic_load_relaxed(&data[i]);
@@ -1081,7 +1081,7 @@ jl_typemap_entry_t *jl_typemap_assoc_by_type(
                 else {
                     // doing subtype, but couldn't figure out unique `ty`, so scan all for supertypes
                     size_t i, l = name1->length;
-                    _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*) name1->data;
+                    _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*) jl_genericmemory_ptr_data(name1);
                     JL_GC_PUSH1(&name1);
                     for (i = 1; i < l; i += 2) {
                         jl_typemap_t *ml = jl_atomic_load_relaxed(&data[i]);
@@ -1237,7 +1237,7 @@ jl_typemap_entry_t *jl_typemap_level_assoc_exact(jl_typemap_level_t *cache, jl_v
             else {
                 // couldn't figure out unique `name` initial point, so must scan all for matches
                 size_t i, l = tname->length;
-                _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*) tname->data;
+                _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*) jl_genericmemory_ptr_data(tname);
                 JL_GC_PUSH1(&tname);
                 for (i = 1; i < l; i += 2) {
                     jl_typemap_t *ml_or_cache = jl_atomic_load_relaxed(&data[i]);
diff --git a/test/core.jl b/test/core.jl
index bfbd0d6c158986..0307479d5c376d 100644
--- a/test/core.jl
+++ b/test/core.jl
@@ -6015,10 +6015,10 @@ const unboxedunions = [Union{Int8, Nothing},
 @test Base.isbitsunion(unboxedunions[2])
 @test Base.isbitsunion(unboxedunions[3])
 
-@test Base.bitsunionsize(unboxedunions[1]) == 1
-@test Base.bitsunionsize(unboxedunions[2]) == 2
-@test Base.bitsunionsize(unboxedunions[3]) == 16
-@test Base.bitsunionsize(unboxedunions[4]) == 8
+@test Base.aligned_sizeof(unboxedunions[1]) == 1
+@test Base.aligned_sizeof(unboxedunions[2]) == 2
+@test Base.aligned_sizeof(unboxedunions[3]) == 16
+@test Base.aligned_sizeof(unboxedunions[4]) == 8
 
 @test sizeof(unboxedunions[1]) == 1
 @test sizeof(unboxedunions[2]) == 2