diff --git a/Makefile b/Makefile
index a1d98e737e..74c9804d54 100644
--- a/Makefile
+++ b/Makefile
@@ -12,6 +12,9 @@ default: $(DEFAULT_TARGET)
 
 VERBOSE ?= 0
 
+#define ARKOUDA_QUICK_COMPILE
+#CHPL_FLAGS += --no-checks --no-loop-invariant-code-motion --no-fast-followers --ccflags="-O0"
+#endef
 CHPL := chpl
 CHPL_DEBUG_FLAGS += --print-passes
 ifdef ARKOUDA_DEVELOPER
diff --git a/arkouda/graph.py b/arkouda/graph.py
new file mode 100755
index 0000000000..3cfd9c7da0
--- /dev/null
+++ b/arkouda/graph.py
@@ -0,0 +1,681 @@
+from __future__ import annotations
+from typing import cast, Tuple, Union
+from typeguard import typechecked
+from arkouda.client import generic_msg
+from arkouda.pdarrayclass import pdarray, create_pdarray
+#, parse_single_value,_parse_single_int_array_value
+from arkouda.logger import getArkoudaLogger
+import numpy as np # type: ignore
+from arkouda.dtypes import str as akstr
+from arkouda.dtypes import int64 as akint
+from arkouda.dtypes import NUMBER_FORMAT_STRINGS, resolve_scalar_dtype, \
+     translate_np_dtype
+import json
+
+__all__ = ['Graph','GraphD','GraphDW','GraphUD','GraphUDW']
+
+class Vertex:
+    """
+    Represents a vertex of a graph
+
+    Attributes
+    ----------
+    vertex_id : int
+        The unique identification of the vertex in a graph
+    weight : int
+        The weitht information of the current vertex
+    neighbors  : pdarray
+        all the vertices connected to the current vertex. For directed graph, out edge vertices are given.
+    logger : ArkoudaLogger
+        Used for all logging operations
+
+    Notes
+    -----
+    Vertex is composed of one pdarray: the ID value array which 
+    contains the all the ids of the adjacency vertices.
+    """
+    # based on args 
+    def __init__(self, *args) -> None: 
+        
+        try:
+            self.vertex_id=args[0]
+            if len(args) > 2: 
+                if isinstance(args[2],pdarray):
+                     self.neighbours=args[2]
+                else:
+                    try:
+                        self.neighbours = create_pdarray(args[2])
+                    except Exception as e:
+                        raise RuntimeError(e)
+            if len(args) > 1: 
+                self.weight=args[1]
+        except Exception as e:
+            raise RuntimeError(e)
+
+        self.dtype = akint
+        self.logger = getArkoudaLogger(name=__class__.__name__) # type: ignore
+
+    def __iter__(self):
+        raise NotImplementedError('Graph does not support iteration')
+
+    def __size__(self) -> int:
+        return self.neighbours.size
+
+
+    def __str__(self) -> str:
+        return "vertex id={},weight={},#neighbours={}".format(self.vertex_id,\
+                self.weight, self.neighbours.size)
+
+    def __repr__(self) -> str:
+        return "{}".format(self.__str__())
+
+
+class Edge:
+    """
+    Represents an Edge of a graph
+
+    Attributes
+    ----------
+    vertex_pair : tuple
+        The unique identification of the edge in a graph
+    weight : int
+        The weitht information of the current edge
+    adjacency  : pdarray
+        all the vertices connected to the current vertex. For directed graph, out edge vertices are given.
+    logger : ArkoudaLogger
+        Used for all logging operations
+
+    Notes
+    -----
+    Vertex is composed of one pdarray: the ID value array which 
+    contains the all the ids of the adjacency vertices.
+    """
+    # based on args 
+    def __init__(self, *args) -> None:
+        try:
+            self.vertex_pair=args[0]
+            if len(args) > 2:
+                if isinstance(args[2],pdarray):
+                     self.adjacency=args[2]
+                else:
+                    try:
+                        self.adjacency = create_pdarray(args[2])
+                    except Exception as e:
+                        raise RuntimeError(e)
+            if len(args) > 1:
+                self.weight=args[1]
+        except Exception as e:
+            raise RuntimeError(e)
+        self.dtype = akint
+        self.logger = getArkoudaLogger(name=__class__.__name__) # type: ignore
+
+    def __iter__(self):
+        raise NotImplementedError('Graph does not support iteration')
+
+    def __str__(self) -> str:
+        return "vertex pair={},weight={},#adjacency={}".format(self.vertex_pair,\
+                self.weight,self.adjacency.size)
+
+    def __repr__(self) -> str:
+        return "{}".format(self.__str__())
+
+
+class GraphD:
+    """
+    This is an array based graph representation. The graph data resides on the
+    arkouda server. The user should not call this class directly;
+    rather its instances are created by other arkouda functions.
+
+    Attributes
+    ----------
+    n_vertices : int
+        The starting indices for each string
+    n_edges : int
+        The starting indices for each string
+    directed : int
+        The graph is directed (True) or undirected (False)
+    weighted : int
+        The graph is weighted (True) or not
+    src : pdarray
+        The source of every edge in the graph
+    dst : pdarray
+        The destination of every vertex in the graph
+    start_i : pdarray
+        The starting index of all the vertices in src 
+    neighbour : pdarray
+        The number of current vertex id v's (v<n_vertices-1) neighbours and the value is
+        neighbour[v+1]-neighbour[v] if v<n_vertices-1 or 
+        n_edges-neighbour[v] if v=n_vertices-1
+    logger : ArkoudaLogger
+        Used for all logging operations
+        
+    Notes
+    -----
+    """
+
+    def __init__(self, *args) -> None:
+        """
+        Initializes the Graph instance by setting all instance
+        attributes, some of which are derived from the array parameters.
+        
+        Parameters
+        ----------
+        n_vertices  : must provide args[0]
+        n_edges     : must provide args[1]
+        directed    : must provide args[2]
+        weighted    : must provide args[3]
+        src,dst     : optional if no weighted  args[4] args[5]
+        start_i, neighbour   : optional if no src and dst args[6] args[7]
+        v_weight    : optional if no neighbour  args[8] 
+        e_weight    : optional if no v_weight   args[9]
+        
+            
+        Returns
+        -------
+        None
+        
+        Raises
+        ------
+        RuntimeError
+            Raised if there's an error converting a Numpy array or standard
+            Python array to either the offset_attrib or bytes_attrib   
+        ValueError
+            Raised if there's an error in generating instance attributes 
+            from either the offset_attrib or bytes_attrib parameter 
+        """
+        try:
+            if len(args) < 4: 
+                raise ValueError
+            self.n_vertices=cast(int,args[0])
+            self.n_edges=cast(int,args[1])
+            self.directed=cast(int,args[2])
+            self.weighted=cast(int,args[3])
+            if len(args) == 7:
+                raise ValueError
+            if len(args) > 7:
+                if (isinstance(args[7],pdarray) and isinstance(args[6],pdarray)) :
+                     self.start_i=args[6]
+                     self.neighbour=args[7]
+                else:
+                    try:
+                        self.start_i = create_pdarray(args[6])
+                        self.neighbour = create_pdarray(args[7])
+                    except Exception as e:
+                        raise RuntimeError(e)
+            if len(args) == 5:
+                raise ValueError
+            if len(args) > 5:
+                if (isinstance(args[5],pdarray) and isinstance(args[4],pdarray)) :
+                     self.src=args[4]
+                     self.dst=args[5]
+                else:
+                    try:
+                        self.src = create_pdarray(args[4])
+                        self.dst = create_pdarray(args[5])
+                    except Exception as e:
+                        raise RuntimeError(e)
+        except Exception as e:
+            raise RuntimeError(e)
+        self.dtype = akint
+        self.logger = getArkoudaLogger(name=__class__.__name__) # type: ignore
+
+    def __iter__(self):
+        raise NotImplementedError('Graph does not support iteration')
+
+    def __size__(self) -> int:
+        return self.n_vertices
+
+    '''
+    def add_vertice(self, x: Vertice)->None :
+        print()
+
+    def remove_vertice(self, x: int) ->None:
+        print()
+
+    def neighbours(self, x: int)->pdarray :
+        return self.neighbour[i]
+
+    def adjacent(self, x: int, y:int )->pdarray :
+        neighbours(self,x)
+        neighbours(self,y)
+
+    def get_vertice_value(self, x: int) -> Vertice:
+        print()
+
+    def set_vertice_value(self, x: int, v: Vertice) :
+        print()
+
+    def add_edge(self, x: int, y: int) :
+        print()
+
+    def remove_edge(self, x: int, y: int) :
+        print()
+    '''
+
+class GraphDW(GraphD):
+    """
+    This is an array based graph representation. The graph data resides on the
+    arkouda server. The user should not call this class directly;
+    rather its instances are created by other arkouda functions.
+
+    Attributes
+    ----------
+    n_vertices : int
+        The starting indices for each string
+    n_edges : int
+        The starting indices for each string
+    directed : int
+        The graph is directed (True) or undirected (False)
+    weighted : int
+        The graph is weighted (True) or not
+    src : pdarray
+        The source of every edge in the graph
+    dst : pdarray
+        The destination of every vertex in the graph
+    start_i : pdarray
+        The starting index of all the vertices in src 
+    neighbour : pdarray
+        The number of current vertex id v's (v<n_vertices-1) neighbours and the value is
+        neighbour[v+1]-neighbour[v] if v<n_vertices-1 or 
+        n_edges-neighbour[v] if v=n_vertices-1
+    v_weight : pdarray
+        The weitht of every vertex in the graph
+    e_weight : pdarray
+        The weitht of every edge in the graph
+    logger : ArkoudaLogger
+        Used for all logging operations
+        
+    Notes
+    -----
+    """
+    def __init__(self, *args) -> None:
+        """
+        Initializes the Graph instance by setting all instance
+        attributes, some of which are derived from the array parameters.
+        
+        Parameters
+        ----------
+        n_vertices  : must provide args[0]
+        n_edges     : must provide args[1]
+        directed    : must provide args[2]
+        weighted    : must provide args[3]
+        src,dst     : optional if no weighted  args[4] args[5]
+        start_i, neighbour   : optional if no src and dst args[6] args[7]
+        v_weight    : optional if no neighbour  args[8] 
+        e_weight    : optional if no v_weight   args[9]
+        
+            
+        Returns
+        -------
+        None
+        
+        Raises
+        ------
+        RuntimeError
+            Raised if there's an error converting a Numpy array or standard
+            Python array to either the offset_attrib or bytes_attrib   
+        ValueError
+            Raised if there's an error in generating instance attributes 
+            from either the offset_attrib or bytes_attrib parameter 
+        """
+        super().__init__(*args)
+        try:
+            if len(args) > 9:
+                if isinstance(args[9],pdarray):
+                     self.e_weight=args[9]
+                else:
+                    try:
+                        self.e_weight = create_pdarray(args[9])
+                    except Exception as e:
+                        raise RuntimeError(e)
+            if len(args) > 8:
+                if isinstance(args[8],pdarray):
+                     self.v_weight=args[8]
+                else:
+                    try:
+                        self.v_weight = create_pdarray(args[8])
+                    except Exception as e:
+                        raise RuntimeError(e)
+        except Exception as e:
+            raise RuntimeError(e)
+
+
+
+class GraphUD(GraphD):
+    """
+    This is an array based graph representation. The graph data resides on the
+    arkouda server. The user should not call this class directly;
+    rather its instances are created by other arkouda functions.
+
+    Attributes
+    ----------
+    n_vertices : int
+        The starting indices for each string
+    n_edges : int
+        The starting indices for each string
+    directed : int
+        The graph is directed (True) or undirected (False)
+    weighted : int
+        The graph is weighted (True) or not
+    src : pdarray
+        The source of every edge in the graph
+    dst : pdarray
+        The destination of every vertex in the graph
+    start_i : pdarray
+        The starting index of all the vertices in src 
+    neighbour : pdarray
+        The number of current vertex id v's (v<n_vertices-1) neighbours and the value is
+        neighbour[v+1]-neighbour[v] if v<n_vertices-1 or 
+        n_edges-neighbour[v] if v=n_vertices-1
+    srcR : pdarray
+        The source of every edge in the graph
+    dstR : pdarray
+        The destination of every vertex in the graph
+    start_iR : pdarray
+        The starting index of all the vertices in src 
+    neighbourR : pdarray
+        The number of current vertex id v's (v<n_vertices-1) neighbours and the value is
+        neighbour[v+1]-neighbour[v] if v<n_vertices-1 or 
+        n_edges-neighbour[v] if v=n_vertices-1
+    logger : ArkoudaLogger
+        Used for all logging operations
+        
+    Notes
+    -----
+    """
+
+    def __init__(self, *args) -> None:
+        """
+        Initializes the Graph instance by setting all instance
+        attributes, some of which are derived from the array parameters.
+        
+        Parameters
+        ----------
+        n_vertices  : must provide args[0]
+        n_edges     : must provide args[1]
+        directed    : must provide args[2]
+        weighted    : must provide args[3]
+        src,dst     : optional if no weighted  args[4] args[5]
+        start_i, neighbour   : optional if no src and dst args[6] args[7]
+        srcR,dstR     : optional if no neighbour  args[8] args[9]
+        start_iR, neighbourR   : optional if no dstR  args[10] args[11]
+        v_weight    : optional if no neighbouirR args[12] 
+        e_weight    : optional if no v_weight    args[13]
+
+        Returns
+        -------
+        None
+        
+        Raises
+        ------
+        RuntimeError
+        ValueError
+        """
+        super().__init__(*args)
+        try:
+            if len(args) > 11:
+                if (isinstance(args[11],pdarray) and isinstance(args[10],pdarray)) :
+                     self.start_iR=args[10]
+                     self.neighbourR=args[11]
+                else:
+                    try:
+                        self.start_iR = create_pdarray(args[10])
+                        self.neighbourR = create_pdarray(args[11])
+                    except Exception as e:
+                        raise RuntimeError(e)
+            if len(args) > 9:
+                if (isinstance(args[9],pdarray) and isinstance(args[8],pdarray)) :
+                     self.srcR=args[8]
+                     self.dstR=args[9]
+                else:
+                    try:
+                        self.srcR = create_pdarray(args[8])
+                        self.dstR = create_pdarray(args[9])
+                    except Exception as e:
+                        raise RuntimeError(e)
+        except Exception as e:
+            raise RuntimeError(e)
+
+
+class GraphUDW(GraphUD):
+    """
+    This is an array based graph representation. The graph data resides on the
+    arkouda server. The user should not call this class directly;
+    rather its instances are created by other arkouda functions.
+
+    Attributes
+    ----------
+    n_vertices : int
+        The starting indices for each string
+    n_edges : int
+        The starting indices for each string
+    directed : int
+        The graph is directed (True) or undirected (False)
+    src : pdarray
+        The source of every edge in the graph
+    dst : pdarray
+        The destination of every vertex in the graph
+    start_i : pdarray
+        The starting index of all the vertices in src 
+    neighbour : pdarray
+        The number of current vertex id v's (v<n_vertices-1) neighbours and the value is
+        neighbour[v+1]-neighbour[v] if v<n_vertices-1 or 
+        n_edges-neighbour[v] if v=n_vertices-1
+    srcR : pdarray
+        The source of every edge in the graph
+    dstR : pdarray
+        The destination of every vertex in the graph
+    start_iR : pdarray
+        The starting index of all the vertices in src 
+    neighbourR : pdarray
+        The number of current vertex id v's (v<n_vertices-1) neighbours and the value is
+        neighbour[v+1]-neighbour[v] if v<n_vertices-1 or 
+        n_edges-neighbour[v] if v=n_vertices-1
+    logger : ArkoudaLogger
+        Used for all logging operations
+        
+    Notes
+    -----
+    """
+
+    def __init__(self, *args) -> None:
+        """
+        Initializes the Graph instance by setting all instance
+        attributes, some of which are derived from the array parameters.
+        
+        Parameters
+        ----------
+        n_vertices  : must provide args[0]
+        n_edges     : must provide args[1]
+        directed    : must provide args[2]
+        weighted    : must provide args[3]
+        src,dst     : optional if no weighted  args[4] args[5]
+        start_i, neighbour   : optional if no src and dst args[6] args[7]
+        srcR,dstR     : optional if no neighbour  args[8] args[9]
+        start_iR, neighbourR   : optional if no dstR  args[10] args[11]
+        v_weight    : optional if no neighbouirR args[12] 
+        e_weight    : optional if no v_weight    args[13]
+        
+            
+        Returns
+        -------
+        None
+        
+        Raises
+        ------
+        RuntimeError
+        ValueError
+        """
+        super().__init__(*args)
+        try:
+            if len(args) > 13:
+                if isinstance(args[13],pdarray):
+                     self.e_weight=args[13]
+                else:
+                    try:
+                        self.e_weight = create_pdarray(args[13])
+                    except Exception as e:
+                        raise RuntimeError(e)
+            if len(args) > 12:
+                if isinstance(args[12],pdarray):
+                     self.v_weight=args[12]
+                else:
+                    try:
+                        self.v_weight = create_pdarray(args[12])
+                    except Exception as e:
+                        raise RuntimeError(e)
+        except Exception as e:
+            raise RuntimeError(e)
+
+
+class Graph:
+    """
+    This is an array based graph representation. The graph data resides on the
+    arkouda server. The user should not call this class directly;
+    rather its instances are created by other arkouda functions.
+
+    Attributes
+    ----------
+    n_vertices : int
+        The starting indices for each string
+    n_edges : int
+        The starting indices for each string
+    directed : int
+        The graph is directed (True) or undirected (False)
+    src : pdarray
+        The source of every edge in the graph
+    dst : pdarray
+        The destination of every vertex in the graph
+    start_i : pdarray
+        The starting index of all the vertices in src 
+    neighbour : pdarray
+        The number of current vertex id v's (v<n_vertices-1) neighbours and the value is
+        neighbour[v+1]-neighbour[v] if v<n_vertices-1 or 
+        n_edges-neighbour[v] if v=n_vertices-1
+    v_weight : pdarray
+        The weitht of every vertex in the graph
+    e_weight : pdarray
+        The weitht of every edge in the graph
+    logger : ArkoudaLogger
+        Used for all logging operations
+        
+    Notes
+    -----
+    """
+
+    def __init__(self, *args) -> None:
+        """
+        Initializes the Graph instance by setting all instance
+        attributes, some of which are derived from the array parameters.
+        
+        Parameters
+        ----------
+        n_vertices  : must provide
+        n_edges     : must provide 
+        directed    : optional  
+        src,dst     : optional if no directed
+        start_i, neighbour   : optional if no src and dst
+        v_weight    : optional if no neighbour
+        e_weight    : optional if no v_weight
+        
+            
+        Returns
+        -------
+        None
+        
+        Raises
+        ------
+        RuntimeError
+            Raised if there's an error converting a Numpy array or standard
+            Python array to either the offset_attrib or bytes_attrib   
+        ValueError
+            Raised if there's an error in generating instance attributes 
+            from either the offset_attrib or bytes_attrib parameter 
+        """
+        try:
+            if len(args) < 2: 
+                raise ValueError
+            self.n_vertices=cast(int,args[0])
+            self.n_edges=cast(int,args[1])
+            if len(args) > 8:
+                if isinstance(args[8],pdarray):
+                     self.e_weight=args[8]
+                else:
+                    try:
+                        self.e_weight = create_pdarray(args[8])
+                    except Exception as e:
+                        raise RuntimeError(e)
+            if len(args) > 7:
+                if isinstance(args[7],pdarray):
+                     self.v_weight=args[7]
+                else:
+                    try:
+                        self.v_weight = create_pdarray(args[7])
+                    except Exception as e:
+                        raise RuntimeError(e)
+            if len(args) == 6:
+                raise ValueError
+            if len(args) > 6:
+                if (isinstance(args[6],pdarray) and isinstance(args[5],pdarray)) :
+                     self.start_i=args[5]
+                     self.neighbour=args[6]
+                else:
+                    try:
+                        self.start_i = create_pdarray(args[5])
+                        self.neighbour = create_pdarray(args[6])
+                    except Exception as e:
+                        raise RuntimeError(e)
+            if len(args) == 4:
+                raise ValueError
+            if len(args) > 4:
+                if (isinstance(args[4],pdarray) and isinstance(args[3],pdarray)) :
+                     self.src=args[3]
+                     self.dst=args[4]
+                else:
+                    try:
+                        self.src = create_pdarray(args[3])
+                        self.dst = create_pdarray(args[4])
+                    except Exception as e:
+                        raise RuntimeError(e)
+            if len(args) > 2:
+                     self.directed=cast(int,args[2])
+        except Exception as e:
+            raise RuntimeError(e)
+        self.dtype = akint
+        self.logger = getArkoudaLogger(name=__class__.__name__) # type: ignore
+
+    def __iter__(self):
+        raise NotImplementedError('Graph does not support iteration')
+
+    def __size__(self) -> int:
+        return self.n_vertices
+
+    '''
+    def add_vertice(self, x: Vertice)->None :
+        print()
+
+    def remove_vertice(self, x: int) ->None:
+        print()
+
+    def neighbours(self, x: int)->pdarray :
+        return self.neighbour[i]
+
+    def adjacent(self, x: int, y:int )->pdarray :
+        neighbours(self,x)
+        neighbours(self,y)
+
+    def get_vertice_value(self, x: int) -> Vertice:
+        print()
+
+    def set_vertice_value(self, x: int, v: Vertice) :
+        print()
+
+    def add_edge(self, x: int, y: int) :
+        print()
+
+    def remove_edge(self, x: int, y: int) :
+        print()
+    '''
+
+
+
diff --git a/arkouda/pdarrayclass.py b/arkouda/pdarrayclass.py
index 4c6adc15ca..d479609336 100755
--- a/arkouda/pdarrayclass.py
+++ b/arkouda/pdarrayclass.py
@@ -65,6 +65,46 @@ def unescape(s):
         raise ValueError(("unsupported value from server {} {}".\
                               format(mydtype.name, value)))
 
+
+
+
+@typechecked
+def _parse_single_int_array_value(msg : str) -> object:
+    """
+    Attempt to convert a scalar return value from the arkouda server to a
+    numpy string in Python. The user should not call this function directly. 
+    
+    Parameters
+    ----------
+    msg : str
+        scalar value in string form to be converted to a numpy string
+
+    Returns
+    -------
+    object numpy scalar         
+    """
+    fields = msg.split(" ",1)
+    dtname, value = msg.split(maxsplit=1)
+    mydtype = dtype(dtname)
+    try:
+        if mydtype == akint64:
+            nfields = value.split("\"")
+#            return nfields[1]
+#            original we return a string include the last ending 0
+
+            _,sastr=nfields[1].split(maxsplit=1)
+            tmpstr=sastr.split()
+            intary  = [int(numeric_string) for numeric_string in tmpstr]
+            return intary
+#           now we return a suffix array and not include the last ending 0
+        else:
+            raise ValueError(("not correct int data type from server {} {}".\
+                              format(mydtype.name, value)))
+    except:
+        raise ValueError(("unsupported value from server {} {}".\
+                              format(mydtype.name, value)))
+
+
 # class for the pdarray
 class pdarray:
     """
diff --git a/arkouda/pdarraycreation.py b/arkouda/pdarraycreation.py
index ab08bb564c..0f298057ea 100755
--- a/arkouda/pdarraycreation.py
+++ b/arkouda/pdarraycreation.py
@@ -8,13 +8,14 @@
      float64, int64, DTypes
 from arkouda.dtypes import dtype as akdtype
 from arkouda.pdarrayclass import pdarray, create_pdarray
-from arkouda.strings import Strings
+from arkouda.strings import Strings, SArrays
+from arkouda.graph import GraphD, GraphDW,GraphUD,GraphUDW
 
 __all__ = ["array", "zeros", "ones", "zeros_like", "ones_like", 
            "arange", "linspace", "randint", "uniform", "standard_normal",
            "random_strings_uniform", "random_strings_lognormal", 
-           "from_series"
-          ]
+           "from_series", "suffix_array","lcp_array","suffix_array_file",
+           "rmat_gen","graph_bfs","graph_file_read"]
 
 numericDTypes = frozenset(["bool", "int64", "float64"]) 
 
@@ -598,7 +599,7 @@ def randint(low : Union[int,float], high : Union[int,float], size : int,
     sizestr = NUMBER_FORMAT_STRINGS['int64'].format(size)
     repMsg = generic_msg("randint {} {} {} {} {}".\
                          format(sizestr, dtype.name, lowstr, highstr, seed))
-    return create_pdarray(repMsg)
+    return create_pdarray(cast(str,repMsg))
 
 @typechecked
 def uniform(size : int, low : float=0.0, high : float=1.0, 
@@ -810,3 +811,348 @@ def random_strings_lognormal(logmean : Union[float, int], logstd : Union[float,
                  seed)
     repMsg = generic_msg(msg)
     return Strings(*(cast(str,repMsg).split('+')))
+
+
+
+@typechecked
+def suffix_array(strings : Strings) -> SArrays:
+        """
+        Return the suffix arrays of given strings. The size/shape of each suffix
+	arrays is the same as the corresponding strings. 
+	A simple example of suffix array is as follow. Given a string "banana$",
+	all the suffixes are as follows. 
+	s[0]="banana$"
+	s[1]="anana$"
+	s[2]="nana$"
+	s[3]="ana$"
+	s[4]="na$"
+	s[5]="a$"
+	s[6]="$"
+	The suffix array of string "banana$"  is the array of indices of sorted suffixes.
+	s[6]="$"
+	s[5]="a$"
+	s[3]="ana$"
+	s[1]="anana$"
+	s[0]="banana$"
+	s[4]="na$"
+	s[2]="nana$"
+	so sa=[6,5,3,1,0,4,2]
+
+        Returns
+        -------
+        pdarray
+            The suffix arrays of the given strings
+
+        See Also
+        --------
+
+        Notes
+        -----
+        
+        Raises
+        ------  
+        RuntimeError
+            Raised if there is a server-side error in executing group request or
+            creating the pdarray encapsulating the return message
+        """
+        msg = "segmentedSuffixAry {} {} {}".format( strings.objtype,
+                                                        strings.offsets.name,
+                                                        strings.bytes.name) 
+        repMsg = generic_msg(msg)
+        return SArrays(*(cast(str,repMsg).split('+')))
+
+
+@typechecked
+def lcp_array(suffixarrays : SArrays, strings : Strings) -> SArrays:
+        """
+        Return the longest common prefix of given suffix arrays. The size/shape of each lcp
+	arrays is the same as the corresponding suffix array. 
+        -------
+        SArrays 
+            The LCP arrays of the given suffix arrays
+
+        See Also
+        --------
+
+        Notes
+        -----
+        
+        Raises
+        ------  
+        RuntimeError
+            Raised if there is a server-side error in executing group request or
+            creating the pdarray encapsulating the return message
+        """
+        msg = "segmentedLCP {} {} {} {} {}".format( suffixarrays.objtype,
+                                                        suffixarrays.offsets.name,
+                                                        suffixarrays.bytes.name, 
+                                                        strings.offsets.name,
+                                                        strings.bytes.name) 
+        repMsg = generic_msg(msg)
+        return SArrays(*(cast(str,repMsg).split('+')))
+
+@typechecked
+def suffix_array_file(filename: str)  -> tuple:
+#def suffix_array_file(filename: str)  -> tuple[SArrays,Strings]:
+        """
+        This function is major used for testing correctness and performance
+        Return the suffix array of given file name's content as a string. 
+	A simple example of suffix array is as follow. Given string "banana$",
+	all the suffixes are as follows. 
+	s[0]="banana$"
+	s[1]="anana$"
+	s[2]="nana$"
+	s[3]="ana$"
+	s[4]="na$"
+	s[5]="a$"
+	s[6]="$"
+	The suffix array of string "banana$"  is the array of indices of sorted suffixes.
+	s[6]="$"
+	s[5]="a$"
+	s[3]="ana$"
+	s[1]="anana$"
+	s[0]="banana$"
+	s[4]="na$"
+	s[2]="nana$"
+	so sa=[6,5,3,1,0,4,2]
+
+        Returns
+        -------
+        pdarray
+            The suffix arrays of the given strings
+
+        See Also
+        --------
+
+        Notes
+        -----
+        
+        Raises
+        ------  
+        RuntimeError
+            Raised if there is a server-side error in executing group request or
+            creating the pdarray encapsulating the return message
+        """
+        msg = "segmentedSAFile {}".format( filename )
+        repMsg = generic_msg(msg)
+        tmpmsg=cast(str,repMsg).split('+')
+        sastr=tmpmsg[0:2]
+        strstr=tmpmsg[2:4]
+        suffixarray=SArrays(*(cast(str,sastr))) 
+        originalstr=Strings(*(cast(str,strstr))) 
+        return suffixarray,originalstr
+#        return SArrays(*(cast(str,repMsg).split('+')))
+
+
+@typechecked
+def graph_file_read(Ne:int, Nv:int,Ncol:int,directed:int, filename: str)  -> Union[GraphD,GraphUD,GraphDW,GraphUDW]:
+        """
+        This function is used for creating a graph from a file.
+        The file should like this
+          1   5
+          13  9
+          4   8
+          7   6
+        This file means the edges are <1,5>,<13,9>,<4,8>,<7,6>. If additional column is added, it is the weight
+        of each edge.
+        Ne : the total number of edges of the graph
+        Nv : the total number of vertices of the graph
+        Ncol: how many column of the file. Ncol=2 means just edges (so no weight and weighted=0) 
+              and Ncol=3 means there is weight for each edge (so weighted=1). 
+        directed: 0 means undirected graph and 1 means directed graph
+        Returns
+        -------
+        Graph
+            The Graph class to represent the data
+
+        See Also
+        --------
+
+        Notes
+        -----
+        
+        Raises
+        ------  
+        RuntimeError
+        """
+        msg = "segmentedGraphFile {} {} {} {} {}".format(Ne, Nv, Ncol,directed, filename);
+        repMsg = generic_msg(msg)
+        if (int(Ncol) >2) :
+             weighted=1
+        else:
+             weighted=0
+
+        if (directed!=0)  :
+           if (weighted!=0) :
+               return GraphDW(*(cast(str,repMsg).split('+')))
+           else:
+               return GraphD(*(cast(str,repMsg).split('+')))
+        else:
+           if (weighted!=0) :
+               return GraphUDW(*(cast(str,repMsg).split('+')))
+           else:
+               return GraphUD(*(cast(str,repMsg).split('+')))
+
+@typechecked
+def rmat_gen (lgNv:int, Ne_per_v:int, p:float, directed: int,weighted:int) ->\
+              Union[GraphD,GraphUD,GraphDW,GraphUDW]:
+        """
+        This function is for creating a graph using rmat graph generator
+        Returns
+        -------
+        Graph
+            The Graph class to represent the data
+
+        See Also
+        --------
+
+        Notes
+        -----
+        
+        Raises
+        ------  
+        RuntimeError
+        """
+        msg = "segmentedRMAT {} {} {} {} {}".format(lgNv, Ne_per_v, p, directed, weighted)
+        repMsg = generic_msg(msg)
+        if (directed!=0)  :
+           if (weighted!=0) :
+               return GraphDW(*(cast(str,repMsg).split('+')))
+           else:
+               return GraphD(*(cast(str,repMsg).split('+')))
+        else:
+           if (weighted!=0) :
+               return GraphUDW(*(cast(str,repMsg).split('+')))
+           else:
+               return GraphUD(*(cast(str,repMsg).split('+')))
+
+@typechecked
+def graph_bfs (graph: Union[GraphD,GraphDW,GraphUD,GraphUDW], root: int ) -> pdarray:
+        """
+        This function is generating the breadth-first search vertices sequences in given graph
+        starting from the given root vertex
+        Returns
+        -------
+        pdarray
+            The bfs vertices results
+
+        See Also
+        --------
+
+        Notes
+        -----
+        
+        Raises
+        ------  
+        RuntimeError
+        """
+        #if (cast(int,graph.directed)!=0)  :
+        if (int(graph.directed)>0)  :
+            if (int(graph.weighted)==0):
+              # directed unweighted GraphD
+              msg = "segmentedGraphBFS {} {} {} {} {} {} {} {} {}".format(
+                 graph.n_vertices,graph.n_edges,\
+                 graph.directed,graph.weighted,\
+                 graph.src.name,graph.dst.name,\
+                 graph.start_i.name,graph.neighbour.name,\
+                 root)
+            else:
+              # directed weighted GraphDW
+              msg = "segmentedGraphBFS {} {} {} {} {} {} {} {} {} {} {}".format(
+                 graph.n_vertices,graph.n_edges,\
+                 graph.directed,graph.weighted,\
+                 graph.src.name,graph.dst.name,\
+                 graph.start_i.name,graph.neighbour.name,\
+                 graph.v_weight.name,graph.e_weight.name,\
+                 root)
+        else:
+            if (int(graph.weighted)==0):
+              # undirected unweighted GraphUD
+              msg = "segmentedGraphBFS {} {} {} {} {} {} {} {} {} {} {} {} {}".format(
+                 graph.n_vertices,graph.n_edges,\
+                 graph.directed,graph.weighted,\
+                 graph.src.name,graph.dst.name,\
+                 graph.start_i.name,graph.neighbour.name,\
+                 graph.srcR.name,graph.dstR.name,\
+                 graph.start_iR.name,graph.neighbourR.name,\
+                 root)
+            else:
+              # undirected weighted GraphUDW 15
+              msg = "segmentedGraphBFS {} {} {} {} {} {} {} {} {} {} {} {} {} {} {}".format(
+                 graph.n_vertices,graph.n_edges,\
+                 graph.directed,graph.weighted,\
+                 graph.src.name,graph.dst.name,\
+                 graph.start_i.name,graph.neighbour.name,\
+                 graph.srcR.name,graph.dstR.name,\
+                 graph.start_iR.name,graph.neighbourR.name,\
+                 graph.v_weight.name,graph.e_weight.name,\
+                 root)
+
+        repMsg = generic_msg(msg)
+        '''
+        tmpmsg=cast(str,repMsg).split('+')
+        levelstr=tmpmsg[0:1]
+        vertexstr=tmpmsg[1:2]
+        levelary=create_pdarray(*(cast(str,levelstr)) )
+        
+        vertexary=create_pdarray(*(cast(str,vertexstr)) )
+        '''
+        return create_pdarray(repMsg)
+        #return (levelary,vertexary)
+
+
+@typechecked
+def graph_dfs (graph: Union[GraphD,GraphUD,GraphDW,GraphUDW], root: int ) -> tuple:
+        """
+        This function is generating the depth-first search vertices sequences in given graph
+        starting from the given root vertex
+        Returns
+        -------
+        pdarray
+            The dfs vertices results
+
+        See Also
+        --------
+
+        Notes
+        -----
+        
+        Raises
+        ------  
+        RuntimeError
+        """
+        msg = "segmentedGraphDFS {} {} {} {} {} {} {} {}".format(graph.n_vertices,graph.n_edges,\
+                 graph.directed,graph.src.name,graph.dst.name,\
+                 graph.start_i.name,graph.neighbour.name,root)
+        repMsg = generic_msg(msg)
+        tmpmsg=cast(str,repMsg).split('+')
+        levelstr=tmpmsg[0:1]
+        vertexstr=tmpmsg[1:2]
+        levelary=create_pdarray(*(cast(str,levelstr)) )
+        
+        vertexary=create_pdarray(*(cast(str,vertexstr)) )
+        return (levelary,vertexary)
+
+
+@typechecked
+def components (graph:  Union[GraphD,GraphUD,GraphDW,GraphUDW] ) -> int :
+        """
+        This function returns the number of components of the given graph
+        Returns
+        -------
+        int
+            The total number of components
+
+        See Also
+        --------
+
+        Notes
+        -----
+        
+        Raises
+        ------  
+        RuntimeError
+        """
+        msg = "segmentedGraphComponents {} {}".format(graph.edges.name,graph.vertices.name)
+        repMsg = generic_msg(msg)
+        return cast(int,repMsg)
diff --git a/arkouda/pdarraysetops.py b/arkouda/pdarraysetops.py
index 1818634a10..548a88f9c4 100644
--- a/arkouda/pdarraysetops.py
+++ b/arkouda/pdarraysetops.py
@@ -5,7 +5,7 @@
 from arkouda.pdarrayclass import pdarray, create_pdarray
 from arkouda.pdarraycreation import zeros_like, array
 from arkouda.sorting import argsort
-from arkouda.strings import Strings
+from arkouda.strings import Strings,SArrays
 from arkouda.logger import getArkoudaLogger
 
 Categorical = ForwardRef('Categorical')
@@ -161,6 +161,83 @@ def in1d(pda1 : Union[pdarray,Strings,'Categorical'], pda2 : Union[pdarray,Strin
     else:
         raise TypeError('Both pda1 and pda2 must be pdarray, Strings, or Categorical')
 
+
+
+def in1d_int(pda1 : Union[pdarray,SArrays,'Categorical'], pda2 : Union[pdarray,SArrays,'Categorical'], #type: ignore
+                         invert : bool=False) -> pdarray: #type: ignore
+    """
+    Test whether each element of a 1-D array is also present in a second array.
+
+    Returns a boolean array the same length as `pda1` that is True
+    where an element of `pda1` is in `pda2` and False otherwise.
+
+    Parameters
+    ----------
+    pda1 : pdarray or SArrays or Categorical
+        Input array.
+    pda2 : pdarray or SArrays or Categorical
+        The values against which to test each value of `pda1`. Must be the 
+        same type as `pda1`.
+    invert : bool, optional
+        If True, the values in the returned array are inverted (that is,
+        False where an element of `pda1` is in `pda2` and True otherwise).
+        Default is False. ``ak.in1d(a, b, invert=True)`` is equivalent
+        to (but is faster than) ``~ak.in1d(a, b)``.
+
+    Returns
+    -------
+    pdarray, bool
+        The values `pda1[in1d]` are in `pda2`.
+        
+    Raises
+    ------
+    TypeError
+        Raised if either pda1 or pda2 is not a pdarray, Strings, or 
+        Categorical object or if invert is not a bool
+    RuntimeError
+        Raised if the dtype of either array is not supported
+
+    See Also
+    --------
+    unique, intersect1d, union1d
+
+    Notes
+    -----
+    `in1d` can be considered as an element-wise function version of the
+    python keyword `in`, for 1-D sequences. ``in1d(a, b)`` is logically
+    equivalent to ``ak.array([item in b for item in a])``, but is much
+    faster and scales to arbitrarily large ``a``.
+    
+    ak.in1d is not supported for bool or float64 pdarrays
+
+    Examples
+    --------
+    >>> ak.in1d(ak.array([-1, 0, 1]), ak.array([-2, 0, 2]))
+    array([False, True, False])    
+    
+    >>> ak.in1d(ak.array(['one','two']),ak.array(['two', 'three','four','five']))
+    array([False, True])
+    """
+    from arkouda.categorical import Categorical as Categorical_
+    if hasattr(pda1, 'in1d'):
+        return cast(Categorical_,pda1).in1d(pda2)
+    elif isinstance(pda1, pdarray) and isinstance(pda2, pdarray):
+        repMsg = generic_msg("in1d {} {} {}".\
+                             format(pda1.name, pda2.name, invert))
+        return create_pdarray(cast(str,repMsg))
+    elif isinstance(pda1, SArrays) and isinstance(pda2, SArrays):
+        repMsg = generic_msg("segmentedIn1dInt {} {} {} {} {} {} {}".\
+                                    format(pda1.objtype,
+                                    pda1.offsets.name,
+                                    pda1.bytes.name,
+                                    pda2.objtype,
+                                    pda2.offsets.name,
+                                    pda2.bytes.name,
+                                    invert))
+        return create_pdarray(cast(str,repMsg))
+    else:
+        raise TypeError('Both pda1 and pda2 must be pdarray, SArrays, or Categorical')
+
 @typechecked
 def concatenate(arrays : Sequence[Union[pdarray,Strings]]) -> Union[pdarray,Strings]:
     """
diff --git a/arkouda/strings.py b/arkouda/strings.py
index 74bee91613..9e1d4605c9 100755
--- a/arkouda/strings.py
+++ b/arkouda/strings.py
@@ -2,15 +2,16 @@
 from typing import cast, Tuple, Union
 from typeguard import typechecked
 from arkouda.client import generic_msg
-from arkouda.pdarrayclass import pdarray, create_pdarray, parse_single_value
+from arkouda.pdarrayclass import pdarray, create_pdarray, parse_single_value,_parse_single_int_array_value
 from arkouda.logger import getArkoudaLogger
 import numpy as np # type: ignore
 from arkouda.dtypes import str as akstr
+from arkouda.dtypes import int64 as akint
 from arkouda.dtypes import NUMBER_FORMAT_STRINGS, resolve_scalar_dtype, \
      translate_np_dtype
 import json
 
-__all__ = ['Strings']
+__all__ = ['Strings','SArrays']
 
 class Strings:
     """
@@ -784,3 +785,328 @@ def unregister(self) -> None:
     def attach(user_defined_name : str) -> Strings:
         return Strings(pdarray.attach(user_defined_name+'_offsets'),
                        pdarray.attach(user_defined_name+'_bytes'))
+
+class SArrays:
+    """
+    Represents an array of (suffix) arrays whose data resides on the arkouda server.
+    The user should not call this class directly; rather its instances are created
+    by other arkouda functions. It is very similar to Strings and the difference is
+    that its content is int arrays instead of strings.
+
+    Attributes
+    ----------
+    offsets : pdarray
+        The starting indices for each suffix array
+    bytes : pdarray
+        The raw integer indices of all suffix arrays
+    size : int
+        The number of suffix arrays in the array
+    nbytes : int
+        The total number of indices in all suffix arrays
+        We have the same number indices as the number of characters/suffixes in strings
+    ndim : int
+        The rank of the array (currently only rank 1 arrays supported)
+    shape : tuple
+        The sizes of each dimension of the array
+    dtype : dtype
+        The dtype is np.int
+    logger : ArkoudaLogger
+        Used for all logging operations
+        
+    Notes
+    -----
+    SArrays is composed of two pdarrays: (1) offsets, which contains the
+    starting indices for each string's suffix array  and (2) bytes, which contains the 
+    indices of all suffix arrays, no any spliter between two index arrays.    
+    """
+
+    BinOps = frozenset(["==", "!="])
+    objtype = "int"
+
+    def __init__(self, offset_attrib : Union[pdarray,np.ndarray], 
+                 bytes_attrib : Union[pdarray,np.ndarray]) -> None:
+        """
+        Initializes the SArrays instance by setting all instance
+        attributes, some of which are derived from the array parameters.
+        
+        Parameters
+        ----------
+        offset_attrib : Union[pdarray, np.ndarray,array]
+            the array containing the offsets 
+        bytes_attrib : Union[pdarray, np.ndarray,array]
+            the array containing the suffix array indices    
+            
+        Returns
+        -------
+        None
+        
+        Raises
+        ------
+        RuntimeError
+            Raised if there's an error converting a Numpy array or standard
+            Python array to either the offset_attrib or bytes_attrib   
+        ValueError
+            Raised if there's an error in generating instance attributes 
+            from either the offset_attrib or bytes_attrib parameter 
+        """
+        if isinstance(offset_attrib, pdarray):
+            self.offsets = offset_attrib
+        else:
+            try:
+                self.offsets = create_pdarray(offset_attrib)
+            except Exception as e:
+                raise RuntimeError(e)
+        if isinstance(bytes_attrib, pdarray):
+            self.bytes = bytes_attrib
+        else:
+            try:
+                self.bytes = create_pdarray(bytes_attrib)
+            except Exception as e:
+                raise RuntimeError(e)
+        try:
+            self.size = self.offsets.size
+            self.nbytes = self.bytes.size
+            self.ndim = self.offsets.ndim
+            self.shape = self.offsets.shape
+        except Exception as e:
+            raise ValueError(e)   
+        self.dtype = akint
+        self.logger = getArkoudaLogger(name=__class__.__name__) # type: ignore
+
+    def __iter__(self):
+        raise NotImplementedError('SArrays does not support iteration now')
+
+    def __len__(self) -> int:
+        return self.shape[0]
+
+    def __str__(self) -> str:
+        from arkouda.client import pdarrayIterThresh
+        if self.size <= pdarrayIterThresh:
+            vals = ["'{}'".format(self[i]) for i in range(self.size)]
+        else:
+            vals = ["'{}'".format(self[i]) for i in range(3)]
+            vals.append('... ')
+            vals.extend([self[i] for i in range(self.size-3, self.size)])
+        return "[{}]".format(', '.join(vals))
+
+    def __repr__(self) -> str:
+        return "array({})".format(self.__str__())
+
+    @typechecked
+    def _binop(self, other : Union[SArrays,np.int_], op : str) -> pdarray:
+        """
+        Executes the requested binop on this SArrays instance and the
+        parameter SArrays object and returns the results within
+        a pdarray object.
+
+        Parameters
+        ----------
+        other : SArrays
+            the other object is a SArrays object
+        op : str
+            name of the binary operation to be performed 
+      
+        Returns
+        -------
+        pdarray
+            encapsulating the results of the requested binop      
+
+        Raises
+    -   -----
+        ValueError
+            Raised if (1) the op is not in the self.BinOps set, or (2) if the
+            sizes of this and the other instance don't match, or (3) the other
+            object is not a SArrays object
+        RuntimeError
+            Raised if a server-side error is thrown while executing the
+            binary operation
+        """
+        if op not in self.BinOps:
+            raise ValueError("SArrays: unsupported operator: {}".format(op))
+        if isinstance(other, Strings):
+            if self.size != other.size:
+                raise ValueError("SArrays: size mismatch {} {}".\
+                                 format(self.size, other.size))
+            msg = "segmentedBinopvvInt {} {} {} {} {} {} {}".format(op,
+                                                                 self.objtype,
+                                                                 self.offsets.name,
+                                                                 self.bytes.name,
+                                                                 other.objtype,
+                                                                 other.offsets.name,
+                                                                 other.bytes.name)
+        elif resolve_scalar_dtype(other) == 'int':
+            msg = "segmentedBinopvsInt {} {} {} {} {} {}".format(op,
+                                                              self.objtype,
+                                                              self.offsets.name,
+                                                              self.bytes.name,
+                                                              self.objtype,
+                                                              json.dumps([other]))
+        else:
+            raise ValueError("SArrays: {} not supported between SArrays and {}"\
+                             .format(op, other.__class__.__name__))
+        repMsg = generic_msg(msg)
+        return create_pdarray(cast(str,repMsg))
+
+    def __eq__(self, other) -> bool:
+        return self._binop(other, "==")
+
+    def __ne__(self, other) -> bool:
+        return self._binop(cast(SArrays, other), "!=")
+
+    def __getitem__(self, key):
+        if np.isscalar(key) and resolve_scalar_dtype(key) == 'int64':
+            orig_key = key
+            if key < 0:
+                # Interpret negative key as offset from end of array
+                key += self.size
+            if (key >= 0 and key < self.size):
+                msg = "segmentedIndex {} {} {} {} {}".format('intIndex',
+                                                             self.objtype,
+                                                             self.offsets.name,
+                                                             self.bytes.name,
+                                                             key)
+                repMsg = generic_msg(msg)
+                _, value = repMsg.split(maxsplit=1)
+                return _parse_single_int_array_value(value)
+            else:
+                raise IndexError("[int] {} is out of bounds with size {}".\
+                                 format(orig_key,self.size))
+        elif isinstance(key, slice):
+            (start,stop,stride) = key.indices(self.size)
+            self.logger.debug('start: {}; stop: {}; stride: {}'.format(start,stop,stride))
+            msg = "segmentedIndex {} {} {} {} {} {} {}".format('sliceIndex',
+                                                               self.objtype,
+                                                               self.offsets.name,
+                                                               self.bytes.name,
+                                                               start,
+                                                               stop,
+                                                               stride)
+            repMsg = generic_msg(msg)
+            offsets, values = repMsg.split('+')
+            return SArrays(offsets, values);
+        elif isinstance(key, pdarray):
+            kind, _ = translate_np_dtype(key.dtype)
+            if kind not in ("bool", "int"):
+                raise TypeError("unsupported pdarray index type {}".format(key.dtype))
+            if kind == "int" and self.size != key.size:
+                raise ValueError("size mismatch {} {}".format(self.size,key.size))
+            msg = "segmentedIndex {} {} {} {} {}".format('pdarrayIndex',
+                                                         self.objtype,
+                                                         self.offsets.name,
+                                                         self.bytes.name,
+                                                         key.name)
+            repMsg = generic_msg(msg)
+            offsets, values = repMsg.split('+')
+            return SArrays(offsets, values)
+        else:
+            raise TypeError("unsupported pdarray index type {}".format(key.__class__.__name__))
+
+    def get_lengths(self) -> pdarray:
+        """
+        Return the length of each suffix array in the array.
+
+        Returns
+        -------
+        pdarray, int
+            The length of each string
+            
+        Raises
+        ------
+        RuntimeError
+            Raised if there is a server-side error thrown
+        """
+        msg = "segmentLengths {} {} {}".\
+                        format(self.objtype, self.offsets.name, self.bytes.name)
+        repMsg = generic_msg(msg)
+        return create_pdarray(cast(str,repMsg))
+
+    '''
+    def __add__(self, other : SArrays) -> SArrays:
+        return self.stick(other)
+    
+
+    def hash(self) -> Tuple[pdarray,pdarray]:
+        """
+        Compute a 128-bit hash of each suffix array.
+
+        Returns
+        -------
+        Tuple[pdarray,pdarray]
+            A tuple of two int64 pdarrays. The ith hash value is the concatenation
+            of the ith values from each array.
+
+        Notes
+        -----
+        The implementation uses SipHash128, a fast and balanced hash function (used
+        by Python for dictionaries and sets). For realistic numbers of suffix array (up
+        to about 10**15), the probability of a collision between two 128-bit hash
+        values is negligible.
+        """
+        msg = "segmentedHash {} {} {}".format(self.objtype, self.offsets.name,
+                                              self.bytes.name)
+        repMsg = generic_msg(msg)
+        h1, h2 = cast(str,repMsg).split('+')
+        return create_pdarray(cast(str,h1)), create_pdarray(cast(str,h2))
+
+    '''
+
+    def save(self, prefix_path : str, dataset : str='int_array', 
+             mode : str='truncate') -> None:
+        """
+        Save the SArrays object to HDF5. The result is a collection of HDF5 files,
+        one file per locale of the arkouda server, where each filename starts
+        with prefix_path. Each locale saves its chunk of the array to its
+        corresponding file.
+
+        Parameters
+        ----------
+        prefix_path : str
+            Directory and filename prefix that all output files share
+        dataset : str
+            The name of the SArrays dataset to be written, defaults to int_array
+        mode : str {'truncate' | 'append'}
+            By default, truncate (overwrite) output files, if they exist.
+            If 'append', create a new SArrays dataset within existing files.
+
+        Returns
+        -------
+        None
+
+        Raises
+        ------
+        ValueError 
+            Raised if the lengths of columns and values differ, or the mode is 
+            neither 'truncate' nor 'append'
+
+        See Also
+        --------
+        pdarrayIO.save
+
+        Notes
+        -----
+        Important implementation notes: (1) SArrays state is saved as two datasets
+        within an hdf5 group, (2) the hdf5 group is named via the dataset parameter, 
+        (3) the hdf5 group encompasses the two pdarrays composing a SArrays object:
+        segments and values and (4) save logic is delegated to pdarray.save
+        """       
+        self.bytes.save(prefix_path=prefix_path, 
+                                    dataset='{}/values'.format(dataset), mode=mode)
+
+    @classmethod
+    def register_helper(cls, offsets, bytes):
+        return cls(offsets, bytes)
+
+    def register(self, user_defined_name : str) -> 'SArrays':
+        return self.register_helper(self.offsets.register(user_defined_name+'_offsets'),
+                               self.bytes.register(user_defined_name+'_bytes'))
+
+    def unregister(self) -> None:
+        self.offsets.unregister()
+        self.bytes.unregister()
+
+    @staticmethod
+    def attach(user_defined_name : str) -> 'SArrays':
+        return SArrays(pdarray.attach(user_defined_name+'_offsets'),
+                       pdarray.attach(user_defined_name+'_bytes'))
+
+
diff --git a/benchmarks/bfs.py b/benchmarks/bfs.py
new file mode 100755
index 0000000000..c01abace7f
--- /dev/null
+++ b/benchmarks/bfs.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python3                                                         
+
+import time, argparse
+import numpy as np
+import arkouda as ak
+import random
+import string
+
+TYPES = ('int64', 'float64', 'bool', 'str')
+
+def time_ak_bfs_graph(trials:int):
+    print("Graph BFS")
+    lgNv=5
+    Ne_per_v=3
+    p=0.03
+    directed=1
+    weighted=0
+    Graph=ak.rmat_gen(lgNv, Ne_per_v, p, directed, weighted)
+    '''
+    print("number of vertices ={}".format(Graph.n_vertices))
+    print("number of edges ={}".format(Graph.n_edges))
+    print("directed graph  ={}".format(Graph.directed))
+    print("weighted graph  ={}".format(Graph.weighted))
+    print("source of edges   ={}".format(Graph.src))
+    print("R dest of edges   ={}".format(Graph.dstR))
+    print("dest of edges   ={}".format(Graph.dst))
+    print("R source of edges   ={}".format(Graph.srcR))
+    print("start   ={}".format(Graph.start_i))
+    print("R start   ={}".format(Graph.start_iR))
+    print(" neighbour   ={}".format(Graph.neighbour))
+    print("R neighbour   ={}".format(Graph.neighbourR))
+    print("vertices weight    ={}".format(Graph.v_weight))
+    print("edges weight    ={}".format(Graph.e_weight))
+    '''
+    deparray = ak.graph_bfs(Graph,4)
+    print(deparray)
+    '''
+    ll,ver = ak.graph_bfs(Graph,4)
+    old=-2;
+    visit=[]
+    for i in range(int(Graph.n_vertices)):
+        cur=ll[i]
+        if (int(cur)!=int(old)):
+            if len(visit) >0:
+                print(visit)
+            print("current level=",cur,"the vertices at this level are")
+            old=cur
+            visit=[]
+        visit.append(ver[i])
+    print(visit)
+    '''
+    print("total edges are as follows")
+    for i in range(int(Graph.n_edges)):
+         print("<",Graph.src[i]," -- ", Graph.dst[i],">")
+
+    '''
+    print("total reverse edges are as follows")
+    for i in range(int(Graph.n_edges)):
+         print("<",Graph.srcR[i]," -- ", Graph.dstR[i],">")
+    
+    timings = []
+    for root in range(trials):
+        start = time.time()
+        _ = ak.graph_bfs(Graph,root)
+        end = time.time()
+        timings.append(end - start)
+    tavg = sum(timings) / trials
+    print("Average time = {:.4f} sec".format(tavg))
+    print("number of vertices ={}".format(Graph.n_vertices))
+    print("number of edges ={}".format(Graph.n_edges))
+    print("Average Edges = {:.4f} K/s".format(int(Graph.n_edges)/tavg/1024))
+    print("Average Vertices = {:.4f} K/s".format(int(Graph.n_vertices)/tavg/1024))
+    #print("Average rate = {:.2f} GiB/sec".format(bytes_per_sec/2**30))
+    '''
+
+
+def create_parser():
+    parser = argparse.ArgumentParser(description="Measure the performance of suffix array building: C= suffix_array(V)")
+    parser.add_argument('hostname', help='Hostname of arkouda server')
+    parser.add_argument('port', type=int, help='Port of arkouda server')
+    parser.add_argument('-v', '--logvertices', type=int, default=5, help='Problem size: log number of vertices')
+    parser.add_argument('-e', '--vedges', type=int, default=2,help='Number of edges per vertex')
+    parser.add_argument('-p', '--possibility', type=float, default=0.01,help='Possibility ')
+    parser.add_argument('-t', '--trials', type=int, default=6, help='Number of times to run the benchmark')
+    parser.add_argument('-m', '--perm', type=int, default=0 , help='if permutation ')
+    parser.add_argument('--numpy', default=False, action='store_true', help='Run the same operation in NumPy to compare performance.')
+    parser.add_argument('--correctness-only', default=False, action='store_true', help='Only check correctness, not performance.')
+    return parser
+
+
+    
+if __name__ == "__main__":
+    import sys
+    parser = create_parser()
+    args = parser.parse_args()
+    ak.verbose = False
+    ak.connect(args.hostname, args.port)
+
+    '''
+    if args.correctness_only:
+        check_correctness(args.number, args.size, args.trials, args.dtype)
+        print("CORRECT")
+        sys.exit(0)
+    '''
+
+    time_ak_bfs_graph(args.trials)
+    sys.exit(0)
diff --git a/benchmarks/graphfilebfs.py b/benchmarks/graphfilebfs.py
new file mode 100755
index 0000000000..e664d77634
--- /dev/null
+++ b/benchmarks/graphfilebfs.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python3                                                         
+
+import time, argparse
+import numpy as np
+import arkouda as ak
+import random
+import string
+
+TYPES = ('int64', 'float64', 'bool', 'str')
+
+def time_ak_bfs_graph(trials:int):
+    lines=91
+    vertices=17
+    col=3
+    directed=0
+    weighted=1
+    Graph=ak.graph_file_read(lines,vertices,col,directed,"kang.gr")
+    print("number of vertices ={}".format(Graph.n_vertices))
+    print("number of edges ={}".format(Graph.n_edges))
+    print("directed graph  ={}".format(Graph.directed))
+    print("weighted graph  ={}".format(Graph.weighted))
+    print("source of edges   ={}".format(Graph.src))
+    print("R dest of edges   ={}".format(Graph.dstR))
+    print("dest of edges   ={}".format(Graph.dst))
+    print("R source of edges   ={}".format(Graph.srcR))
+    print("start   ={}".format(Graph.start_i))
+    print("R start   ={}".format(Graph.start_iR))
+    print(" neighbour   ={}".format(Graph.neighbour))
+    print("R neighbour   ={}".format(Graph.neighbourR))
+    print("vertices weight    ={}".format(Graph.v_weight))
+    print("edges weight    ={}".format(Graph.e_weight))
+    ll,ver = ak.graph_bfs(Graph,4)
+    old=-2;
+    visit=[]
+    for i in range(int(Graph.n_vertices)):
+        cur=ll[i]
+        if (int(cur)!=int(old)):
+            if len(visit) >0:
+                print(visit)
+            print("current level=",cur,"the vertices at this level are")
+            old=cur
+            visit=[]
+        visit.append(ver[i])
+    print(visit)
+    
+    print("total edges are as follows")
+    for i in range(int(Graph.n_edges)):
+         print("<",Graph.src[i]," -- ", Graph.dst[i],">")
+    '''
+    print("total reverse edges are as follows")
+    for i in range(int(Graph.n_edges)):
+         print("<",Graph.srcR[i]," -- ", Graph.dstR[i],">")
+    '''
+    timings = []
+    for root in range(trials):
+        start = time.time()
+        level,nodes = ak.graph_bfs(Graph,root)
+        end = time.time()
+        timings.append(end - start)
+    tavg = sum(timings) / trials
+    print("Average time = {:.4f} sec".format(tavg))
+    print("Average Edges = {:.4f} K/s".format(int(Graph.n_edges)/tavg/1024))
+    print("Average Vertices = {:.4f} K/s".format(int(Graph.n_vertices)/tavg/1024))
+    '''
+    #print("Average rate = {:.2f} GiB/sec".format(bytes_per_sec/2**30))
+    '''
+
+
+def create_parser():
+    parser = argparse.ArgumentParser(description="Measure the performance of suffix array building: C= suffix_array(V)")
+    parser.add_argument('hostname', help='Hostname of arkouda server')
+    parser.add_argument('port', type=int, help='Port of arkouda server')
+    parser.add_argument('-v', '--logvertices', type=int, default=5, help='Problem size: log number of vertices')
+    parser.add_argument('-e', '--vedges', type=int, default=2,help='Number of edges per vertex')
+    parser.add_argument('-p', '--possibility', type=float, default=0.01,help='Possibility ')
+    parser.add_argument('-t', '--trials', type=int, default=6, help='Number of times to run the benchmark')
+    parser.add_argument('-m', '--perm', type=int, default=0 , help='if permutation ')
+    parser.add_argument('--numpy', default=False, action='store_true', help='Run the same operation in NumPy to compare performance.')
+    parser.add_argument('--correctness-only', default=False, action='store_true', help='Only check correctness, not performance.')
+    return parser
+
+
+    
+if __name__ == "__main__":
+    import sys
+    parser = create_parser()
+    args = parser.parse_args()
+    ak.verbose = False
+    ak.connect(args.hostname, args.port)
+
+    '''
+    if args.correctness_only:
+        check_correctness(args.number, args.size, args.trials, args.dtype)
+        print("CORRECT")
+        sys.exit(0)
+    '''
+
+    time_ak_bfs_graph(args.trials)
+    sys.exit(0)
diff --git a/benchmarks/rmatgen.py b/benchmarks/rmatgen.py
new file mode 100755
index 0000000000..3169b02c0d
--- /dev/null
+++ b/benchmarks/rmatgen.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python3                                                         
+
+import time, argparse
+import numpy as np
+import arkouda as ak
+import random
+import string
+
+TYPES = ('int64', 'float64', 'bool', 'str')
+
+def time_ak_rmat_graph(lgNv, Ne_per_v, p, directed, weighted):
+    print(">>> arkouda rmat graph")
+    cfg = ak.get_config()
+    Nv =  cfg["numLocales"]
+    print("numLocales = {}".format(cfg["numLocales"]))
+    Graph = ak.rmat_gen(lgNv, Ne_per_v, p, directed, weighted)
+    print("number of vertices ={}".format(Graph.n_vertices))
+    print("number of edges ={}".format(Graph.n_edges))
+    print("directed graph  ={}".format(Graph.directed))
+    print("source of edges   ={}".format(Graph.src))
+    print("dest of edges   ={}".format(Graph.dst))
+    print("start   ={}".format(Graph.start_i))
+    print("neighbour   ={}".format(Graph.neighbour))
+    #print("vertices weight    ={}".format(Graph.v_weight))
+    #print("edges weight    ={}".format(Graph.e_weight))
+    timings = []
+    for _ in range(trials):
+        start = time.time()
+        Graph = ak.rmat_gen(lgNv, Ne_per_v, p, perm)
+        end = time.time()
+        timings.append(end - start)
+    tavg = sum(timings) / trials
+
+    print("Average time = {:.4f} sec".format(tavg))
+
+
+
+def create_parser():
+    parser = argparse.ArgumentParser(description="Measure the performance of suffix array building: C= suffix_array(V)")
+    parser.add_argument('hostname', help='Hostname of arkouda server')
+    parser.add_argument('port', type=int, help='Port of arkouda server')
+    parser.add_argument('-v', '--logvertices', type=int, default=5, help='Problem size: log number of vertices')
+    parser.add_argument('-e', '--vedges', type=int, default=2,help='Number of edges per vertex')
+    parser.add_argument('-p', '--possibility', type=float, default=0.01,help='Possibility ')
+    parser.add_argument('-t', '--trials', type=int, default=6, help='Number of times to run the benchmark')
+    parser.add_argument('-d', '--directed', type=int, default=0 , help='if directed ')
+    parser.add_argument('-w', '--weighted', type=int, default=0 , help='if weighted ')
+    parser.add_argument('--numpy', default=False, action='store_true', help='Run the same operation in NumPy to compare performance.')
+    parser.add_argument('--correctness-only', default=False, action='store_true', help='Only check correctness, not performance.')
+    return parser
+
+
+    
+if __name__ == "__main__":
+    import sys
+    parser = create_parser()
+    args = parser.parse_args()
+    ak.verbose = False
+    ak.connect(args.hostname, args.port)
+
+    '''
+    if args.correctness_only:
+        check_correctness(args.number, args.size, args.trials, args.dtype)
+        print("CORRECT")
+        sys.exit(0)
+    '''
+
+    time_ak_rmat_graph(args.logvertices, args.vedges, args.possibility, args.directed,args.weighted)
+    sys.exit(0)
diff --git a/benchmarks/run_benchmarks.py b/benchmarks/run_benchmarks.py
index 42ec3a28d2..a116c7f182 100755
--- a/benchmarks/run_benchmarks.py
+++ b/benchmarks/run_benchmarks.py
@@ -19,7 +19,7 @@
 
 logging.basicConfig(level=logging.INFO)
 
-BENCHMARKS = ['stream', 'argsort', 'coargsort', 'gather', 'scatter', 'reduce', 'scan', 'noop', 'setops']
+BENCHMARKS = ['stream', 'argsort', 'coargsort', 'gather', 'scatter', 'reduce', 'scan', 'noop', 'setops','sa']
 
 def get_chpl_util_dir():
     """ Get the Chapel directory that contains graph generation utilities. """
diff --git a/benchmarks/sa.py b/benchmarks/sa.py
new file mode 100755
index 0000000000..ae59178180
--- /dev/null
+++ b/benchmarks/sa.py
@@ -0,0 +1,128 @@
+#!/usr/bin/env python3                                                         
+
+import time, argparse
+import numpy as np
+import arkouda as ak
+import random
+import string
+
+TYPES = ('int64', 'float64', 'bool', 'str')
+
+def time_ak_sa( vsize,strlen, trials, dtype):
+    print(">>> arkouda suffix array")
+    cfg = ak.get_config()
+    Nv = vsize * cfg["numLocales"]
+    print("numLocales = {},  num of strings  = {:,}".format(cfg["numLocales"], Nv))
+
+    if dtype == 'str':
+         v = ak.random_strings_uniform(1, strlen, Nv)
+    else:
+        print("Wrong data type")
+    c=ak.suffix_array(v)
+#    print("size of suffix array={}".format(c.bytes.size))    
+#    print("offset/number of suffix array={}".format(c.offsets.size))    
+#    print("itemsize of suffix array={}".format(c.offsets.itemsize))    
+    print("All the random strings are as follows")
+    for k in range(vsize):
+       print("the {} th random tring ={}".format(k,v[k]))    
+       print("the {} th suffix array ={}".format(k,c[k]))    
+       print("")
+    timings = []
+    for _ in range(trials):
+        start = time.time()
+        c=ak.suffix_array(v)
+        end = time.time()
+        timings.append(end - start)
+    tavg = sum(timings) / trials
+
+    print("Average time = {:.4f} sec".format(tavg))
+    if dtype == 'str':
+        offsets_transferred = 0 * c.offsets.size * c.offsets.itemsize
+        bytes_transferred = (c.bytes.size * c.offsets.itemsize) + (0 * c.bytes.size)
+        bytes_per_sec = (offsets_transferred + bytes_transferred) / tavg
+    else:
+        print("Wrong data type")
+    print("Average rate = {:.2f} GiB/sec".format(bytes_per_sec/2**30))
+
+
+def suffixArray(s):
+    suffixes = [(s[i:], i) for i in range(len(s))]
+    suffixes.sort(key=lambda x: x[0])
+    sa= [s[1] for s in suffixes]
+    #sa.insert(0,len(sa))
+    return sa
+
+def time_np_sa(vsize, strlen, trials, dtype):
+    s=''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(strlen))
+    timings = []
+    for _ in range(trials):
+        start = time.time()
+        sa=suffixArray(s)
+        end = time.time()
+        timings.append(end - start)
+    tavg = sum(timings) / trials
+    print("Average time = {:.4f} sec".format(tavg))
+    if dtype == 'str':
+        offsets_transferred = 0
+        bytes_transferred = len(s)
+        bytes_per_sec = (offsets_transferred + bytes_transferred) / tavg
+    else:
+        print("Wrong data type")
+    print("Average rate = {:.2f} GiB/sec".format(bytes_per_sec/2**30))
+
+def check_correctness( vsize,strlen, trials, dtype):
+    Ni = strlen
+    Nv = vsize
+
+    v = ak.random_strings_uniform(1, Ni, Nv)
+    c=ak.suffix_array(v)
+    for k in range(Nv):
+        s=v[k]
+        sa=suffixArray(s)
+        aksa=c[k]
+#        _,tmp=c[k].split(maxsplit=1)
+#        aksa=tmp.split()
+#        intaksa  = [int(numeric_string) for numeric_string in aksa]
+#        intaksa  = aksa[1:-1]
+#        print(sa)
+#        print(intaksa)
+        assert (sa==aksa)
+
+
+def create_parser():
+    parser = argparse.ArgumentParser(description="Measure the performance of suffix array building: C= suffix_array(V)")
+    parser.add_argument('hostname', help='Hostname of arkouda server')
+    parser.add_argument('port', type=int, help='Port of arkouda server')
+    parser.add_argument('-n', '--size', type=int, default=10**4, help='Problem size: length of strings')
+    parser.add_argument('-v', '--number', type=int, default=10,help='Number of strings')
+    parser.add_argument('-t', '--trials', type=int, default=6, help='Number of times to run the benchmark')
+    parser.add_argument('-d', '--dtype', default='str', help='Dtype of value array ({})'.format(', '.join(TYPES)))
+    parser.add_argument('--numpy', default=False, action='store_true', help='Run the same operation in NumPy to compare performance.')
+    parser.add_argument('-r', '--randomize', default=False, action='store_true', help='Use random values instead of ones')
+    parser.add_argument('--correctness-only', default=False, action='store_true', help='Only check correctness, not performance.')
+    return parser
+
+
+    
+if __name__ == "__main__":
+    import sys
+    parser = create_parser()
+    args = parser.parse_args()
+    if args.dtype not in TYPES:
+        raise ValueError("Dtype must be {}, not {}".format('/'.join(TYPES), args.dtype))
+    ak.verbose = False
+    ak.connect(args.hostname, args.port)
+
+    if args.correctness_only:
+        check_correctness(args.number, args.size, args.trials, args.dtype)
+        print("CORRECT")
+        sys.exit(0)
+
+
+    print("length of strings = {:,}".format(args.size))
+    print("number of strings = {:,}".format(args.number))
+    print("number of trials = ", args.trials)
+    time_ak_sa(args.number, args.size, args.trials, args.dtype)
+    if args.numpy:
+        time_np_sa(args.number, args.size, args.trials, args.dtype)
+    sys.exit(0)
diff --git a/benchmarks/scatter.py b/benchmarks/scatter.py
index d7f218600b..0de21a32e0 100755
--- a/benchmarks/scatter.py
+++ b/benchmarks/scatter.py
@@ -29,9 +29,13 @@ def time_ak_scatter(isize, vsize, trials, dtype, random, seed):
     
     timings = []
     for _ in range(trials):
+        print("i={},c[i]={}".format(i, c[i]))
+        print("v={}".format(v))
         start = time.time()
         c[i] = v
         end = time.time()
+        print("i={},c[i]={}".format(i, c[i]))
+        print("v={}".format(v))
         timings.append(end - start)
     tavg = sum(timings) / trials
 
diff --git a/src/GenSymIO.chpl b/src/GenSymIO.chpl
index 8237ce81f8..ab93eb00ac 100644
--- a/src/GenSymIO.chpl
+++ b/src/GenSymIO.chpl
@@ -168,7 +168,7 @@ module GenSymIO {
     }
 
     /*
-     * Converts the JSON array to a pdarray
+     * Converts the JSON array to a string pdarray
      */
     proc jsonToPdArray(json: string, size: int) throws {
         var f = opentmp();
@@ -183,6 +183,21 @@ module GenSymIO {
         return array;
     }
 
+    /*
+     * Converts the JSON array to a integer pdarray
+     */
+    proc jsonToPdArrayInt(json: string, size: int) throws {
+        var f = opentmp();
+        var w = f.writer();
+        w.write(json);
+        w.close();
+        var r = f.reader(start=0);
+        var array: [0..#size] int;
+        r.readf("%jt", array);
+        r.close();
+        f.close();
+        return array;
+    }
     /*
      * Spawns a separate Chapel process that executes and returns the 
      * result of the h5ls command
diff --git a/src/MultiTypeSymEntry.chpl b/src/MultiTypeSymEntry.chpl
index f391a060b4..7f601fbe85 100644
--- a/src/MultiTypeSymEntry.chpl
+++ b/src/MultiTypeSymEntry.chpl
@@ -78,6 +78,7 @@ module MultiTypeSymEntry
         :arg etype: type to be instantiated
         :type etype: type
         */
+
         proc init(len: int, type etype) {
             super.init(etype, len);
             this.etype = etype;
diff --git a/src/MultiTypeSymbolTable.chpl b/src/MultiTypeSymbolTable.chpl
index 731cbb7e96..33e52f0008 100644
--- a/src/MultiTypeSymbolTable.chpl
+++ b/src/MultiTypeSymbolTable.chpl
@@ -9,7 +9,12 @@ module MultiTypeSymbolTable
     
     use MultiTypeSymEntry;
     use Map;
+    use RadixSortLSD only radixSortLSD_ranks;
+    use RandArray only fillInt;
+
+
     
+    var FilteringPattern=0:int;
     var mtLogger = new Logger();
     if v {
         mtLogger.level = LogLevel.DEBUG;
@@ -351,18 +356,74 @@ module MultiTypeSymbolTable
                     {
                         var e = toSymEntry(u,int);
                         if e.size == 0 {s =  "[]";}
-                        else if e.size < thresh || e.size <= 6 {
+                        else if e.size < thresh+4 || e.size <= 6 {
                             s =  "[";
                             for i in 0..(e.size-2) {s += try! "%t ".format(e.a[i]);}
                             s += try! "%t]".format(e.a[e.size-1]);
                         }
                         else {
-                            s = try! "[%t %t %t ... %t %t %t]".format(e.a[0],e.a[1],e.a[2],
-                                                                      e.a[e.size-3],
-                                                                      e.a[e.size-2],
-                                                                      e.a[e.size-1]);
-                        }
-                    }
+                            select FilteringPattern  
+                            {
+                                 when 0 //HeadAndTail 
+                                 {
+                                      var half=thresh/2:int;
+                                      s =  "[";
+                                      for i in 0..(half-2) {s += try! "%t ".format(e.a[i]);}
+                                      s += try! "%t ... ".format(e.a[half-1]);
+                                      for i in e.size-2-half..(e.size-2) {s += try! "%t ".format(e.a[i]);}
+                                      s += try! "%t]".format(e.a[e.size-1]);
+
+                                      //s = try! "[%t %t %t ... %t %t %t]".format(e.a[0],e.a[1],e.a[2],
+                                      //                                e.a[e.size-3],
+                                      //                                e.a[e.size-2],
+                                      //                                e.a[e.size-1]);
+                                 }
+                                 when 1 //Head
+                                 {
+                                      s =  "[";
+                                      for i in 0..thresh-2 {s += try! "%t ".format(e.a[i]);}
+                                      s += try! "%t ...] ".format(e.a[thresh-1]);
+                                 }
+                                 when 2 //Tail
+                                 {
+                                      s =  "[... ";
+                                      for i in e.size-1-thresh..e.size-2  {s += try! "%t ".format(e.a[i]);}
+                                      s += try! "%t]".format(e.a[e.size-1]);
+                                 }
+                                 when 3 //Middle 
+                                 {
+                                      var startM=e.size-1-thresh/2:int;
+                                      s =  "[... ";
+                                      for i in startM..startM+thresh-2  {s += try! "%t ".format(e.a[i]);}
+                                      s += try! "%t ...]".format(e.a[startM+thresh-1]);
+                                       
+                                 }
+                                 when 4 //Uniform
+                                 {
+                                      var stride =(e.size-1)/thresh:int;
+                                      s =  "[... ";
+                                      for i in 0..thresh-2  {s += try! "%t ".format(e.a[i*stride]);}
+                                      s += try! "%t ...]".format(e.a[ stride*(thresh-1)]);
+                                 }
+                                 when 5 //Random
+                                 {
+                                      var samplearray:[0..thresh-1]int;
+                                      var indexarray:[0..thresh-1]int;
+                                      fillInt(samplearray,0,e.size-1);
+                                      var iv = radixSortLSD_ranks(samplearray);
+                                      indexarray=samplearray[iv]:int;
+                                      s =  "[... ";
+                                      for i in 0..thresh-2  {
+                                          if (e.a[indexarray[i]]!=e.a[indexarray[i+1]]) {
+                                               s += try! "%t ".format(e.a[indexarray[i]]);
+                                          }
+                                          s += try! "%t ...]".format(e.a[indexarray[thresh-1]]);
+                                      }
+                                 }
+
+                            }//end select
+                        }//end else
+                    }//end DType.Int64
                     when DType.Float64
                     {
                         var e = toSymEntry(u,real);
diff --git a/src/SACA.chpl b/src/SACA.chpl
new file mode 100644
index 0000000000..3aa2ba5bd2
--- /dev/null
+++ b/src/SACA.chpl
@@ -0,0 +1,230 @@
+module SACA{
+// In this module, different algorithms to construct suffix array are provided
+//Nov.15, 2020
+//Algorithm 1
+// The first algorithm divsufsort is the fastest C codes on suffix array
+require "../thirdparty/SA/libdivsufsort/include/config.h";
+require "../thirdparty/SA/libdivsufsort/include/divsufsort.h";
+require "../thirdparty/SA/libdivsufsort/include/divsufsort_private.h";
+require "../thirdparty/SA/libdivsufsort/include/lfs.h";
+
+require "../thirdparty/SA/libdivsufsort/lib/divsufsort.c";
+require "../thirdparty/SA/libdivsufsort/lib/sssort.c";
+require "../thirdparty/SA/libdivsufsort/lib/trsort.c";
+require "../thirdparty/SA/libdivsufsort/lib/utils.c";
+extern proc divsufsort(inputstr:[] uint(8),suffixarray:[] int(32),totallen:int(32));
+
+//Another possible SACA algorithm to utilize. 
+//require "../thirdparty/SA/SACA-K/saca-k.c";
+
+//extern proc SACA_K(inputstr:[] uint(8), suffixarray:[] uint, n:uint, K:uint,m:uint, level:int);
+//void SACA_K(unsigned char *s, unsigned int *SA,
+//  unsigned int n, unsigned int K,
+//  unsigned int m, int level) ;
+
+//Algorithm 2
+
+// The Chapel version of suffix array construction algorithm using skew algorithm
+// Rewrite the algorithm and codes in paper
+// "Simple Linear Work Suffix Array Construction" by Juha Karkkainen and Peter Sanders (2003)
+// Dec.7, 2020
+
+inline proc leq(a1 :int, a2:int, b1:int, b2:int) // lexicographic order
+{  return(a1 < b1 || a1 == b1 && a2 <= b2); 
+} // for pairs
+
+inline proc leq(a1 :int, a2:int,a3:int, b1:int, b2:int, b3:int) // lexicographic order
+{  return(a1 < b1 || a1 == b1 && leq(a2,a3, b2,b3)); 
+} // for pairs
+
+//stably sort a[0..n-1] to b[0..n-1] with keys in 0..K from r
+proc radixPass(a:[] int, b:[] int, r:[] uint(8), n:int, K:int )
+{  // count occurrences
+        var c:[0..K] uint(8); // counter array
+        var x:uint(8); 
+        var i=0:int;
+        var sum=0:int;
+        forall x in c do x=0;
+        for i in 0..n-1 do  c[r[a[i]]]=c[r[a[i]]]+1;
+        var t:uint(8);
+        for i in 0..K do  {
+             t=c[i];
+             c[i]=sum;
+             sum+=t;
+        }
+        for i in 0..n-1  do { 
+           b[c[r[a[i]]]] = a[i];
+           c[r[a[i]]]=c[r[a[i]]]+1;
+        }
+} 
+
+
+//stably sort a[0..n-1] to b[0..n-1] with keys in 0..K from r
+//element a[i] is mapping to r[a[i]] and r is the alphabets with K+1 characters.
+// a and b are bounded by n in calculation
+proc radixPass(a:[] int, b:[] int, r:[] int, n:int, K:int )
+{// count occurrences
+        var c:[0..K] int; // counter array
+        var x:int; 
+        var i:int;
+        var t:int;
+        var sum=0:int;
+        forall x in c do x=0;
+        // calculate the number of different characters in a
+        for i in 0..n-1 do  c[r[a[i]]]=c[r[a[i]]]+1;
+        // calculate the presum of c, so c[i] will be the starting position of different characters
+        for i in 0..K do  {
+             t=c[i];
+             c[i]=sum;
+             sum+=t;
+        }
+        // let b[j] store the position of each a[i] based on their order. 
+        //The same character but following the previous suffix will be put at the next position.
+        for i in 0..n-1 do {
+            b[c[r[a[i]]]] = a[i];
+            c[r[a[i]]]=c[r[a[i]]]+1;
+        }
+
+} 
+//stably sort a[0..n-1] to b[0..n-1] with keys in 0..K from r
+
+// find the suffix array SA of s[0..n-1] in {1..K}^n
+// require s[n]=s[n+1]=s[n+2]=0, n>=2. So the size of s should be n+3
+proc SuffixArraySkew(s:[] int, SA: [] int, n:int, K: int) {
+   var  n0=(n+2)/3:int;
+   var  n1=(n+1)/3:int;
+   var  n2=n/3:int;
+   var  n02=n0+n2:int;
+   var  n12=n1+n2:int;
+//number of elements meet i %3 =0,1, and 2. 
+//s[i] is the ith suffix, i in 0..n-1
+   var  s12: [0..n02+2] int; 
+   s12[n02]= 0;
+   s12[n02+1]= 0;
+   s12[n02+2]=0;
+// Here n02 instead of  n12=n1+n2 is used for the later s0 building based on n1 elements
+   var SA12:[0..n02 + 2] int; 
+   SA12[n02]=0;
+   SA12[n02+1]=0;
+   SA12[n02+2]=0;
+
+   var s0:[0.. n0+2] int;
+   var SA0:[0..n0+2] int;
+   var i=0:int;
+   var j=0:int;
+   var k=0:int;
+
+// generate positions of mod 1 and mod 2 suffixes
+// n0-n1 is used for building s0, s1 has the same number of elements as s0
+   for  i in 0.. n+(n0-n1)-1 do { 
+       if (i%3 != 0) { 
+           s12[j] = i;
+           j=j+1;
+       }
+   }
+// lsb radix sort the mod 1 and mod 2 triples
+   var tmps:[0..n+2] int;
+   forall i in 0..n-2 do tmps[i]=s[i+2];
+   radixPass(s12 , SA12, tmps, n02, K);
+   forall i in 0..n-1 do tmps[i]=s[i+1];
+   radixPass(SA12, s12 , tmps, n02, K);
+   radixPass(s12 , SA12, s , n02, K);
+
+// find lexicographic names of triples
+
+   var name = 0:int, c0 = -1:int, c1 = -1:int, c2 = -1:int;
+
+   for i in 0..n02-1 do {
+      if (s[SA12[i]] != c0 || s[SA12[i]+1] != c1 || s[SA12[i]+2] != c2)
+      {  name=name+1;
+         c0 = s[SA12[i]]; 
+         c1 = s[SA12[i]+1]; 
+         c2 = s[SA12[i]+2]; 
+      } 
+      if (SA12[i] % 3 == 1) {   
+         s12[SA12[i]/3] = name; 
+                // mapping the suffix to small alphabets
+      } // left half
+      else {
+         s12[SA12[i]/3 + n0] = name; 
+      } // right half
+   }  
+
+// recurse if names are not unique
+   if (name < n02) {
+     SuffixArraySkew(s12, SA12, n02, name);
+// store unique names in s12 using the suffix array
+     for i in 0..n02-1 do  s12[SA12[i]] = i + 1;
+                //restore the value of s12 since we will change its values during the procedure
+   }  else // generate the suffix array of s12 directly
+   {    for i in 0..n02-1 do  SA12[s12[i] - 1] = i;
+          // here SA12 is in fact the ISA array.
+   }
+// stably sort the mod 0 suffixes from SA12 by their first character
+   j=0;
+   for i in 0..n02-1 do {
+// here in fact we take advantage of the sorted SA12 to just sort s0 once to get its sorted array
+// at first we think the postion i%3=1 is the position
+       if (SA12[i] < n0) { 
+              s0[j] = 3*SA12[i]; 
+              j=j+1;
+           }
+   }
+   radixPass(s0, SA0, s, n0, K);
+
+// merge sorted SA0 suffixes and sorted SA12 suffixes
+   var p=0:int;// first s0 position
+   var t=n0-n1:int;//first s1 position
+   k=0;
+   var i1:int , j1:int;
+   var tmpk:int;
+   for tmpk in 0..n-1 do  {
+//#define GetI() (SA12[t] < n0 ? SA12[t] * 3 + 1 : (SA12[t] - n0) * 3 + 2)
+   proc GetI():int {
+      if (SA12[t] < n0 ) { return SA12[t] * 3 + 1 ;
+      }
+      else {
+          return (SA12[t] - n0) * 3 + 2;
+      }
+   }    
+   i = GetI(); // pos of current offset 12 suffix
+   j = SA0[p]; // pos of current offset 0 suffix
+   var flag:bool;
+   if (SA12[t] < n0) {
+             // different compares for mod 1 and mod 2 suffixes
+             // i % 3 =1
+          flag=leq(s[i], s12[SA12[t] + n0], s[j], s12[j/3]); 
+   } else {
+             // i % 3 =2
+          flag=leq(s[i],s[i+1],s12[SA12[t]-n0+1], s[j],s[j+1],s12[j/3+n0]);
+    // flag=leq(s[i],s[i+1],s12[SA12[t]-n0], s[j],s[j+1],s12[j/3+n0]);
+   }
+   if (flag)
+   {// suffix from SA12 is smaller
+      SA[k] = i; 
+      k=k+1;
+      t=t+1;
+      if (t == n02)  {// done --- only SA0 suffixes left
+         forall (i1,j1) in zip (k..n-1,p..p+n-k-1) do  SA[i1] = SA0[j1];
+         break;
+      }  
+   } else {// suffix from SA0 is smaller
+         SA[k] = j; 
+         k=k+1;
+         p=p+1;
+         var tmpt=t:int;
+         if (p == n0) { // done --- only SA12 suffixes left
+             for i1 in tmpt..n02-1 do { 
+                           SA[k] = GetI();
+                           t=t+1;
+                           k=k+1;
+             }
+             break;
+         }  
+     }    
+   }
+}
+
+
+
+}
diff --git a/src/SegmentedArray.chpl b/src/SegmentedArray.chpl
index 7fd24abfdb..a710835b7e 100644
--- a/src/SegmentedArray.chpl
+++ b/src/SegmentedArray.chpl
@@ -145,14 +145,15 @@ module SegmentedArray {
       return s;
     }
 
+
     /* Take a slice of strings from the array. The slice must be a 
-       Chapel range, i.e. low..high by stride, not a Python slice.
-       Returns arrays for the segment offsets and bytes of the slice.*/
+     *  Chapel range, i.e. low..high by stride, not a Python slice.
+     *  Returns arrays for the segment offsets and bytes of the slice.
+     */
+
     proc this(const slice: range(stridable=true)) throws {
       if (slice.low < offsets.aD.low) || (slice.high > offsets.aD.high) {
-          saLogger.error(getModuleName(),getRoutineName(),getLineNumber(),
-          "Array is out of bounds");
-          throw new owned OutOfBoundsError();
+        throw new owned OutOfBoundsError();
       }
       // Early return for zero-length result
       if (size == 0) || (slice.size == 0) {
@@ -188,7 +189,8 @@ module SegmentedArray {
     }
 
     /* Gather strings by index. Returns arrays for the segment offsets
-       and bytes of the gathered strings.*/
+     *  and bytes of the gathered strings.
+     */
     proc this(iv: [?D] int) throws {
       // Early return for zero-length result
       if (D.size == 0) {
@@ -198,9 +200,7 @@ module SegmentedArray {
       var ivMin = min reduce iv;
       var ivMax = max reduce iv;
       if (ivMin < 0) || (ivMax >= offsets.size) {
-          saLogger.error(getModuleName(),getRoutineName(),getLineNumber(),
-                              "Array out of bounds");
-          throw new owned OutOfBoundsError();
+        throw new owned OutOfBoundsError();
       }
       saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),
                                               "Computing lengths and offsets");
@@ -228,7 +228,7 @@ module SegmentedArray {
       gatheredOffsets -= gatheredLengths;
       
       saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(), 
-                                "aggregation in %i seconds".format(getCurrentTime() - t1));
+                                                  "%i seconds".format(getCurrentTime() - t1));
       saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(), "Copying values");
       if v {
           t1 = getCurrentTime();
@@ -239,11 +239,11 @@ module SegmentedArray {
       if CHPL_COMM != 'none' {
         // Compute the src index for each byte in gatheredVals
         /* For performance, we will do this with a scan, so first we need an array
-           with the difference in index between the current and previous byte. For
-           the interior of a segment, this is just one, but at the segment boundary,
-           it is the difference between the src offset of the current segment ("left")
-           and the src index of the last byte in the previous segment (right - 1).
-        */
+         *  with the difference in index between the current and previous byte. For
+         *  the interior of a segment, this is just one, but at the segment boundary,
+         *  it is the difference between the src offset of the current segment ("left")
+         *  and the src index of the last byte in the previous segment (right - 1).
+         */
         var srcIdx = makeDistArray(retBytes, int);
         srcIdx = 1;
         var diffs: [D] int;
@@ -272,9 +272,8 @@ module SegmentedArray {
           }
         }
       }
-      saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),
-                            "Gathered offsets and vals in %i seconds".format(
-                                           getCurrentTime() -t1));
+      saLogger.debug(getModuleName(),getRoutineName(),getLineNumber,
+                                                     "%i seconds".format(getCurrentTime() -t1));
       return (gatheredOffsets, gatheredVals);
     }
 
@@ -282,12 +281,10 @@ module SegmentedArray {
     proc this(iv: [?D] bool) throws {
       // Index vector must be same domain as array
       if (D != offsets.aD) {
-          saLogger.info(getModuleName(),getRoutineName(),getLineNumber(),
-                                                           "Array out of bounds");
-          throw new owned OutOfBoundsError();
+        throw new owned OutOfBoundsError();
       }
       saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(), 
-                                                 "Computing lengths and offsets");
+                                                       "Computing lengths and offsets");
       var t1 = getCurrentTime();
       ref oa = offsets.a;
       const low = offsets.aD.low, high = offsets.aD.high;
@@ -343,6 +340,7 @@ module SegmentedArray {
       /* saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),
                                      "%i seconds".format(getCurrentTime() - t1));*/
       /* return (gatheredOffsets, gatheredVals); */
+
     }
 
     /* Apply a hash function to all strings. This is useful for grouping
@@ -722,158 +720,1307 @@ module SegmentedArray {
   } // class SegString
 
 
-  inline proc memcmp(const ref x: [] uint(8), const xinds, const ref y: [] uint(8), const yinds): int {
-    const l = min(xinds.size, yinds.size);
-    var ret: int = 0;
-    for (i, j) in zip(xinds.low..#l, yinds.low..#l) {
-      ret = x[i]:int - y[j]:int;
-      if (ret != 0) {
-        break;
-      }
-    }
-    if (ret == 0) {
-      ret = xinds.size - yinds.size;
-    }
-    return ret;
-  }
+  /**
+   * Represents an array of arrays, implemented as a segmented array of integers.
+   * Instances are ephemeral, not stored in the symbol table. Instead, attributes
+   * of this class refer to symbol table entries that persist. This class is a
+   * convenience for bundling those persistent objects and defining suffix array-relevant
+   * operations.
+     Here we just copy SegString, we need change more in the future to fit suffix array
+   */
+  class SegSArray {
+ 
+    /**
+     * The name of the SymEntry corresponding to the pdarray containing
+     * the offsets, which are start indices for each string bytearray
+     */
+    var offsetName: string;
 
+    /**
+     * The pdarray containing the offsets, which are the start indices of
+     * the bytearrays, each of whichs corresponds to an individual string.
+     */ 
+    var offsets: borrowed SymEntry(int);
 
-  enum SearchMode { contains, startsWith, endsWith }
-  class UnknownSearchMode: Error {}
-  
-  /* Test for equality between two same-length arrays of strings. Returns
-     a boolean vector of the same length. */
-  proc ==(lss:SegString, rss:SegString) throws {
-    return compare(lss, rss, true);
-  }
+    /**
+     * The name of the SymEntry corresponding to the pdarray containing
+     * the string values where each value is byte array.
+     */
+    var valueName: string;
 
-  /* Test for inequality between two same-length arrays of strings. Returns
-     a boolean vector of the same length. */
-  proc !=(lss:SegString, rss:SegString) throws {
-    return compare(lss, rss, false);
-  }
+    /**
+     * The pdaray containing the complete int array composed of integer index
+     * corresponding to each string, 
+     */ 
+    //    var values: borrowed SymEntry(uint(8));
+    var values: borrowed SymEntry(int);
+    
+    /**
+     * The number of strings in the segmented array
+     */
+    var size: int;
+    
+    /**
+     * The total number of bytes in the entire segmented array including
+     * the bytes corresonding to the strings as well as the nulls
+     * separating the string bytes.
+     */ 
+    var nBytes: int;
 
-  /* Element-wise comparison of two same-length arrays of strings. The
-     polarity parameter determines whether the comparison checks for 
-     equality (polarity=true, result is true where elements are equal) 
-     or inequality (polarity=false, result is true where elements differ). */
-  private proc compare(lss:SegString, rss:SegString, param polarity: bool) throws {
-    // String arrays must be same size
-    if (lss.size != rss.size) {
-        throw getErrorWithContext(
-                           msg="The String arrays must be the same size",
-                           lineNumber = getLineNumber(),
-                           routineName = getRoutineName(),
-                           moduleName = getModuleName(),
-                           errorClass="ArgumentError");
+    /* 
+     * This version of the init method is the most common and is only used 
+     * when the names of the segments (offsets) and values SymEntries are known.
+     */
+    proc init(segName: string, valName: string, st: borrowed SymTab) {
+      offsetName = segName;
+      // The try! is needed here because init cannot throw
+      var gs = try! st.lookup(segName);
+      // I want this to be borrowed, but that throws a lifetime error
+      var segs = toSymEntry(gs, int): unmanaged SymEntry(int);
+      offsets = segs;
+      valueName = valName;
+
+      var vs = try! st.lookup(valName);
+      //      var vals = toSymEntry(vs, uint(8)): unmanaged SymEntry(uint(8));
+      var vals = toSymEntry(vs, int): unmanaged SymEntry(int);
+      values = vals;
+      size = segs.size;
+      nBytes = vals.size;
     }
-    ref oD = lss.offsets.aD;
-    // Start by assuming all elements differ, then correct for those that are equal
-    // This translates to an initial value of false for == and true for !=
-    var truth: [oD] bool = !polarity;
-    // Early exit for zero-length result
-    if (lss.size == 0) {
-      return truth;
+
+    /*
+     * This version of init method takes segments and values arrays as
+     * inputs, generates the SymEntry objects for each and passes the
+     * offset and value SymTab lookup names to the alternate init method
+     */
+
+    //    proc init(segments: [] int, values: [] uint(8), st: borrowed SymTab) {
+    proc init(segments: [] int, values: [] int, st: borrowed SymTab) {
+      var oName = st.nextName();
+      var segEntry = new shared SymEntry(segments);
+      try! st.addEntry(oName, segEntry);
+      var vName = st.nextName();
+      var valEntry = new shared SymEntry(values);
+      try! st.addEntry(vName, valEntry);
+      this.init(oName, vName, st);
     }
-    ref lvalues = lss.values.a;
-    ref loffsets = lss.offsets.a;
-    ref rvalues = rss.values.a;
-    ref roffsets = rss.offsets.a;
-    // Compare segments in parallel
-    // Segments are guaranteed to be on same locale, but bytes are not
-    forall (t, lo, ro, idx) in zip(truth, loffsets, roffsets, oD) 
-      with (var agg = newDstAggregator(bool)) {
-      var llen: int;
-      var rlen: int;
-      if (idx == oD.high) {
-        llen = lvalues.size - lo - 1;
-        rlen = rvalues.size - ro - 1;
-      } else {
-        llen = loffsets[idx+1] - lo - 1;
-        rlen = roffsets[idx+1] - ro - 1;
-      }
-      // Only compare bytes if lengths are equal
-      if (llen == rlen) {
-        var allEqual = true;
-        // TO DO: consider an on clause here to ensure at least one access is local
-        for pos in 0..#llen {
-          if (lvalues[lo+pos] != rvalues[ro+pos]) {
-            allEqual = false;
-            break;
-          }
+
+    proc show(n: int = 3) throws {
+      if (size >= 2*n) {
+        for i in 0..#n {
+          writeln(this[i]);
         }
-        // Only if lengths and all bytes are equal, override the default value
-        if allEqual {
-          // For ==, the output should be true; for !=, false
-          agg.copy(t, polarity);
+        writeln("...");
+        for i in size-n..#n {
+          writeln(this[i]);
+        }
+      } else {
+        for i in 0..#size {
+          writeln(this[i]);
         }
       }
     }
-    return truth;
-  }
-
-  /* Test an array of strings for equality against a constant string. Return a boolean
-     vector the same size as the array. */
-  proc ==(ss:SegString, testStr: string) {
-    return compare(ss, testStr, true);
-  }
-  
-  /* Test an array of strings for inequality against a constant string. Return a boolean
-     vector the same size as the array. */
-  proc !=(ss:SegString, testStr: string) {
-    return compare(ss, testStr, false);
-  }
 
-  /* Element-wise comparison of an arrays of string against a target string. 
-     The polarity parameter determines whether the comparison checks for 
-     equality (polarity=true, result is true where elements equal target) 
-     or inequality (polarity=false, result is true where elements differ from 
-     target). */
-  proc compare(ss:SegString, testStr: string, param polarity: bool) {
-    ref oD = ss.offsets.aD;
-    // Initially assume all elements equal the target string, then correct errors
-    // For ==, this means everything starts true; for !=, everything starts false
-    var truth: [oD] bool = polarity;
-    // Early exit for zero-length result
-    if (ss.size == 0) {
-      return truth;
-    }
-    ref values = ss.values.a;
-    ref vD = ss.values.aD;
-    ref offsets = ss.offsets.a;
-    // Use a whole-array strategy, where the ith byte from every segment is checked simultaneously
-    // This will do len(testStr) parallel loops, but loops will have low overhead
-    for (b, i) in zip(testStr.chpl_bytes(), 0..) {
-      forall (t, o, idx) in zip(truth, offsets, oD) with (var agg = newDstAggregator(bool)) {
-        if ((o+i > vD.high) || (b != values[o+i])) {
-          // Strings are not equal, so change the output
-          // For ==, output is now false; for !=, output is now true
-          agg.copy(t, !polarity);
-        }
+    /* Retrieve one string from the array */
+    proc this(idx: int): string throws {
+      if (idx < offsets.aD.low) || (idx > offsets.aD.high) {
+        throw new owned OutOfBoundsError();
       }
-    }
-    // Check the length by checking that the next byte is null
-    forall (t, o, idx) in zip(truth, offsets, oD) with (var agg = newDstAggregator(bool)) {
-      if ((o+testStr.size > vD.high) || (0 != values[o+testStr.size])) {
-        // Strings are not equal, so change the output
-        // For ==, output is now false; for !=, output is now true
-        agg.copy(t, !polarity);
+      // Start index of the string
+      var start = offsets.a[idx];
+      // Index of last (null) byte in string
+      var end: int;
+      if (idx == size - 1) {
+        end = nBytes - 1;
+      } else {
+        end = offsets.a[idx+1] - 1;
       }
+      // Take the slice of the bytearray and "cast" it to a chpl string
+      //var s = interpretAsString(values.a[start..end]);
+      var tmp=values.a[start..end];
+      var s: string;
+      var i:int;
+      s="";
+      for i in tmp do {
+          s=s+" "+ i:string;
+      }
+      return s;
     }
-    return truth;
-  }
 
-  /* Test array of strings for membership in another array (set) of strings. Returns
-     a boolean vector the same size as the first array. */
-  proc in1d(mainStr: SegString, testStr: SegString, invert=false) throws where useHash {
-    var truth: [mainStr.offsets.aD] bool;
-    // Early exit for zero-length result
-    if (mainStr.size == 0) {
-      return truth;
-    }
-    // Hash all strings for fast comparison
-    var t = new Timer();
-    saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),"Hashing strings");
+    /* Take a slice of indices from the array. The slice must be a 
+       Chapel range, i.e. low..high by stride, not a Python slice.
+       Returns arrays for the segment offsets and bytes of the slice.*/
+    proc this(const slice: range(stridable=true)) throws {
+      if (slice.low < offsets.aD.low) || (slice.high > offsets.aD.high) {
+          saLogger.error(getModuleName(),getRoutineName(),getLineNumber(),
+          "Array is out of bounds");
+          throw new owned OutOfBoundsError();
+      }
+      // Early return for zero-length result
+      if (size == 0) || (slice.size == 0) {
+        //return (makeDistArray(0, int), makeDistArray(0, uint(8)));
+        return (makeDistArray(0, int), makeDistArray(0, int));
+      }
+      // Start of bytearray slice
+      var start = offsets.a[slice.low];
+      // End of bytearray slice
+      var end: int;
+      if (slice.high == offsets.aD.high) {
+        // if slice includes the last string, go to the end of values
+        end = values.aD.high;
+      } else {
+        end = offsets.a[slice.high+1] - 1;
+      }
+      // Segment offsets of the new slice
+      var newSegs = makeDistArray(slice.size, int);
+      ref oa = offsets.a;
+      // newSegs = offsets.a[slice] - start;
+      forall (i, ns) in zip(newSegs.domain, newSegs) with (var agg = newSrcAggregator(int)) {
+        agg.copy(ns, oa[slice.low + i]);
+      }
+      // Offsets need to be re-zeroed
+      newSegs -= start;
+      // Bytearray of the new slice
+      //var newVals = makeDistArray(end - start + 1, uint(8));
+      var newVals = makeDistArray(end - start + 1, int);
+      ref va = values.a;
+      // newVals = values.a[start..end];
+      //forall (i, nv) in zip(newVals.domain, newVals) with (var agg = newSrcAggregator(uint(8))) {
+      forall (i, nv) in zip(newVals.domain, newVals) with (var agg = newSrcAggregator(int)) {
+        agg.copy(nv, va[start + i]);
+      }
+      return (newSegs, newVals);
+    }
+
+    /* Gather strings by index. Returns arrays for the segment offsets
+       and bytes of the gathered strings.*/
+    proc this(iv: [?D] int) throws {
+      // Early return for zero-length result
+      if (D.size == 0) {
+        //return (makeDistArray(0, int), makeDistArray(0, uint(8)));
+        return (makeDistArray(0, int), makeDistArray(0, int));
+      }
+      // Check all indices within bounds
+      var ivMin = min reduce iv;
+      var ivMax = max reduce iv;
+      if (ivMin < 0) || (ivMax >= offsets.size) {
+          saLogger.error(getModuleName(),getRoutineName(),getLineNumber(),
+                              "Array out of bounds");
+          throw new owned OutOfBoundsError();
+      }
+      if v {writeln("Computing lengths and offsets"); stdout.flush();}
+      var t1 = getCurrentTime();
+      ref oa = offsets.a;
+      const low = offsets.aD.low, high = offsets.aD.high;
+      // Gather the right and left boundaries of the indexed strings
+      // NOTE: cannot compute lengths inside forall because agg.copy will
+      // experience race condition with loop-private variable
+      var right: [D] int, left: [D] int;
+      forall (r, l, idx) in zip(right, left, iv) with (var agg = newSrcAggregator(int)) {
+        if (idx == high) {
+          agg.copy(r, values.size);
+        } else {
+          agg.copy(r, oa[idx+1]);
+        }
+        agg.copy(l, oa[idx]);
+      }
+      // Lengths of segments including null bytes
+      var gatheredLengths: [D] int = right - left;
+      // The returned offsets are the 0-up cumulative lengths
+      var gatheredOffsets = (+ scan gatheredLengths);
+      // The total number of bytes in the gathered strings
+      var retBytes = gatheredOffsets[D.high];
+      gatheredOffsets -= gatheredLengths;
+      
+      saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(), 
+                                "aggregation in %i seconds".format(getCurrentTime() - t1));
+      saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(), "Copying values");
+
+      if v {
+        writeln(getCurrentTime() - t1, " seconds");
+        writeln("Copying values"); stdout.flush();
+        t1 = getCurrentTime();
+      }
+      //var gatheredVals = makeDistArray(retBytes, uint(8));
+      var gatheredVals = makeDistArray(retBytes, int);
+      // Multi-locale requires some extra localization work that is not needed
+      // in CHPL_COMM=none
+      if CHPL_COMM != 'none' {
+
+        // Compute the src index for each byte in gatheredVals
+
+        /* For performance, we will do this with a scan, so first we need an array
+           with the difference in index between the current and previous byte. For
+           the interior of a segment, this is just one, but at the segment boundary,
+           it is the difference between the src offset of the current segment ("left")
+           and the src index of the last byte in the previous segment (right - 1).
+        */
+        var srcIdx = makeDistArray(retBytes, int);
+        srcIdx = 1;
+        var diffs: [D] int;
+        diffs[D.low] = left[D.low]; // first offset is not affected by scan
+        if (D.size > 1) {
+          // This expression breaks when D.size == 1, resulting in strange behavior
+          // However, this logic is only necessary when D.size > 1 anyway
+          diffs[D.interior(D.size-1)] = left[D.interior(D.size-1)] - (right[D.interior(-(D.size-1))] - 1);
+        }
+        // Set srcIdx to diffs at segment boundaries
+        forall (go, d) in zip(gatheredOffsets, diffs) with (var agg = newDstAggregator(int)) {
+          agg.copy(srcIdx[go], d);
+        }
+        srcIdx = + scan srcIdx;
+        // Now srcIdx has a dst-local copy of the source index and vals can be efficiently gathered
+        ref va = values.a;
+        //forall (v, si) in zip(gatheredVals, srcIdx) with (var agg = newSrcAggregator(uint(8))) {
+        forall (v, si) in zip(gatheredVals, srcIdx) with (var agg = newSrcAggregator(int)) {
+          agg.copy(v, va[si]);
+        }
+      } else {
+        ref va = values.a;
+        // Copy string data to gathered result
+        forall (go, gl, idx) in zip(gatheredOffsets, gatheredLengths, iv) {
+          for pos in 0..#gl {
+            gatheredVals[go+pos] = va[oa[idx]+pos];
+          }
+        }
+      }
+      saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),
+                            "Gathered offsets and vals in %i seconds".format(
+                                           getCurrentTime() -t1));
+      return (gatheredOffsets, gatheredVals);
+    }
+
+    /* Logical indexing (compress) of strings. */
+    proc this(iv: [?D] bool) throws {
+      // Index vector must be same domain as array
+      if (D != offsets.aD) {
+          saLogger.info(getModuleName(),getRoutineName(),getLineNumber(),
+                                                           "Array out of bounds");
+          throw new owned OutOfBoundsError();
+      }
+      saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(), 
+                                                 "Computing lengths and offsets");
+      var t1 = getCurrentTime();
+      ref oa = offsets.a;
+      const low = offsets.aD.low, high = offsets.aD.high;
+      // Calculate the destination indices
+      var steps = + scan iv;
+      var newSize = steps[high];
+      steps -= iv;
+      // Early return for zero-length result
+      if (newSize == 0) {
+        //return (makeDistArray(0, int), makeDistArray(0, uint(8)));
+        return (makeDistArray(0, int), makeDistArray(0, int));
+      }
+      var segInds = makeDistArray(newSize, int);
+      forall (t, dst, idx) in zip(iv, steps, D) with (var agg = newDstAggregator(int)) {
+        if t {
+          agg.copy(segInds[dst], idx);
+        }
+      }
+      return this[segInds];
+      
+      /* // Lengths of dest segments including null bytes */
+      /* var gatheredLengths = makeDistArray(newSize, int); */
+      /* forall (idx, present, i) in zip(D, iv, steps) { */
+      /*   if present { */
+      /*     segInds[i-1] = idx; */
+      /*     if (idx == high) { */
+      /*       gatheredLengths[i-1] = values.size - oa[high]; */
+      /*     } else { */
+      /*       gatheredLengths[i-1] = oa[idx+1] - oa[idx]; */
+      /*     } */
+      /*   } */
+      /* } */
+      /* // Make dest offsets from lengths */
+      /* var gatheredOffsets = (+ scan gatheredLengths); */
+      /* var retBytes = gatheredOffsets[newSize-1]; */
+      /* gatheredOffsets -= gatheredLengths; */
+      /* if v { */
+      /*   writeln(getCurrentTime() - t1, " seconds"); */
+      /*   writeln("Copying values"); stdout.flush(); */
+      /*   t1 = getCurrentTime(); */
+      /* } */
+      /* var gatheredVals = makeDistArray(retBytes, uint(8)); */
+      /* ref va = values.a; */
+      /* if DEBUG { */
+      /*   printAry("gatheredOffsets: ", gatheredOffsets); */
+      /*   printAry("gatheredLengths: ", gatheredLengths); */
+      /*   printAry("segInds: ", segInds); */
+      /* } */
+      /* // Copy string bytes from src to dest */
+      /* forall (go, gl, idx) in zip(gatheredOffsets, gatheredLengths, segInds) { */
+      /*   gatheredVals[{go..#gl}] = va[{oa[idx]..#gl}]; */
+      /* } */
+      /* if v {writeln(getCurrentTime() - t1, " seconds"); stdout.flush();} */
+      /* return (gatheredOffsets, gatheredVals); */
+    }
+
+    /* Apply a hash function to all strings. This is useful for grouping
+       and set membership. The hash used is SipHash128.*/
+    proc hash() throws {
+      // 128-bit hash values represented as 2-tuples of uint(64)
+      var hashes: [offsets.aD] 2*uint(64);
+      // Early exit for zero-length result
+      if (size == 0) {
+        return hashes;
+      }
+      ref oa = offsets.a;
+      ref va = values.a;
+      // Compute lengths of strings
+      var lengths = getLengths();
+      // Hash each string
+      // TO DO: test on clause with aggregator
+      forall (o, l, h) in zip(oa, lengths, hashes) {
+        const myRange = o..#l;
+        h = sipHash128(va, myRange);
+        /* // localize the string bytes */
+        /* const myBytes = va[{o..#l}]; */
+        /* h = sipHash128(myBytes, hashKey); */
+        /* // Perf Note: localizing string bytes is ~3x faster on IB multilocale than this: */
+        /* // h = sipHash128(va[{o..#l}]); */
+      }
+      return hashes;
+    }
+
+    /* Return a permutation that groups the strings. Because hashing is used,
+       this permutation will not sort the strings, but all equivalent strings
+       will fall in one contiguous block. */
+    proc argGroup() throws {
+      var t = new Timer();
+      if useHash {
+        // Hash all strings
+        saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(), "Hashing strings"); 
+        if v { t.start(); }
+        var hashes = this.hash();
+
+        if v { 
+            t.stop();    
+            saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),
+                           "hashing took %t seconds\nSorting hashes".format(t.elapsed())); 
+            t.clear(); t.start(); 
+        }
+
+        // Return the permutation that sorts the hashes
+        var iv = radixSortLSD_ranks(hashes);
+        if v { 
+            t.stop(); 
+            saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),
+                                            "sorting took %t seconds".format(t.elapsed())); 
+        }
+        if v{
+          var sortedHashes = [i in iv] hashes[i];
+          var diffs = sortedHashes[(iv.domain.low+1)..#(iv.size-1)] - 
+                                                 sortedHashes[(iv.domain.low)..#(iv.size-1)];
+          printAry("diffs = ", diffs);
+          var nonDecreasing = [(d0,d1) in diffs] ((d0 > 0) || ((d0 == 0) && (d1 >= 0)));
+          saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),
+                                    "Are hashes sorted? %i".format(&& reduce nonDecreasing));
+        }
+        return iv;
+      } else {
+        var iv = argsort();
+        return iv;
+      }
+    }
+
+    /* Return lengths of all strings, including null terminator. */
+    proc getLengths() {
+      var lengths: [offsets.aD] int;
+      if (size == 0) {
+        return lengths;
+      }
+      ref oa = offsets.a;
+      const low = offsets.aD.low;
+      const high = offsets.aD.high;
+      forall (i, o, l) in zip(offsets.aD, oa, lengths) {
+        if (i == high) {
+          l = values.size - o;
+        } else {
+          l = oa[i+1] - o;
+        }
+      }
+      /* lengths[low..high-1] = (oa[low+1..high] - oa[low..high-1]); */
+      /* lengths[high] = values.size - oa[high]; */
+      return lengths;
+    }
+
+
+    /*
+
+    proc findSubstringInBytes(const substr: string) {
+      // Find the start position of every occurence of substr in the flat bytes array
+      // Start by making a right-truncated subdomain representing all valid starting positions 
+      // for substr of given length
+
+      var D: subdomain(values.aD) = values.aD[values.aD.low..#(values.size - substr.numBytes)];
+      // Every start position is valid until proven otherwise
+      var truth: [D] bool = true;
+      // Shift the flat values one byte at a time and check against corresponding byte of substr
+      for (i, b) in zip(0.., substr.chpl_bytes()) {
+        truth &= (values.a[D.translate(i)] == b);
+      }
+      return truth;
+    }
+    
+    proc substringSearch(const substr: string, mode: SearchMode) throws {
+      var hits: [offsets.aD] bool;  // the answer
+      if (size == 0) || (substr.size == 0) {
+        return hits;
+      }
+      var t = new Timer();
+
+      if v {
+           saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),
+                "Checking bytes of substr"); 
+           t.start();
+      }
+      const truth = findSubstringInBytes(substr);
+      const D = truth.domain;
+      if v {
+            t.stop(); 
+            saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),
+                  "took %t seconds\nTranslating to segments...".format(t.elapsed())); 
+            t.clear(); 
+            t.start();
+      }
+      // Need to ignore segment(s) at the end of the array that are too short to contain substr
+      const tail = + reduce (offsets.a > D.high);
+      // oD is the right-truncated domain representing segments that are candidates for containing substr
+      var oD: subdomain(offsets.aD) = offsets.aD[offsets.aD.low..#(offsets.size - tail)];
+      if v {
+             t.stop(); 
+             saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),
+                                   "took %t seconds\ndetermining answer...".format(t.elapsed())); 
+             t.clear(); 
+             t.start();
+      }
+      ref oa = offsets.a;
+      if mode == SearchMode.contains {
+        // Determine whether each segment contains a hit
+        // Do this by taking the difference in the cumulative number of hits at the end vs the beginning of the segment  
+        // Cumulative number of hits up to (and excluding) this point
+        var numHits = (+ scan truth) - truth;
+        hits[oD.interior(-(oD.size-1))] = (numHits[oa[oD.interior(oD.size-1)]] - numHits[oa[oD.interior(-(oD.size-1))]]) > 0;
+        hits[oD.high] = (numHits[D.high] + truth[D.high] - numHits[oa[oD.high]]) > 0;
+      } else if mode == SearchMode.startsWith {
+        // First position of segment must be a hit
+        hits[oD] = truth[oa[oD]];
+      } else if mode == SearchMode.endsWith {
+        // Position where substr aligns with end of segment must be a hit
+        // -1 for null byte
+        hits[oD.interior(-(oD.size-1))] = truth[oa[oD.interior(oD.size-1)] - substr.numBytes - 1];
+        hits[oD.high] = truth[D.high];
+      }
+      if v {
+          t.stop(); 
+          saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),
+                                   "took %t seconds".format(t.elapsed()));
+      }
+      return hits;
+    }
+
+    proc peel(const delimiter: string, const times: int, param includeDelimiter: bool, param keepPartial: bool, param left: bool) {
+      param stride = if left then 1 else -1;
+      const dBytes = delimiter.numBytes;
+      const lengths = getLengths() - 1;
+      var leftEnd: [offsets.aD] int;
+      var rightStart: [offsets.aD] int;
+      const truth = findSubstringInBytes(delimiter);
+      const D = truth.domain;
+      ref oa = offsets.a;
+      var numHits = (+ scan truth) - truth;
+      const high = offsets.aD.high;
+      forall i in offsets.aD {
+        // First, check whether string contains enough instances of delimiter to peel
+        var hasEnough: bool;
+        if oa[i] > D.high {
+          // When the last string(s) is/are shorter than the substr
+          hasEnough = false;
+        } else if i == high {
+          hasEnough = ((+ reduce truth) - numHits[oa[i]]) >= times;
+        } else {
+          hasEnough = (numHits[oa[i+1]] - numHits[oa[i]]) >= times;
+        }
+        if !hasEnough {
+          // If not, then the entire string stays together, and the param args
+          // determine whether it ends up on the left or right
+          if left {
+            if keepPartial {
+              // Goes on the left
+              leftEnd[i] = oa[i] + lengths[i] - 1;
+              rightStart[i] = oa[i] + lengths[i];
+            } else {
+              // Goes on the right
+              leftEnd[i] = oa[i] - 1;
+              rightStart[i] = oa[i];
+            }
+          } else {
+            if keepPartial {
+              // Goes on the right
+              leftEnd[i] = oa[i] - 1;
+              rightStart[i] = oa[i];
+            } else {
+              // Goes on the left
+              leftEnd[i] = oa[i] + lengths[i] - 1;
+              rightStart[i] = oa[i] + lengths[i];
+            }
+          }
+        } else {
+          // The string can be peeled; figure out where to split
+          var nDelim = 0;
+          var j: int;
+          if left {
+            j = oa[i];
+          } else {
+            // If coming from the right, need to handle edge case of last string
+            if i == high {
+              j = values.aD.high - 1;
+            } else {
+              j = oa[i+1] - 2;
+            }
+          }
+          // Step until the delimiter is encountered the exact number of times
+          while true {
+            if (j <= D.high) && truth[j] {
+              nDelim += 1;
+            }
+            if nDelim == times {
+              break;
+            }
+            j += stride;
+          }
+          // j is now the start of the correct delimiter
+          // tweak leftEnd and rightStart based on includeDelimiter
+          if left {
+            if includeDelimiter {
+              leftEnd[i] = j + dBytes - 1;
+              rightStart[i] = j + dBytes;
+            } else {
+              leftEnd[i] = j - 1;
+              rightStart[i] = j + dBytes;
+            }
+          } else {
+            if includeDelimiter {
+              leftEnd[i] = j - 1;
+              rightStart[i] = j;
+            } else {
+              leftEnd[i] = j - 1;
+              rightStart[i] = j + dBytes;
+            }
+          }
+        }
+      }
+      // Compute lengths and offsets for left and right return arrays
+      const leftLengths = leftEnd - oa + 2;
+      const rightLengths = lengths - (rightStart - oa) + 1;
+      const leftOffsets = (+ scan leftLengths) - leftLengths;
+      const rightOffsets = (+ scan rightLengths) - rightLengths;
+      // Allocate values and fill
+      // var leftVals = makeDistArray((+ reduce leftLengths), uint(8));
+      // var rightVals = makeDistArray((+ reduce rightLengths), uint(8));
+      var leftVals = makeDistArray((+ reduce leftLengths), int);
+      var rightVals = makeDistArray((+ reduce rightLengths), int);
+      ref va = values.a;
+      // Fill left values
+      forall (srcStart, dstStart, len) in zip(oa, leftOffsets, leftLengths) {
+        for i in 0..#len {
+          unorderedCopy(leftVals[dstStart+i], va[srcStart+i]);
+        }
+      }
+      // Fill right values
+      forall (srcStart, dstStart, len) in zip(rightStart, rightOffsets, rightLengths) {
+        for i in 0..#len {
+          unorderedCopy(rightVals[dstStart+i], va[srcStart+i]);
+        }
+      }
+      return (leftOffsets, leftVals, rightOffsets, rightVals);
+    }
+
+    proc stick(other: SegString, delim: string, param right: bool) throws {
+        if (offsets.aD != other.offsets.aD) {
+            throw getErrorWithContext(
+                           msg="The SegString offsets to not match",
+                           lineNumber = getLineNumber(),
+                           routineName = getRoutineName(),
+                           moduleName = getModuleName(),
+                           errorClass="ArgumentError");
+        }
+      // Combine lengths and compute new offsets
+      var leftLen = getLengths() - 1;
+      var rightLen = other.getLengths() - 1;
+      const newLengths = leftLen + rightLen + delim.numBytes + 1;
+      var newOffsets = (+ scan newLengths);
+      const newBytes = newOffsets[offsets.aD.high];
+      newOffsets -= newLengths;
+      // Allocate new values array
+      var newVals = makeDistArray(newBytes, uint(8));
+      // Copy in the left and right-hand values, separated by the delimiter
+      ref va1 = values.a;
+      ref va2 = other.values.a;
+      forall (o1, o2, no, l1, l2) in zip(offsets.a, other.offsets.a, newOffsets, leftLen, rightLen) {
+        var pos = no;
+        // Left side
+        if right {
+          for i in 0..#l1 {
+            unorderedCopy(newVals[pos+i], va1[o1+i]);
+          }
+          pos += l1;
+        } else {
+          for i in 0..#l2 {
+            unorderedCopy(newVals[pos+i], va2[o2+i]);
+          }
+          pos += l2;
+        }
+        // Delimiter
+        for (i, b) in zip(0..#delim.numBytes, delim.chpl_bytes()) {
+          unorderedCopy(newVals[pos+i], b);
+        }
+        pos += delim.numBytes;
+        // Right side
+        if right {
+          for i in 0..#l2 {
+            unorderedCopy(newVals[pos+i], va2[o2+i]);
+          }
+        } else {
+          for i in 0..#l1 {
+            unorderedCopy(newVals[pos+i], va1[o1+i]);
+          }
+        }
+      }
+      return (newOffsets, newVals);
+    }
+    */
+
+    /* The comments above is treated as though they were ediff's comment string, which will cause sphinx errors
+     * It takes me several hours without any idea and thanks  Brad help out. He added the following 
+     * line to solve the problem
+     * dummy chpldoc description for ediff() 
+     */
+    proc ediff():[offsets.aD] int {
+      var diff: [offsets.aD] int;
+      if (size < 2) {
+        return diff;
+      }
+      ref oa = offsets.a;
+      ref va = values.a;
+      const high = offsets.aD.high;
+      forall (i, a) in zip(offsets.aD, diff) {
+        if (i < high) {
+          var asc: bool;
+          const left = oa[i]..oa[i+1]-1;
+          if (i < high - 1) {
+            const right = oa[i+1]..oa[i+2]-1;
+            a = -memcmp(va, left, va, right);
+          } else { // i == high - 1
+            const right = oa[i+1]..values.aD.high;
+            a = -memcmp(va, left, va, right);
+          }
+        } else { // i == high
+          a = 0;
+        } 
+      }
+      return diff;
+    }
+
+    proc isSorted():bool {
+      if (size < 2) {
+        return true;
+      }
+      return (&& reduce (ediff() >= 0));
+    }
+
+    proc argsort(checkSorted:bool=true): [offsets.aD] int throws {
+      const ref D = offsets.aD;
+      const ref va = values.a;
+      if checkSorted && isSorted() {
+          saLogger.warn(getModuleName(),getRoutineName(),getLineNumber(),
+                                                   "argsort called on already sorted array");
+          var ranks: [D] int = [i in D] i;
+          return ranks;
+      }
+      var ranks = twoPhaseStringSort(this);
+      return ranks;
+    }
+
+  } // class SegSArray
+
+
+  /**
+   * We use several arrays and intgers to represent a graph 
+   * Instances are ephemeral, not stored in the symbol table. Instead, attributes
+   * of this class refer to symbol table entries that persist. This class is a
+   * convenience for bundling those persistent objects and defining graph-relevant
+   * operations.
+   * Now we  copy from SegSArray, we need change more in the future to fit a graph
+   */
+  class SegGraph {
+ 
+    /*    The starting indices for each string*/
+    var n_vertices : int;
+
+    /*    The starting indices for each string*/
+    var n_edges : int;
+
+    /*    The graph is directed (True) or undirected (False)*/
+    var directed : bool;
+
+    /*    The source of every edge in the graph, name */
+    var srcName : string;
+
+    /*    The source of every edge in the graph,array value */
+    var src: borrowed SymEntry(int);
+
+    /*    The destination of every vertex in the graph,name */
+    var dstName : string;
+
+    /*    The destination of every vertex in the graph,array value */
+    var dst: borrowed SymEntry(int);
+
+
+    /*    The starting index  of every vertex in src and dst the ,name */
+    var startName : string;
+
+    /*    The starting index  of every vertex in src and dst the ,name */
+    var start_i: borrowed SymEntry(int);
+
+    /*  The number of current vertex id v's (v<n_vertices-1) neighbours and the value is
+     *  neighbour[v+1]-neighbour[v] if v<n_vertices-1 or n_edges-neighbour[v] if v=n_vertices-1
+     */
+    var neighbourName : string;
+
+    /*    The current vertex id v's (v<n_vertices-1) neighbours are from dst[neighbour[v]] to dst[neighbour[v+1]]
+     *   if v=n_vertices-1, then v's neighbours are from dst[neighbour[v]] to dst[n_edges-1], here is the value
+     */
+    var neighbour : borrowed SymEntry(int);
+
+
+    /*    The weitht of every vertex in the graph,name */
+    var v_weightName : string;
+
+    /*    The weitht of every vertex in the graph,array value */
+    var v_weight: borrowed SymEntry(int);
+
+    /*    The weitht of every edge in the graph, name */
+    var e_weightName : string;
+
+    /*    The weitht of every edge in the graph, array value */
+    var e_weight : borrowed SymEntry(int);
+
+
+
+    /* 
+     * The following version we will init differnt kind of arrays
+     * this is for src, dst, start_i, neighbour, v_weight and e_weight arrays
+     */
+    proc init( numv:int, nume:int, dire:bool, srcNameA: string, dstNameA: string, 
+               startNameA:string,neiNameA: string, vweiNameA: string, 
+               eweiNameA:string, st: borrowed SymTab) {
+      n_vertices=numv;
+      n_edges=nume;
+      directed=dire;
+      
+
+      srcName = srcNameA;
+      // The try! is needed here because init cannot throw
+      var gs = try! st.lookup(srcName);
+      var tmpsrc = toSymEntry(gs, int): unmanaged SymEntry(int);
+      src=tmpsrc;
+
+      dstName = dstNameA;
+      // The try! is needed here because init cannot throw
+      var ds = try! st.lookup(dstName);
+      var tmpdst = toSymEntry(ds, int): unmanaged SymEntry(int);
+      dst=tmpdst;
+
+      startName = startNameA;
+      // The try! is needed here because init cannot throw
+      var starts = try! st.lookup(startName);
+      var tmpstart_i = toSymEntry(starts, int): unmanaged SymEntry(int);
+      start_i=tmpstart_i;
+
+      neighbourName = neiNameA;
+      // The try! is needed here because init cannot throw
+      var neis = try! st.lookup(neighbourName);
+      var tmpneighbour = toSymEntry(neis, int): unmanaged SymEntry(int);
+      neighbour=tmpneighbour;
+
+      v_weightName = vweiNameA;
+      // The try! is needed here because init cannot throw
+      var vweis = try! st.lookup(v_weightName);
+      // I want this to be borrowed, but that throws a lifetime error
+      var tmpv_weight = toSymEntry(vweis, int): unmanaged SymEntry(int);
+      v_weight=tmpv_weight;
+
+      e_weightName = eweiNameA;
+      // The try! is needed here because init cannot throw
+      var eweis = try! st.lookup(e_weightName);
+      var tmpe_weight = toSymEntry(eweis, int): unmanaged SymEntry(int);
+      e_weight=tmpe_weight;
+
+    }
+
+
+  } // class SegGraph
+
+
+
+
+  /**
+   * We use several arrays and intgers to represent a basic directed graph 
+   * Instances are ephemeral, not stored in the symbol table. Instead, attributes
+   * of this class refer to symbol table entries that persist. This class is a
+   * convenience for bundling those persistent objects and defining graph-relevant
+   * operations.
+   * Now we  copy from SegSArray, we need change more in the future to fit a graph
+   */
+  class SegGraphD {
+ 
+    /*    The starting indices for each string*/
+    var n_vertices : int;
+
+    /*    The starting indices for each string*/
+    var n_edges : int;
+
+    /*    The graph is directed (True) or undirected (False)*/
+    var directed=1 : int;
+
+    /*    The graph is directed (True) or undirected (False)*/
+    var weighted=0 : int;
+
+    /*    The source of every edge in the graph, name */
+    var srcName : string;
+
+    /*    The source of every edge in the graph,array value */
+    var src: borrowed SymEntry(int);
+
+    /*    The destination of every vertex in the graph,name */
+    var dstName : string;
+
+    /*    The destination of every vertex in the graph,array value */
+    var dst: borrowed SymEntry(int);
+
+
+    /*    The starting index  of every vertex in src and dst the ,name */
+    var startName : string;
+
+    /*    The starting index  of every vertex in src and dst the ,name */
+    var start_i: borrowed SymEntry(int);
+
+    /*  The number of current vertex id v's (v<n_vertices-1) neighbours and the value is
+     *  neighbour[v+1]-neighbour[v] if v<n_vertices-1 or n_edges-neighbour[v] if v=n_vertices-1
+     */
+    var neighbourName : string;
+
+    /*    The current vertex id v's (v<n_vertices-1) neighbours are from dst[neighbour[v]] to dst[neighbour[v+1]]
+     *   if v=n_vertices-1, then v's neighbours are from dst[neighbour[v]] to dst[n_edges-1], here is the value
+     */
+    var neighbour : borrowed SymEntry(int);
+
+
+    /* 
+     * The following version we will init differnt kind of arrays
+     * this is for src, dst, start_i, neighbour, v_weight and e_weight arrays
+     */
+    proc init( numv:int, nume:int, dire:int,wei:int, 
+               srcNameA: string, dstNameA: string, 
+               startNameA:string,neiNameA: string,  
+               st: borrowed SymTab) {
+      n_vertices=numv;
+      n_edges=nume;
+      directed=dire;
+      weighted=wei;
+      
+      srcName = srcNameA;
+      // The try! is needed here because init cannot throw
+      var gs = try! st.lookup(srcName);
+      var tmpsrc = toSymEntry(gs, int): unmanaged SymEntry(int);
+      src=tmpsrc;
+
+      dstName = dstNameA;
+      // The try! is needed here because init cannot throw
+      var ds = try! st.lookup(dstName);
+      var tmpdst = toSymEntry(ds, int): unmanaged SymEntry(int);
+      dst=tmpdst;
+
+      startName = startNameA;
+      // The try! is needed here because init cannot throw
+      var starts = try! st.lookup(startName);
+      var tmpstart_i = toSymEntry(starts, int): unmanaged SymEntry(int);
+      start_i=tmpstart_i;
+
+      neighbourName = neiNameA;
+      // The try! is needed here because init cannot throw
+      var neis = try! st.lookup(neighbourName);
+      var tmpneighbour = toSymEntry(neis, int): unmanaged SymEntry(int);
+      neighbour=tmpneighbour;
+    }
+  } // class SegGraphD
+
+
+
+  /**
+   * We use several arrays and intgers to represent a weighted directed graph 
+   * Instances are ephemeral, not stored in the symbol table. Instead, attributes
+   * of this class refer to symbol table entries that persist. This class is a
+   * convenience for bundling those persistent objects and defining graph-relevant
+   * operations.
+   * Now we  copy from SegSArray, we need change more in the future to fit a graph
+   */
+  class SegGraphDW:SegGraphD {
+ 
+
+    /*    The weitht of every vertex in the graph,name */
+    var v_weightName : string;
+
+    /*    The weitht of every vertex in the graph,array value */
+    var v_weight: borrowed SymEntry(int);
+
+    /*    The weitht of every edge in the graph, name */
+    var e_weightName : string;
+
+    /*    The weitht of every edge in the graph, array value */
+    var e_weight : borrowed SymEntry(int);
+
+
+
+    /* 
+     * The following version we will init differnt kind of arrays
+     * this is for src, dst, start_i, neighbour, v_weight and e_weight arrays
+     */
+    proc init( numv:int, nume:int, dire:int,wei:int, 
+               srcNameA: string, dstNameA: string, 
+               startNameA:string,neiNameA: string, 
+               vweiNameA: string, eweiNameA:string, 
+               st: borrowed SymTab) {
+          super.init(numv:int, nume:int, dire:int, wei:int,
+                           srcNameA: string, dstNameA: string,
+                           startNameA:string,neiNameA: string, 
+                           st: borrowed SymTab);
+          v_weightName = vweiNameA;
+          // The try! is needed here because init cannot throw
+          var vweis = try! st.lookup(v_weightName);
+          var tmpv_weight = toSymEntry(vweis, int): unmanaged SymEntry(int);
+          v_weight=tmpv_weight;
+
+          e_weightName = eweiNameA;
+          // The try! is needed here because init cannot throw
+          var eweis = try! st.lookup(e_weightName);
+          var tmpe_weight = toSymEntry(eweis, int): unmanaged SymEntry(int);
+          e_weight=tmpe_weight;
+    }
+  } // class SegGraphDW
+
+
+
+
+  /**
+   * We use several arrays and intgers to represent an undirected graph 
+   * Instances are ephemeral, not stored in the symbol table. Instead, attributes
+   * of this class refer to symbol table entries that persist. This class is a
+   * convenience for bundling those persistent objects and defining graph-relevant
+   * operations.
+   * Now we  copy from SegSArray, we need change more in the future to fit a graph
+   */
+  class SegGraphUD:SegGraphD {
+ 
+    /*    The source of every edge in the graph, name */
+    var srcNameR : string;
+
+    /*    The source of every edge in the graph,array value */
+    var srcR: borrowed SymEntry(int);
+
+    /*    The destination of every vertex in the graph,name */
+    var dstNameR : string;
+
+    /*    The destination of every vertex in the graph,array value */
+    var dstR: borrowed SymEntry(int);
+
+
+    /*    The starting index  of every vertex in src and dst the ,name */
+    var startNameR : string;
+
+    /*    The starting index  of every vertex in src and dst the ,name */
+    var start_iR: borrowed SymEntry(int);
+
+    /*  The number of current vertex id v's (v<n_vertices-1) neighbours and the value is
+     *  neighbour[v+1]-neighbour[v] if v<n_vertices-1 or n_edges-neighbour[v] if v=n_vertices-1
+     */
+    var neighbourNameR : string;
+
+    /*    The current vertex id v's (v<n_vertices-1) neighbours are from dst[neighbour[v]] to dst[neighbour[v+1]]
+     *   if v=n_vertices-1, then v's neighbours are from dst[neighbour[v]] to dst[n_edges-1], here is the value
+     */
+    var neighbourR : borrowed SymEntry(int);
+
+
+    /* 
+     * The following version we will init differnt kind of arrays
+     * this is for src, dst, start_i, neighbour, v_weight and e_weight arrays
+     */
+    proc init( numv:int, nume:int, dire:int,wei:int, 
+               srcNameA: string, dstNameA: string, 
+               startNameA:string,neiNameA: string, 
+               srcNameAR: string, dstNameAR: string, 
+               startNameAR:string,neiNameAR: string, 
+               st: borrowed SymTab) {
+      
+
+          super.init(numv:int, nume:int, dire:int, wei:int,
+                           srcNameA: string, dstNameA: string,
+                           startNameA:string,neiNameA: string,
+                           st: borrowed SymTab);
+
+          srcNameR = srcNameAR;
+          // The try! is needed here because init cannot throw
+          var gsR = try! st.lookup(srcNameR);
+          var tmpsrcR = toSymEntry(gsR, int): unmanaged SymEntry(int);
+          srcR=tmpsrcR;
+
+          dstNameR = dstNameAR;
+          // The try! is needed here because init cannot throw
+          var dsR = try! st.lookup(dstNameR);
+          var tmpdstR = toSymEntry(dsR, int): unmanaged SymEntry(int);
+          dstR=tmpdstR;
+
+          startNameR = startNameAR;
+          // The try! is needed here because init cannot throw
+          var startsR = try! st.lookup(startNameR);
+          var tmpstart_iR = toSymEntry(startsR, int): unmanaged SymEntry(int);
+          start_iR=tmpstart_iR;
+
+          neighbourNameR = neiNameAR;
+          // The try! is needed here because init cannot throw
+          var neisR = try! st.lookup(neighbourNameR);
+          var tmpneighbourR = toSymEntry(neisR, int): unmanaged SymEntry(int);
+          neighbourR=tmpneighbourR;
+
+    }
+
+
+  } // class SegGraphUD
+
+
+
+
+  /**
+   * We use several arrays and intgers to represent a weighted and undirected graph 
+   * Instances are ephemeral, not stored in the symbol table. Instead, attributes
+   * of this class refer to symbol table entries that persist. This class is a
+   * convenience for bundling those persistent objects and defining graph-relevant
+   * operations.
+   * Now we  copy from SegSArray, we need change more in the future to fit a graph
+   */
+  class SegGraphUDW:SegGraphUD {
+ 
+
+    /*    The weitht of every vertex in the graph,name */
+    var v_weightName : string;
+
+    /*    The weitht of every vertex in the graph,array value */
+    var v_weight: borrowed SymEntry(int);
+
+    /*    The weitht of every edge in the graph, name */
+    var e_weightName : string;
+
+    /*    The weitht of every edge in the graph, array value */
+    var e_weight : borrowed SymEntry(int);
+
+
+
+    /* 
+     * The following version we will init differnt kind of arrays
+     * this is for src, dst, start_i, neighbour, v_weight and e_weight arrays
+     */
+    proc init( numv:int, nume:int, dire:int,wei:int,
+               srcNameA: string, dstNameA: string, 
+               startNameA:string,neiNameA: string, 
+               srcNameAR: string, dstNameAR: string, 
+               startNameAR:string,neiNameAR: string, 
+               vweiNameA: string, eweiNameA:string, 
+               st: borrowed SymTab) {
+
+      super.init(numv:int, nume:int, dire:int, wei:int,
+                           srcNameA: string, dstNameA: string,
+                           startNameA:string,neiNameA: string,
+                           srcNameAR: string, dstNameAR: string, 
+                           startNameAR:string,neiNameAR: string, 
+                           st: borrowed SymTab);
+
+      v_weightName = vweiNameA;
+      // The try! is needed here because init cannot throw
+      var vweis = try! st.lookup(v_weightName);
+      // I want this to be borrowed, but that throws a lifetime error
+      var tmpv_weight = toSymEntry(vweis, int): unmanaged SymEntry(int);
+      v_weight=tmpv_weight;
+
+      e_weightName = eweiNameA;
+      // The try! is needed here because init cannot throw
+      var eweis = try! st.lookup(e_weightName);
+      var tmpe_weight = toSymEntry(eweis, int): unmanaged SymEntry(int);
+      e_weight=tmpe_weight;
+
+    }
+
+  } // class SegGraphUDW
+
+
+
+
+
+  inline proc memcmp(const ref x: [] uint(8), const xinds, const ref y: [] uint(8), const yinds): int {
+    const l = min(xinds.size, yinds.size);
+    var ret: int = 0;
+    for (i, j) in zip(xinds.low..#l, yinds.low..#l) {
+      ret = x[i]:int - y[j]:int;
+      if (ret != 0) {
+        break;
+      }
+    }
+    if (ret == 0) {
+      ret = xinds.size - yinds.size;
+    }
+    return ret;
+  }
+
+
+  enum SearchMode { contains, startsWith, endsWith }
+  class UnknownSearchMode: Error {}
+  
+  /* Test for equality between two same-length arrays of strings. Returns
+     a boolean vector of the same length. */
+  proc ==(lss:SegString, rss:SegString) throws {
+    return compare(lss, rss, true);
+  }
+
+  /* Test for inequality between two same-length arrays of strings. Returns
+     a boolean vector of the same length. */
+  proc !=(lss:SegString, rss:SegString) throws {
+    return compare(lss, rss, false);
+  }
+
+  /* Element-wise comparison of two same-length arrays of strings. The
+     polarity parameter determines whether the comparison checks for 
+     equality (polarity=true, result is true where elements are equal) 
+     or inequality (polarity=false, result is true where elements differ). */
+  private proc compare(lss:SegString, rss:SegString, param polarity: bool) throws {
+    // String arrays must be same size
+    if (lss.size != rss.size) {
+        throw getErrorWithContext(
+                           msg="The String arrays must be the same size",
+                           lineNumber = getLineNumber(),
+                           routineName = getRoutineName(),
+                           moduleName = getModuleName(),
+                           errorClass="ArgumentError");
+    }
+    ref oD = lss.offsets.aD;
+    // Start by assuming all elements differ, then correct for those that are equal
+    // This translates to an initial value of false for == and true for !=
+    var truth: [oD] bool = !polarity;
+    // Early exit for zero-length result
+    if (lss.size == 0) {
+      return truth;
+    }
+    ref lvalues = lss.values.a;
+    ref loffsets = lss.offsets.a;
+    ref rvalues = rss.values.a;
+    ref roffsets = rss.offsets.a;
+    // Compare segments in parallel
+    // Segments are guaranteed to be on same locale, but bytes are not
+    forall (t, lo, ro, idx) in zip(truth, loffsets, roffsets, oD) 
+      with (var agg = newDstAggregator(bool)) {
+      var llen: int;
+      var rlen: int;
+      if (idx == oD.high) {
+        llen = lvalues.size - lo - 1;
+        rlen = rvalues.size - ro - 1;
+      } else {
+        llen = loffsets[idx+1] - lo - 1;
+        rlen = roffsets[idx+1] - ro - 1;
+      }
+      // Only compare bytes if lengths are equal
+      if (llen == rlen) {
+        var allEqual = true;
+        // TO DO: consider an on clause here to ensure at least one access is local
+        for pos in 0..#llen {
+          if (lvalues[lo+pos] != rvalues[ro+pos]) {
+            allEqual = false;
+            break;
+          }
+        }
+        // Only if lengths and all bytes are equal, override the default value
+        if allEqual {
+          // For ==, the output should be true; for !=, false
+          agg.copy(t, polarity);
+        }
+      }
+    }
+    return truth;
+  }
+
+  /* Test an array of strings for equality against a constant string. Return a boolean
+     vector the same size as the array. */
+  proc ==(ss:SegString, testStr: string) {
+    return compare(ss, testStr, true);
+  }
+  
+  /* Test an array of strings for inequality against a constant string. Return a boolean
+     vector the same size as the array. */
+  proc !=(ss:SegString, testStr: string) {
+    return compare(ss, testStr, false);
+  }
+
+  /* Element-wise comparison of an arrays of string against a target string. 
+     The polarity parameter determines whether the comparison checks for 
+     equality (polarity=true, result is true where elements equal target) 
+     or inequality (polarity=false, result is true where elements differ from 
+     target). */
+  proc compare(ss:SegString, testStr: string, param polarity: bool) {
+    ref oD = ss.offsets.aD;
+    // Initially assume all elements equal the target string, then correct errors
+    // For ==, this means everything starts true; for !=, everything starts false
+    var truth: [oD] bool = polarity;
+    // Early exit for zero-length result
+    if (ss.size == 0) {
+      return truth;
+    }
+    ref values = ss.values.a;
+    ref vD = ss.values.aD;
+    ref offsets = ss.offsets.a;
+    // Use a whole-array strategy, where the ith byte from every segment is checked simultaneously
+    // This will do len(testStr) parallel loops, but loops will have low overhead
+    for (b, i) in zip(testStr.chpl_bytes(), 0..) {
+      forall (t, o, idx) in zip(truth, offsets, oD) with (var agg = newDstAggregator(bool)) {
+        if ((o+i > vD.high) || (b != values[o+i])) {
+          // Strings are not equal, so change the output
+          // For ==, output is now false; for !=, output is now true
+          agg.copy(t, !polarity);
+        }
+      }
+    }
+    // Check the length by checking that the next byte is null
+    forall (t, o, idx) in zip(truth, offsets, oD) with (var agg = newDstAggregator(bool)) {
+      if ((o+testStr.size > vD.high) || (0 != values[o+testStr.size])) {
+        // Strings are not equal, so change the output
+        // For ==, output is now false; for !=, output is now true
+        agg.copy(t, !polarity);
+      }
+    }
+    return truth;
+  }
+
+
+  /* Test array of strings for membership in another array (set) of strings. Returns
+     a boolean vector the same size as the first array. */
+  proc in1d(mainStr: SegString, testStr: SegString, invert=false) throws where useHash {
+    var truth: [mainStr.offsets.aD] bool;
+    // Early exit for zero-length result
+    if (mainStr.size == 0) {
+      return truth;
+    }
+    // Hash all strings for fast comparison
+    var t = new Timer();
+    saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),"Hashing strings");
     if v { t.start(); }
     const hashes = mainStr.hash();
     if v {
@@ -919,6 +2066,62 @@ module SegmentedArray {
     return truth;
   }
 
+  /* Test array of strings for membership in another array (set) of strings. Returns
+     a boolean vector the same size as the first array. */
+  proc in1d_Int(mainSar: SegSArray, testSar: SegSArray, invert=false) throws where useHash {
+    var truth: [mainSar.offsets.aD] bool;
+    // Early exit for zero-length result
+    if (mainSar.size == 0) {
+      return truth;
+    }
+    // Hash all suffix array for fast comparison
+    var t = new Timer();
+    saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),"Hashing strings");
+    if v { t.start(); }
+    const hashes = mainSar.hash();
+    if v {
+        t.stop(); 
+        saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(), 
+                                                "%t seconds".format(t.elapsed())); 
+        t.clear();
+        saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(), 
+                                           "Making associative domains for test set on each locale");
+        t.start();
+    }
+    // On each locale, make an associative domain with the hashes of the second array
+    // parSafe=false because we are adding in serial and it's faster
+    var localTestHashes: [PrivateSpace] domain(2*uint(64), parSafe=false);
+    coforall loc in Locales {
+      on loc {
+        // Local hashes of second array
+        ref mySet = localTestHashes[here.id];
+        mySet.requestCapacity(testSar.size);
+        const testHashes = testSar.hash();
+        for h in testHashes {
+          mySet += h;
+        }
+        /* // Check membership of hashes in this locale's chunk of the array */
+        /* [i in truth.localSubdomain()] truth[i] = mySet.contains(hashes[i]); */
+      }
+    }
+    if v {
+      t.stop(); 
+      saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),
+                                             "%t seconds".format(t.elapsed())); 
+      t.clear();
+      saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),      
+                                             "Testing membership"); 
+      t.start();
+    }
+    [i in truth.domain] truth[i] = localTestHashes[here.id].contains(hashes[i]);
+    if v {
+        t.stop(); 
+        saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),
+                                             "%t seconds".format(t.elapsed()));
+    }
+    return truth;
+  }
+
   proc concat(s1: [] int, v1: [] uint(8), s2: [] int, v2: [] uint(8)) throws {
     // TO DO: extend to axis == 1
     var segs = makeDistArray(s1.size + s2.size, int);
@@ -1012,6 +2215,86 @@ module SegmentedArray {
     }
   }
 
+
+  proc in1d_Int(mainSar: SegSArray, testSar: SegSArray, invert=false) throws where !useHash {
+    var truth: [mainSar.offsets.aD] bool;
+    // Early exit for zero-length result
+    if (mainSar.size == 0) {
+      return truth;
+    }
+    if (testSar.size <= in1dSortThreshold) {
+      for i in 0..#testSar.size {
+        truth |= (mainSar == testSar[i]);
+      }
+      return truth;
+    } else {
+      // This is inspired by numpy in1d
+      const (uoMain, uvMain, cMain, revIdx) = uniqueGroup(mainSar, returnInverse=true);
+      const (uoTest, uvTest, cTest, revTest) = uniqueGroup(testSar);
+      const (segs, vals) = concat(uoMain, uvMain, uoTest, uvTest);
+      saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(), 
+           "Unique strings in first array: %t\nUnique strings in second array: %t\nConcat length: %t".format(
+                                             uoMain.size, uoTest.size, segs.size));
+      var st = new owned SymTab();
+      const ar = new owned SegSArray(segs, vals, st);
+      const order = ar.argsort();
+      const (sortedSegs, sortedVals) = ar[order];
+      const sar = new owned SegSArray(sortedSegs, sortedVals, st);
+      if v { 
+          saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),
+                                                            "Sorted concatenated unique strings:"); 
+          sar.show(10); 
+          stdout.flush(); 
+      }
+      const D = sortedSegs.domain;
+      // First compare lengths and only check pairs whose lengths are equal (because gathering them is expensive)
+      var flag: [D] bool;
+      const lengths = sar.getLengths();
+      const ref saro = sar.offsets.a;
+      const ref sarv = sar.values.a;
+      const high = D.high;
+      forall (i, f, o, l) in zip(D, flag, saro, lengths) {
+        if (i < high) && (l == lengths[i+1]) {
+          const left = o..saro[i+1]-1;
+          var eq: bool;
+          if (i < high - 1) {
+            const right = saro[i+1]..saro[i+2]-1;
+            eq = (memcmp(sarv, left, sarv, right) == 0);
+          } else {
+            const ref right = saro[i+1]..sar.values.aD.high;
+            eq = (memcmp(sarv, left, sarv, right) == 0);
+          }
+          if eq {
+            f = true;
+            flag[i+1] = true;
+          }
+        }
+      }
+      
+      saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),
+                                             "Flag pop: %t".format(+ reduce flag));
+
+      // Now flag contains true for both elements of duplicate pairs
+      if invert {flag = !flag;}
+      // Permute back to unique order
+      var ret: [D] bool;
+      forall (o, f) in zip(order, flag) with (var agg = newDstAggregator(bool)) {
+        agg.copy(ret[o], f);
+      }
+      if v {
+          saLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),
+                                                "Ret pop: %t".format(+ reduce ret));
+      }
+      // Broadcast back to original (pre-unique) order
+      var truth: [mainSar.offsets.aD] bool;
+      forall (t, i) in zip(truth, revIdx) with (var agg = newSrcAggregator(bool)) {
+        agg.copy(t, ret[i]);
+      }
+      return truth;
+    }
+  }
+
+
   /* Convert an array of raw bytes into a Chapel string. */
   inline proc interpretAsString(bytearray: [?D] uint(8)): string {
     // Byte buffer must be local in order to make a C pointer
diff --git a/src/SegmentedMsg.chpl b/src/SegmentedMsg.chpl
index 18804099b6..895f005e1d 100644
--- a/src/SegmentedMsg.chpl
+++ b/src/SegmentedMsg.chpl
@@ -9,8 +9,16 @@ module SegmentedMsg {
   use MultiTypeSymEntry;
   use RandArray;
   use IO;
-  use GenSymIO only jsonToPdArray;
+  use GenSymIO only jsonToPdArray,jsonToPdArrayInt;
 
+  use SymArrayDmap;
+  use SACA;
+  use Random;
+  use RadixSortLSD;
+  use Set;
+  public use ArgSortMsg;
+
+  private config const DEBUG = false;
   const smLogger = new Logger();
   
   if v {
@@ -64,6 +72,8 @@ module SegmentedMsg {
       return repMsg;
   }
 
+
+
   proc segmentLengthsMsg(cmd: string, payload: bytes, 
                                           st: borrowed SymTab): string throws {
     var pn = Reflection.getRoutineName();
@@ -85,6 +95,12 @@ module SegmentedMsg {
         // Do not include the null terminator in the length
         lengths.a = strings.getLengths() - 1;
       }
+      when "int" {
+        var sarrays = new owned SegSArray(segName, valName, st);
+        var lengths = st.addEntry(rname, sarrays.size, int);
+        // Do not include the null terminator in the length
+        lengths.a = sarrays.getLengths() - 1;
+      }
       otherwise {
           var errorMsg = notImplementedError(pn, "%s".format(objtype));
           smLogger.error(getModuleName(),getRoutineName(),getLineNumber(),errorMsg);                      
@@ -283,6 +299,20 @@ proc segmentedPeelMsg(cmd: string, payload: bytes, st: borrowed SymTab): string
             smLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),repMsg);
             return repMsg;
         }
+/*
+        when "int" {
+            var sarrays = new owned SegSArray(segName, valName, st);
+            var hashes = sarrays.hash();
+            var name1 = st.nextName();
+            var hash1 = st.addEntry(name1, hashes.size, int);
+            var name2 = st.nextName();
+            var hash2 = st.addEntry(name2, hashes.size, int);
+            forall (h, h1, h2) in zip(hashes, hash1.a, hash2.a) {
+                (h1,h2) = h:(int,int);
+            }
+            return "created " + st.attrib(name1) + "+created " + st.attrib(name2);
+        }
+*/
         otherwise {
             var errorMsg = notImplementedError(pn, objtype);
             smLogger.error(getModuleName(),getRoutineName(),getLineNumber(),errorMsg);      
@@ -365,6 +395,16 @@ proc segmentedPeelMsg(cmd: string, payload: bytes, st: borrowed SymTab): string
               smLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),repMsg); 
               return repMsg;
           }
+          when "int" {
+              // Make a temporary int array
+              var arrays = new owned SegSArray(args[1], args[2], st);
+              // Parse the index
+              var idx = args[3]:int;
+              // TO DO: in the future, we will force the client to handle this
+              idx = convertPythonIndexToChapel(idx, arrays.size);
+              var s = arrays[idx];
+              return "item %s %jt".format("int", s);
+          }
           otherwise { 
               var errorMsg = notImplementedError(pn, objtype); 
               smLogger.error(getModuleName(),getRoutineName(),getLineNumber(),errorMsg);      
@@ -411,6 +451,33 @@ proc segmentedPeelMsg(cmd: string, payload: bytes, st: borrowed SymTab): string
         var newValName = st.nextName();
         // Compute the slice
         var (newSegs, newVals) = strings[slice];
+
+        // Store the resulting offsets and bytes arrays
+        var newSegsEntry = new shared SymEntry(newSegs);
+        var newValsEntry = new shared SymEntry(newVals);
+        st.addEntry(newSegName, newSegsEntry);
+        st.addEntry(newValName, newValsEntry);
+        return "created " + st.attrib(newSegName) + " +created " + st.attrib(newValName);
+      }
+      when "int" {
+        // Make a temporary integer  array
+        var sarrays = new owned SegSArray(args[1], args[2], st);
+        // Parse the slice parameters
+        var start = args[3]:int;
+        var stop = args[4]:int;
+        var stride = args[5]:int;
+        // Only stride-1 slices are allowed for now
+        if (stride != 1) { 
+            var errorMsg = notImplementedError(pn, "stride != 1"); 
+            smLogger.error(getModuleName(),getRoutineName(),getLineNumber(),errorMsg);      
+            return errorMsg;
+        }
+        // TO DO: in the future, we will force the client to handle this
+        var slice: range(stridable=true) = convertPythonSliceToChapel(start, stop, stride);
+        var newSegName = st.nextName();
+        var newValName = st.nextName();
+        // Compute the slice
+        var (newSegs, newVals) = sarrays[slice];
         // Store the resulting offsets and bytes arrays
         var newSegsEntry = new shared SymEntry(newSegs);
         var newValsEntry = new shared SymEntry(newVals);
@@ -490,6 +557,41 @@ proc segmentedPeelMsg(cmd: string, payload: bytes, st: borrowed SymTab): string
                 return "Error: %t".format(e.message());
             }
         }
+        when "int" {
+            var sarrays = new owned SegSArray(args[1], args[2], st);
+            var iname = args[3];
+            var gIV: borrowed GenSymEntry = st.lookup(iname);
+            try {
+                select gIV.dtype {
+                    when DType.Int64 {
+                        var iv = toSymEntry(gIV, int);
+                        var (newSegs, newVals) = sarrays[iv.a];
+                        var newSegsEntry = new shared SymEntry(newSegs);
+                        var newValsEntry = new shared SymEntry(newVals);
+                        st.addEntry(newSegName, newSegsEntry);
+                        st.addEntry(newValName, newValsEntry);
+                    }
+                    when DType.Bool {
+                        var iv = toSymEntry(gIV, bool);
+                        var (newSegs, newVals) = sarrays[iv.a];
+                        var newSegsEntry = new shared SymEntry(newSegs);
+                        var newValsEntry = new shared SymEntry(newVals);
+                        st.addEntry(newSegName, newSegsEntry);
+                        st.addEntry(newValName, newValsEntry);
+                    }
+                    otherwise {
+                        var errorMsg = "("+objtype+","+dtype2str(gIV.dtype)+")";
+                        smLogger.error(getModuleName(),getRoutineName(),
+                                                      getLineNumber(),errorMsg); 
+                        return notImplementedError(pn,errorMsg);
+                    }
+                }
+            } catch e: Error {
+                smLogger.error(getModuleName(),getRoutineName(),getLineNumber(),
+                      e.message());
+                return "Error: %t".format(e.message());
+            }
+        }
         otherwise {
             var errorMsg = "unsupported objtype: %t".format(objtype);
             smLogger.error(getModuleName(),getRoutineName(),getLineNumber(),errorMsg);
@@ -561,6 +663,47 @@ proc segmentedPeelMsg(cmd: string, payload: bytes, st: borrowed SymTab): string
     return repMsg;
   }
 
+  proc segBinopvvIntMsg(cmd: string, payload: bytes, st: borrowed SymTab): string throws {
+    var pn = Reflection.getRoutineName();
+    var repMsg: string;
+    var (op,
+         // Type and attrib names of left segmented array
+         ltype, lsegName, lvalName,
+         // Type and attrib names of right segmented array
+         rtype, rsegName, rvalName, leftStr, jsonStr)
+           = payload.decode().splitMsgToTuple(9);
+
+    // check to make sure symbols defined
+    st.check(lsegName);
+    st.check(lvalName);
+    st.check(rsegName);
+    st.check(rvalName);
+
+    select (ltype, rtype) {
+    when ("int", "int") {
+      var lsa = new owned SegSArray(lsegName, lvalName, st);
+      var rsa = new owned SegString(rsegName, rvalName, st);
+      select op {
+        when "==" {
+          var rname = st.nextName();
+          var e = st.addEntry(rname, lsa.size, bool);
+          e.a = (lsa == rsa);
+          repMsg = "created " + st.attrib(rname);
+        }
+        when "!=" {
+          var rname = st.nextName();
+          var e = st.addEntry(rname, lsa.size, bool);
+          e.a = (lsa != rsa);
+          repMsg = "created " + st.attrib(rname);
+        }
+        otherwise {return notImplementedError(pn, ltype, op, rtype);}
+        }
+    }
+    otherwise {return unrecognizedTypeError(pn, "("+ltype+", "+rtype+")");} 
+    }
+    return repMsg;
+  }
+
   proc segBinopvsMsg(cmd: string, payload: bytes, st: borrowed SymTab): string throws {
     var pn = Reflection.getRoutineName();
     var repMsg: string;
@@ -570,7 +713,6 @@ proc segmentedPeelMsg(cmd: string, payload: bytes, st: borrowed SymTab): string
     // check to make sure symbols defined
     st.check(segName);
     st.check(valName);
-
     var json = jsonToPdArray(encodedVal, 1);
     var value = json[json.domain.low];
     var rname = st.nextName();
@@ -594,6 +736,42 @@ proc segmentedPeelMsg(cmd: string, payload: bytes, st: borrowed SymTab): string
     return "created " + st.attrib(rname);
   }
 
+  proc segBinopvsIntMsg(cmd: string, payload: bytes, st: borrowed SymTab): string throws {
+    var pn = Reflection.getRoutineName();
+    var repMsg: string;
+    var (op, objtype, segName, valName, valtype, encodedVal)
+          = payload.decode().splitMsgToTuple(6);
+
+    // check to make sure symbols defined
+    st.check(segName);
+    st.check(valName);
+    var json = jsonToPdArrayInt(encodedVal, 1);
+    var value = json[json.domain.low];
+    var rname = st.nextName();
+    select (objtype, valtype) {
+    when ("int", "int") {
+      var sarrays  = new owned SegSArray(segName, valName, st);
+      select op {
+        when "==" {
+          var e = st.addEntry(rname, sarrays.size, bool);
+          var tmp=sarrays[sarrays.offsets.aD.low]:int;
+          e.a = (tmp == value);
+//          e.a = (sarrays == value);
+        }
+        when "!=" {
+          var e = st.addEntry(rname, sarrays.size, bool);
+          var tmp=sarrays[sarrays.offsets.aD.low]:int;
+          e.a = (tmp != value);
+//          e.a = (sarrays != value);
+        }
+        otherwise {return notImplementedError(pn, objtype, op, valtype);}
+        }
+    }
+    otherwise {return unrecognizedTypeError(pn, "("+objtype+", "+valtype+")");} 
+    }
+    return "created " + st.attrib(rname);
+  }
+
   proc segIn1dMsg(cmd: string, payload: bytes, st: borrowed SymTab): string throws {
     var pn = Reflection.getRoutineName();
     var repMsg: string;
@@ -632,6 +810,43 @@ proc segmentedPeelMsg(cmd: string, payload: bytes, st: borrowed SymTab): string
     return "created " + st.attrib(rname);
   }
 
+  proc segIn1dIntMsg(cmd: string, payload: bytes, st: borrowed SymTab): string throws {
+    var pn = Reflection.getRoutineName();
+    var repMsg: string;
+    var (mainObjtype, mainSegName, mainValName, testObjtype, testSegName,
+         testValName, invertStr) = payload.decode().splitMsgToTuple(7);
+
+    // check to make sure symbols defined
+    st.check(mainSegName);
+    st.check(mainValName);
+    st.check(testSegName);
+    st.check(testValName);
+
+    var invert: bool;
+    if invertStr == "True" {invert = true;}
+    else if invertStr == "False" {invert = false;}
+    else {return "Error: Invalid argument in %s: %s (expected True or False)".format(pn, invertStr);}
+    
+    var rname = st.nextName();
+    select (mainObjtype, testObjtype) {
+    when ("int", "int") {
+      var mainSA = new owned SegSArray(mainSegName, mainValName, st);
+      var testSA = new owned SegSArray(testSegName, testValName, st);
+      var e = st.addEntry(rname, mainSA.size, bool);
+      if invert {
+        e.a = !in1d_Int(mainSA, testSA);
+      } else {
+        e.a = in1d_Int(mainSA, testSA);
+      }
+    }
+    otherwise {
+        var errorMsg = unrecognizedTypeError(pn, "("+mainObjtype+", "+testObjtype+")");
+        smLogger.error(getModuleName(),getRoutineName(),getLineNumber(),errorMsg);      
+        return errorMsg;            
+      }
+    }
+    return "created " + st.attrib(rname);
+  }
   proc segGroupMsg(cmd: string, payload: bytes, st: borrowed SymTab): string throws {
       var pn = Reflection.getRoutineName();
       var (objtype, segName, valName) = payload.decode().splitMsgToTuple(3);
@@ -655,4 +870,1463 @@ proc segmentedPeelMsg(cmd: string, payload: bytes, st: borrowed SymTab): string
       }
       return "created " + st.attrib(rname);
   }
+
+
+
+  proc segSuffixArrayMsg(cmd: string, payload: bytes, st: borrowed SymTab): string throws {
+      var pn = Reflection.getRoutineName();
+      var (objtype, segName, valName) = payload.decode().splitMsgToTuple(3);
+      var repMsg: string;
+
+      // check to make sure symbols defined
+      st.check(segName);
+      st.check(valName);
+
+      var strings = new owned SegString(segName, valName, st);
+      var size=strings.size;
+      var nBytes = strings.nBytes;
+      var length=strings.getLengths();
+      var offsegs = (+ scan length) - length;
+      var algorithmNum=2:int; //2:"divsufsort";1:SuffixArraySkew
+      select (objtype) {
+          when "str" {
+              // To be checked, I am not sure if this formula can estimate the total memory requirement
+              // Lengths + 2*segs + 2*vals (copied to SymTab)
+              overMemLimit(8*size + 16*size + nBytes);
+
+              //allocate an offset array
+              var sasoff = offsegs;
+              //allocate an values array
+              var sasval:[0..(nBytes-1)] int;
+              //              var lcpval:[0..(nBytes-1)] int; now we will not build the LCP array at the same time
+
+              var i:int;
+              var j:int;
+              forall i in 0..(size-1) do {
+              // the start position of ith string in value array
+
+                var startposition:int;
+                var endposition:int;
+                startposition = offsegs[i];
+                endposition = startposition+length[i]-1;
+                // what we do in the select structure is filling the sasval array with correct index
+                select (algorithmNum) {
+                    when 1 {
+                       var sasize=length[i]:int;
+                       ref strArray=strings.values.a[startposition..endposition];
+                       var tmparray:[0..sasize+2] int;
+                       var intstrArray:[0..sasize+2] int;
+                       var x:int;
+                       var y:int;
+                       forall (x,y) in zip ( intstrArray[0..sasize-1],
+                                strings.values.a[startposition..endposition]) do x=y;
+                       intstrArray[sasize]=0;
+                       intstrArray[sasize+1]=0;
+                       intstrArray[sasize+2]=0;
+                       SuffixArraySkew(intstrArray,tmparray,sasize,256);
+                       for (x, y) in zip(sasval[startposition..endposition], tmparray[0..sasize-1]) do
+                               x = y;
+                    }
+                    when 2 {
+                       var sasize=length[i]:int(32);
+                       var localstrArray:[0..endposition-startposition] uint(8);
+                       var a:int(8);
+                       var b:int(8);
+                       ref strArray=strings.values.a[startposition..endposition];
+                       localstrArray=strArray;
+                       //for all (a,b) in zip (localstrArray[0..sasize-1],strArray) do a=b;
+                       var tmparray:[1..sasize] int(32);
+                       divsufsort(localstrArray,tmparray,sasize);
+                       //divsufsort(strArray,tmparray,sasize);
+                       var x:int;
+                       var y:int(32);
+                       for (x, y) in zip(sasval[startposition..endposition], tmparray[1..sasize]) do
+                            x = y;
+                    }
+                }
+
+/*
+// Here we calculate the lcp(Longest Common Prefix) array value
+                forall j in startposition+1..endposition do{
+                        var tmpcount=0:int;
+                        var tmpbefore=sasval[j-1]:int;
+                        var tmpcur=sasval[j]:int;
+                        var tmplen=min(sasize-tmpcur, sasize-tmpbefore);
+                        var tmpi:int;
+                        for tmpi in 0..tmplen-1 do {
+                            if (intstrArray[tmpbefore]!=intstrArray[tmpcur]) {
+                                 break;
+                            }                        
+                            tmpcount+=1;
+                        } 
+                        lcpval[j]=tmpcount;
+                }
+*/
+              }
+              var segName2 = st.nextName();
+              var valName2 = st.nextName();
+              //              var lcpvalName = st.nextName();
+
+              var segEntry = new shared SymEntry(sasoff);
+              var valEntry = new shared SymEntry(sasval);
+              //              var lcpvalEntry = new shared SymEntry(lcpval);
+              /*
+              valEntry.enhancedInfo=lcpvalName;
+              lcpvalEntry.enhancedInfo=valName2;
+              we have removed enchancedInfo.
+              */
+              st.addEntry(segName2, segEntry);
+              st.addEntry(valName2, valEntry);
+//              st.addEntry(lcpvalName, lcpvalEntry);
+              repMsg = 'created ' + st.attrib(segName2) + '+created ' + st.attrib(valName2);
+              return repMsg;
+
+
+          }
+          otherwise {
+              var errorMsg = notImplementedError(pn, "("+objtype+")");
+              writeln(generateErrorContext(
+                                     msg=errorMsg, 
+                                     lineNumber=getLineNumber(), 
+                                     moduleName=getModuleName(), 
+                                     routineName=getRoutineName(), 
+                                     errorClass="NotImplementedError")); 
+              return errorMsg;            
+          }
+      }
+
+  }
+
+  proc segLCPMsg(cmd: string, payload: bytes, st: borrowed SymTab): string throws {
+      var pn = Reflection.getRoutineName();
+      var (objtype, segName1, valName1,segName2,valName2) = payload.decode().splitMsgToTuple(5);
+      var repMsg: string;
+
+      // check to make sure symbols defined
+      st.check(segName1);
+      st.check(valName1);
+      st.check(segName2);
+      st.check(valName2);
+
+      var suffixarrays = new owned SegSArray(segName1, valName1, st);
+      var size=suffixarrays.size;
+      var nBytes = suffixarrays.nBytes;
+      var length=suffixarrays.getLengths();
+      var offsegs = (+ scan length) - length;
+
+
+      var strings = new owned SegString(segName2, valName2, st);
+
+      select (objtype) {
+          when "int" {
+              // To be checked, I am not sure if this formula can estimate the total memory requirement
+              // Lengths + 2*segs + 2*vals (copied to SymTab)
+              overMemLimit(8*size + 16*size + nBytes);
+
+              //allocate an offset array
+              var sasoff = offsegs;
+              //allocate an values array
+              var lcpval:[0..(nBytes-1)] int;
+
+              var i:int;
+              var j:int;
+              forall i in 0..(size-1) do {
+              // the start position of ith surrix array  in value array
+                var startposition:int;
+                var endposition:int;
+                startposition = offsegs[i];
+                endposition = startposition+length[i]-1;
+
+
+
+
+                var sasize=length[i]:int;
+                ref sufArray=suffixarrays.values.a[startposition..endposition];
+                ref strArray=strings.values.a[startposition..endposition];
+// Here we calculate the lcp(Longest Common Prefix) array value
+                forall j in startposition+1..endposition do{
+                        var tmpcount=0:int;
+                        var tmpbefore=sufArray[j-1]:int;
+                        var tmpcur=sufArray[j]:int;
+                        var tmplen=min(sasize-tmpcur, sasize-tmpbefore);
+                        var tmpi:int;
+                        for tmpi in 0..tmplen-1 do {
+                            if (strArray[tmpbefore]!=strArray[tmpcur]) {
+                                 break;
+                            }                        
+                            tmpbefore+=1;
+                            tmpcur+=1;
+                            tmpcount+=1;
+                        } 
+                        lcpval[j]=tmpcount;
+                }
+              }
+              var lcpsegName = st.nextName();
+              var lcpvalName = st.nextName();
+
+              var lcpsegEntry = new shared SymEntry(sasoff);
+              var lcpvalEntry = new shared SymEntry(lcpval);
+              /*
+              valEntry.enhancedInfo=lcpvalName;
+              lcpvalEntry.enhancedInfo=valName2;
+              we have removed enchancedInfo.
+              */
+              st.addEntry(lcpsegName, lcpsegEntry);
+              st.addEntry(lcpvalName, lcpvalEntry);
+              repMsg = 'created ' + st.attrib(lcpsegName) + '+created ' + st.attrib(lcpvalName);
+              return repMsg;
+
+
+          }
+          otherwise {
+              var errorMsg = notImplementedError(pn, "("+objtype+")");
+              writeln(generateErrorContext(
+                                     msg=errorMsg, 
+                                     lineNumber=getLineNumber(), 
+                                     moduleName=getModuleName(), 
+                                     routineName=getRoutineName(), 
+                                     errorClass="NotImplementedError")); 
+              return errorMsg;            
+          }
+      }
+
+  }
+
+// directly read a string from given file and generate its suffix array
+  proc segSAFileMsg(cmd: string, payload: bytes, st: borrowed SymTab): string throws {
+      var pn = Reflection.getRoutineName();
+//      var (FileName) = payload.decode().splitMsgToTuple(1);
+      var FileName = payload.decode();
+      var repMsg: string;
+
+//      var filesize:int(32);
+      var filesize:int;
+      var f = open(FileName, iomode.r);
+      var size=1:int;
+      var nBytes = f.size;
+      var length:[0..0] int  =nBytes;
+      var offsegs:[0..0] int =0 ;
+
+      var sasize=nBytes:int;
+      var startposition:int;
+      var endposition:int;
+      startposition = 0;
+      endposition = nBytes-1;
+      var strArray:[startposition..endposition]uint(8);
+      var r = f.reader(kind=ionative);
+      r.read(strArray);
+      r.close();
+
+      var segName = st.nextName();
+      var valName = st.nextName();
+
+      var segEntry = new shared SymEntry(offsegs);
+      var valEntry = new shared SymEntry(strArray);
+      st.addEntry(segName, segEntry);
+      st.addEntry(valName, valEntry);
+
+      var algorithmNum=2:int; //2:"divsufsort";1:SuffixArraySkew
+
+      select ("str") {
+          when "str" {
+              // To be checked, I am not sure if this formula can estimate the total memory requirement
+              // Lengths + 2*segs + 2*vals (copied to SymTab)
+              overMemLimit(8*size + 16*size + nBytes);
+
+              //allocate an offset array
+              var sasoff = offsegs;
+              //allocate a suffix array  values array and lcp array
+              var sasval:[0..(nBytes-1)] int;
+//              var lcpval:[0..(nBytes-1)] int;
+
+              var i:int;
+              forall i in 0..(size-1) do {
+              // the start position of ith string in value array
+                select (algorithmNum) {
+                    when 1 {
+                       var sasize=length[i]:int;
+                       var tmparray:[0..sasize+2] int;
+                       var intstrArray:[0..sasize+2] int;
+                       var x:int;
+                       var y:int;
+                       forall (x,y) in zip ( intstrArray[0..sasize-1],strArray[startposition..endposition]) do x=y;
+                       intstrArray[sasize]=0;
+                       intstrArray[sasize+1]=0;
+                       intstrArray[sasize+2]=0;
+                       SuffixArraySkew(intstrArray,tmparray,sasize,256);
+                       for (x, y) in zip(sasval[startposition..endposition], tmparray[0..sasize-1]) do
+                               x = y;
+                    }
+                    when 2 {
+                       var sasize=length[i]:int(32);
+                       //ref strArray=strings.values.a[startposition..endposition];
+                       var tmparray:[1..sasize] int(32);
+                       divsufsort(strArray,tmparray,sasize);
+                       var x:int;
+                       var y:int(32);
+                       for (x, y) in zip(sasval[startposition..endposition], tmparray[1..sasize]) do
+                            x = y;
+                    }
+                }// end of select 
+              } // end of forall
+              var segName2 = st.nextName();
+              var valName2 = st.nextName();
+//              var lcpvalName = st.nextName();
+
+              var segEntry = new shared SymEntry(sasoff);
+              var valEntry = new shared SymEntry(sasval);
+//              var lcpvalEntry = new shared SymEntry(lcpval);
+              /*
+              valEntry.enhancedInfo=lcpvalName;
+              lcpvalEntry.enhancedInfo=valName2;
+              We have removed enhancedInfo.
+              */
+              st.addEntry(segName2, segEntry);
+              st.addEntry(valName2, valEntry);
+//              st.addEntry(lcpvalName, lcpvalEntry);
+              repMsg = 'created ' + st.attrib(segName2) + '+created ' + st.attrib(valName2) 
+                        + '+created ' + st.attrib(segName) + '+created ' + st.attrib(valName);
+              return repMsg;
+
+          }
+          otherwise {
+              var errorMsg = notImplementedError(pn, "("+FileName+")");
+              writeln(generateErrorContext(
+                                     msg=errorMsg, 
+                                     lineNumber=getLineNumber(), 
+                                     moduleName=getModuleName(), 
+                                     routineName=getRoutineName(), 
+                                     errorClass="NotImplementedError")); 
+              return errorMsg;            
+          }
+      }
+
+  }
+
+
+
+
+// directly read a graph from given file and build the SegGraph class in memory
+  proc segGraphFileMsg(cmd: string, payload: bytes, st: borrowed SymTab): string throws {
+      var pn = Reflection.getRoutineName();
+      var (NeS,NvS,ColS,DirectedS, FileName) = payload.decode().splitMsgToTuple(5);
+      //writeln("======================Graph Reading=====================");
+      //writeln(NeS,NvS,ColS,DirectedS, FileName);
+      var Ne=NeS:int;
+      var Nv=NvS:int;
+      var NumCol=ColS:int;
+      var directed=DirectedS:int;
+      var weighted=0:int;
+      if NumCol>2 {
+           weighted=1;
+      }
+      var src,srcR,src1,srcR1: [0..Ne-1] int;
+      var dst,dstR,dst1,dstR1: [0..Ne-1] int;
+      var e_weight: [0..Ne-1] int;
+      var v_weight: [0..Nv-1] int;
+      var neighbour: [0..Nv-1] int;
+      var neighbourR: [0..Nv-1] int;
+      var start_i: [0..Nv-1] int;
+      var start_iR: [0..Nv-1] int;
+
+      var linenum=0:int;
+
+      var repMsg: string;
+
+      var filesize:int;
+      var f = open(FileName, iomode.r);
+      var r = f.reader(kind=ionative);
+      var line:string;
+      var a,b,c:string;
+      var curline=0:int;
+      while r.readline(line) {
+          if NumCol==2 {
+            (a,b)=  line.splitMsgToTuple(2);
+          } else {
+            (a,b,c)=  line.splitMsgToTuple(3);
+             e_weight[curline]=c:int;
+          }
+          src[curline]=a:int;
+          dst[curline]=b:int;
+          curline+=1;
+      } 
+
+      r.close();
+      src=src+(src==dst);
+      src=src%Nv;
+      dst=dst%Nv;
+
+      var iv = radixSortLSD_ranks(src);
+      // permute into sorted order
+      src1 = src[iv]; //# permute first vertex into sorted order
+      dst1 = dst[iv]; //# permute second vertex into sorted order
+      var startpos=0, endpos:int;
+      var sort=0:int;
+      while (startpos < Ne-2) {
+         endpos=startpos+1;
+         sort=0;
+         while (endpos <=Ne-1) {
+            if (src1[startpos]==src1[endpos])  {
+               sort=1;
+               endpos+=1;
+               continue;
+            } else {
+               break;
+            }
+         }//end of while endpos
+         if (sort==1) {
+            var tmpary:[0..endpos-startpos-1] int;
+            tmpary=dst1[startpos..endpos-1];
+            var ivx=radixSortLSD_ranks(tmpary);
+            dst1[startpos..endpos-1]=tmpary[ivx];
+            sort=0;
+         }
+         startpos+=1;
+      }//end of while startpos
+
+      for i in 0..Ne-1 do {
+        neighbour[src1[i]]+=1;
+        if (start_i[src1[i]] ==-1){
+           start_i[src1[i]]=i;
+        }
+
+      }
+
+      if (directed==0) { //undirected graph
+
+          srcR = dst1;
+          dstR = src1;
+
+          var ivR = radixSortLSD_ranks(srcR);
+          srcR1 = srcR[ivR]; //# permute first vertex into sorted order
+          dstR1 = dstR[ivR]; //# permute second vertex into sorted order
+          startpos=0;
+          sort=0;
+          while (startpos < Ne-2) {
+              endpos=startpos+1;
+              sort=0;
+              while (endpos <=Ne-1) {
+                 if (srcR1[startpos]==srcR1[endpos])  {
+                    sort=1;
+                    endpos+=1;
+                    continue;
+                 } else {
+                    break;
+                 }
+              }//end of while endpos
+              if (sort==1) {
+                  var tmparyR:[0..endpos-startpos-1] int;
+                  tmparyR=dstR1[startpos..endpos-1];
+                  var ivxR=radixSortLSD_ranks(tmparyR);
+                  dstR1[startpos..endpos-1]=tmparyR[ivxR];
+                  sort=0;
+              }
+              startpos+=1;
+          }//end of while startpos
+          for i in 0..Ne-1 do {
+              neighbourR[srcR1[i]]+=1;
+              if (start_iR[srcR1[i]] ==-1){
+                  start_iR[srcR1[i]]=i;
+              }
+          }
+
+      }//end of undirected
+
+
+      var ewName ,vwName:string;
+      if (weighted!=0) {
+        fillInt(v_weight,1,1000);
+        //fillRandom(v_weight,0,100);
+        ewName = st.nextName();
+        vwName = st.nextName();
+        var vwEntry = new shared SymEntry(v_weight);
+        var ewEntry = new shared SymEntry(e_weight);
+        st.addEntry(vwName, vwEntry);
+        st.addEntry(ewName, ewEntry);
+      }
+      var srcName = st.nextName();
+      var dstName = st.nextName();
+      var startName = st.nextName();
+      var neiName = st.nextName();
+      var srcEntry = new shared SymEntry(src1);
+      var dstEntry = new shared SymEntry(dst1);
+      var startEntry = new shared SymEntry(start_i);
+      var neiEntry = new shared SymEntry(neighbour);
+      st.addEntry(srcName, srcEntry);
+      st.addEntry(dstName, dstEntry);
+      st.addEntry(startName, startEntry);
+      st.addEntry(neiName, neiEntry);
+      var sNv=Nv:string;
+      var sNe=Ne:string;
+      var sDirected=directed:string;
+      var sWeighted=weighted:string;
+
+      var srcNameR, dstNameR, startNameR, neiNameR:string;
+      if (directed!=0) {//for directed graph
+          if (weighted!=0) {
+              repMsg =  sNv + '+ ' + sNe + '+ ' + sDirected + '+ ' + sWeighted +
+                    '+created ' + st.attrib(srcName)   + '+created ' + st.attrib(dstName) +
+                    '+created ' + st.attrib(startName) + '+created ' + st.attrib(neiName) +
+                    '+created ' + st.attrib(vwName)    + '+created ' + st.attrib(ewName);
+          } else {
+              repMsg =  sNv + '+ ' + sNe + '+ ' + sDirected + '+ ' + sWeighted +
+                    '+created ' + st.attrib(srcName)   + '+created ' + st.attrib(dstName) +
+                    '+created ' + st.attrib(startName) + '+created ' + st.attrib(neiName) ;
+
+          }
+      } else {//for undirected graph
+
+          srcNameR = st.nextName();
+          dstNameR = st.nextName();
+          startNameR = st.nextName();
+          neiNameR = st.nextName();
+          var srcEntryR = new shared SymEntry(srcR1);
+          var dstEntryR = new shared SymEntry(dstR1);
+          var startEntryR = new shared SymEntry(start_iR);
+          var neiEntryR = new shared SymEntry(neighbourR);
+          st.addEntry(srcNameR, srcEntryR);
+          st.addEntry(dstNameR, dstEntryR);
+          st.addEntry(startNameR, startEntryR);
+          st.addEntry(neiNameR, neiEntryR);
+          if (weighted!=0) {
+              repMsg =  sNv + '+ ' + sNe + '+ ' + sDirected + ' +' + sWeighted +
+                    '+created ' + st.attrib(srcName)   + '+created ' + st.attrib(dstName) +
+                    '+created ' + st.attrib(startName) + '+created ' + st.attrib(neiName) +
+                    '+created ' + st.attrib(srcNameR)   + '+created ' + st.attrib(dstNameR) +
+                    '+created ' + st.attrib(startNameR) + '+created ' + st.attrib(neiNameR) +
+                    '+created ' + st.attrib(vwName)    + '+created ' + st.attrib(ewName);
+          } else {
+              repMsg =  sNv + '+ ' + sNe + '+ ' + sDirected + ' +' + sWeighted +
+                    '+created ' + st.attrib(srcName)   + '+created ' + st.attrib(dstName) +
+                    '+created ' + st.attrib(startName) + '+created ' + st.attrib(neiName) +
+                    '+created ' + st.attrib(srcNameR)   + '+created ' + st.attrib(dstNameR) +
+                    '+created ' + st.attrib(startNameR) + '+created ' + st.attrib(neiNameR) ;
+          }
+
+      }
+      smLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),repMsg);
+      return repMsg;
+  }
+
+
+
+  proc segrmatgenMsg(cmd: string, payload: bytes, st: borrowed SymTab): string throws {
+      var pn = Reflection.getRoutineName();
+      var repMsg: string;
+      var (slgNv, sNe_per_v, sp, sdire,swei,rest )
+          = payload.decode().splitMsgToTuple(6);
+      //writeln(slgNv, sNe_per_v, sp, sdire,swei,rest);
+      var lgNv = slgNv: int;
+      var Ne_per_v = sNe_per_v: int;
+      var p = sp: real;
+      var directed=sdire : int;
+      var weighted=swei : int;
+
+      var Nv = 2**lgNv:int;
+      // number of edges
+      var Ne = Ne_per_v * Nv:int;
+      // probabilities
+      var src: [0..Ne-1] int;
+      var dst: [0..Ne-1] int;
+      var iv: [0..Ne-1] int;
+
+      var length: [0..Nv-1] int;
+      var start_i: [0..Nv-1] int;
+      var neighbour:[0..Nv-1] int;
+      length=0;
+      start_i=-1;
+      neighbour=0;
+      var n_vertices=Nv;
+      var n_edges=Ne;
+      src=1;
+      dst=1;
+      var srcName:string ;
+      var dstName:string ;
+      var startName:string ;
+      var neiName:string ;
+      var sNv:string;
+      var sNe:string;
+      var sDirected:string;
+      var sWeighted:string;
+
+
+      proc rmat_gen() {
+             var a = p;
+             var b = (1.0 - a)/ 3.0:real;
+             var c = b;
+             var d = b;
+             var ab=a+b;
+             var c_norm = c / (c + d):real;
+             var a_norm = a / (a + b):real;
+             // generate edges
+             var src_bit: [0..Ne-1]int;
+             var dst_bit: [0..Ne-1]int;
+             for ib in 1..lgNv {
+                 var tmpvar: [0..Ne-1] real;
+                 fillRandom(tmpvar);
+                 src_bit=tmpvar>ab;
+                 fillRandom(tmpvar);
+                 dst_bit=tmpvar>(c_norm * src_bit + a_norm * (~ src_bit));
+                 src = src + ((2**(ib-1)) * src_bit);
+                 dst = dst + ((2**(ib-1)) * dst_bit);
+             }
+             src=src%Nv;
+             dst=dst%Nv;
+             //remove self loop
+             src=src+(src==dst);
+             src=src%Nv;
+      }
+      proc combine_sort(){
+
+             /* we cannot use the coargsort version because it will break the memory limit */ 
+             // coargsort
+             param bitsPerDigit = RSLSD_bitsPerDigit;
+             var bitWidths: [0..1] int;
+             var negs: [0..1] bool;
+             var totalDigits: int;
+             var size=Nv: int;
+
+             for (bitWidth, ary, neg) in zip(bitWidths, [src,dst], negs) {
+                       (bitWidth, neg) = getBitWidth(ary); 
+                       totalDigits += (bitWidth + (bitsPerDigit-1)) / bitsPerDigit;
+             }
+             proc mergedArgsort(param numDigits) throws {
+
+                    //overMemLimit(((4 + 3) * size * (numDigits * bitsPerDigit / 8))
+                    //             + (2 * here.maxTaskPar * numLocales * 2**16 * 8));
+                    var merged = makeDistArray(size, numDigits*uint(bitsPerDigit));
+                    var curDigit = RSLSD_tupleLow + numDigits - totalDigits;
+                    for (ary , nBits, neg) in zip([src,dst], bitWidths, negs) {
+                        proc mergeArray(type t) {
+                            ref A = ary;
+                            const r = 0..#nBits by bitsPerDigit;
+                            for rshift in r {
+                                 const myDigit = (r.high - rshift) / bitsPerDigit;
+                                 const last = myDigit == 0;
+                                 forall (m, a) in zip(merged, A) {
+                                     m[curDigit+myDigit] =  getDigit(a, rshift, last, neg):uint(bitsPerDigit);
+                                 }
+                            }
+                            curDigit += r.size;
+                        }
+                        mergeArray(int); 
+                    }
+                    var iv = argsortDefault(merged);
+                    return iv;
+             }
+
+             if totalDigits <=  4 { 
+                  iv = mergedArgsort( 4); 
+             }
+
+             if totalDigits <=  8 { 
+                  iv =  mergedArgsort( 8); 
+             }
+             if totalDigits <= 16 { 
+                  iv = mergedArgsort(16); 
+             }
+
+      }
+
+      proc twostep_sort(){
+             iv = radixSortLSD_ranks(src);
+             // permute into sorted order
+             var tmpedge=src;
+             tmpedge=src[iv];
+             src=tmpedge;
+             tmpedge=dst[iv];
+             dst=tmpedge;
+             //# to premute/rename vertices
+             var startpos=0, endpos:int;
+             var sort=0:int;
+             while (startpos < Ne-2) {
+                     endpos=startpos+1;
+                     sort=0;
+                     //writeln("startpos=",startpos,"endpos=",endpos);
+                     while (endpos <=Ne-1) {
+                         if (src[startpos]==src[endpos])  {
+                              sort=1;
+                              endpos+=1;
+                              continue;
+                         } else {
+                              break;
+                         }
+                     }//end of while endpos
+                     if (sort==1) {
+                         var tmpary:[0..endpos-startpos-1] int;
+                         tmpary=dst[startpos..endpos-1];
+                         var ivx=radixSortLSD_ranks(tmpary);
+                         dst[startpos..endpos-1]=tmpary[ivx];
+                         //writeln("src1=",src1,"dst1=",dst1,"ivx=",ivx);
+                         sort=0;
+                     } 
+                     startpos+=1;
+             }//end of while startpos
+      }
+      proc set_neighbour(){
+             for i in 0..Ne-1 do {
+                 length[src[i]]+=1;
+                 if (start_i[src[i]] ==-1){
+                      start_i[src[i]]=i;
+                      //writeln("assign index ",i, " to vertex ",src[i]);
+                 }
+ 
+             }
+             neighbour  = length;
+      }
+      //proc set_common_symtable(): string throws {
+      proc set_common_symtable() {
+             srcName = st.nextName();
+             dstName = st.nextName();
+             startName = st.nextName();
+             neiName = st.nextName();
+             var srcEntry = new shared SymEntry(src);
+             var dstEntry = new shared SymEntry(dst);
+             var startEntry = new shared SymEntry(start_i);
+             var neiEntry = new shared SymEntry(neighbour);
+             try! st.addEntry(srcName, srcEntry);
+             try! st.addEntry(dstName, dstEntry);
+             try! st.addEntry(startName, startEntry);
+             try! st.addEntry(neiName, neiEntry);
+             sNv=Nv:string;
+             sNe=Ne:string;
+             sDirected=directed:string;
+             sWeighted=weighted:string;
+      }
+      if (directed!=0) {// for directed graph
+          if (weighted!=0) { // for weighted graph
+             var e_weight: [0..Ne-1] int;
+             var v_weight: [0..Nv-1] int;
+             rmat_gen();
+             twostep_sort();
+             set_neighbour();
+
+             var ewName ,vwName:string;
+             fillInt(e_weight,1,1000);
+             //fillRandom(e_weight,0,100);
+             fillInt(v_weight,1,1000);
+             //fillRandom(v_weight,0,100);
+             ewName = st.nextName();
+             vwName = st.nextName();
+             var vwEntry = new shared SymEntry(v_weight);
+             var ewEntry = new shared SymEntry(e_weight);
+             try! st.addEntry(vwName, vwEntry);
+             try! st.addEntry(ewName, ewEntry);
+      
+             set_common_symtable();
+             repMsg =  sNv + '+ ' + sNe + '+ ' + sDirected + '+ ' + sWeighted +
+                    '+created ' + st.attrib(srcName)   + '+created ' + st.attrib(dstName) + 
+                    '+created ' + st.attrib(startName) + '+created ' + st.attrib(neiName) + 
+                    '+created ' + st.attrib(vwName)    + '+created ' + st.attrib(ewName);
+
+          } else {
+             rmat_gen();
+             twostep_sort();
+             set_neighbour();
+             set_common_symtable();
+             repMsg =  sNv + '+ ' + sNe + '+ ' + sDirected + '+ ' + sWeighted +
+                    '+created ' + st.attrib(srcName)   + '+created ' + st.attrib(dstName) + 
+                    '+created ' + st.attrib(startName) + '+created ' + st.attrib(neiName) ; 
+          }
+      }
+      else {
+          // only for undirected graph, we only declare R variables here
+          var srcR: [0..Ne-1] int;
+          var dstR: [0..Ne-1] int;
+          ref  ivR=iv;
+          var start_iR: [0..Nv-1] int;
+          var lengthR: [0..Nv-1] int;
+          var neighbourR: [0..Nv-1] int;
+          start_iR=-1;
+          lengthR=0;
+          neighbourR=0;
+          var srcNameR, dstNameR, startNameR, neiNameR:string;
+        
+          proc combine_sortR(){
+
+             /* we cannot use the coargsort version because it will break the memory limit */
+             param bitsPerDigit = RSLSD_bitsPerDigit;
+             var bitWidths: [0..1] int;
+             var negs: [0..1] bool;
+             var totalDigits: int;
+             var size=Nv: int;
+             for (bitWidth, ary, neg) in zip(bitWidths, [srcR,dstR], negs) {
+                 (bitWidth, neg) = getBitWidth(ary); 
+                 totalDigits += (bitWidth + (bitsPerDigit-1)) / bitsPerDigit;
+
+             }
+             proc mergedArgsort(param numDigits) throws {
+
+               //overMemLimit(((4 + 3) * size * (numDigits * bitsPerDigit / 8))
+               //          + (2 * here.maxTaskPar * numLocales * 2**16 * 8));
+               var merged = makeDistArray(size, numDigits*uint(bitsPerDigit));
+               var curDigit = RSLSD_tupleLow + numDigits - totalDigits;
+               for (ary , nBits, neg) in zip([src,dst], bitWidths, negs) {
+                  proc mergeArray(type t) {
+                     ref A = ary;
+                     const r = 0..#nBits by bitsPerDigit;
+                     for rshift in r {
+                        const myDigit = (r.high - rshift) / bitsPerDigit;
+                        const last = myDigit == 0;
+                        forall (m, a) in zip(merged, A) {
+                             m[curDigit+myDigit] =  getDigit(a, rshift, last, neg):uint(bitsPerDigit);
+                        }
+                     }
+                     curDigit += r.size;
+                  }
+                  mergeArray(int); 
+               }
+               var iv = argsortDefault(merged);
+               return iv;
+             } 
+
+             if totalDigits <=  4 { 
+               ivR = mergedArgsort( 4); 
+             }
+
+             if totalDigits <=  8 { 
+               ivR =  mergedArgsort( 8); 
+             }
+             if totalDigits <= 16 { 
+               ivR = mergedArgsort(16); 
+             }
+
+
+          }
+
+          proc   twostep_sortR() {
+             ivR = radixSortLSD_ranks(srcR);
+             var tmpedges = srcR[ivR]; //# permute first vertex into sorted order
+             srcR=tmpedges;
+             tmpedges = dstR[ivR]; //# permute second vertex into sorted order
+             dstR=tmpedges;
+             var startpos=0:int;
+             var endpos:int;
+             var sort=0;
+             while (startpos < Ne-2) {
+                endpos=startpos+1;
+                sort=0;
+                while (endpos <=Ne-1) {
+                   if (srcR[startpos]==srcR[endpos])  {
+                      sort=1;
+                      endpos+=1;
+                      continue;
+                   } else {
+                      break;
+                   } 
+                }//end of while endpos
+                if (sort==1) {
+                    var tmparyR:[0..endpos-startpos-1] int;
+                    tmparyR=dstR[startpos..endpos-1];
+                    var ivxR=radixSortLSD_ranks(tmparyR);
+                    dstR[startpos..endpos-1]=tmparyR[ivxR];
+                    sort=0;
+                } 
+                startpos+=1;
+             } //end of while startpos
+          }
+          proc    set_neighbourR(){
+             for i in 0..Ne-1 do {
+                lengthR[srcR[i]]+=1;
+                if (start_iR[srcR[i]] ==-1){
+                    start_iR[srcR[i]]=i;
+                }
+             }
+             neighbourR  = lengthR;
+
+          }
+          //proc   set_common_symtableR():string throws {
+          proc   set_common_symtableR() {
+             srcNameR = st.nextName();
+             dstNameR = st.nextName();
+             startNameR = st.nextName();
+             neiNameR = st.nextName();
+             var srcEntryR = new shared SymEntry(srcR);
+             var dstEntryR = new shared SymEntry(dstR);
+             var startEntryR = new shared SymEntry(start_iR);
+             var neiEntryR = new shared SymEntry(neighbourR);
+             try! st.addEntry(srcNameR, srcEntryR);
+             try! st.addEntry(dstNameR, dstEntryR);
+             try! st.addEntry(startNameR, startEntryR);
+             try! st.addEntry(neiNameR, neiEntryR);
+          }
+
+
+          if (weighted!=0) {
+             rmat_gen();
+             twostep_sort();
+             set_neighbour();
+             srcR = dst;
+             dstR = src;
+             twostep_sortR(); 
+             set_neighbourR();
+
+             //only for weighted  graph
+             var ewName ,vwName:string;
+             var e_weight: [0..Ne-1] int;
+             var v_weight: [0..Nv-1] int;
+
+             fillInt(e_weight,1,1000);
+             //fillRandom(e_weight,0,100);
+             fillInt(v_weight,1,1000);
+             //fillRandom(v_weight,0,100);
+             ewName = st.nextName();
+             vwName = st.nextName();
+             var vwEntry = new shared SymEntry(v_weight);
+             var ewEntry = new shared SymEntry(e_weight);
+             st.addEntry(vwName, vwEntry);
+             st.addEntry(ewName, ewEntry);
+             // end of weighted!=0
+      
+             set_common_symtable();
+             set_common_symtableR();
+ 
+             repMsg =  sNv + '+ ' + sNe + '+ ' + sDirected + ' +' + sWeighted +
+                    '+created ' + st.attrib(srcName)   + '+created ' + st.attrib(dstName) + 
+                    '+created ' + st.attrib(startName) + '+created ' + st.attrib(neiName) + 
+                    '+created ' + st.attrib(srcNameR)   + '+created ' + st.attrib(dstNameR) + 
+                    '+created ' + st.attrib(startNameR) + '+created ' + st.attrib(neiNameR) + 
+                    '+created ' + st.attrib(vwName)    + '+created ' + st.attrib(ewName);
+
+
+          } else {
+
+             rmat_gen();
+             twostep_sort();
+             set_neighbour();
+             srcR = dst;
+             dstR = src;
+             twostep_sortR(); 
+             set_neighbourR();
+
+             repMsg =  sNv + '+ ' + sNe + '+ ' + sDirected + ' +' + sWeighted +
+                    '+created ' + st.attrib(srcName)   + '+created ' + st.attrib(dstName) + 
+                    '+created ' + st.attrib(startName) + '+created ' + st.attrib(neiName) + 
+                    '+created ' + st.attrib(srcNameR)   + '+created ' + st.attrib(dstNameR) + 
+                    '+created ' + st.attrib(startNameR) + '+created ' + st.attrib(neiNameR) ; 
+
+
+          }
+      }
+      smLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),repMsg);      
+      return repMsg;
+  }
+
+
+  proc segBFSMsg(cmd: string, payload: bytes, st: borrowed SymTab): string throws {
+      var pn = Reflection.getRoutineName();
+      var repMsg: string;
+      //var (n_verticesN,n_edgesN,directedN,weightedN,srcN, dstN, startN, neighbourN,vweightN,eweightN, rootN )
+      //    = payload.decode().splitMsgToTuple(10);
+      var (n_verticesN,n_edgesN,directedN,weightedN,restpart )
+          = payload.decode().splitMsgToTuple(5);
+      var Nv=n_verticesN:int;
+      var Ne=n_edgesN:int;
+      var Directed=directedN:int;
+      var Weighted=weightedN:int;
+      var depthName:string;
+      var depth=-1: [0..Nv-1] int;
+      var root:int;
+      depth[root]=0;
+      var srcN, dstN, startN, neighbourN,vweightN,eweightN, rootN :string;
+      var srcRN, dstRN, startRN, neighbourRN:string;
+
+      //proc bfs_kernel(nei:[?D1] int, start_i:[?D2] int,dst:[?D3] int):string throws{
+      proc bfs_kernel(nei:[?D1] int, start_i:[?D2] int,dst:[?D3] int){
+          root=try! rootN:int;
+          var cur_level=0;
+          var SetCurF= try! new set(int,parSafe = true);
+          var SetNextF=try!  new set(int,parSafe = true);
+          try! SetCurF.add(root);
+          var numCurF=1:int;
+
+          while (numCurF>0) {
+               SetNextF.clear();
+               forall i in SetCurF with (ref SetNextF) {
+                  var numNF=-1 :int;
+                  ref nf=nei;
+                  ref sf=start_i;
+                  ref df=dst;
+                  numNF=nf[i];
+                  ref NF=df[sf[i]..sf[i]+numNF-1];
+                  if (numNF>0) {
+                    //forall j in NF {
+                    for j in NF {
+                       if (depth[j]==-1) {
+                          depth[j]=cur_level+1;
+                          SetNextF.add(j);
+                       }
+                    }
+                  }
+
+               }//end forall i
+               cur_level+=1;
+               //writeln("SetCurF= ", SetCurF, "SetNextF=", SetNextF, " level ", cur_level+1);
+               numCurF=SetNextF.size;
+               SetCurF=SetNextF;
+          }  
+      }
+
+      //proc return_depth(): string throws{
+      proc return_depth(){
+          var depthName = st.nextName();
+          var depthEntry = new shared SymEntry(depth);
+          try! st.addEntry(depthName, depthEntry);
+          repMsg =  'created ' + (try! st.attrib(depthName));
+      }
+
+      //proc return_pair():string throws{
+      proc return_pair(){
+          var vertexValue = radixSortLSD_ranks(depth);
+          var levelValue=depth[vertexValue]; 
+          var levelName = st.nextName();
+          var vertexName = st.nextName();
+          var levelEntry = new shared SymEntry(levelValue);
+          var vertexEntry = new shared SymEntry(vertexValue);
+          try! st.addEntry(levelName, levelEntry);
+          try! st.addEntry(vertexName, vertexEntry);
+          repMsg =  'created ' + st.attrib(levelName) + '+created ' + st.attrib(vertexName) ;
+
+      }
+      if (Directed!=0) {
+          if (Weighted!=0) {
+              //repMsg=BFS_DW(Nv, Ne,Directed,Weighted,restpart,st);
+              //var pn = Reflection.getRoutineName();
+               (srcN, dstN, startN, neighbourN,vweightN,eweightN, rootN)=
+                   restpart.splitMsgToTuple(7);
+
+              var ag = new owned SegGraphDW(Nv,Ne,Directed,Weighted,srcN,dstN,
+                                 startN,neighbourN,vweightN,eweightN, st);
+              bfs_kernel(ag.neighbour.a, ag.start_i.a,ag.dst.a);
+              return_depth();
+
+          } else {
+              //repMsg=BFS_D(Nv, Ne,Directed,Weighted,restpart,st);
+
+              (srcN, dstN, startN, neighbourN,rootN )=restpart.splitMsgToTuple(5);
+              var ag = new owned SegGraphD(Nv,Ne,Directed,Weighted,srcN,dstN,
+                      startN,neighbourN,st);
+
+
+              bfs_kernel(ag.neighbour.a, ag.start_i.a,ag.dst.a);
+              return_depth();
+
+          }
+      }
+      else {
+          if (Weighted!=0) {
+              //repMsg=BFS_UDW(Nv, Ne,Directed,Weighted,restpart,st);
+
+               (srcN, dstN, startN, neighbourN,srcRN, dstRN, startRN, neighbourRN,vweightN,eweightN, rootN )=
+                   restpart.splitMsgToTuple(11);
+               var ag = new owned SegGraphUDW(Nv,Ne,Directed,Weighted,
+                      srcN,dstN, startN,neighbourN,
+                      srcRN,dstRN, startRN,neighbourRN,
+                      vweightN,eweightN, st);
+              bfs_kernel(ag.neighbour.a, ag.start_i.a,ag.dst.a);
+              return_depth();
+
+          } else {
+              //repMsg=BFS_UD(Nv, Ne,Directed,Weighted,restpart,st);
+
+              (srcN, dstN, startN, neighbourN,srcRN, dstRN, startRN, neighbourRN, rootN )=
+                   restpart.splitMsgToTuple(9);
+              var ag = new owned SegGraphUD(Nv,Ne,Directed,Weighted,
+                      srcN,dstN, startN,neighbourN,
+                      srcRN,dstRN, startRN,neighbourRN,
+                      st);
+
+              bfs_kernel(ag.neighbour.a, ag.start_i.a,ag.dst.a);
+              return_depth();
+          }
+      }
+      return repMsg;
+
+  }
+
+
+
+
+  proc BFS_D(Nv:int , Ne:int ,Directed:int ,Weighted:int,restpart:string ,st: borrowed SymTab): string throws {
+      var pn = Reflection.getRoutineName();
+      var repMsg: string;
+
+      var srcN, dstN, startN, neighbourN, rootN :string;
+
+      (srcN, dstN, startN, neighbourN,rootN )=restpart.splitMsgToTuple(5);
+      var ag = new owned SegGraphD(Nv,Ne,Directed,Weighted,srcN,dstN,
+                      startN,neighbourN,st);
+
+
+      var root=rootN:int;
+      var depth=-1: [0..Nv-1] int;
+      depth[root]=0;
+      var cur_level=0;
+      var SetCurF= new set(int,parSafe = true);
+      var SetNextF= new set(int,parSafe = true);
+      SetCurF.add(root);
+      var numCurF=1:int;
+
+      while (numCurF>0) {
+           SetNextF.clear();
+           forall i in SetCurF with (ref SetNextF) {
+              var numNF=-1 :int;
+              ref nf=ag.neighbour.a;
+              ref sf=ag.start_i.a;
+              ref df=ag.dst.a;
+              numNF=nf[i];
+              ref NF=df[sf[i]..sf[i]+numNF-1];
+              if (numNF>0) {
+                //forall j in NF {
+                for j in NF {
+                   if (depth[j]==-1) {
+                      depth[j]=cur_level+1;
+                      SetNextF.add(j);
+                   }
+                }
+              }
+
+           }//end forall i
+           cur_level+=1;
+           //writeln("SetCurF= ", SetCurF, "SetNextF=", SetNextF, " level ", cur_level+1);
+           numCurF=SetNextF.size;
+           SetCurF=SetNextF;
+      }
+
+      /*
+      var vertexValue = radixSortLSD_ranks(depth);
+      var levelValue=depth[vertexValue]; 
+      //var depthName =st.nextName();
+      var levelName = st.nextName();
+      var vertexName = st.nextName();
+      var levelEntry = new shared SymEntry(levelValue);
+      var vertexEntry = new shared SymEntry(vertexValue);
+      //var depthEntry = new shared SymEntry(depth);
+      st.addEntry(levelName, levelEntry);
+      st.addEntry(vertexName, vertexEntry);
+      //st.addEntry(depthName, depthEntry);
+      repMsg =  'created ' + st.attrib(levelName) + '+created ' + st.attrib(vertexName) ;
+      //repMsg =  'created ' + st.attrib(depthName);
+      */
+
+      var depthName = st.nextName();
+      var depthEntry = new shared SymEntry(depth);
+      st.addEntry(depthName, depthEntry);
+      repMsg =  'created ' + st.attrib(depthName);
+      smLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),repMsg);      
+      return repMsg;
+  }
+
+
+
+
+  proc BFS_DW(Nv:int, Ne:int,Directed:int,Weighted:int,restpart:string,st: borrowed SymTab): string throws {
+      var pn = Reflection.getRoutineName();
+      var repMsg: string;
+
+      var srcN, dstN, startN, neighbourN,vweightN,eweightN, rootN :string;
+      (srcN, dstN, startN, neighbourN,vweightN,eweightN, rootN)=
+                   restpart.splitMsgToTuple(7);
+
+      var ag = new owned SegGraphDW(Nv,Ne,Directed,Weighted,srcN,dstN,
+                      startN,neighbourN,vweightN,eweightN, st);
+      var root=rootN:int;
+      var depth=-1: [0..Nv-1] int;
+      depth[root]=0;
+      var cur_level=0;
+      var SetCurF= new set(int,parSafe = true);
+      var SetNextF= new set(int,parSafe = true);
+      SetCurF.add(root);
+      var numCurF=1:int;
+
+      while (numCurF>0) {
+           SetNextF.clear();
+           forall i in SetCurF with (ref SetNextF) {
+              var numNF=-1 :int;
+              ref nf=ag.neighbour.a;
+              ref sf=ag.start_i.a;
+              ref df=ag.dst.a;
+              numNF=nf[i];
+              ref NF=df[sf[i]..sf[i]+numNF-1];
+              //writeln("current node ",i, " has ", numNF, " neighbours and  they are  ",NF);
+              if (numNF>0) {
+                //forall j in NF {
+                for j in NF {
+                   //writeln("current node ",i, " check neibour ",j, " its depth=",depth[j]);
+                   if (depth[j]==-1) {
+                      depth[j]=cur_level+1;
+                      SetNextF.add(j);
+                      //writeln("current node ",i, " add ", j, " into level ", cur_level+1, " SetNextF=", SetNextF);
+                   }
+                }
+              }
+
+           }//end forall i
+           cur_level+=1;
+           //writeln("SetCurF= ", SetCurF, "SetNextF=", SetNextF, " level ", cur_level+1);
+           numCurF=SetNextF.size;
+           SetCurF=SetNextF;
+      }
+      /*
+      var vertexValue = radixSortLSD_ranks(depth);
+      var levelValue=depth[vertexValue]; 
+
+      var levelName = st.nextName();
+      var vertexName = st.nextName();
+      var levelEntry = new shared SymEntry(levelValue);
+      var vertexEntry = new shared SymEntry(vertexValue);
+      st.addEntry(levelName, levelEntry);
+      st.addEntry(vertexName, vertexEntry);
+      repMsg =  'created ' + st.attrib(levelName) + '+created ' + st.attrib(vertexName) ;
+      */
+      var depthName = st.nextName();
+      var depthEntry = new shared SymEntry(depth);
+      st.addEntry(depthName, depthEntry);
+      repMsg =  'created ' + st.attrib(depthName);
+      smLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),repMsg);      
+      return repMsg;
+  }
+
+
+
+
+
+
+
+
+  proc BFS_UD(Nv:int , Ne:int ,Directed:int ,Weighted:int,restpart:string ,st: borrowed SymTab): string throws {
+      var pn = Reflection.getRoutineName();
+      var repMsg: string;
+
+
+      var srcN, dstN, startN, neighbourN, rootN :string;
+      var srcRN, dstRN, startRN, neighbourRN:string;
+
+      (srcN, dstN, startN, neighbourN,srcRN, dstRN, startRN, neighbourRN, rootN )=
+                   restpart.splitMsgToTuple(9);
+      var ag = new owned SegGraphUD(Nv,Ne,Directed,Weighted,
+                      srcN,dstN, startN,neighbourN,
+                      srcRN,dstRN, startRN,neighbourRN,
+                      st);
+
+      var root=rootN:int;
+      var depth=-1: [0..Nv-1] int;
+      depth[root]=0;
+      var cur_level=0;
+      var SetCurF= new set(int,parSafe = true);
+      var SetNextF= new set(int,parSafe = true);
+      SetCurF.add(root);
+      var numCurF=1:int;
+
+      //writeln("========================BSF_UD==================================");
+      while (numCurF>0) {
+           SetNextF.clear();
+           forall i in SetCurF with (ref SetNextF) {
+              var numNF=-1 :int;
+              ref nf=ag.neighbour.a;
+              ref sf=ag.start_i.a;
+              ref df=ag.dst.a;
+              numNF=nf[i];
+              ref NF=df[sf[i]..sf[i]+numNF-1];
+              if (numNF>0) {
+                //forall j in NF {
+                //writeln("current node ",i, " has neibours ",NF);
+                for j in NF {
+                   if (depth[j]==-1) {
+                      depth[j]=cur_level+1;
+                      SetNextF.add(j);
+                      //writeln("current node ",i, " add ", j, 
+                      //        " into level ", cur_level+1, " SetNextF=", SetNextF);
+                   }
+                }
+              }
+              // reverse direction
+              if (Directed!=1) {
+
+                  var numNFR=-1 :int;
+                  ref nfR=ag.neighbourR.a;
+                  ref sfR=ag.start_iR.a;
+                  ref dfR=ag.dstR.a;
+                  numNFR=nfR[i];
+                  ref NFR=dfR[sfR[i]..sfR[i]+numNFR-1];
+                  if (numNFR>0) {
+                      //writeln("current node ",i, " has reverse neibours ",NFR);
+                      //forall j in NFR {
+                      for j in NFR {
+                          if (depth[j]==-1) {
+                             depth[j]=cur_level+1;
+                             SetNextF.add(j);
+                             //writeln("current node ",i, " add reverse ", j, 
+                             //        " into level ", cur_level+1, " SetNextF=", SetNextF);
+                          }
+                      } 
+                  }
+              }
+
+
+           }//end forall i
+           cur_level+=1;
+           //writeln("SetCurF= ", SetCurF, "SetNextF=", SetNextF, " level ", cur_level+1);
+           numCurF=SetNextF.size;
+           SetCurF=SetNextF;
+      }
+      /*
+      var vertexValue = radixSortLSD_ranks(depth);
+      var levelValue=depth[vertexValue]; 
+
+      var levelName = st.nextName();
+      var vertexName = st.nextName();
+      var levelEntry = new shared SymEntry(levelValue);
+      var vertexEntry = new shared SymEntry(vertexValue);
+      st.addEntry(levelName, levelEntry);
+      st.addEntry(vertexName, vertexEntry);
+
+      */
+      var depthName = st.nextName();
+      var depthEntry = new shared SymEntry(depth);
+      st.addEntry(depthName, depthEntry);
+      repMsg =  'created ' + st.attrib(depthName);
+
+      smLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),repMsg);      
+      return repMsg;
+  }
+
+
+
+
+
+
+  proc BFS_UDW(Nv:int , Ne:int ,Directed:int ,Weighted:int,restpart:string ,st: borrowed SymTab): string throws {
+      var pn = Reflection.getRoutineName();
+      var repMsg: string;
+
+
+      var srcN, dstN, startN, neighbourN,vweightN,eweightN, rootN :string;
+      var srcRN, dstRN, startRN, neighbourRN:string;
+
+      (srcN, dstN, startN, neighbourN,srcRN, dstRN, startRN, neighbourRN,vweightN,eweightN, rootN )=
+                   restpart.splitMsgToTuple(11);
+      var ag = new owned SegGraphUDW(Nv,Ne,Directed,Weighted,
+                      srcN,dstN, startN,neighbourN,
+                      srcRN,dstRN, startRN,neighbourRN,
+                      vweightN,eweightN, st);
+
+      //writeln("========================BSF_UDW==================================");
+      var root=rootN:int;
+      var depth=-1: [0..Nv-1] int;
+      depth[root]=0;
+      var cur_level=0;
+      var SetCurF= new set(int,parSafe = true);
+      var SetNextF= new set(int,parSafe = true);
+      SetCurF.add(root);
+      var numCurF=1:int;
+
+      /*
+      writeln("Fisrt Check if the values are correct");
+      writeln("src=");
+      writeln(ag.src.a);
+      writeln("dst=");
+      writeln(ag.dst.a);
+      writeln("neighbours=");
+      writeln(ag.neighbour.a);
+      writeln("start=");
+      writeln(ag.start_i.a);
+
+      writeln("srcR=");
+      writeln(ag.srcR.a);
+      writeln("dstR=");
+      writeln(ag.dstR.a);
+      writeln("neighbours=");
+      writeln(ag.neighbourR.a);
+      writeln("startR=");
+      writeln(ag.start_iR.a);
+
+      for i in 0..ag.n_vertices-1 do {
+          writeln("node ",i, " has ", ag.neighbour.a[i], " neighbours", 
+                " start=",ag.start_i.a[i], " they are ", 
+                ag.dst.a[ag.start_i.a[i]..ag.start_i.a[i]-1+ag.neighbour.a[i]]);
+      }
+      for i in 0..ag.n_vertices-1 do {
+          writeln("reverse node ",i, " has ", ag.neighbourR.a[i], " neighbours", 
+                " start=",ag.start_iR.a[i], " they are ", 
+                ag.dstR.a[ag.start_iR.a[i]..ag.start_iR.a[i]-1+ag.neighbourR.a[i]]);
+      }
+      */
+
+      while (numCurF>0) {
+           //writeln("start loop SetCurF=", SetCurF);
+           SetNextF.clear();
+           forall i in SetCurF with (ref SetNextF) {
+              var numNF=-1 :int;
+              ref nf=ag.neighbour.a;
+              ref sf=ag.start_i.a;
+              ref df=ag.dst.a;
+              numNF=nf[i];
+              ref NF=df[sf[i]..sf[i]+numNF-1];
+              //writeln("current node ",i, " has ", numNF, " neighbours and  they are  ",NF);
+              if (numNF>0) {
+                //forall j in NF {
+                for j in NF {
+                   //writeln("current node ",i, " check neibour ",j, " its depth=",depth[j]);
+                   if (depth[j]==-1) {
+                      depth[j]=cur_level+1;
+                      SetNextF.add(j);
+                      //writeln("current node ",i, " add ", j, " into level ", cur_level+1, " SetNextF=", SetNextF);
+                   }
+                }
+              }
+              // reverse direction
+              if (Directed!=1) {
+                  var numNFR=-1 :int;
+                  ref nfR=ag.neighbourR.a;
+                  ref sfR=ag.start_iR.a;
+                  ref dfR=ag.dstR.a;
+                  numNFR=nfR[i];
+                  ref NFR=dfR[sfR[i]..sfR[i]+numNFR-1];
+                  //writeln("current node ",i, " has ", numNFR ," reverse neighbours and  they are  ",NFR);
+                  if ( numNFR>0) {
+                      //forall j in NFR {
+                      for j in NFR {
+                          //writeln("current node ",i, " check neibour ",j, " its depth=",depth[j]);
+                          if (depth[j]==-1) {
+                             depth[j]=cur_level+1;
+                             SetNextF.add(j);
+                             //writeln("current node ",i, " add reverse ", j, 
+                             //          " into level ", cur_level+1, " SetNextF=", SetNextF);
+                          }
+                      } 
+                  }
+              }
+
+
+           }//end forall i
+           cur_level+=1;
+           //writeln("SetCurF= ", SetCurF, "SetNextF=", SetNextF, " level ", cur_level+1);
+           numCurF=SetNextF.size;
+           SetCurF=SetNextF;
+      }
+      /*
+      var vertexValue = radixSortLSD_ranks(depth);
+      var levelValue=depth[vertexValue]; 
+
+      var levelName = st.nextName();
+      var vertexName = st.nextName();
+      var levelEntry = new shared SymEntry(levelValue);
+      var vertexEntry = new shared SymEntry(vertexValue);
+      st.addEntry(levelName, levelEntry);
+      st.addEntry(vertexName, vertexEntry);
+      repMsg =  'created ' + st.attrib(levelName) + '+created ' + st.attrib(vertexName) ;
+      */
+      var depthName = st.nextName();
+      var depthEntry = new shared SymEntry(depth);
+      st.addEntry(depthName, depthEntry);
+      repMsg =  'created ' + st.attrib(depthName);
+      smLogger.debug(getModuleName(),getRoutineName(),getLineNumber(),repMsg);      
+      return repMsg;
+  }
+
+
+
+
+
 }
diff --git a/src/SipHash.chpl b/src/SipHash.chpl
index a64622bd40..6ffc8819ac 100644
--- a/src/SipHash.chpl
+++ b/src/SipHash.chpl
@@ -58,6 +58,27 @@ module SipHash {
             (p[7]: uint(64) << 56));
   }
 
+  private inline proc U8TO64_LE(p: [] int, D): uint(64) {
+    return ((p[D.low]: uint(64)) |
+            (p[D.low+1]: uint(64) << 8) |
+            (p[D.low+2]: uint(64) << 16) |
+            (p[D.low+3]: uint(64) << 24) |
+            (p[D.low+4]: uint(64) << 32) |
+            (p[D.low+5]: uint(64) << 40) |
+            (p[D.low+6]: uint(64) << 48) |
+            (p[D.low+7]: uint(64) << 56));
+  }
+
+  private inline proc U8TO64_LE(p: c_ptr(int)): uint(64) {
+    return ((p[0]: uint(64)) |
+            (p[1]: uint(64) << 8) |
+            (p[2]: uint(64) << 16) |
+            (p[3]: uint(64) << 24) |
+            (p[4]: uint(64) << 32) |
+            (p[5]: uint(64) << 40) |
+            (p[6]: uint(64) << 48) |
+            (p[7]: uint(64) << 56));
+  }
 
   private inline proc byte_reverse(b: uint(64)): uint(64) {
     var c: uint(64);
@@ -80,6 +101,9 @@ module SipHash {
   proc sipHash128(msg: [] uint(8), D): 2*uint(64) {
     return computeSipHashLocalized(msg, D, 16);
   }
+  proc sipHash128(msg: [] int, D): 2*uint(64) {
+    return computeSipHashLocalized(msg, D, 16);
+  }
 
   private proc computeSipHashLocalized(msg: [] uint(8), D, param outlen: int) {
     if contiguousIndices(msg) {
@@ -107,6 +131,31 @@ module SipHash {
     return computeSipHash(msg, D, outlen);
   }
   
+  private proc computeSipHashLocalized(msg: [] int, D, param outlen: int) {
+    if contiguousIndices(msg) {
+      ref start = msg[D.low];
+      if D.high < D.low {
+        return computeSipHash(c_ptrTo(start), 0..#0, outlen);
+      }
+      ref end = msg[D.high];
+      const startLocale = start.locale.id;
+      const endLocale = end.locale.id;
+      const hereLocale = here.id;
+      const l = D.size;
+      if startLocale == endLocale {
+        if startLocale == hereLocale {
+          return computeSipHash(c_ptrTo(start), 0..#l, outlen);
+        } else {
+          var a = c_malloc(msg.eltType, l);
+          GET(a, startLocale, getAddr(start), l);
+          var h = computeSipHash(a, 0..#l, outlen);
+          c_free(a);
+          return h;
+        }
+      }
+    }
+    return computeSipHash(msg, D, outlen);
+  }
   private proc computeSipHash(msg, D, param outlen: int) {
     if !((outlen == 8) || (outlen == 16)) {
       compilerError("outlen must be 8 or 16");
diff --git a/src/arkouda_server.chpl b/src/arkouda_server.chpl
index e78bb95e8e..83efcd57ba 100644
--- a/src/arkouda_server.chpl
+++ b/src/arkouda_server.chpl
@@ -241,8 +241,17 @@ proc main() {
                 when "segmentedIndex"    {repMsg = segmentedIndexMsg(cmd, payload, st);}
                 when "segmentedBinopvv"  {repMsg = segBinopvvMsg(cmd, payload, st);}
                 when "segmentedBinopvs"  {repMsg = segBinopvsMsg(cmd, payload, st);}
+                when "segmentedBinopvvInt"  {repMsg = segBinopvvIntMsg(cmd, payload, st);}
+                when "segmentedBinopvsInt"  {repMsg = segBinopvsIntMsg(cmd, payload, st);}
                 when "segmentedGroup"    {repMsg = segGroupMsg(cmd, payload, st);}
+                when "segmentedSuffixAry"{repMsg = segSuffixArrayMsg(cmd, payload, st);}
+                when "segmentedLCP"      {repMsg = segLCPMsg(cmd, payload, st);}
+                when "segmentedSAFile"   {repMsg = segSAFileMsg(cmd, payload, st);}
+                when "segmentedGraphFile"   {repMsg = segGraphFileMsg(cmd, payload, st);}
+                when "segmentedRMAT"     {repMsg = segrmatgenMsg(cmd, payload, st);}
+                when "segmentedGraphBFS" {repMsg = segBFSMsg(cmd, payload, st);}
                 when "segmentedIn1d"     {repMsg = segIn1dMsg(cmd, payload, st);}
+                when "segmentedIn1dInt"     {repMsg = segIn1dIntMsg(cmd, payload, st);}
                 when "lshdf"             {repMsg = lshdfMsg(cmd, payload, st);}
                 when "readhdf"           {repMsg = readhdfMsg(cmd, payload, st);}
                 when "readAllHdf"        {repMsg = readAllHdfMsg(cmd, payload, st);}
diff --git a/thirdparty/SA/libdivsufsort/include/config.h b/thirdparty/SA/libdivsufsort/include/config.h
new file mode 100644
index 0000000000..fb4e71261c
--- /dev/null
+++ b/thirdparty/SA/libdivsufsort/include/config.h
@@ -0,0 +1,81 @@
+/*
+ * config.h for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _CONFIG_H
+#define _CONFIG_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/** Define to the version of this package. **/
+#define PROJECT_VERSION_FULL "2.0.1-14-g5f60d6f"
+
+/** Define to 1 if you have the header files. **/
+#define HAVE_INTTYPES_H 1
+#define HAVE_STDDEF_H 1
+#define HAVE_STDINT_H 1
+#define HAVE_STDLIB_H 1
+#define HAVE_STRING_H 1
+#define HAVE_STRINGS_H 1
+#define HAVE_MEMORY_H 1
+#define HAVE_SYS_TYPES_H 1
+
+/** for WinIO **/
+/* #undef HAVE_IO_H */
+/* #undef HAVE_FCNTL_H */
+/* #undef HAVE__SETMODE */
+/* #undef HAVE_SETMODE */
+/* #undef HAVE__FILENO */
+/* #undef HAVE_FOPEN_S */
+/* #undef HAVE__O_BINARY */
+#ifndef HAVE__SETMODE
+# if HAVE_SETMODE
+#  define _setmode setmode
+#  define HAVE__SETMODE 1
+# endif
+# if HAVE__SETMODE && !HAVE__O_BINARY
+#  define _O_BINARY 0
+#  define HAVE__O_BINARY 1
+# endif
+#endif
+
+/** for inline **/
+#ifndef INLINE
+# define INLINE inline
+#endif
+
+/** for VC++ warning **/
+#ifdef _MSC_VER
+#pragma warning(disable: 4127)
+#endif
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* _CONFIG_H */
diff --git a/thirdparty/SA/libdivsufsort/include/divsufsort.h b/thirdparty/SA/libdivsufsort/include/divsufsort.h
new file mode 100644
index 0000000000..6d3e648701
--- /dev/null
+++ b/thirdparty/SA/libdivsufsort/include/divsufsort.h
@@ -0,0 +1,180 @@
+/*
+ * divsufsort.h for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DIVSUFSORT_H
+#define _DIVSUFSORT_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#include <inttypes.h>
+
+#ifndef DIVSUFSORT_API
+# ifdef DIVSUFSORT_BUILD_DLL
+#  define DIVSUFSORT_API 
+# else
+#  define DIVSUFSORT_API 
+# endif
+#endif
+
+/*- Datatypes -*/
+#ifndef SAUCHAR_T
+#define SAUCHAR_T
+typedef uint8_t sauchar_t;
+#endif /* SAUCHAR_T */
+#ifndef SAINT_T
+#define SAINT_T
+typedef int32_t saint_t;
+#endif /* SAINT_T */
+#ifndef SAIDX_T
+#define SAIDX_T
+typedef int32_t saidx_t;
+#endif /* SAIDX_T */
+#ifndef PRIdSAINT_T
+#define PRIdSAINT_T PRId32
+#endif /* PRIdSAINT_T */
+#ifndef PRIdSAIDX_T
+#define PRIdSAIDX_T PRId32
+#endif /* PRIdSAIDX_T */
+
+
+/*- Prototypes -*/
+
+/**
+ * Constructs the suffix array of a given string.
+ * @param T[0..n-1] The input string.
+ * @param SA[0..n-1] The output array of suffixes.
+ * @param n The length of the given string.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saint_t
+divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n);
+
+/**
+ * Constructs the burrows-wheeler transformed string of a given string.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param A[0..n-1] The temporary array. (can be NULL)
+ * @param n The length of the given string.
+ * @return The primary index if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saidx_t
+divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n);
+
+/**
+ * Returns the version of the divsufsort library.
+ * @return The version number string.
+ */
+DIVSUFSORT_API
+const char *
+divsufsort_version(void);
+
+
+/**
+ * Constructs the burrows-wheeler transformed string of a given string and suffix array.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param SA[0..n-1] The suffix array. (can be NULL)
+ * @param n The length of the given string.
+ * @param idx The output primary index.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saint_t
+bw_transform(const sauchar_t *T, sauchar_t *U,
+             saidx_t *SA /* can NULL */,
+             saidx_t n, saidx_t *idx);
+
+/**
+ * Inverse BW-transforms a given BWTed string.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param A[0..n-1] The temporary array. (can be NULL)
+ * @param n The length of the given string.
+ * @param idx The primary index.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+DIVSUFSORT_API
+saint_t
+inverse_bw_transform(const sauchar_t *T, sauchar_t *U,
+                     saidx_t *A /* can NULL */,
+                     saidx_t n, saidx_t idx);
+
+/**
+ * Checks the correctness of a given suffix array.
+ * @param T[0..n-1] The input string.
+ * @param SA[0..n-1] The input suffix array.
+ * @param n The length of the given string.
+ * @param verbose The verbose mode.
+ * @return 0 if no error occurred.
+ */
+DIVSUFSORT_API
+saint_t
+sufcheck(const sauchar_t *T, const saidx_t *SA, saidx_t n, saint_t verbose);
+
+/**
+ * Search for the pattern P in the string T.
+ * @param T[0..Tsize-1] The input string.
+ * @param Tsize The length of the given string.
+ * @param P[0..Psize-1] The input pattern string.
+ * @param Psize The length of the given pattern string.
+ * @param SA[0..SAsize-1] The input suffix array.
+ * @param SAsize The length of the given suffix array.
+ * @param idx The output index.
+ * @return The count of matches if no error occurred, -1 otherwise.
+ */
+DIVSUFSORT_API
+saidx_t
+sa_search(const sauchar_t *T, saidx_t Tsize,
+          const sauchar_t *P, saidx_t Psize,
+          const saidx_t *SA, saidx_t SAsize,
+          saidx_t *left);
+
+/**
+ * Search for the character c in the string T.
+ * @param T[0..Tsize-1] The input string.
+ * @param Tsize The length of the given string.
+ * @param SA[0..SAsize-1] The input suffix array.
+ * @param SAsize The length of the given suffix array.
+ * @param c The input character.
+ * @param idx The output index.
+ * @return The count of matches if no error occurred, -1 otherwise.
+ */
+DIVSUFSORT_API
+saidx_t
+sa_simplesearch(const sauchar_t *T, saidx_t Tsize,
+                const saidx_t *SA, saidx_t SAsize,
+                saint_t c, saidx_t *left);
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* _DIVSUFSORT_H */
diff --git a/thirdparty/SA/libdivsufsort/include/divsufsort_private.h b/thirdparty/SA/libdivsufsort/include/divsufsort_private.h
new file mode 100644
index 0000000000..a6d630d78e
--- /dev/null
+++ b/thirdparty/SA/libdivsufsort/include/divsufsort_private.h
@@ -0,0 +1,210 @@
+/*
+ * divsufsort_private.h for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#define HAVE_CONFIG_H 1
+#undef _DIVSUFSORT_PRIVATE_H
+
+#ifndef _DIVSUFSORT_PRIVATE_H
+  #define _DIVSUFSORT_PRIVATE_H 1
+
+  #ifdef __cplusplus
+   extern "C" {
+  #endif /* __cplusplus */
+
+  #if HAVE_CONFIG_H
+    # include "config.h"
+  #endif
+  #include <assert.h>
+  #include <stdio.h>
+  #if HAVE_STRING_H
+    # include <string.h>
+  #endif
+  #if HAVE_STDLIB_H
+   # include <stdlib.h>
+  #endif
+  #if HAVE_MEMORY_H
+    # include <memory.h>
+  #endif
+  #if HAVE_STDDEF_H
+    # include <stddef.h>
+  #endif
+  #if HAVE_STRINGS_H
+   # include <strings.h>
+  #endif
+  #if HAVE_INTTYPES_H
+    # include <inttypes.h>
+  #else
+  # if HAVE_STDINT_H
+    #  include <stdint.h>
+  # endif
+#endif
+#if defined(BUILD_DIVSUFSORT64)
+  # include "divsufsort64.h"
+  # ifndef SAIDX_T
+   #  define SAIDX_T
+   #  define saidx_t saidx64_t
+  # endif /* SAIDX_T */
+  # ifndef PRIdSAIDX_T
+    #  define PRIdSAIDX_T PRIdSAIDX64_T
+  # endif /* PRIdSAIDX_T */
+  # define divsufsort divsufsort64
+  # define divbwt divbwt64
+  # define divsufsort_version divsufsort64_version
+  # define bw_transform bw_transform64
+  # define inverse_bw_transform inverse_bw_transform64
+  # define sufcheck sufcheck64
+  # define sa_search sa_search64
+  # define sa_simplesearch sa_simplesearch64
+  # define sssort sssort64
+  # define trsort trsort64
+#else
+  # include "divsufsort.h"
+#endif
+
+
+/*- Constants -*/
+#if !defined(UINT8_MAX)
+  # define UINT8_MAX (255)
+#endif /* UINT8_MAX */
+#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
+  # undef ALPHABET_SIZE
+#endif
+#if !defined(ALPHABET_SIZE)
+  # define ALPHABET_SIZE (UINT8_MAX + 1)
+#endif
+/* for divsufsort.c */
+#define BUCKET_A_SIZE (ALPHABET_SIZE)
+#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
+/* for sssort.c */
+#if defined(SS_INSERTIONSORT_THRESHOLD)
+  # if SS_INSERTIONSORT_THRESHOLD < 1
+   #  undef SS_INSERTIONSORT_THRESHOLD
+   #  define SS_INSERTIONSORT_THRESHOLD (1)
+  # endif
+  #else
+   # define SS_INSERTIONSORT_THRESHOLD (8)
+#endif
+#if defined(SS_BLOCKSIZE)
+  # if SS_BLOCKSIZE < 0
+   #  undef SS_BLOCKSIZE
+   #  define SS_BLOCKSIZE (0)
+  # elif 32768 <= SS_BLOCKSIZE
+    #  undef SS_BLOCKSIZE
+    #  define SS_BLOCKSIZE (32767)
+  # endif
+#else
+  # define SS_BLOCKSIZE (1024)
+#endif
+/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
+#if SS_BLOCKSIZE == 0
+  # if defined(BUILD_DIVSUFSORT64)
+   #  define SS_MISORT_STACKSIZE (96)
+  # else
+     #  define SS_MISORT_STACKSIZE (64)
+  # endif
+  #elif SS_BLOCKSIZE <= 4096
+    # define SS_MISORT_STACKSIZE (16)
+#else
+# define SS_MISORT_STACKSIZE (24)
+#endif
+#if defined(BUILD_DIVSUFSORT64)
+# define SS_SMERGE_STACKSIZE (64)
+#else
+# define SS_SMERGE_STACKSIZE (32)
+#endif
+/* for trsort.c */
+#define TR_INSERTIONSORT_THRESHOLD (8)
+#if defined(BUILD_DIVSUFSORT64)
+# define TR_STACKSIZE (96)
+#else
+# define TR_STACKSIZE (64)
+#endif
+
+
+/*- Macros -*/
+#ifndef SWAP
+# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
+#endif /* SWAP */
+#ifndef MIN
+# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
+#endif /* MIN */
+#ifndef MAX
+# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
+#endif /* MAX */
+#define STACK_PUSH(_a, _b, _c, _d)\
+  do {\
+    assert(ssize < STACK_SIZE);\
+    stack[ssize].a = (_a), stack[ssize].b = (_b),\
+    stack[ssize].c = (_c), stack[ssize++].d = (_d);\
+  } while(0)
+#define STACK_PUSH5(_a, _b, _c, _d, _e)\
+  do {\
+    assert(ssize < STACK_SIZE);\
+    stack[ssize].a = (_a), stack[ssize].b = (_b),\
+    stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
+  } while(0)
+#define STACK_POP(_a, _b, _c, _d)\
+  do {\
+    assert(0 <= ssize);\
+    if(ssize == 0) { return; }\
+    (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
+    (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
+  } while(0)
+#define STACK_POP5(_a, _b, _c, _d, _e)\
+  do {\
+    assert(0 <= ssize);\
+    if(ssize == 0) { return; }\
+    (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
+    (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
+  } while(0)
+/* for divsufsort.c */
+#define BUCKET_A(_c0) bucket_A[(_c0)]
+#if ALPHABET_SIZE == 256
+#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
+#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
+#else
+#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
+#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
+#endif
+
+
+/*- Private Prototypes -*/
+/* sssort.c */
+void
+sssort(const sauchar_t *Td, const saidx_t *PA,
+       saidx_t *first, saidx_t *last,
+       saidx_t *buf, saidx_t bufsize,
+       saidx_t depth, saidx_t n, saint_t lastsuffix);
+/* trsort.c */
+void
+trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth);
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* _DIVSUFSORT_PRIVATE_H */
diff --git a/thirdparty/SA/libdivsufsort/include/lfs.h b/thirdparty/SA/libdivsufsort/include/lfs.h
new file mode 100644
index 0000000000..7ef88f0b4d
--- /dev/null
+++ b/thirdparty/SA/libdivsufsort/include/lfs.h
@@ -0,0 +1,56 @@
+/*
+ * lfs.h for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _LFS_H
+#define _LFS_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef __STRICT_ANSI__
+# define LFS_OFF_T off_t
+# define LFS_FOPEN fopen
+# define LFS_FTELL ftello
+# define LFS_FSEEK fseeko
+# define LFS_PRId  PRIdMAX
+#else
+# define LFS_OFF_T long
+# define LFS_FOPEN fopen
+# define LFS_FTELL ftell
+# define LFS_FSEEK fseek
+# define LFS_PRId "ld"
+#endif
+#ifndef PRIdOFF_T
+# define PRIdOFF_T LFS_PRId
+#endif
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* _LFS_H */
diff --git a/thirdparty/SA/libdivsufsort/lib/divsufsort.c b/thirdparty/SA/libdivsufsort/lib/divsufsort.c
new file mode 100644
index 0000000000..4835351f61
--- /dev/null
+++ b/thirdparty/SA/libdivsufsort/lib/divsufsort.c
@@ -0,0 +1,400 @@
+/*
+ * divsufsort.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+//#include "divsufsort_private.h"
+#include "../include/divsufsort_private.h"
+#ifdef _OPENMP
+# include <omp.h>
+#endif
+
+
+/*- Private Functions -*/
+
+/* Sorts suffixes of type B*. */
+static
+saidx_t
+sort_typeBstar(const sauchar_t *T, saidx_t *SA,
+               saidx_t *bucket_A, saidx_t *bucket_B,
+               saidx_t n) {
+  saidx_t *PAb, *ISAb, *buf;
+#ifdef _OPENMP
+  saidx_t *curbuf;
+  saidx_t l;
+#endif
+  saidx_t i, j, k, t, m, bufsize;
+  saint_t c0, c1;
+#ifdef _OPENMP
+  saint_t d0, d1;
+  int tmp;
+#endif
+
+  /* Initialize bucket arrays. */
+  for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
+  for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
+
+  /* Count the number of occurrences of the first one or two characters of each
+     type A, B and B* suffix. Moreover, store the beginning position of all
+     type B* suffixes into the array SA. */
+  for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
+    /* type A suffix. */
+    do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
+    if(0 <= i) {
+      /* type B* suffix. */
+      ++BUCKET_BSTAR(c0, c1);
+      SA[--m] = i;
+      /* type B suffix. */
+      for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
+        ++BUCKET_B(c0, c1);
+      }
+    }
+  }
+  m = n - m;
+/*
+note:
+  A type B* suffix is lexicographically smaller than a type B suffix that
+  begins with the same first two characters.
+*/
+
+  /* Calculate the index of start/end point of each bucket. */
+  for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
+    t = i + BUCKET_A(c0);
+    BUCKET_A(c0) = i + j; /* start point */
+    i = t + BUCKET_B(c0, c0);
+    for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
+      j += BUCKET_BSTAR(c0, c1);
+      BUCKET_BSTAR(c0, c1) = j; /* end point */
+      i += BUCKET_B(c0, c1);
+    }
+  }
+
+  if(0 < m) {
+    /* Sort the type B* suffixes by their first two characters. */
+    PAb = SA + n - m; ISAb = SA + m;
+    for(i = m - 2; 0 <= i; --i) {
+      t = PAb[i], c0 = T[t], c1 = T[t + 1];
+      SA[--BUCKET_BSTAR(c0, c1)] = i;
+    }
+    t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
+    SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
+
+    /* Sort the type B* substrings using sssort. */
+#ifdef _OPENMP
+    tmp = omp_get_max_threads();
+    buf = SA + m, bufsize = (n - (2 * m)) / tmp;
+    c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m;
+#pragma omp parallel default(shared) private(curbuf, k, l, d0, d1, tmp)
+    {
+      tmp = omp_get_thread_num();
+      curbuf = buf + tmp * bufsize;
+      k = 0;
+      for(;;) {
+        #pragma omp critical(sssort_lock)
+        {
+          if(0 < (l = j)) {
+            d0 = c0, d1 = c1;
+            do {
+              k = BUCKET_BSTAR(d0, d1);
+              if(--d1 <= d0) {
+                d1 = ALPHABET_SIZE - 1;
+                if(--d0 < 0) { break; }
+              }
+            } while(((l - k) <= 1) && (0 < (l = k)));
+            c0 = d0, c1 = d1, j = k;
+          }
+        }
+        if(l == 0) { break; }
+        sssort(T, PAb, SA + k, SA + l,
+               curbuf, bufsize, 2, n, *(SA + k) == (m - 1));
+      }
+    }
+#else
+    buf = SA + m, bufsize = n - (2 * m);
+    for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
+      for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
+        i = BUCKET_BSTAR(c0, c1);
+        if(1 < (j - i)) {
+          sssort(T, PAb, SA + i, SA + j,
+                 buf, bufsize, 2, n, *(SA + i) == (m - 1));
+        }
+      }
+    }
+#endif
+
+    /* Compute ranks of type B* substrings. */
+    for(i = m - 1; 0 <= i; --i) {
+      if(0 <= SA[i]) {
+        j = i;
+        do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
+        SA[i + 1] = i - j;
+        if(i <= 0) { break; }
+      }
+      j = i;
+      do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
+      ISAb[SA[i]] = j;
+    }
+
+    /* Construct the inverse suffix array of type B* suffixes using trsort. */
+    trsort(ISAb, SA, m, 1);
+
+    /* Set the sorted order of tyoe B* suffixes. */
+    for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
+      for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
+      if(0 <= i) {
+        t = i;
+        for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
+        SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
+      }
+    }
+
+    /* Calculate the index of start/end point of each bucket. */
+    BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
+    for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
+      i = BUCKET_A(c0 + 1) - 1;
+      for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
+        t = i - BUCKET_B(c0, c1);
+        BUCKET_B(c0, c1) = i; /* end point */
+
+        /* Move all type B* suffixes to the correct position. */
+        for(i = t, j = BUCKET_BSTAR(c0, c1);
+            j <= k;
+            --i, --k) { SA[i] = SA[k]; }
+      }
+      BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
+      BUCKET_B(c0, c0) = i; /* end point */
+    }
+  }
+
+  return m;
+}
+
+/* Constructs the suffix array by using the sorted order of type B* suffixes. */
+static
+void
+construct_SA(const sauchar_t *T, saidx_t *SA,
+             saidx_t *bucket_A, saidx_t *bucket_B,
+             saidx_t n, saidx_t m) {
+  saidx_t *i, *j, *k;
+  saidx_t s;
+  saint_t c0, c1, c2;
+
+  if(0 < m) {
+    /* Construct the sorted order of type B suffixes by using
+       the sorted order of type B* suffixes. */
+    for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
+      /* Scan the suffix array from right to left. */
+      for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
+          j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
+          i <= j;
+          --j) {
+        if(0 < (s = *j)) {
+          assert(T[s] == c1);
+          assert(((s + 1) < n) && (T[s] <= T[s + 1]));
+          assert(T[s - 1] <= T[s]);
+          *j = ~s;
+          c0 = T[--s];
+          if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
+          if(c0 != c2) {
+            if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
+            k = SA + BUCKET_B(c2 = c0, c1);
+          }
+          assert(k < j);
+          *k-- = s;
+        } else {
+          assert(((s == 0) && (T[s] == c1)) || (s < 0));
+          *j = ~s;
+        }
+      }
+    }
+  }
+
+  /* Construct the suffix array by using
+     the sorted order of type B suffixes. */
+  k = SA + BUCKET_A(c2 = T[n - 1]);
+  *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
+  /* Scan the suffix array from left to right. */
+  for(i = SA, j = SA + n; i < j; ++i) {
+    if(0 < (s = *i)) {
+      assert(T[s - 1] >= T[s]);
+      c0 = T[--s];
+      if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
+      if(c0 != c2) {
+        BUCKET_A(c2) = k - SA;
+        k = SA + BUCKET_A(c2 = c0);
+      }
+      assert(i < k);
+      *k++ = s;
+    } else {
+      assert(s < 0);
+      *i = ~s;
+    }
+  }
+}
+
+/* Constructs the burrows-wheeler transformed string directly
+   by using the sorted order of type B* suffixes. */
+static
+saidx_t
+construct_BWT(const sauchar_t *T, saidx_t *SA,
+              saidx_t *bucket_A, saidx_t *bucket_B,
+              saidx_t n, saidx_t m) {
+  saidx_t *i, *j, *k, *orig;
+  saidx_t s;
+  saint_t c0, c1, c2;
+
+  if(0 < m) {
+    /* Construct the sorted order of type B suffixes by using
+       the sorted order of type B* suffixes. */
+    for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
+      /* Scan the suffix array from right to left. */
+      for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
+          j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
+          i <= j;
+          --j) {
+        if(0 < (s = *j)) {
+          assert(T[s] == c1);
+          assert(((s + 1) < n) && (T[s] <= T[s + 1]));
+          assert(T[s - 1] <= T[s]);
+          c0 = T[--s];
+          *j = ~((saidx_t)c0);
+          if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
+          if(c0 != c2) {
+            if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
+            k = SA + BUCKET_B(c2 = c0, c1);
+          }
+          assert(k < j);
+          *k-- = s;
+        } else if(s != 0) {
+          *j = ~s;
+#ifndef NDEBUG
+        } else {
+          assert(T[s] == c1);
+#endif
+        }
+      }
+    }
+  }
+
+  /* Construct the BWTed string by using
+     the sorted order of type B suffixes. */
+  k = SA + BUCKET_A(c2 = T[n - 1]);
+  *k++ = (T[n - 2] < c2) ? ~((saidx_t)T[n - 2]) : (n - 1);
+  /* Scan the suffix array from left to right. */
+  for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
+    if(0 < (s = *i)) {
+      assert(T[s - 1] >= T[s]);
+      c0 = T[--s];
+      *i = c0;
+      if((0 < s) && (T[s - 1] < c0)) { s = ~((saidx_t)T[s - 1]); }
+      if(c0 != c2) {
+        BUCKET_A(c2) = k - SA;
+        k = SA + BUCKET_A(c2 = c0);
+      }
+      assert(i < k);
+      *k++ = s;
+    } else if(s != 0) {
+      *i = ~s;
+    } else {
+      orig = i;
+    }
+  }
+
+  return orig - SA;
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/*- Function -*/
+
+saint_t
+divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n) {
+  saidx_t *bucket_A, *bucket_B;
+  saidx_t m;
+  saint_t err = 0;
+
+  /* Check arguments. */
+  if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
+  else if(n == 0) { return 0; }
+  else if(n == 1) { SA[0] = 0; return 0; }
+  else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
+
+  bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
+  bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
+
+  /* Suffixsort. */
+  if((bucket_A != NULL) && (bucket_B != NULL)) {
+    m = sort_typeBstar(T, SA, bucket_A, bucket_B, n);
+    construct_SA(T, SA, bucket_A, bucket_B, n, m);
+  } else {
+    err = -2;
+  }
+
+  free(bucket_B);
+  free(bucket_A);
+
+  return err;
+}
+
+saidx_t
+divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n) {
+  saidx_t *B;
+  saidx_t *bucket_A, *bucket_B;
+  saidx_t m, pidx, i;
+
+  /* Check arguments. */
+  if((T == NULL) || (U == NULL) || (n < 0)) { return -1; }
+  else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
+
+  if((B = A) == NULL) { B = (saidx_t *)malloc((size_t)(n + 1) * sizeof(saidx_t)); }
+  bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
+  bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
+
+  /* Burrows-Wheeler Transform. */
+  if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
+    m = sort_typeBstar(T, B, bucket_A, bucket_B, n);
+    pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m);
+
+    /* Copy to output string. */
+    U[0] = T[n - 1];
+    for(i = 0; i < pidx; ++i) { U[i + 1] = (sauchar_t)B[i]; }
+    for(i += 1; i < n; ++i) { U[i] = (sauchar_t)B[i]; }
+    pidx += 1;
+  } else {
+    pidx = -2;
+  }
+
+  free(bucket_B);
+  free(bucket_A);
+  if(A == NULL) { free(B); }
+
+  return pidx;
+}
+
+const char *
+divsufsort_version(void) {
+ return "2.0.1-14-g5f60d6f";
+//  return PROJECT_VERSION_FULL;
+}
diff --git a/thirdparty/SA/libdivsufsort/lib/sssort.c b/thirdparty/SA/libdivsufsort/lib/sssort.c
new file mode 100644
index 0000000000..9542b6dbb8
--- /dev/null
+++ b/thirdparty/SA/libdivsufsort/lib/sssort.c
@@ -0,0 +1,826 @@
+/*
+ * sssort.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+//#include "divsufsort_private.h"
+#include "../include/divsufsort_private.h"
+
+
+/*- Private Functions -*/
+
+static const saint_t lg_table[256]= {
+ -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
+};
+
+#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
+
+//static INLINE
+static inline
+saint_t
+ss_ilg(saidx_t n) {
+#if SS_BLOCKSIZE == 0
+# if defined(BUILD_DIVSUFSORT64)
+  return (n >> 32) ?
+          ((n >> 48) ?
+            ((n >> 56) ?
+              56 + lg_table[(n >> 56) & 0xff] :
+              48 + lg_table[(n >> 48) & 0xff]) :
+            ((n >> 40) ?
+              40 + lg_table[(n >> 40) & 0xff] :
+              32 + lg_table[(n >> 32) & 0xff])) :
+          ((n & 0xffff0000) ?
+            ((n & 0xff000000) ?
+              24 + lg_table[(n >> 24) & 0xff] :
+              16 + lg_table[(n >> 16) & 0xff]) :
+            ((n & 0x0000ff00) ?
+               8 + lg_table[(n >>  8) & 0xff] :
+               0 + lg_table[(n >>  0) & 0xff]));
+# else
+  return (n & 0xffff0000) ?
+          ((n & 0xff000000) ?
+            24 + lg_table[(n >> 24) & 0xff] :
+            16 + lg_table[(n >> 16) & 0xff]) :
+          ((n & 0x0000ff00) ?
+             8 + lg_table[(n >>  8) & 0xff] :
+             0 + lg_table[(n >>  0) & 0xff]);
+# endif
+#elif SS_BLOCKSIZE < 256
+  return lg_table[n];
+#else
+  return (n & 0xff00) ?
+          8 + lg_table[(n >> 8) & 0xff] :
+          0 + lg_table[(n >> 0) & 0xff];
+#endif
+}
+
+#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
+
+#if SS_BLOCKSIZE != 0
+
+static const saint_t sqq_table[256] = {
+  0,  16,  22,  27,  32,  35,  39,  42,  45,  48,  50,  53,  55,  57,  59,  61,
+ 64,  65,  67,  69,  71,  73,  75,  76,  78,  80,  81,  83,  84,  86,  87,  89,
+ 90,  91,  93,  94,  96,  97,  98,  99, 101, 102, 103, 104, 106, 107, 108, 109,
+110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
+128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
+143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
+156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
+169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
+181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
+192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
+202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
+212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
+221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
+230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
+239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
+247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
+};
+
+//static INLINE
+static inline
+saidx_t
+ss_isqrt(saidx_t x) {
+  saidx_t y, e;
+
+  if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
+  e = (x & 0xffff0000) ?
+        ((x & 0xff000000) ?
+          24 + lg_table[(x >> 24) & 0xff] :
+          16 + lg_table[(x >> 16) & 0xff]) :
+        ((x & 0x0000ff00) ?
+           8 + lg_table[(x >>  8) & 0xff] :
+           0 + lg_table[(x >>  0) & 0xff]);
+
+  if(e >= 16) {
+    y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
+    if(e >= 24) { y = (y + 1 + x / y) >> 1; }
+    y = (y + 1 + x / y) >> 1;
+  } else if(e >= 8) {
+    y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
+  } else {
+    return sqq_table[x] >> 4;
+  }
+
+  return (x < (y * y)) ? y - 1 : y;
+}
+
+#endif /* SS_BLOCKSIZE != 0 */
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Compares two suffixes. */
+//static INLINE
+static inline
+saint_t
+ss_compare(const sauchar_t *T,
+           const saidx_t *p1, const saidx_t *p2,
+           saidx_t depth) {
+  const sauchar_t *U1, *U2, *U1n, *U2n;
+
+  for(U1 = T + depth + *p1,
+      U2 = T + depth + *p2,
+      U1n = T + *(p1 + 1) + 2,
+      U2n = T + *(p2 + 1) + 2;
+      (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
+      ++U1, ++U2) {
+  }
+
+  return U1 < U1n ?
+        (U2 < U2n ? *U1 - *U2 : 1) :
+        (U2 < U2n ? -1 : 0);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
+
+/* Insertionsort for small size groups */
+static
+void
+ss_insertionsort(const sauchar_t *T, const saidx_t *PA,
+                 saidx_t *first, saidx_t *last, saidx_t depth) {
+  saidx_t *i, *j;
+  saidx_t t;
+  saint_t r;
+
+  for(i = last - 2; first <= i; --i) {
+    for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
+      do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
+      if(last <= j) { break; }
+    }
+    if(r == 0) { *j = ~*j; }
+    *(j - 1) = t;
+  }
+}
+
+#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
+
+
+/*---------------------------------------------------------------------------*/
+
+#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
+
+//static INLINE
+static inline
+void
+ss_fixdown(const sauchar_t *Td, const saidx_t *PA,
+           saidx_t *SA, saidx_t i, saidx_t size) {
+  saidx_t j, k;
+  saidx_t v;
+  saint_t c, d, e;
+
+  for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
+    d = Td[PA[SA[k = j++]]];
+    if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
+    if(d <= c) { break; }
+  }
+  SA[i] = v;
+}
+
+/* Simple top-down heapsort. */
+static
+void
+ss_heapsort(const sauchar_t *Td, const saidx_t *PA, saidx_t *SA, saidx_t size) {
+  saidx_t i, m;
+  saidx_t t;
+
+  m = size;
+  if((size % 2) == 0) {
+    m--;
+    if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
+  }
+
+  for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
+  if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
+  for(i = m - 1; 0 < i; --i) {
+    t = SA[0], SA[0] = SA[i];
+    ss_fixdown(Td, PA, SA, 0, i);
+    SA[i] = t;
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Returns the median of three elements. */
+//static INLINE
+static inline
+saidx_t *
+ss_median3(const sauchar_t *Td, const saidx_t *PA,
+           saidx_t *v1, saidx_t *v2, saidx_t *v3) {
+  saidx_t *t;
+  if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
+  if(Td[PA[*v2]] > Td[PA[*v3]]) {
+    if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
+    else { return v3; }
+  }
+  return v2;
+}
+
+/* Returns the median of five elements. */
+//static INLINE
+static inline
+saidx_t *
+ss_median5(const sauchar_t *Td, const saidx_t *PA,
+           saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) {
+  saidx_t *t;
+  if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
+  if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
+  if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
+  if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
+  if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
+  if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
+  return v3;
+}
+
+/* Returns the pivot element. */
+//static INLINE
+static inline
+saidx_t *
+ss_pivot(const sauchar_t *Td, const saidx_t *PA, saidx_t *first, saidx_t *last) {
+  saidx_t *middle;
+  saidx_t t;
+
+  t = last - first;
+  middle = first + t / 2;
+
+  if(t <= 512) {
+    if(t <= 32) {
+      return ss_median3(Td, PA, first, middle, last - 1);
+    } else {
+      t >>= 2;
+      return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
+    }
+  }
+  t >>= 3;
+  first  = ss_median3(Td, PA, first, first + t, first + (t << 1));
+  middle = ss_median3(Td, PA, middle - t, middle, middle + t);
+  last   = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
+  return ss_median3(Td, PA, first, middle, last);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Binary partition for substrings. */
+//static INLINE
+static inline
+saidx_t *
+ss_partition(const saidx_t *PA,
+                    saidx_t *first, saidx_t *last, saidx_t depth) {
+  saidx_t *a, *b;
+  saidx_t t;
+  for(a = first - 1, b = last;;) {
+    for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
+    for(; (a < --b) && ((PA[*b] + depth) <  (PA[*b + 1] + 1));) { }
+    if(b <= a) { break; }
+    t = ~*b;
+    *b = *a;
+    *a = t;
+  }
+  if(first < a) { *first = ~*first; }
+  return a;
+}
+
+/* Multikey introsort for medium size groups. */
+static
+void
+ss_mintrosort(const sauchar_t *T, const saidx_t *PA,
+              saidx_t *first, saidx_t *last,
+              saidx_t depth) {
+#define STACK_SIZE SS_MISORT_STACKSIZE
+  struct { saidx_t *a, *b, c; saint_t d; } stack[STACK_SIZE];
+  const sauchar_t *Td;
+  saidx_t *a, *b, *c, *d, *e, *f;
+  saidx_t s, t;
+  saint_t ssize;
+  saint_t limit;
+  saint_t v, x = 0;
+
+  for(ssize = 0, limit = ss_ilg(last - first);;) {
+
+    if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
+#if 1 < SS_INSERTIONSORT_THRESHOLD
+      if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
+#endif
+      STACK_POP(first, last, depth, limit);
+      continue;
+    }
+
+    Td = T + depth;
+    if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
+    if(limit < 0) {
+      for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
+        if((x = Td[PA[*a]]) != v) {
+          if(1 < (a - first)) { break; }
+          v = x;
+          first = a;
+        }
+      }
+      if(Td[PA[*first] - 1] < v) {
+        first = ss_partition(PA, first, a, depth);
+      }
+      if((a - first) <= (last - a)) {
+        if(1 < (a - first)) {
+          STACK_PUSH(a, last, depth, -1);
+          last = a, depth += 1, limit = ss_ilg(a - first);
+        } else {
+          first = a, limit = -1;
+        }
+      } else {
+        if(1 < (last - a)) {
+          STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
+          first = a, limit = -1;
+        } else {
+          last = a, depth += 1, limit = ss_ilg(a - first);
+        }
+      }
+      continue;
+    }
+
+    /* choose pivot */
+    a = ss_pivot(Td, PA, first, last);
+    v = Td[PA[*a]];
+    SWAP(*first, *a);
+
+    /* partition */
+    for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
+    if(((a = b) < last) && (x < v)) {
+      for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
+        if(x == v) { SWAP(*b, *a); ++a; }
+      }
+    }
+    for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
+    if((b < (d = c)) && (x > v)) {
+      for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
+        if(x == v) { SWAP(*c, *d); --d; }
+      }
+    }
+    for(; b < c;) {
+      SWAP(*b, *c);
+      for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
+        if(x == v) { SWAP(*b, *a); ++a; }
+      }
+      for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
+        if(x == v) { SWAP(*c, *d); --d; }
+      }
+    }
+
+    if(a <= d) {
+      c = b - 1;
+
+      if((s = a - first) > (t = b - a)) { s = t; }
+      for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+      if((s = d - c) > (t = last - d - 1)) { s = t; }
+      for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+
+      a = first + (b - a), c = last - (d - c);
+      b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
+
+      if((a - first) <= (last - c)) {
+        if((last - c) <= (c - b)) {
+          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+          STACK_PUSH(c, last, depth, limit);
+          last = a;
+        } else if((a - first) <= (c - b)) {
+          STACK_PUSH(c, last, depth, limit);
+          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+          last = a;
+        } else {
+          STACK_PUSH(c, last, depth, limit);
+          STACK_PUSH(first, a, depth, limit);
+          first = b, last = c, depth += 1, limit = ss_ilg(c - b);
+        }
+      } else {
+        if((a - first) <= (c - b)) {
+          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+          STACK_PUSH(first, a, depth, limit);
+          first = c;
+        } else if((last - c) <= (c - b)) {
+          STACK_PUSH(first, a, depth, limit);
+          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
+          first = c;
+        } else {
+          STACK_PUSH(first, a, depth, limit);
+          STACK_PUSH(c, last, depth, limit);
+          first = b, last = c, depth += 1, limit = ss_ilg(c - b);
+        }
+      }
+    } else {
+      limit += 1;
+      if(Td[PA[*first] - 1] < v) {
+        first = ss_partition(PA, first, last, depth);
+        limit = ss_ilg(last - first);
+      }
+      depth += 1;
+    }
+  }
+#undef STACK_SIZE
+}
+
+#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
+
+
+/*---------------------------------------------------------------------------*/
+
+#if SS_BLOCKSIZE != 0
+
+//static INLINE
+static inline
+void
+ss_blockswap(saidx_t *a, saidx_t *b, saidx_t n) {
+  saidx_t t;
+  for(; 0 < n; --n, ++a, ++b) {
+    t = *a, *a = *b, *b = t;
+  }
+}
+
+//static INLINE
+static inline
+void
+ss_rotate(saidx_t *first, saidx_t *middle, saidx_t *last) {
+  saidx_t *a, *b, t;
+  saidx_t l, r;
+  l = middle - first, r = last - middle;
+  for(; (0 < l) && (0 < r);) {
+    if(l == r) { ss_blockswap(first, middle, l); break; }
+    if(l < r) {
+      a = last - 1, b = middle - 1;
+      t = *a;
+      do {
+        *a-- = *b, *b-- = *a;
+        if(b < first) {
+          *a = t;
+          last = a;
+          if((r -= l + 1) <= l) { break; }
+          a -= 1, b = middle - 1;
+          t = *a;
+        }
+      } while(1);
+    } else {
+      a = first, b = middle;
+      t = *a;
+      do {
+        *a++ = *b, *b++ = *a;
+        if(last <= b) {
+          *a = t;
+          first = a + 1;
+          if((l -= r + 1) <= r) { break; }
+          a += 1, b = middle;
+          t = *a;
+        }
+      } while(1);
+    }
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static
+void
+ss_inplacemerge(const sauchar_t *T, const saidx_t *PA,
+                saidx_t *first, saidx_t *middle, saidx_t *last,
+                saidx_t depth) {
+  const saidx_t *p;
+  saidx_t *a, *b;
+  saidx_t len, half;
+  saint_t q, r;
+  saint_t x;
+
+  for(;;) {
+    if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
+    else                { x = 0; p = PA +  *(last - 1); }
+    for(a = first, len = middle - first, half = len >> 1, r = -1;
+        0 < len;
+        len = half, half >>= 1) {
+      b = a + half;
+      q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
+      if(q < 0) {
+        a = b + 1;
+        half -= (len & 1) ^ 1;
+      } else {
+        r = q;
+      }
+    }
+    if(a < middle) {
+      if(r == 0) { *a = ~*a; }
+      ss_rotate(a, middle, last);
+      last -= middle - a;
+      middle = a;
+      if(first == middle) { break; }
+    }
+    --last;
+    if(x != 0) { while(*--last < 0) { } }
+    if(middle == last) { break; }
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Merge-forward with internal buffer. */
+static
+void
+ss_mergeforward(const sauchar_t *T, const saidx_t *PA,
+                saidx_t *first, saidx_t *middle, saidx_t *last,
+                saidx_t *buf, saidx_t depth) {
+  saidx_t *a, *b, *c, *bufend;
+  saidx_t t;
+  saint_t r;
+
+  bufend = buf + (middle - first) - 1;
+  ss_blockswap(buf, first, middle - first);
+
+  for(t = *(a = first), b = buf, c = middle;;) {
+    r = ss_compare(T, PA + *b, PA + *c, depth);
+    if(r < 0) {
+      do {
+        *a++ = *b;
+        if(bufend <= b) { *bufend = t; return; }
+        *b++ = *a;
+      } while(*b < 0);
+    } else if(r > 0) {
+      do {
+        *a++ = *c, *c++ = *a;
+        if(last <= c) {
+          while(b < bufend) { *a++ = *b, *b++ = *a; }
+          *a = *b, *b = t;
+          return;
+        }
+      } while(*c < 0);
+    } else {
+      *c = ~*c;
+      do {
+        *a++ = *b;
+        if(bufend <= b) { *bufend = t; return; }
+        *b++ = *a;
+      } while(*b < 0);
+
+      do {
+        *a++ = *c, *c++ = *a;
+        if(last <= c) {
+          while(b < bufend) { *a++ = *b, *b++ = *a; }
+          *a = *b, *b = t;
+          return;
+        }
+      } while(*c < 0);
+    }
+  }
+}
+
+/* Merge-backward with internal buffer. */
+static
+void
+ss_mergebackward(const sauchar_t *T, const saidx_t *PA,
+                 saidx_t *first, saidx_t *middle, saidx_t *last,
+                 saidx_t *buf, saidx_t depth) {
+  const saidx_t *p1, *p2;
+  saidx_t *a, *b, *c, *bufend;
+  saidx_t t;
+  saint_t r;
+  saint_t x;
+
+  bufend = buf + (last - middle) - 1;
+  ss_blockswap(buf, middle, last - middle);
+
+  x = 0;
+  if(*bufend < 0)       { p1 = PA + ~*bufend; x |= 1; }
+  else                  { p1 = PA +  *bufend; }
+  if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
+  else                  { p2 = PA +  *(middle - 1); }
+  for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
+    r = ss_compare(T, p1, p2, depth);
+    if(0 < r) {
+      if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
+      *a-- = *b;
+      if(b <= buf) { *buf = t; break; }
+      *b-- = *a;
+      if(*b < 0) { p1 = PA + ~*b; x |= 1; }
+      else       { p1 = PA +  *b; }
+    } else if(r < 0) {
+      if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
+      *a-- = *c, *c-- = *a;
+      if(c < first) {
+        while(buf < b) { *a-- = *b, *b-- = *a; }
+        *a = *b, *b = t;
+        break;
+      }
+      if(*c < 0) { p2 = PA + ~*c; x |= 2; }
+      else       { p2 = PA +  *c; }
+    } else {
+      if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
+      *a-- = ~*b;
+      if(b <= buf) { *buf = t; break; }
+      *b-- = *a;
+      if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
+      *a-- = *c, *c-- = *a;
+      if(c < first) {
+        while(buf < b) { *a-- = *b, *b-- = *a; }
+        *a = *b, *b = t;
+        break;
+      }
+      if(*b < 0) { p1 = PA + ~*b; x |= 1; }
+      else       { p1 = PA +  *b; }
+      if(*c < 0) { p2 = PA + ~*c; x |= 2; }
+      else       { p2 = PA +  *c; }
+    }
+  }
+}
+
+/* D&C based merge. */
+static
+void
+ss_swapmerge(const sauchar_t *T, const saidx_t *PA,
+             saidx_t *first, saidx_t *middle, saidx_t *last,
+             saidx_t *buf, saidx_t bufsize, saidx_t depth) {
+#define STACK_SIZE SS_SMERGE_STACKSIZE
+#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
+#define MERGE_CHECK(a, b, c)\
+  do {\
+    if(((c) & 1) ||\
+       (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
+      *(a) = ~*(a);\
+    }\
+    if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
+      *(b) = ~*(b);\
+    }\
+  } while(0)
+  struct { saidx_t *a, *b, *c; saint_t d; } stack[STACK_SIZE];
+  saidx_t *l, *r, *lm, *rm;
+  saidx_t m, len, half;
+  saint_t ssize;
+  saint_t check, next;
+
+  for(check = 0, ssize = 0;;) {
+    if((last - middle) <= bufsize) {
+      if((first < middle) && (middle < last)) {
+        ss_mergebackward(T, PA, first, middle, last, buf, depth);
+      }
+      MERGE_CHECK(first, last, check);
+      STACK_POP(first, middle, last, check);
+      continue;
+    }
+
+    if((middle - first) <= bufsize) {
+      if(first < middle) {
+        ss_mergeforward(T, PA, first, middle, last, buf, depth);
+      }
+      MERGE_CHECK(first, last, check);
+      STACK_POP(first, middle, last, check);
+      continue;
+    }
+
+    for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
+        0 < len;
+        len = half, half >>= 1) {
+      if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
+                       PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
+        m += half + 1;
+        half -= (len & 1) ^ 1;
+      }
+    }
+
+    if(0 < m) {
+      lm = middle - m, rm = middle + m;
+      ss_blockswap(lm, middle, m);
+      l = r = middle, next = 0;
+      if(rm < last) {
+        if(*rm < 0) {
+          *rm = ~*rm;
+          if(first < lm) { for(; *--l < 0;) { } next |= 4; }
+          next |= 1;
+        } else if(first < lm) {
+          for(; *r < 0; ++r) { }
+          next |= 2;
+        }
+      }
+
+      if((l - first) <= (last - r)) {
+        STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
+        middle = lm, last = l, check = (check & 3) | (next & 4);
+      } else {
+        if((next & 2) && (r == middle)) { next ^= 6; }
+        STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
+        first = r, middle = rm, check = (next & 3) | (check & 4);
+      }
+    } else {
+      if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
+        *middle = ~*middle;
+      }
+      MERGE_CHECK(first, last, check);
+      STACK_POP(first, middle, last, check);
+    }
+  }
+#undef STACK_SIZE
+}
+
+#endif /* SS_BLOCKSIZE != 0 */
+
+
+/*---------------------------------------------------------------------------*/
+
+/*- Function -*/
+
+/* Substring sort */
+void
+sssort(const sauchar_t *T, const saidx_t *PA,
+       saidx_t *first, saidx_t *last,
+       saidx_t *buf, saidx_t bufsize,
+       saidx_t depth, saidx_t n, saint_t lastsuffix) {
+  saidx_t *a;
+#if SS_BLOCKSIZE != 0
+  saidx_t *b, *middle, *curbuf;
+  saidx_t j, k, curbufsize, limit;
+#endif
+  saidx_t i;
+
+  if(lastsuffix != 0) { ++first; }
+
+#if SS_BLOCKSIZE == 0
+  ss_mintrosort(T, PA, first, last, depth);
+#else
+  if((bufsize < SS_BLOCKSIZE) &&
+      (bufsize < (last - first)) &&
+      (bufsize < (limit = ss_isqrt(last - first)))) {
+    if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
+    buf = middle = last - limit, bufsize = limit;
+  } else {
+    middle = last, limit = 0;
+  }
+  for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
+#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
+    ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
+#elif 1 < SS_BLOCKSIZE
+    ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
+#endif
+    curbufsize = last - (a + SS_BLOCKSIZE);
+    curbuf = a + SS_BLOCKSIZE;
+    if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
+    for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
+      ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
+    }
+  }
+#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
+  ss_mintrosort(T, PA, a, middle, depth);
+#elif 1 < SS_BLOCKSIZE
+  ss_insertionsort(T, PA, a, middle, depth);
+#endif
+  for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
+    if(i & 1) {
+      ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
+      a -= k;
+    }
+  }
+  if(limit != 0) {
+#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
+    ss_mintrosort(T, PA, middle, last, depth);
+#elif 1 < SS_BLOCKSIZE
+    ss_insertionsort(T, PA, middle, last, depth);
+#endif
+    ss_inplacemerge(T, PA, first, middle, last, depth);
+  }
+#endif
+
+  if(lastsuffix != 0) {
+    /* Insert last type B* suffix. */
+    saidx_t PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
+    for(a = first, i = *(first - 1);
+        (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
+        ++a) {
+      *(a - 1) = *a;
+    }
+    *(a - 1) = i;
+  }
+}
diff --git a/thirdparty/SA/libdivsufsort/lib/trsort.c b/thirdparty/SA/libdivsufsort/lib/trsort.c
new file mode 100644
index 0000000000..88123f5627
--- /dev/null
+++ b/thirdparty/SA/libdivsufsort/lib/trsort.c
@@ -0,0 +1,595 @@
+/*
+ * trsort.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+//#include "divsufsort_private.h"
+#include "../include/divsufsort_private.h"
+
+
+/*- Private Functions -*/
+
+static const saint_t lg_table[256]= {
+ -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
+};
+
+//static INLINE
+static inline
+saint_t
+tr_ilg(saidx_t n) {
+#if defined(BUILD_DIVSUFSORT64)
+  return (n >> 32) ?
+          ((n >> 48) ?
+            ((n >> 56) ?
+              56 + lg_table[(n >> 56) & 0xff] :
+              48 + lg_table[(n >> 48) & 0xff]) :
+            ((n >> 40) ?
+              40 + lg_table[(n >> 40) & 0xff] :
+              32 + lg_table[(n >> 32) & 0xff])) :
+          ((n & 0xffff0000) ?
+            ((n & 0xff000000) ?
+              24 + lg_table[(n >> 24) & 0xff] :
+              16 + lg_table[(n >> 16) & 0xff]) :
+            ((n & 0x0000ff00) ?
+               8 + lg_table[(n >>  8) & 0xff] :
+               0 + lg_table[(n >>  0) & 0xff]));
+#else
+  return (n & 0xffff0000) ?
+          ((n & 0xff000000) ?
+            24 + lg_table[(n >> 24) & 0xff] :
+            16 + lg_table[(n >> 16) & 0xff]) :
+          ((n & 0x0000ff00) ?
+             8 + lg_table[(n >>  8) & 0xff] :
+             0 + lg_table[(n >>  0) & 0xff]);
+#endif
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Simple insertionsort for small size groups. */
+static
+void
+tr_insertionsort(const saidx_t *ISAd, saidx_t *first, saidx_t *last) {
+  saidx_t *a, *b;
+  saidx_t t, r;
+
+  for(a = first + 1; a < last; ++a) {
+    for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
+      do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
+      if(b < first) { break; }
+    }
+    if(r == 0) { *b = ~*b; }
+    *(b + 1) = t;
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+//static INLINE
+static inline
+void
+tr_fixdown(const saidx_t *ISAd, saidx_t *SA, saidx_t i, saidx_t size) {
+  saidx_t j, k;
+  saidx_t v;
+  saidx_t c, d, e;
+
+  for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
+    d = ISAd[SA[k = j++]];
+    if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
+    if(d <= c) { break; }
+  }
+  SA[i] = v;
+}
+
+/* Simple top-down heapsort. */
+static
+void
+tr_heapsort(const saidx_t *ISAd, saidx_t *SA, saidx_t size) {
+  saidx_t i, m;
+  saidx_t t;
+
+  m = size;
+  if((size % 2) == 0) {
+    m--;
+    if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
+  }
+
+  for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
+  if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
+  for(i = m - 1; 0 < i; --i) {
+    t = SA[0], SA[0] = SA[i];
+    tr_fixdown(ISAd, SA, 0, i);
+    SA[i] = t;
+  }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Returns the median of three elements. */
+//static INLINE
+static inline
+saidx_t *
+tr_median3(const saidx_t *ISAd, saidx_t *v1, saidx_t *v2, saidx_t *v3) {
+  saidx_t *t;
+  if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
+  if(ISAd[*v2] > ISAd[*v3]) {
+    if(ISAd[*v1] > ISAd[*v3]) { return v1; }
+    else { return v3; }
+  }
+  return v2;
+}
+
+/* Returns the median of five elements. */
+//static INLINE
+static inline
+saidx_t *
+tr_median5(const saidx_t *ISAd,
+           saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) {
+  saidx_t *t;
+  if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
+  if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
+  if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
+  if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
+  if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
+  if(ISAd[*v3] > ISAd[*v4]) { return v4; }
+  return v3;
+}
+
+/* Returns the pivot element. */
+//static INLINE
+static inline
+saidx_t *
+tr_pivot(const saidx_t *ISAd, saidx_t *first, saidx_t *last) {
+  saidx_t *middle;
+  saidx_t t;
+
+  t = last - first;
+  middle = first + t / 2;
+
+  if(t <= 512) {
+    if(t <= 32) {
+      return tr_median3(ISAd, first, middle, last - 1);
+    } else {
+      t >>= 2;
+      return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
+    }
+  }
+  t >>= 3;
+  first  = tr_median3(ISAd, first, first + t, first + (t << 1));
+  middle = tr_median3(ISAd, middle - t, middle, middle + t);
+  last   = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
+  return tr_median3(ISAd, first, middle, last);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+typedef struct _trbudget_t trbudget_t;
+struct _trbudget_t {
+  saidx_t chance;
+  saidx_t remain;
+  saidx_t incval;
+  saidx_t count;
+};
+
+//static INLINE
+static inline
+void
+trbudget_init(trbudget_t *budget, saidx_t chance, saidx_t incval) {
+  budget->chance = chance;
+  budget->remain = budget->incval = incval;
+}
+
+//static INLINE
+static inline
+saint_t
+trbudget_check(trbudget_t *budget, saidx_t size) {
+  if(size <= budget->remain) { budget->remain -= size; return 1; }
+  if(budget->chance == 0) { budget->count += size; return 0; }
+  budget->remain += budget->incval - size;
+  budget->chance -= 1;
+  return 1;
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+//static INLINE
+static inline
+void
+tr_partition(const saidx_t *ISAd,
+             saidx_t *first, saidx_t *middle, saidx_t *last,
+             saidx_t **pa, saidx_t **pb, saidx_t v) {
+  saidx_t *a, *b, *c, *d, *e, *f;
+  saidx_t t, s;
+  saidx_t x = 0;
+
+  for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
+  if(((a = b) < last) && (x < v)) {
+    for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
+      if(x == v) { SWAP(*b, *a); ++a; }
+    }
+  }
+  for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
+  if((b < (d = c)) && (x > v)) {
+    for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
+      if(x == v) { SWAP(*c, *d); --d; }
+    }
+  }
+  for(; b < c;) {
+    SWAP(*b, *c);
+    for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
+      if(x == v) { SWAP(*b, *a); ++a; }
+    }
+    for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
+      if(x == v) { SWAP(*c, *d); --d; }
+    }
+  }
+
+  if(a <= d) {
+    c = b - 1;
+    if((s = a - first) > (t = b - a)) { s = t; }
+    for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+    if((s = d - c) > (t = last - d - 1)) { s = t; }
+    for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+    first += (b - a), last -= (d - c);
+  }
+  *pa = first, *pb = last;
+}
+
+static
+void
+tr_copy(saidx_t *ISA, const saidx_t *SA,
+        saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last,
+        saidx_t depth) {
+  /* sort suffixes of middle partition
+     by using sorted order of suffixes of left and right partition. */
+  saidx_t *c, *d, *e;
+  saidx_t s, v;
+
+  v = b - SA - 1;
+  for(c = first, d = a - 1; c <= d; ++c) {
+    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+      *++d = s;
+      ISA[s] = d - SA;
+    }
+  }
+  for(c = last - 1, e = d + 1, d = b; e < d; --c) {
+    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+      *--d = s;
+      ISA[s] = d - SA;
+    }
+  }
+}
+
+static
+void
+tr_partialcopy(saidx_t *ISA, const saidx_t *SA,
+               saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last,
+               saidx_t depth) {
+  saidx_t *c, *d, *e;
+  saidx_t s, v;
+  saidx_t rank, lastrank, newrank = -1;
+
+  v = b - SA - 1;
+  lastrank = -1;
+  for(c = first, d = a - 1; c <= d; ++c) {
+    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+      *++d = s;
+      rank = ISA[s + depth];
+      if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
+      ISA[s] = newrank;
+    }
+  }
+
+  lastrank = -1;
+  for(e = d; first <= e; --e) {
+    rank = ISA[*e];
+    if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
+    if(newrank != rank) { ISA[*e] = newrank; }
+  }
+
+  lastrank = -1;
+  for(c = last - 1, e = d + 1, d = b; e < d; --c) {
+    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+      *--d = s;
+      rank = ISA[s + depth];
+      if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
+      ISA[s] = newrank;
+    }
+  }
+}
+
+static
+void
+tr_introsort(saidx_t *ISA, const saidx_t *ISAd,
+             saidx_t *SA, saidx_t *first, saidx_t *last,
+             trbudget_t *budget) {
+#define STACK_SIZE TR_STACKSIZE
+  struct { const saidx_t *a; saidx_t *b, *c; saint_t d, e; }stack[STACK_SIZE];
+  saidx_t *a, *b, *c;
+  saidx_t t;
+  saidx_t v, x = 0;
+  saidx_t incr = ISAd - ISA;
+  saint_t limit, next;
+  saint_t ssize, trlink = -1;
+
+  for(ssize = 0, limit = tr_ilg(last - first);;) {
+
+    if(limit < 0) {
+      if(limit == -1) {
+        /* tandem repeat partition */
+        tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
+
+        /* update ranks */
+        if(a < last) {
+          for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
+        }
+        if(b < last) {
+          for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
+        }
+
+        /* push */
+        if(1 < (b - a)) {
+          STACK_PUSH5(NULL, a, b, 0, 0);
+          STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
+          trlink = ssize - 2;
+        }
+        if((a - first) <= (last - b)) {
+          if(1 < (a - first)) {
+            STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
+            last = a, limit = tr_ilg(a - first);
+          } else if(1 < (last - b)) {
+            first = b, limit = tr_ilg(last - b);
+          } else {
+            STACK_POP5(ISAd, first, last, limit, trlink);
+          }
+        } else {
+          if(1 < (last - b)) {
+            STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
+            first = b, limit = tr_ilg(last - b);
+          } else if(1 < (a - first)) {
+            last = a, limit = tr_ilg(a - first);
+          } else {
+            STACK_POP5(ISAd, first, last, limit, trlink);
+          }
+        }
+      } else if(limit == -2) {
+        /* tandem repeat copy */
+        a = stack[--ssize].b, b = stack[ssize].c;
+        if(stack[ssize].d == 0) {
+          tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
+        } else {
+          if(0 <= trlink) { stack[trlink].d = -1; }
+          tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
+        }
+        STACK_POP5(ISAd, first, last, limit, trlink);
+      } else {
+        /* sorted partition */
+        if(0 <= *first) {
+          a = first;
+          do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
+          first = a;
+        }
+        if(first < last) {
+          a = first; do { *a = ~*a; } while(*++a < 0);
+          next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
+          if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
+
+          /* push */
+          if(trbudget_check(budget, a - first)) {
+            if((a - first) <= (last - a)) {
+              STACK_PUSH5(ISAd, a, last, -3, trlink);
+              ISAd += incr, last = a, limit = next;
+            } else {
+              if(1 < (last - a)) {
+                STACK_PUSH5(ISAd + incr, first, a, next, trlink);
+                first = a, limit = -3;
+              } else {
+                ISAd += incr, last = a, limit = next;
+              }
+            }
+          } else {
+            if(0 <= trlink) { stack[trlink].d = -1; }
+            if(1 < (last - a)) {
+              first = a, limit = -3;
+            } else {
+              STACK_POP5(ISAd, first, last, limit, trlink);
+            }
+          }
+        } else {
+          STACK_POP5(ISAd, first, last, limit, trlink);
+        }
+      }
+      continue;
+    }
+
+    if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
+      tr_insertionsort(ISAd, first, last);
+      limit = -3;
+      continue;
+    }
+
+    if(limit-- == 0) {
+      tr_heapsort(ISAd, first, last - first);
+      for(a = last - 1; first < a; a = b) {
+        for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
+      }
+      limit = -3;
+      continue;
+    }
+
+    /* choose pivot */
+    a = tr_pivot(ISAd, first, last);
+    SWAP(*first, *a);
+    v = ISAd[*first];
+
+    /* partition */
+    tr_partition(ISAd, first, first + 1, last, &a, &b, v);
+    if((last - first) != (b - a)) {
+      next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
+
+      /* update ranks */
+      for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
+      if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
+
+      /* push */
+      if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
+        if((a - first) <= (last - b)) {
+          if((last - b) <= (b - a)) {
+            if(1 < (a - first)) {
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              STACK_PUSH5(ISAd, b, last, limit, trlink);
+              last = a;
+            } else if(1 < (last - b)) {
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              first = b;
+            } else {
+              ISAd += incr, first = a, last = b, limit = next;
+            }
+          } else if((a - first) <= (b - a)) {
+            if(1 < (a - first)) {
+              STACK_PUSH5(ISAd, b, last, limit, trlink);
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              last = a;
+            } else {
+              STACK_PUSH5(ISAd, b, last, limit, trlink);
+              ISAd += incr, first = a, last = b, limit = next;
+            }
+          } else {
+            STACK_PUSH5(ISAd, b, last, limit, trlink);
+            STACK_PUSH5(ISAd, first, a, limit, trlink);
+            ISAd += incr, first = a, last = b, limit = next;
+          }
+        } else {
+          if((a - first) <= (b - a)) {
+            if(1 < (last - b)) {
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              STACK_PUSH5(ISAd, first, a, limit, trlink);
+              first = b;
+            } else if(1 < (a - first)) {
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              last = a;
+            } else {
+              ISAd += incr, first = a, last = b, limit = next;
+            }
+          } else if((last - b) <= (b - a)) {
+            if(1 < (last - b)) {
+              STACK_PUSH5(ISAd, first, a, limit, trlink);
+              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
+              first = b;
+            } else {
+              STACK_PUSH5(ISAd, first, a, limit, trlink);
+              ISAd += incr, first = a, last = b, limit = next;
+            }
+          } else {
+            STACK_PUSH5(ISAd, first, a, limit, trlink);
+            STACK_PUSH5(ISAd, b, last, limit, trlink);
+            ISAd += incr, first = a, last = b, limit = next;
+          }
+        }
+      } else {
+        if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
+        if((a - first) <= (last - b)) {
+          if(1 < (a - first)) {
+            STACK_PUSH5(ISAd, b, last, limit, trlink);
+            last = a;
+          } else if(1 < (last - b)) {
+            first = b;
+          } else {
+            STACK_POP5(ISAd, first, last, limit, trlink);
+          }
+        } else {
+          if(1 < (last - b)) {
+            STACK_PUSH5(ISAd, first, a, limit, trlink);
+            first = b;
+          } else if(1 < (a - first)) {
+            last = a;
+          } else {
+            STACK_POP5(ISAd, first, last, limit, trlink);
+          }
+        }
+      }
+    } else {
+      if(trbudget_check(budget, last - first)) {
+        limit = tr_ilg(last - first), ISAd += incr;
+      } else {
+        if(0 <= trlink) { stack[trlink].d = -1; }
+        STACK_POP5(ISAd, first, last, limit, trlink);
+      }
+    }
+  }
+#undef STACK_SIZE
+}
+
+
+
+/*---------------------------------------------------------------------------*/
+
+/*- Function -*/
+
+/* Tandem repeat sort */
+void
+trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth) {
+  saidx_t *ISAd;
+  saidx_t *first, *last;
+  trbudget_t budget;
+  saidx_t t, skip, unsorted;
+
+  trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
+/*  trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
+  for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
+    first = SA;
+    skip = 0;
+    unsorted = 0;
+    do {
+      if((t = *first) < 0) { first -= t; skip += t; }
+      else {
+        if(skip != 0) { *(first + skip) = skip; skip = 0; }
+        last = SA + ISA[t] + 1;
+        if(1 < (last - first)) {
+          budget.count = 0;
+          tr_introsort(ISA, ISAd, SA, first, last, &budget);
+          if(budget.count != 0) { unsorted += budget.count; }
+          else { skip = first - last; }
+        } else if((last - first) == 1) {
+          skip = -1;
+        }
+        first = last;
+      }
+    } while(first < (SA + n));
+    if(skip != 0) { *(first + skip) = skip; }
+    if(unsorted == 0) { break; }
+  }
+}
diff --git a/thirdparty/SA/libdivsufsort/lib/utils.c b/thirdparty/SA/libdivsufsort/lib/utils.c
new file mode 100644
index 0000000000..75f1f71fab
--- /dev/null
+++ b/thirdparty/SA/libdivsufsort/lib/utils.c
@@ -0,0 +1,381 @@
+/*
+ * utils.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+//#include "divsufsort_private.h"
+#include "../include/divsufsort_private.h"
+
+/*- Private Function -*/
+
+/* Binary search for inverse bwt. */
+static
+saidx_t
+binarysearch_lower(const saidx_t *A, saidx_t size, saidx_t value) {
+  saidx_t half, i;
+  for(i = 0, half = size >> 1;
+      0 < size;
+      size = half, half >>= 1) {
+    if(A[i + half] < value) {
+      i += half + 1;
+      half -= (size & 1) ^ 1;
+    }
+  }
+  return i;
+}
+
+
+/*- Functions -*/
+
+/* Burrows-Wheeler transform. */
+saint_t
+bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *SA,
+             saidx_t n, saidx_t *idx) {
+  saidx_t *A, i, j, p, t;
+  saint_t c;
+
+  /* Check arguments. */
+  if((T == NULL) || (U == NULL) || (n < 0) || (idx == NULL)) { return -1; }
+  if(n <= 1) {
+    if(n == 1) { U[0] = T[0]; }
+    *idx = n;
+    return 0;
+  }
+
+  if((A = SA) == NULL) {
+    i = divbwt(T, U, NULL, n);
+    if(0 <= i) { *idx = i; i = 0; }
+    return (saint_t)i;
+  }
+
+  /* BW transform. */
+  if(T == U) {
+    t = n;
+    for(i = 0, j = 0; i < n; ++i) {
+      p = t - 1;
+      t = A[i];
+      if(0 <= p) {
+        c = T[j];
+        U[j] = (j <= p) ? T[p] : (sauchar_t)A[p];
+        A[j] = c;
+        j++;
+      } else {
+        *idx = i;
+      }
+    }
+    p = t - 1;
+    if(0 <= p) {
+      c = T[j];
+      U[j] = (j <= p) ? T[p] : (sauchar_t)A[p];
+      A[j] = c;
+    } else {
+      *idx = i;
+    }
+  } else {
+    U[0] = T[n - 1];
+    for(i = 0; A[i] != 0; ++i) { U[i + 1] = T[A[i] - 1]; }
+    *idx = i + 1;
+    for(++i; i < n; ++i) { U[i] = T[A[i] - 1]; }
+  }
+
+  if(SA == NULL) {
+    /* Deallocate memory. */
+    free(A);
+  }
+
+  return 0;
+}
+
+/* Inverse Burrows-Wheeler transform. */
+saint_t
+inverse_bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *A,
+                     saidx_t n, saidx_t idx) {
+  saidx_t C[ALPHABET_SIZE];
+  sauchar_t D[ALPHABET_SIZE];
+  saidx_t *B;
+  saidx_t i, p;
+  saint_t c, d;
+
+  /* Check arguments. */
+  if((T == NULL) || (U == NULL) || (n < 0) || (idx < 0) ||
+     (n < idx) || ((0 < n) && (idx == 0))) {
+    return -1;
+  }
+  if(n <= 1) { return 0; }
+
+  if((B = A) == NULL) {
+    /* Allocate n*sizeof(saidx_t) bytes of memory. */
+    if((B = (saidx_t *)malloc((size_t)n * sizeof(saidx_t))) == NULL) { return -2; }
+  }
+
+  /* Inverse BW transform. */
+  for(c = 0; c < ALPHABET_SIZE; ++c) { C[c] = 0; }
+  for(i = 0; i < n; ++i) { ++C[T[i]]; }
+  for(c = 0, d = 0, i = 0; c < ALPHABET_SIZE; ++c) {
+    p = C[c];
+    if(0 < p) {
+      C[c] = i;
+      D[d++] = (sauchar_t)c;
+      i += p;
+    }
+  }
+  for(i = 0; i < idx; ++i) { B[C[T[i]]++] = i; }
+  for( ; i < n; ++i)       { B[C[T[i]]++] = i + 1; }
+  for(c = 0; c < d; ++c) { C[c] = C[D[c]]; }
+  for(i = 0, p = idx; i < n; ++i) {
+    U[i] = D[binarysearch_lower(C, d, p)];
+    p = B[p - 1];
+  }
+
+  if(A == NULL) {
+    /* Deallocate memory. */
+    free(B);
+  }
+
+  return 0;
+}
+
+/* Checks the suffix array SA of the string T. */
+saint_t
+sufcheck(const sauchar_t *T, const saidx_t *SA,
+         saidx_t n, saint_t verbose) {
+  saidx_t C[ALPHABET_SIZE];
+  saidx_t i, p, q, t;
+  saint_t c;
+
+  if(verbose) { fprintf(stderr, "sufcheck: "); }
+
+  /* Check arguments. */
+  if((T == NULL) || (SA == NULL) || (n < 0)) {
+    if(verbose) { fprintf(stderr, "Invalid arguments.\n"); }
+    return -1;
+  }
+  if(n == 0) {
+    if(verbose) { fprintf(stderr, "Done.\n"); }
+    return 0;
+  }
+
+  /* check range: [0..n-1] */
+  for(i = 0; i < n; ++i) {
+    if((SA[i] < 0) || (n <= SA[i])) {
+      if(verbose) {
+        fprintf(stderr, "Out of the range [0,%" PRIdSAIDX_T "].\n"
+                        "  SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n",
+                        n - 1, i, SA[i]);
+      }
+      return -2;
+    }
+  }
+
+  /* check first characters. */
+  for(i = 1; i < n; ++i) {
+    if(T[SA[i - 1]] > T[SA[i]]) {
+      if(verbose) {
+        fprintf(stderr, "Suffixes in wrong order.\n"
+                        "  T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d"
+                        " > T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d\n",
+                        i - 1, SA[i - 1], T[SA[i - 1]], i, SA[i], T[SA[i]]);
+      }
+      return -3;
+    }
+  }
+
+  /* check suffixes. */
+  for(i = 0; i < ALPHABET_SIZE; ++i) { C[i] = 0; }
+  for(i = 0; i < n; ++i) { ++C[T[i]]; }
+  for(i = 0, p = 0; i < ALPHABET_SIZE; ++i) {
+    t = C[i];
+    C[i] = p;
+    p += t;
+  }
+
+  q = C[T[n - 1]];
+  C[T[n - 1]] += 1;
+  for(i = 0; i < n; ++i) {
+    p = SA[i];
+    if(0 < p) {
+      c = T[--p];
+      t = C[c];
+    } else {
+      c = T[p = n - 1];
+      t = q;
+    }
+    if((t < 0) || (p != SA[t])) {
+      if(verbose) {
+        fprintf(stderr, "Suffix in wrong position.\n"
+                        "  SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T " or\n"
+                        "  SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n",
+                        t, (0 <= t) ? SA[t] : -1, i, SA[i]);
+      }
+      return -4;
+    }
+    if(t != q) {
+      ++C[c];
+      if((n <= C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; }
+    }
+  }
+
+  if(1 <= verbose) { fprintf(stderr, "Done.\n"); }
+  return 0;
+}
+
+
+static
+int
+_compare(const sauchar_t *T, saidx_t Tsize,
+         const sauchar_t *P, saidx_t Psize,
+         saidx_t suf, saidx_t *match) {
+  saidx_t i, j;
+  saint_t r;
+  for(i = suf + *match, j = *match, r = 0;
+      (i < Tsize) && (j < Psize) && ((r = T[i] - P[j]) == 0); ++i, ++j) { }
+  *match = j;
+  return (r == 0) ? -(j != Psize) : r;
+}
+
+/* Search for the pattern P in the string T. */
+saidx_t
+sa_search(const sauchar_t *T, saidx_t Tsize,
+          const sauchar_t *P, saidx_t Psize,
+          const saidx_t *SA, saidx_t SAsize,
+          saidx_t *idx) {
+  saidx_t size, lsize, rsize, half;
+  saidx_t match, lmatch, rmatch;
+  saidx_t llmatch, lrmatch, rlmatch, rrmatch;
+  saidx_t i, j, k;
+  saint_t r;
+
+  if(idx != NULL) { *idx = -1; }
+  if((T == NULL) || (P == NULL) || (SA == NULL) ||
+     (Tsize < 0) || (Psize < 0) || (SAsize < 0)) { return -1; }
+  if((Tsize == 0) || (SAsize == 0)) { return 0; }
+  if(Psize == 0) { if(idx != NULL) { *idx = 0; } return SAsize; }
+
+  for(i = j = k = 0, lmatch = rmatch = 0, size = SAsize, half = size >> 1;
+      0 < size;
+      size = half, half >>= 1) {
+    match = MIN(lmatch, rmatch);
+    r = _compare(T, Tsize, P, Psize, SA[i + half], &match);
+    if(r < 0) {
+      i += half + 1;
+      half -= (size & 1) ^ 1;
+      lmatch = match;
+    } else if(r > 0) {
+      rmatch = match;
+    } else {
+      lsize = half, j = i, rsize = size - half - 1, k = i + half + 1;
+
+      /* left part */
+      for(llmatch = lmatch, lrmatch = match, half = lsize >> 1;
+          0 < lsize;
+          lsize = half, half >>= 1) {
+        lmatch = MIN(llmatch, lrmatch);
+        r = _compare(T, Tsize, P, Psize, SA[j + half], &lmatch);
+        if(r < 0) {
+          j += half + 1;
+          half -= (lsize & 1) ^ 1;
+          llmatch = lmatch;
+        } else {
+          lrmatch = lmatch;
+        }
+      }
+
+      /* right part */
+      for(rlmatch = match, rrmatch = rmatch, half = rsize >> 1;
+          0 < rsize;
+          rsize = half, half >>= 1) {
+        rmatch = MIN(rlmatch, rrmatch);
+        r = _compare(T, Tsize, P, Psize, SA[k + half], &rmatch);
+        if(r <= 0) {
+          k += half + 1;
+          half -= (rsize & 1) ^ 1;
+          rlmatch = rmatch;
+        } else {
+          rrmatch = rmatch;
+        }
+      }
+
+      break;
+    }
+  }
+
+  if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; }
+  return k - j;
+}
+
+/* Search for the character c in the string T. */
+saidx_t
+sa_simplesearch(const sauchar_t *T, saidx_t Tsize,
+                const saidx_t *SA, saidx_t SAsize,
+                saint_t c, saidx_t *idx) {
+  saidx_t size, lsize, rsize, half;
+  saidx_t i, j, k, p;
+  saint_t r;
+
+  if(idx != NULL) { *idx = -1; }
+  if((T == NULL) || (SA == NULL) || (Tsize < 0) || (SAsize < 0)) { return -1; }
+  if((Tsize == 0) || (SAsize == 0)) { return 0; }
+
+  for(i = j = k = 0, size = SAsize, half = size >> 1;
+      0 < size;
+      size = half, half >>= 1) {
+    p = SA[i + half];
+    r = (p < Tsize) ? T[p] - c : -1;
+    if(r < 0) {
+      i += half + 1;
+      half -= (size & 1) ^ 1;
+    } else if(r == 0) {
+      lsize = half, j = i, rsize = size - half - 1, k = i + half + 1;
+
+      /* left part */
+      for(half = lsize >> 1;
+          0 < lsize;
+          lsize = half, half >>= 1) {
+        p = SA[j + half];
+        r = (p < Tsize) ? T[p] - c : -1;
+        if(r < 0) {
+          j += half + 1;
+          half -= (lsize & 1) ^ 1;
+        }
+      }
+
+      /* right part */
+      for(half = rsize >> 1;
+          0 < rsize;
+          rsize = half, half >>= 1) {
+        p = SA[k + half];
+        r = (p < Tsize) ? T[p] - c : -1;
+        if(r <= 0) {
+          k += half + 1;
+          half -= (rsize & 1) ^ 1;
+        }
+      }
+
+      break;
+    }
+  }
+
+  if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; }
+  return k - j;
+}
diff --git a/toys/np_rmat.py b/toys/np_rmat.py
index 0618f3922e..f80abc24d7 100755
--- a/toys/np_rmat.py
+++ b/toys/np_rmat.py
@@ -52,8 +52,8 @@ def gen_rmat_edges(lgNv, Ne_per_v, p):
 print("jj = ", (jj.size, jj))
 print("jj(min,max) = ", (jj.min(), jj.max()))
 
-#plt.scatter(ii,jj)
-#plt.show()
+plt.scatter(ii,jj)
+plt.show()
 
 df = pd.DataFrame({"ii":ii, "jj":jj})