From dcbba9a45ae2a190b31badec530ea54a58437606 Mon Sep 17 00:00:00 2001
From: Shoumik Palkar <shoumik@cs.stanford.edu>
Date: Sat, 4 Apr 2020 13:50:54 -0700
Subject: [PATCH] DataFrame foundations (#510)

---
 .gitignore                                    |   1 +
 weld-python/tests/grizzly/core/test_frame.py  | 100 ++++
 weld-python/tests/grizzly/core/test_series.py |  37 +-
 .../tests/grizzly/core/test_strings.py        |   8 +
 weld-python/tests/weld/core/test_lazy.py      |   2 +-
 weld-python/tests/weld/encoders/test_numpy.py |   5 +
 .../tests/weld/encoders/test_primitives.py    |   2 +-
 weld-python/weld/compile.py                   |   8 +-
 weld-python/weld/encoders/__init__.py         |   1 -
 weld-python/weld/encoders/numpy.py            |  37 +-
 weld-python/weld/encoders/primitives.py       |  45 +-
 weld-python/weld/encoders/struct.py           |  78 +++
 weld-python/weld/grizzly/__init__.py          |   1 +
 weld-python/weld/grizzly/core/forwarding.py   |  71 +++
 weld-python/weld/grizzly/core/frame.py        | 349 ++++++++++++
 weld-python/weld/grizzly/core/generic.py      | 109 ++++
 .../weld/grizzly/core/indexes/__init__.py     |   2 +
 weld-python/weld/grizzly/core/indexes/base.py |   9 +
 .../weld/grizzly/core/indexes/column.py       | 153 +++++
 weld-python/weld/grizzly/core/series.py       | 524 +++++++++++-------
 weld-python/weld/grizzly/weld/ops.py          |  16 +
 weld-python/weld/types.py                     |   3 +
 22 files changed, 1273 insertions(+), 288 deletions(-)
 create mode 100644 weld-python/tests/grizzly/core/test_frame.py
 create mode 100644 weld-python/weld/encoders/struct.py
 create mode 100644 weld-python/weld/grizzly/core/forwarding.py
 create mode 100644 weld-python/weld/grizzly/core/frame.py
 create mode 100644 weld-python/weld/grizzly/core/generic.py
 create mode 100644 weld-python/weld/grizzly/core/indexes/__init__.py
 create mode 100644 weld-python/weld/grizzly/core/indexes/base.py
 create mode 100644 weld-python/weld/grizzly/core/indexes/column.py

diff --git a/.gitignore b/.gitignore
index 2f64d6bdc..75a41b30d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,7 @@ Cargo.lock
 .#*
 *~
 *.swp
+*.swo
 *.bc
 *.pyc
 *.o
diff --git a/weld-python/tests/grizzly/core/test_frame.py b/weld-python/tests/grizzly/core/test_frame.py
new file mode 100644
index 000000000..519b501fb
--- /dev/null
+++ b/weld-python/tests/grizzly/core/test_frame.py
@@ -0,0 +1,100 @@
+"""
+Test basic DataFrame functionality.
+
+"""
+
+import pandas as pd
+import pytest
+import weld.grizzly as gr
+
+def get_frames(cls, strings):
+    """
+    Returns two DataFrames for testing binary operators.
+
+    The DataFrames have columns of overlapping/different names, types, etc.
+
+    """
+    df1 = pd.DataFrame({
+        'name': ['Bob', 'Sally', 'Kunal', 'Deepak', 'James', 'Pratiksha'],
+        'lastName': ['Kahn', 'Lopez', 'Smith', 'Narayanan', 'Thomas', 'Thaker'],
+        'age': [20, 30, 35, 20, 50, 35],
+        'score': [20.0, 30.0, 35.0, 50.0, 35.0, 25.0]
+        })
+    df2 = pd.DataFrame({
+        'firstName': ['Bob', 'Sally', 'Kunal', 'Deepak', 'James', 'Pratiksha'],
+        'lastName': ['Kahn', 'Lopez', 'smith', 'narayanan', 'Thomas', 'thaker'],
+        'age': [25, 30, 45, 20, 60, 35],
+        'scores': [20.0, 30.0, 35.0, 50.0, 35.0, 25.0]
+        })
+    if not strings:
+        df1 = df1.drop(['name', 'lastName'], axis=1)
+        df2 = df2.drop(['firstName', 'lastName'], axis=1)
+    return (cls(df1), cls(df2))
+
+def _test_binop(pd_op, gr_op, strings=True):
+    """
+    Test a binary operator.
+
+    Binary operators align on column name. For columns that don't exist in both
+    DataFrames, the column is filled with NaN (for non-comparison operations) and
+    or False (for comparison operations).
+
+    If the RHS is a Series, the Series should be added to all columns.
+
+    """
+    df1, df2 = get_frames(pd.DataFrame, strings)
+    gdf1, gdf2 = get_frames(gr.GrizzlyDataFrame, strings)
+
+    expect = pd_op(df1, df2)
+    result = gr_op(gdf1, gdf2).to_pandas()
+    assert expect.equals(result)
+
+def test_evaluation():
+    # Test to make sure that evaluating a DataFrame once caches the result/
+    # doesn't cause another evaluation.
+    df1 = gr.GrizzlyDataFrame({
+        'age': [20, 30, 35, 20, 50, 35],
+        'score': [20.0, 30.0, 35.0, 50.0, 35.0, 25.0]
+        })
+    df2 = gr.GrizzlyDataFrame({
+        'age': [20, 30, 35, 20, 50, 35],
+        'scores': [20.0, 30.0, 35.0, 50.0, 35.0, 25.0]
+        })
+    df3 = (df1 + df2) * df2 + df1 / df2
+    assert not df3.is_value
+    df3.evaluate()
+    assert df3.is_value
+    weld_value = df3.weld_value
+    df3.evaluate()
+    # The same weld_value should be returned.
+    assert weld_value is df3.weld_value
+
+def test_add():
+    _test_binop(pd.DataFrame.add, gr.GrizzlyDataFrame.add, strings=False)
+
+def test_sub():
+    _test_binop(pd.DataFrame.sub, gr.GrizzlyDataFrame.sub, strings=False)
+
+def test_mul():
+    _test_binop(pd.DataFrame.mul, gr.GrizzlyDataFrame.mul, strings=False)
+
+def test_div():
+    _test_binop(pd.DataFrame.div, gr.GrizzlyDataFrame.div, strings=False)
+
+def test_eq():
+    _test_binop(pd.DataFrame.eq, gr.GrizzlyDataFrame.eq, strings=True)
+
+def test_ne():
+    _test_binop(pd.DataFrame.ne, gr.GrizzlyDataFrame.ne, strings=True)
+
+def test_le():
+    _test_binop(pd.DataFrame.le, gr.GrizzlyDataFrame.le, strings=False)
+
+def test_lt():
+    _test_binop(pd.DataFrame.lt, gr.GrizzlyDataFrame.lt, strings=False)
+
+def test_ge():
+    _test_binop(pd.DataFrame.ge, gr.GrizzlyDataFrame.ge, strings=False)
+
+def test_gt():
+    _test_binop(pd.DataFrame.gt, gr.GrizzlyDataFrame.gt, strings=False)
diff --git a/weld-python/tests/grizzly/core/test_series.py b/weld-python/tests/grizzly/core/test_series.py
index eaf603cf5..fe569ef58 100644
--- a/weld-python/tests/grizzly/core/test_series.py
+++ b/weld-python/tests/grizzly/core/test_series.py
@@ -85,25 +85,6 @@ def eval_expression(cls):
         yield a + b + c * d - e
     _compare_vs_pandas(eval_expression)
 
-def test_basic_fallback():
-    # Tests basic unsupported functionality.
-    # NOTE: This test will need to change as more features are added...
-    def eval_expression(cls):
-        a = cls([1, 2, 3])
-        b = cls([-4, 5, -6])
-        # Test 1: abs()
-        c = a + b
-        yield (c.abs() + a)
-        # Test 2: argmin()
-        c = a + b
-        yield cls(c.argmin())
-        # Test 3: reindex()
-        c = a + b
-        res = c.reindex(index=[2, 0, 1])
-        # Falls back to Pandas, since we don't support indices.
-        assert isinstance(res, pd.Series)
-    _compare_vs_pandas(eval_expression)
-
 def test_scalar():
     types = ['int8', 'uint8', 'int16', 'uint16', 'int32',\
             'uint32', 'int64', 'uint64', 'float32', 'float64']
@@ -129,6 +110,19 @@ def test_indexing():
     assert np.array_equal(x[x == 2].evaluate().values, np.array([2], dtype='int64'))
     assert np.array_equal(x[x < 0].evaluate().values, np.array([], dtype='int64'))
 
+def test_name():
+    # Test that names propagate after operations.
+    x = gr.GrizzlySeries([1,2,3], name="testname")
+    y = x + x
+    assert y.evaluate().name == "testname"
+    y = x.agg(['sum', 'count'])
+    assert y.evaluate().name == "testname"
+    y = x[:2]
+    assert y.evaluate().name == "testname"
+    y = x[x == 1]
+    assert y.evaluate().name == "testname"
+
+
 def test_unsupported_binop_error():
     # Test unsupported
     from weld.grizzly.core.error import GrizzlyError
@@ -136,3 +130,8 @@ def test_unsupported_binop_error():
         a = gr.GrizzlySeries([1,2,3])
         b = pd.Series([1,2,3])
         a.add(b)
+
+    with pytest.raises(TypeError):
+        a = gr.GrizzlySeries(["hello", "world"])
+        b = gr.GrizzlySeries(["hello", "world"])
+        a.divide(b)
diff --git a/weld-python/tests/grizzly/core/test_strings.py b/weld-python/tests/grizzly/core/test_strings.py
index e760c1cd0..c807bae3b 100644
--- a/weld-python/tests/grizzly/core/test_strings.py
+++ b/weld-python/tests/grizzly/core/test_strings.py
@@ -64,6 +64,14 @@ def test_get():
     pandas_result = pd.Series(expect)
     assert pandas_result.equals(grizzly_result)
 
+def test_eq():
+    left = ["hello", "world", "strings", "morestrings"]
+    right = ["hel", "world", "string", "morestrings"]
+    x = gr.GrizzlySeries(left)
+    y = gr.GrizzlySeries(right)
+    assert list(x.eq(y).evaluate().values) == [False, True, False, True]
+    assert list(x.ne(y).evaluate().values) == [True, False, True, False]
+
 def test_strip():
     compare_vs_pandas('strip', ["",
     "   hi   ",
diff --git a/weld-python/tests/weld/core/test_lazy.py b/weld-python/tests/weld/core/test_lazy.py
index 61e3b5164..cac6da22c 100644
--- a/weld-python/tests/weld/core/test_lazy.py
+++ b/weld-python/tests/weld/core/test_lazy.py
@@ -2,7 +2,7 @@
 Tests for constructing and evaluating lazy operations.
 """
 
-from weld.encoders import PrimitiveWeldEncoder, PrimitiveWeldDecoder
+from weld.encoders.primitives import PrimitiveWeldEncoder, PrimitiveWeldDecoder
 from weld.types import *
 from weld.lazy import *
 
diff --git a/weld-python/tests/weld/encoders/test_numpy.py b/weld-python/tests/weld/encoders/test_numpy.py
index c353453b4..2e1f91ef9 100644
--- a/weld-python/tests/weld/encoders/test_numpy.py
+++ b/weld-python/tests/weld/encoders/test_numpy.py
@@ -99,6 +99,11 @@ def test_float32_vec():
 def test_float64_vec():
     encdec(array('float64'), WeldVec(F64()))
 
+def test_struct_of_vecs():
+    arrays = (array('float32'), array('uint16'), array('uint32'))
+    ty = WeldStruct([WeldVec(F32()), WeldVec(U16()), WeldVec(U32())])
+    encdec(arrays, ty)
+
 def test_type_conversions():
     types = ['bool', 'int8', 'uint8', 'int16', 'uint16',
             'int32', 'uint32', 'int64', 'uint64', 'float32', 'float64']
diff --git a/weld-python/tests/weld/encoders/test_primitives.py b/weld-python/tests/weld/encoders/test_primitives.py
index 09e551075..8c1a9f1ee 100644
--- a/weld-python/tests/weld/encoders/test_primitives.py
+++ b/weld-python/tests/weld/encoders/test_primitives.py
@@ -5,7 +5,7 @@
 import ctypes
 
 from .helpers import encdec_factory
-from weld.encoders import PrimitiveWeldEncoder, PrimitiveWeldDecoder
+from weld.encoders.primitives import PrimitiveWeldEncoder, PrimitiveWeldDecoder
 from weld.types import *
 
 encdec = encdec_factory(PrimitiveWeldEncoder, PrimitiveWeldDecoder)
diff --git a/weld-python/weld/compile.py b/weld-python/weld/compile.py
index 8ef0586d0..d626e8d63 100644
--- a/weld-python/weld/compile.py
+++ b/weld-python/weld/compile.py
@@ -4,10 +4,10 @@
 
 """
 
-from .core import *
-from .encoders import WeldEncoder, WeldDecoder, PrimitiveWeldEncoder,\
-        PrimitiveWeldDecoder
-from .types import WeldType
+from weld.core import *
+from weld.encoders import WeldEncoder, WeldDecoder
+from weld.encoders.primitives import PrimitiveWeldEncoder, PrimitiveWeldDecoder
+from weld.types import WeldType
 
 import ctypes
 import logging
diff --git a/weld-python/weld/encoders/__init__.py b/weld-python/weld/encoders/__init__.py
index d1ced7685..79979cb27 100644
--- a/weld-python/weld/encoders/__init__.py
+++ b/weld-python/weld/encoders/__init__.py
@@ -1,3 +1,2 @@
 
 from .encoder_base import *
-from .primitives import PrimitiveWeldEncoder, PrimitiveWeldDecoder
diff --git a/weld-python/weld/encoders/numpy.py b/weld-python/weld/encoders/numpy.py
index 23852f946..e798df67e 100644
--- a/weld-python/weld/encoders/numpy.py
+++ b/weld-python/weld/encoders/numpy.py
@@ -17,7 +17,7 @@
 import ctypes
 import numpy as np
 
-from .encoder_base import *
+from weld.encoders.struct import StructWeldEncoder, StructWeldDecoder
 from weld.types import *
 
 # We just need this for the path.
@@ -118,15 +118,15 @@ def binop_output_type(left_ty, right_ty, truediv=False):
     Examples
     --------
     >>> binop_output_type(Bool(), Bool())
-    <weld.types.Bool object at ...>
+    bool
     >>> binop_output_type(I8(), U16())
-    <weld.types.I32 object at ...>
+    i32
     >>> binop_output_type(U8(), U16())
-    <weld.types.U16 object at ...>
+    u16
     >>> binop_output_type(F32(), U16())
-    <weld.types.F32 object at ...>
+    f32
     >>> binop_output_type(I8(), U64())
-    <weld.types.F64 object at ...>
+    f64
     """
     if not truediv and left_ty == right_ty:
         return left_ty
@@ -238,13 +238,13 @@ def dtype_to_weld_type(ty):
     Examples
     --------
     >>> dtype_to_weld_type('int32')
-    <weld.types.I32 object at 0x...>
+    i32
     >>> dtype_to_weld_type('float')
-    <weld.types.F64 object at 0x...>
+    f64
     >>> dtype_to_weld_type('i8')
-    <weld.types.I64 object at 0x...>
+    i64
     >>> dtype_to_weld_type(np.int16)
-    <weld.types.I16 object at 0x...>
+    i16
 
     Parameters
     ----------
@@ -295,7 +295,11 @@ def weld_string_array_to_numpy(arr):
         return result
 
 
-class NumPyWeldEncoder(WeldEncoder):
+class NumPyWeldEncoder(StructWeldEncoder):
+    """
+    Encodes NumPy arrays as Weld arrays.
+
+    """
 
     @staticmethod
     def _convert_1d_array(array, check_type=None):
@@ -353,7 +357,7 @@ def _is_string_array(obj):
             return False
         return True
 
-    def encode(self, obj, ty):
+    def encode_element(self, obj, ty):
         if NumPyWeldEncoder._is_string_array(obj):
             assert ty == WeldVec(WeldVec(I8()))
             return StringConversionFuncs.numpy_string_array_to_weld(obj)
@@ -365,9 +369,12 @@ def encode(self, obj, ty):
         else:
             raise TypeError("Unexpected type {} in NumPy encoder".format(type(obj)))
 
-class NumPyWeldDecoder(WeldDecoder):
-    """ Decodes an encoded Weld array into a NumPy array.
+class NumPyWeldDecoder(StructWeldDecoder):
+    """
+    Decodes an encoded Weld array into a NumPy array.
 
+    Examples
+    --------
     >>> arr = np.array([1,2,3], dtype='int32')
     >>> encoded = NumPyWeldEncoder().encode(arr, WeldVec(I32()))
     >>> NumPyWeldDecoder().decode(ctypes.pointer(encoded), WeldVec(I32()))
@@ -457,7 +464,7 @@ def _is_string_array(restype):
                     return True
         return False
 
-    def decode(self, obj, restype, context=None):
+    def decode_element(self, obj, restype, context=None):
         # A 1D NumPy array
         obj = obj.contents
         if NumPyWeldDecoder._is_string_array(restype):
diff --git a/weld-python/weld/encoders/primitives.py b/weld-python/weld/encoders/primitives.py
index 8c7a6f36b..fa0055158 100644
--- a/weld-python/weld/encoders/primitives.py
+++ b/weld-python/weld/encoders/primitives.py
@@ -6,15 +6,14 @@
 
 """
 
-from .encoder_base import *
-from ..types import *
+from weld.encoders.struct import StructWeldEncoder, StructWeldDecoder
+from weld.types import *
 
-class PrimitiveWeldEncoder(WeldEncoder):
-    """
-    A primitive encoder for booleans, integers and floats.
+import ctypes
 
-    Eventually, this will also support encoding for tuples (structs) of other
-    primitive types.
+class PrimitiveWeldEncoder(StructWeldEncoder):
+    """
+    A primitive encoder for booleans, integers, floats, and tuples thereof.
 
     Examples
     --------
@@ -33,24 +32,13 @@ class PrimitiveWeldEncoder(WeldEncoder):
     >>> s._1
     1
     """
-    def encode(self, obj, target_type):
+    def encode_element(self, obj, target_type):
         encoder = target_type.ctype_class
-        if isinstance(target_type, WeldStruct):
-            struct = encoder()
-            for (i, (field, weld_ty)) in enumerate(zip(\
-                    obj, target_type.field_types)):
-                encoded = self.encode(field, weld_ty)
-                setattr(struct, "_" + str(i), encoded)
-            return struct
-        else:
-            return encoder(obj)
+        return encoder(obj)
 
-class PrimitiveWeldDecoder(WeldDecoder):
+class PrimitiveWeldDecoder(StructWeldDecoder):
     """
-    A primitive encoder for booleans, integers, and floats.
-
-    Eventually, this will also support decoding for structs (tuples) of other
-    primitive types.
+    A primitive encoder for booleans, integers, floats, and tuples thereof.
 
     Examples
     --------
@@ -65,18 +53,9 @@ class PrimitiveWeldDecoder(WeldDecoder):
     >>> decoder.decode(ctypes.pointer(x), struct_type)
     (1, 1.0)
     """
-    def decode(self, obj, restype, context=None):
+
+    def decode_element(self, obj, restype, context=None):
         if isinstance(restype, Bool):
             return bool(obj.contents.value)
-        elif isinstance(restype, WeldStruct):
-            struct = obj.contents
-            ctype_class = restype.ctype_class
-            result = []
-            for (i, (weld_ty, (cfield, cty))) in enumerate(zip(\
-                    restype.field_types, ctype_class._fields_)):
-                ofs = getattr(ctype_class, cfield).offset
-                p = ctypes.pointer(cty.from_buffer(struct, ofs))
-                result.append(self.decode(p, weld_ty))
-            return tuple(result)
         else:
             return obj.contents.value
diff --git a/weld-python/weld/encoders/struct.py b/weld-python/weld/encoders/struct.py
new file mode 100644
index 000000000..047a813a1
--- /dev/null
+++ b/weld-python/weld/encoders/struct.py
@@ -0,0 +1,78 @@
+"""
+Implements an encoder mixin for tuples of values.
+
+Tuples are encoded as Weld Structs. Weld structs are decoded back into
+Python tuples. This encoder takes another encoder to encode or decode
+individual elements.
+
+"""
+
+from abc import abstractmethod
+from weld.encoders import WeldEncoder, WeldDecoder
+from weld.types import WeldStruct
+
+import ctypes
+
+class StructWeldEncoder(WeldEncoder):
+    """
+    An encoder mixin for structs, to be subclassed by another encoders.
+
+    Subclasses should encode their own types in 'encode_element()' instead
+    of 'encode()'.
+
+    """
+
+    @abstractmethod
+    def encode_element(self, obj, target_type):
+        """
+        Encodes a single non-struct element.
+
+        Subclasses' 'encode()' implementation should go here.
+
+        """
+        pass
+
+    def encode(self, obj, target_type):
+        if isinstance(target_type, WeldStruct):
+            encoder = target_type.ctype_class
+            struct = encoder()
+            for (i, (field, weld_ty)) in enumerate(zip(\
+                    obj, target_type.field_types)):
+                encoded = self.encode(field, weld_ty)
+                setattr(struct, "_" + str(i), encoded)
+            return struct
+        else:
+            return self.encode_element(obj, target_type)
+
+class StructWeldDecoder(WeldDecoder):
+    """
+    A decoder abstract class for structs.
+
+    Subclasses should encode their own types in 'decode_element()' instead
+    of 'decode()'.
+
+    """
+
+    @abstractmethod
+    def decode_element(self, obj, restype, context=None):
+        """
+        Decodes a single non-struct element.
+
+        Subclasses' 'decode()' implementation should go here.
+
+        """
+        pass
+
+    def decode(self, obj, restype, context=None):
+        if isinstance(restype, WeldStruct):
+            struct = obj.contents
+            ctype_class = restype.ctype_class
+            result = []
+            for (i, (weld_ty, (cfield, cty))) in enumerate(zip(\
+                    restype.field_types, ctype_class._fields_)):
+                ofs = getattr(ctype_class, cfield).offset
+                p = ctypes.pointer(cty.from_buffer(struct, ofs))
+                result.append(self.decode(p, weld_ty))
+            return tuple(result)
+        else:
+            return self.decode_element(obj, restype, context)
diff --git a/weld-python/weld/grizzly/__init__.py b/weld-python/weld/grizzly/__init__.py
index 963902a18..ab72d7cd0 100644
--- a/weld-python/weld/grizzly/__init__.py
+++ b/weld-python/weld/grizzly/__init__.py
@@ -1,2 +1,3 @@
 
+from weld.grizzly.core.frame import GrizzlyDataFrame
 from weld.grizzly.core.series import GrizzlySeries
diff --git a/weld-python/weld/grizzly/core/forwarding.py b/weld-python/weld/grizzly/core/forwarding.py
new file mode 100644
index 000000000..1c2a83e87
--- /dev/null
+++ b/weld-python/weld/grizzly/core/forwarding.py
@@ -0,0 +1,71 @@
+"""
+Implements method forwarding from one class to another.
+
+The `Forwarding` class can be used as a mixin.
+
+"""
+
+import inspect
+import warnings
+
+class Forwarding(object):
+    @classmethod
+    def _get_class_that_defined_method(cls, meth):
+        """
+        Returns the class that defines the requested method.
+
+        For methods that are defined outside of a particular set of
+        Grizzly-defined classes, Grizzly will first evaluate lazy results
+        before forwarding the data to the requested class.
+
+        """
+        if inspect.ismethod(meth):
+            for cls in inspect.getmro(meth.__self__.__class__):
+                if cls.__dict__.get(meth.__name__) is meth:
+                    return cls
+        if inspect.isfunction(meth):
+            return getattr(inspect.getmodule(meth),
+                           meth.__qualname__.split('.<locals>', 1)[0].rsplit('.', 1)[0])
+
+    @classmethod
+    def _requires_forwarding(cls, meth):
+        defined_in = cls._get_class_that_defined_method(meth)
+        if defined_in is not None and defined_in is not cls:
+            return True
+        else:
+            return False
+
+    @classmethod
+    def _forward(cls, to_cls):
+        from functools import wraps
+        def forward_decorator(func):
+            @wraps(func)
+            def forwarding_wrapper(self, *args, **kwargs):
+                self.evaluate()
+                result = func(self, *args, **kwargs)
+                # Unsupported functions will return Series -- try to
+                # switch back to GrizzlySeries.
+                if not isinstance(result, cls) and isinstance(result, to_cls):
+                    try_convert = cls(data=result.values, index=result.index)
+                    if not isinstance(try_convert, cls):
+                        warnings.warn("Unsupported operation '{}' produced unsupported Series: falling back to Pandas".format(
+                            func.__name__))
+                    return try_convert
+                return result
+            return forwarding_wrapper
+        return forward_decorator
+
+    @classmethod
+    def add_forwarding_methods(cls, to_cls):
+        """
+        Add forwarding methods from this class to `to_cls`.
+
+        """
+        methods = dir(cls)
+        for meth in methods:
+            if meth.startswith("_"):
+                # We only want to do this for API methods.
+                continue
+            attr = getattr(cls, meth)
+            if cls._requires_forwarding(attr):
+                setattr(cls, meth, cls._forward(to_cls)(attr))
diff --git a/weld-python/weld/grizzly/core/frame.py b/weld-python/weld/grizzly/core/frame.py
new file mode 100644
index 000000000..0ff1fd6d1
--- /dev/null
+++ b/weld-python/weld/grizzly/core/frame.py
@@ -0,0 +1,349 @@
+"""
+A Weld wrapper for pandas.DataFrame.
+
+"""
+
+import numpy as np
+import pandas as pd
+
+import weld.encoders.numpy as wenp
+import weld.grizzly.weld.ops as weldops
+
+from weld.lazy import WeldLazy, identity
+from weld.grizzly.core.forwarding import Forwarding
+from weld.grizzly.core.generic import GrizzlyBase
+from weld.grizzly.core.indexes import ColumnIndex
+from weld.grizzly.core.series import GrizzlySeries
+from weld.types import *
+
+class GrizzlyDataFrame(Forwarding, GrizzlyBase):
+    """
+    An API-compatible DataFrame backed by Weld.
+
+    DataFrames are dictionary-like containers of Series of the same length.
+    Each Series can be a different data type. Operations on DataFrames align on
+    column name. Unlike pandas, DataFrame operations do not align on row
+    indexes.
+
+    Examples
+    --------
+    >>> df = GrizzlyDataFrame({'name': ['mike', 'sam', 'sally'], 'age': [20, 22, 56]})
+    >>> df
+        name  age
+    0   mike   20
+    1    sam   22
+    2  sally   56
+    >>> df2 = GrizzlyDataFrame({'nom': ['jacques', 'kelly', 'marie'], 'age': [50, 60, 70]})
+    >>> df.add(df2).to_pandas()
+       age  name  nom
+    0   70   NaN  NaN
+    1   82   NaN  NaN
+    2  126   NaN  NaN
+
+    """
+
+    # Indicates that the length of a DataFrame is not known. Certain operations on DataFrames
+    # with this length are disallowed, since Grizzly (like Pandas) assumes each column in a DataFrame
+    # is of the same length.
+    UNKNOWN_LENGTH = -1
+
+    _encoder = wenp.NumPyWeldEncoder()
+    _decoder = wenp.NumPyWeldDecoder()
+
+    # ------------------- GrizzlyBase methods ----------------------
+
+    @property
+    def weld_value(self):
+        if hasattr(self, "weld_value_"):
+            return self.weld_value_
+
+        if len(self.data) == 0:
+            raise GrizzlyError("weld_value cannot be accessed on DataFrame with no data")
+        output_type = WeldStruct([col.weld_value.output_type for col in self.data])
+        self.weld_value_ = weldops.make_struct(*[col.weld_value for col in self.data])(output_type, GrizzlyDataFrame._decoder)
+        return self.weld_value_
+
+    @property
+    def is_value(self):
+        """
+        Returns whether this DataFrame is a physical value.
+
+        If this is True, evaluate() is guaranteed to be a no-op.
+
+        Examples
+        --------
+        >>> df = GrizzlyDataFrame({'name': ['sam', 'sally'], 'age': [25, 50]})
+        >>> df.is_value
+        True
+        >>> df = df.eq(df)
+        >>> df.is_value
+        False
+
+        """
+        return self.pandas_df is not None or\
+                all([child.is_identity for child in self.children])
+
+    def evaluate(self):
+        """
+        Evaluates this `GrizzlyDataFrame` and returns itself.
+
+        Evaluation reduces the currently stored computation to a physical value
+        by compiling and running a Weld program. If this `GrizzlyDataFrame` refers
+        to a physical value and no computation, no program is compiled, and this
+        method returns `self` unmodified.
+
+        Returns
+        -------
+        GrizzlyDataFrame
+
+        """
+        if not self.is_value:
+            if len(self.data) == 0:
+                # We're an empty DataFrame
+                self.pandas_df = pd.DataFrame()
+                return
+            # Collect each vector into a struct rather than evaluating each Series individually:
+            # this is more efficient so computations shared among vectors can be optimized.
+            result = self.weld_value.evaluate()
+            columns = result[0]
+            new_data = []
+            length = None
+            for column in columns:
+                data = column.copy2numpy()
+                column_length = len(data)
+                if length is not None:
+                    assert column_length == length, "invalid DataFrame produced after evaluation"
+                else:
+                    length = column_length
+                series = GrizzlySeries(data)
+                new_data.append(series)
+
+            # Columns is unchanged
+            self.pandas_df = None
+            self.data = new_data
+            self.length = length
+            # Reset the weld representation.
+            delattr(self, "weld_value_")
+        assert self.is_value
+        return self
+
+    def to_pandas(self, copy=False):
+        """
+        Evaluate and convert this GrizzlyDataFrame to a pandas DataFrame.
+
+        Parameters
+        ----------
+        copy : bool
+            whether to copy the data into the new DataFrame. This only guarantees
+            that a copy _will_ occur; it does not guarantee that a copy will not.
+
+        Returns
+        -------
+        pandas.DataFrame
+
+        """
+        self.evaluate()
+        if self.pandas_df is not None:
+            return self.pandas_df
+        col_to_data = dict()
+        for col in self.columns:
+            col_to_data[col] = self._col(col).values
+        self.pandas_df = pd.DataFrame(data=col_to_data, copy=copy)
+        return self.pandas_df
+
+    # ------------------- Initialization ----------------------
+
+    def __init__(self, data, columns=None, _length=UNKNOWN_LENGTH, _fastpath=False):
+        if _fastpath:
+            assert all([isinstance(d, GrizzlySeries) for d in data])
+            assert isinstance(columns, ColumnIndex)
+            self.data = data
+            self.columns = columns
+            self.length = _length
+            self.pandas_df = None
+            return
+
+        self.data = []
+        column_index = []
+        # Keep a reference to the Pandas DataFrame. This should not consume any extra memory, since GrizzlySeries
+        # just keep references ot the same values. The only exception is for string data, where a copy will be created
+        # to force use of the 'S' dtype.
+        #
+        # TODO(shoumik): Convert string data to dtype 'S' here so it doesn't get copied when wrapping the Series as
+        # GrizzlySeries.
+        if isinstance(data, pd.DataFrame):
+            self.pandas_df = data
+        else:
+            self.pandas_df = pd.DataFrame(data, columns=columns)
+        self.length = len(self.pandas_df)
+        for (i, col) in enumerate(self.pandas_df):
+            grizzly_series = GrizzlySeries(self.pandas_df[col], name=self.pandas_df[col].name)
+            if not isinstance(grizzly_series, GrizzlySeries):
+                raise TypeError("Unsupported Series in DataFrame: {}".format(self.pandas_df[col]))
+            self.data.append(grizzly_series)
+            column_index.append(col)
+
+        self.columns = ColumnIndex(column_index)
+
+    def _col(self, col):
+        """
+        Returns the column associated with the column name 'col'.
+
+        """
+        return self.data[self.columns[col]]
+
+    # ------------------- Getting and Setting Items ----------------------
+
+    # TODO!
+
+    # ------------------- Ops ----------------------
+
+    def explain(self):
+        """
+        Prints a string that describes the operations to compute each column.
+
+        If this DataFrame is a value, prints the data.
+
+        """
+        if self.pandas_df is not None:
+            print(self.pandas_df)
+        else:
+            for col in self.columns:
+                code = self._col(col).code
+                code = code.replace("\n", "\n\t")
+                print("{}: {}".format(col, code))
+
+
+    def _arithmetic_binop_impl(self, other, series_op, fill=np.nan):
+        """
+        Apply the binary operation between this DataFrame and other.
+
+        Parameters
+        ----------
+        other : DataFrame, scalar, GrizzlySeries
+            If other is a DataFrame, aligns on column name. If the binary
+            operator is not supported on any column, raises a TypeError.
+        series_op : func
+            The binary operation to apply.
+        compare : bool
+            whether this is a comparison operation.
+
+        Returns
+        -------
+        GrizzlyDataFrame
+
+        """
+        new_data = []
+        if not isinstance(other, GrizzlyDataFrame):
+            for data in self.data:
+                new_data.append(series_op(data, other))
+            return GrizzlyDataFrame(new_data,
+                    columns=copy.deepcopy(self.columns),
+                    _length=self.length,
+                    _fastpath=True)
+
+        new_cols = []
+        for (col, left_slot, right_slot) in self.columns.zip(other.columns):
+            new_cols.append(col)
+            if left_slot is None or right_slot is None:
+                # TODO(shoumik): Handle this case by making a lazy computation.
+                assert self.length != GrizzlyDataFrame.UNKNOWN_LENGTH
+                dtype = np.array([fill]).dtype
+                vals = np.empty(self.length, dtype=dtype)
+                vals[:] = fill
+                new_data.append(GrizzlySeries(vals))
+            else:
+                new_data.append(series_op(self.data[left_slot], other.data[right_slot]))
+        return GrizzlyDataFrame(new_data,
+                columns=ColumnIndex(new_cols),
+                _length=self.length,
+                _fastpath=True)
+
+    def add(self, other):
+        return self._arithmetic_binop_impl(other, GrizzlySeries.add)
+
+    def sub(self, other):
+        return self._arithmetic_binop_impl(other, GrizzlySeries.sub)
+
+    def mod(self, other):
+        return self._arithmetic_binop_impl(other, GrizzlySeries.mod)
+
+    def mul(self, other):
+        return self._arithmetic_binop_impl(other, GrizzlySeries.mul)
+
+    def truediv(self, other):
+        return self._arithmetic_binop_impl(other, GrizzlySeries.truediv)
+
+    def divide(self, other):
+        return self.truediv(other)
+
+    def div(self, other):
+        return self.truediv(other)
+
+    def eq(self, other):
+        return self._arithmetic_binop_impl(other, GrizzlySeries.eq, fill=False)
+
+    def ne(self, other):
+        # Fill with True on this one.
+        return self._arithmetic_binop_impl(other, GrizzlySeries.ne, fill=True)
+
+    def ge(self, other):
+        return self._arithmetic_binop_impl(other, GrizzlySeries.ge, fill=False)
+
+    def gt(self, other):
+        return self._arithmetic_binop_impl(other, GrizzlySeries.gt, fill=False)
+
+    def le(self, other):
+        return self._arithmetic_binop_impl(other, GrizzlySeries.le, fill=False)
+
+    def lt(self, other):
+        return self._arithmetic_binop_impl(other, GrizzlySeries.lt, fill=False)
+
+    def __add__(self, other):
+        return self.add(other)
+
+    def __sub__(self, other):
+        return self.sub(other)
+
+    def __mul__(self, other):
+        return self.mul(other)
+
+    def __truediv__(self, other):
+        return self.truediv(other)
+
+    def __divmod__(self, other):
+        return self.divmod(other)
+
+    def __mod__(self, other):
+        return self.mod(other)
+
+    def __eq__(self, other):
+        return self.eq(other)
+
+    def __ne__(self, other):
+        return self.ne(other)
+
+    def __ge__(self, other):
+        return self.ge(other)
+
+    def __gt__(self, other):
+        return self.gt(other)
+
+    def __le__(self, other):
+        return self.le(other)
+
+    def __lt__(self, other):
+        return self.lt(other)
+
+    def __str__(self):
+        if self.pandas_df is not None:
+            return str(self.pandas_df)
+        else:
+            return repr(self)
+
+    def __repr__(self):
+        if self.pandas_df is not None:
+            return repr(self.pandas_df)
+        else:
+            return "GrizzlyDataFrame(lazy, {})".format([name for name in self.columns.columns])
+
diff --git a/weld-python/weld/grizzly/core/generic.py b/weld-python/weld/grizzly/core/generic.py
new file mode 100644
index 000000000..0ccaf7be1
--- /dev/null
+++ b/weld-python/weld/grizzly/core/generic.py
@@ -0,0 +1,109 @@
+"""
+Base class for Grizzly collection types.
+
+"""
+
+from abc import ABC, abstractmethod
+
+class GrizzlyBase(ABC):
+
+    @property
+    @abstractmethod
+    def weld_value(self):
+        """
+        Returns the WeldLazy represention of this object.
+
+        """
+        pass
+
+    @property
+    @abstractmethod
+    def is_value(self):
+        """
+        Returns whether this collection wraps a physical value rather than
+        a computation.
+        """
+        pass
+
+    @abstractmethod
+    def evaluate(self):
+        """
+        Evaluates this collection and returns itself.
+
+        Evaluation reduces the currently stored computation to a physical value
+        by compiling and running a Weld program. If this collection refers
+        to a physical value and no computation, no program is compiled, and this
+        method returns `self` unmodified.
+
+        """
+        pass
+
+    @abstractmethod
+    def to_pandas(self, copy=False):
+        """
+        Evaluates this computation and coerces it into a pandas object.
+
+        This is guaranteed to be 0-copy if `self.is_value == True`. Otherwise,
+        some allocation may occur. Regardless, `self` and the returned value
+        will always have the same underlying data unless `copy = True`.
+
+        Parameters
+        ----------
+        copy : boolean, optional
+            Specifies whether the new collection should copy data from self
+
+        """
+        pass
+
+    @property
+    def children(self):
+        """
+        Returns the Weld children of this value.
+        """
+        return self.weld_value.children
+
+    @property
+    def output_type(self):
+        """
+        Returns the Weld output type of this collection.
+
+        The output type is always a `WeldVec` of some type.
+        """
+        return self.weld_value.output_type
+
+    @property
+    def code(self):
+        """
+        Returns the Weld code for this computation.
+        """
+        return self.weld_value.code
+
+    @classmethod
+    def scalar_ty(cls, value, cast_ty):
+        """
+        Returns the scalar Weld type of a scalar Python value. If the value is not a scalar,
+        returns None. For primitive 'int' values, returns 'cast_ty'.
+
+        This returns 'None' if the value type is not supported.
+
+        Parameters
+        ----------
+        value : any
+            value whose dtype to obtain.
+
+        Returns
+        -------
+        WeldType
+
+        """
+        if hasattr(value, 'dtype') and hasattr(value, 'shape') and value.shape == ():
+            return wenp.dtype_to_weld_type(value.dtype)
+        if isinstance(value, int):
+            return cast_ty
+        if isinstance(value, float):
+            return F64()
+        if isinstance(value, bool):
+            return Bool()
+
+
+
diff --git a/weld-python/weld/grizzly/core/indexes/__init__.py b/weld-python/weld/grizzly/core/indexes/__init__.py
new file mode 100644
index 000000000..1be09ae96
--- /dev/null
+++ b/weld-python/weld/grizzly/core/indexes/__init__.py
@@ -0,0 +1,2 @@
+
+from weld.grizzly.core.indexes.column import ColumnIndex
diff --git a/weld-python/weld/grizzly/core/indexes/base.py b/weld-python/weld/grizzly/core/indexes/base.py
new file mode 100644
index 000000000..f9c085c8c
--- /dev/null
+++ b/weld-python/weld/grizzly/core/indexes/base.py
@@ -0,0 +1,9 @@
+
+from abc import ABC
+
+class Index(ABC):
+    """
+    Base class for an index in Grizzly.
+
+    """
+    pass
diff --git a/weld-python/weld/grizzly/core/indexes/column.py b/weld-python/weld/grizzly/core/indexes/column.py
new file mode 100644
index 000000000..66801670d
--- /dev/null
+++ b/weld-python/weld/grizzly/core/indexes/column.py
@@ -0,0 +1,153 @@
+"""
+Index used for access columns in Grizzly.
+
+"""
+
+from weld.grizzly.core.indexes.base import Index
+
+class ColumnIndex(Index):
+    """
+    An index used for columns in a Grizzly DataFrame.
+
+    Each index value is a Python object. For operations between two DataFrames
+    with the same ColumnIndex, the result will also have the same index. For
+    operations between two DataFrames with different ColumnIndex, the output
+    will have a join of the two ColumnIndex, sorted by the index values.
+
+    Two ColumnIndex are equal if their index names are equal and have the same
+    order.
+
+    Parameters
+    ----------
+    columns : iterable
+        column names.
+    slots : iterable of int or None
+        slots associated with each column. If provided, the length must be
+        len(columns). This is used for underlying data access only; index
+        equality depends only on the column names and ordering.
+
+
+    Examples
+    --------
+    >>> ColumnIndex(["name", "age"])
+    ColumnIndex(['name', 'age'], [0, 1])
+    >>> ColumnIndex(["name", "age"], slots=[1, 0])
+    ColumnIndex(['name', 'age'], [1, 0])
+    >>> ColumnIndex(["name", "age"], slots=[1, 2])
+    Traceback (most recent call last):
+    ...
+    ValueError: slots must be contiguous starting at 0
+
+    """
+
+    def __init__(self, columns, slots=None):
+        if not isinstance(columns, list):
+            columns = list(columns)
+        if slots is not None:
+            assert len(columns) == len(slots)
+            sorted_slots = sorted(slots)
+            # Make sure each slot is occupied/there are no "holes".
+            if not sorted_slots == list(range(len(slots))):
+                raise ValueError("slots must be contiguous starting at 0")
+        else:
+            slots = range(len(columns))
+
+        # The original column order.
+        self.columns = columns
+        # The mapping from columns to slots.
+        self.index = dict(zip(columns, slots))
+
+    def __iter__(self):
+        """
+        Iterates over columns in the order in which they appear in a DataFrame.
+
+        Examples
+        --------
+        >>> x = ColumnIndex(["name", "age"], slots=[1, 0])
+        >>> [name for name in x]
+        ['name', 'age']
+
+        """
+        for col in self.columns:
+            yield col
+
+    def zip(self, other):
+        """
+        Zips this index with 'other', returning an iterator of `(name,
+        slot_in_self, slot_in_other)`. The slot may be `None` if the name does
+        not appear in either column.
+
+        The result columns are ordered in a way consistent with how DataFrame
+        columns should be be ordered (i.e., same order `self` if `self ==
+        other`, and sorted by the union of columns from `self` and `other`
+        otherwise).
+
+        Examples
+        --------
+        >>> a = ColumnIndex(["name", "age"])
+        >>> b = ColumnIndex(["name", "age"])
+        >>> list(a.zip(b))
+        [('name', 0, 0), ('age', 1, 1)]
+        >>> b = ColumnIndex(["income", "age", "name"])
+        >>> list(a.zip(b))
+        [('age', 1, 1), ('income', None, 0), ('name', 0, 2)]
+
+        """
+        if self == other:
+            for name in self.columns:
+                yield (name, self.index[name], other.index[name])
+        else:
+            columns = sorted(list(set(self.columns).union(other.columns)))
+            for name in columns:
+                yield (name, self.index.get(name), other.index.get(name))
+
+    def __getitem__(self, key):
+        """
+        Get the slot for a paritcular column name.
+
+        Examples
+        --------
+        >>> a = ColumnIndex(["name", "age"])
+        >>> a["age"]
+        1
+
+        """
+        return self.index[key]
+
+    def append(self, key):
+        """
+        Add a new column to the index. The slot is set to be `len(columns) - 1`.
+
+        Examples
+        --------
+        >>> a = ColumnIndex(["name", "age"])
+        >>> a.append("income")
+        >>> a["income"]
+        2
+
+        """
+        self.index[key] = len(self.columns)
+        self.columns.append(key)
+
+    def __eq__(self, other):
+        """
+        Compare equality depending on column names.
+
+        Examples
+        --------
+        >>> a = ColumnIndex(["name", "age"])
+        >>> a == ColumnIndex(["name", "age"])
+        True
+        >>> a == ColumnIndex(["age", "name"])
+        False
+        >>> a == ColumnIndex(["name", "age", "income"])
+        False
+
+        """
+        return isinstance(other, ColumnIndex) and self.columns == other.columns
+
+    def __str__(self):
+        return repr(self)
+
+    def __repr__(self):
+        return "ColumnIndex({}, {})".format(self.columns, [self.index[col] for col in self.columns])
diff --git a/weld-python/weld/grizzly/core/series.py b/weld-python/weld/grizzly/core/series.py
index 81a3e576d..a9abd2356 100644
--- a/weld-python/weld/grizzly/core/series.py
+++ b/weld-python/weld/grizzly/core/series.py
@@ -1,11 +1,10 @@
 """
 A Weld wrapper for pandas.Series.
+
 """
 
-import inspect
 import numpy as np
 import pandas as pd
-import warnings
 
 import weld.encoders.numpy as wenp
 import weld.grizzly.weld.agg as weldagg
@@ -13,20 +12,12 @@
 from weld.lazy import PhysicalValue, WeldLazy, WeldNode, identity
 from weld.grizzly.weld.ops import *
 from weld.grizzly.core.error import GrizzlyError
+from weld.grizzly.core.forwarding import Forwarding
+from weld.grizzly.core.generic import GrizzlyBase
 from weld.grizzly.core.strings import StringMethods
 from weld.types import *
 
-def _grizzlyseries_constructor_with_fallback(data=None, **kwargs):
-    """
-    A flexible constructor for Series._constructor, which needs to be able
-    to fall back to a Series (if a certain operation cannot produce GrizzlySeries).
-    """
-    try:
-        return GrizzlySeries(data=data, **kwargs)
-    except TypeError:
-        return pd.Series(data=data, **kwargs)
-
-class GrizzlySeries(pd.Series):
+class GrizzlySeries(Forwarding, GrizzlyBase):
     """
     A lazy `Series` object backed by a Weld computation.
 
@@ -36,6 +27,31 @@ class GrizzlySeries(pd.Series):
     regular pandas behavior. Similarly, `GrizzlySeries` that are initialized with
     features that Grizzly does not understand are initialized as `pd.Series`.
 
+    Implementation Notes
+    --------------------
+
+    A Series in Grizzly is a 1D array of a single data type. Transformations on
+    Series can result in scalar values, which are directly evaluated and
+    returned as scalars, as other Series that encapsulate lazy Weld
+    computations, or as DataFrames (currently unsupported).
+
+    A Series always has a known type: unlike pandas Series, which are wrappers
+    around Python object, Series with heterogeneous types (i.e., Series with
+    `dtype=object`) are disallowed. Series that are not evaluated/store a lazy
+    compuations also always have a known type.
+
+    Internally, most Series are backed by NumPy arrays, which themselves are
+    stored as contiguous C arrays. The main exception is string data. Grizzly
+    operates exclusively over UTF-8 encoded data, and most string operations
+    will create a copy of the input data upon evaluation. Strings are currently
+    encoded as NumPy arrays with the 'S' dtype: this may change in the future
+    (the 'S' dtype forces memory usage per string to match the length of the
+    largest string in the array, which is clearly suboptimal).
+
+    Unlike pandas Series, GrizzlySeries do not support alignment of index
+    values for performance reasons. To align the indices of two Series, consider
+    using a DataFrame with a join operator.
+
     Parameters
     ----------
 
@@ -43,65 +59,57 @@ class GrizzlySeries(pd.Series):
         Contains data stored in the series.
     dtype : numpy.dtype or str
         Data type of the values.
+    name : str
+        A name to give the series.
 
     Examples
     --------
-    >>> x = GrizzlySeries([1,2,3])
+    >>> x = GrizzlySeries([1,2,3], name="numbers")
     >>> x
     0    1
     1    2
     2    3
-    dtype: int64
+    Name: numbers, dtype: int64
     """
 
-    # TODO(shoumik): Does this need to be in _metadata instead?
-    _internal_names = pd.Series._internal_names + ['weld_value_']
-    _internal_names_set = set(_internal_names)
-
     # The encoders and decoders are stateless, so no need to instantiate them
     # for each computation.
     _encoder = wenp.NumPyWeldEncoder()
     _decoder = wenp.NumPyWeldDecoder()
 
-    # ---------------------- WeldNode abstract methods ---------------------
-
     @property
-    def children(self):
-        """
-        Returns the Weld children of this `GrizzlySeries.
-        """
-        return self.weld_value_.children
+    def weld_value(self):
+        return self.weld_value_
+
+    @weld_value.setter
+    def weld_value(self, value):
+        self.weld_value_ = value
 
     @property
-    def output_type(self):
+    def is_value(self):
         """
-        Returns the Weld output type of this `GrizzlySeries`.
-
-        The output type is always a `WeldVec` of some type.
+        Returns whether this collection wraps a physical value rather than
+        a computation.
         """
-        return self.weld_value_.output_type
+        return self.weld_value.is_identity or hasattr(self, "evaluating_")
 
     @property
     def elem_type(self):
         """
         Returns the element type of this `GrizzlySeries`.
         """
-        return self.weld_value_.output_type.elem_type
+        return self.output_type.elem_type
 
     @property
-    def is_value(self):
-        """
-        Returns whether this `GrizzlySeries` wraps a physical value rather than
-        a computation.
+    def dtype(self):
         """
-        return self.weld_value_.is_identity or hasattr(self, "evaluating_")
+        Returns the NumPy dtype of this Series.
 
-    @property
-    def code(self):
-        """
-        Returns the Weld code for this computation.
         """
-        return self.weld_value_.code
+        elem_type = self.elem_type
+        if elem_type == WeldVec(I8()):
+            return np.dtype('S')
+        return wenp.weld_type_to_dtype(elem_type)
 
     @property
     def values(self):
@@ -126,7 +134,7 @@ def values(self):
         """
         if not self.is_value:
             raise GrizzlyError("GrizzlySeries is not evaluated and does not have values. Try calling 'evaluate()' first.")
-        return super(GrizzlySeries, self).values
+        return self.values_
 
     def evaluate(self):
         """
@@ -139,16 +147,16 @@ def evaluate(self):
 
         """
         if not self.is_value:
-            result = self.weld_value_.evaluate()
+            result = self.weld_value.evaluate()
             # TODO(shoumik): it's unfortunate that this copy is needed, but
             # things are breaking without it (even if we hold a reference to
             # the WeldContext). DEBUG ME!
             if isinstance(result[0], wenp.weldbasearray):
-                super(GrizzlySeries, self).__init__(result[0].copy2numpy())
+                self.__init__(result[0].copy2numpy(), name=self.name)
             else:
-                super(GrizzlySeries, self).__init__(result[0])
+                self.__init__(result[0], name=self.name)
             setattr(self, "evaluating_", 0)
-            self.weld_value_ = identity(PhysicalValue(self.values,\
+            self.weld_value = identity(PhysicalValue(self.values,\
                     self.output_type, GrizzlySeries._encoder),
                     GrizzlySeries._decoder)
             delattr(self, "evaluating_")
@@ -190,33 +198,27 @@ def to_pandas(self, copy=False):
         """
         self.evaluate()
         assert self.is_value
-        return pd.Series(data=self.array,
-                index=self.index,
-                copy=copy,
-                fastpath=True)
-
-    # ------------- pd.Series methods required for subclassing ----------
-
-    @property
-    def _constructor(self):
-        return _grizzlyseries_constructor_with_fallback
-
-    @property
-    def _constructor_expanddim(self):
-        return NotImplemented
+        return pd.Series(data=self.values_, name=self.name, copy=copy)
 
     # ---------------------- Class methods ------------------------------
 
     @classmethod
-    def _supports_grizzly(cls, data):
+    def supported(cls, data):
         """
-        Returns the Weld type if data supports Grizzly operation, or returns
-        `None` otherwise.
+        Returns whether the given ndarray supports Grizzly operation.
 
         Parameters
         ----------
         data : np.array
 
+        Examples
+        -------
+        >>> GrizzlySeries.supported(np.array([1,2,3], dtype='int32'))
+        vec[i32]
+        >>> GrizzlySeries.supported(np.array(["foo","bar"], dtype='object'))
+        >>> GrizzlySeries.supported(np.array(["foo","bar"], dtype='S'))
+        vec[vec[i8]]
+
         """
         if not isinstance(data, np.ndarray) or data.ndim != 1:
             return None
@@ -228,13 +230,9 @@ def _supports_grizzly(cls, data):
 
     # ---------------------- Initialization ------------------------------
 
-    def __init__(self, data, dtype=None, index=None, **kwargs):
-        # Everything important is done in __new__.
-        pass
-
-    def __new__(cls, data, dtype=None, index=None, **kwargs):
+    def __init__(self, data, dtype=None, name=None):
         """
-        Internal initialization. Tests below are for internal visibility only.
+        Initialize a new GrizzlySeries.
 
         >>> x = GrizzlySeries([1,2,3])
         >>> x
@@ -242,64 +240,46 @@ def __new__(cls, data, dtype=None, index=None, **kwargs):
         1    2
         2    3
         dtype: int64
-        >>> x.__class__
-        <class 'weld.grizzly.core.series.GrizzlySeries'>
-        >>> x = GrizzlySeries(np.ones(5))
-        >>> x.__class__
-        <class 'weld.grizzly.core.series.GrizzlySeries'>
-        >>> y = GrizzlySeries(['hi', 'bye'])
-        >>> y.__class__
-        <class 'weld.grizzly.core.series.GrizzlySeries'>
-        >>> y = GrizzlySeries([1, 2, 3], index=[1, 0, 2]) # Unsupported
-        >>> y.__class__
-        <class 'pandas.core.series.Series'>
+
         """
+
         s = None
         if isinstance(data, WeldLazy):
-            self = super(GrizzlySeries, cls).__new__(cls)
-            super(GrizzlySeries, self).__init__(np.array([], dtype=dtype), **kwargs)
-            self.weld_value_ = data
-            return self
-
-        if index is not None and not isinstance(index, pd.RangeIndex):
-            # TODO(shoumik): This is probably incomplete, since we could have a
-            # RangeIndex that does not capture the full span of the data, has a
-            # non-zero step, etc.
-            return pd.Series(data, dtype=dtype, index=index, **kwargs)
-
-        if len(kwargs) != 0:
-            # Unsupported arguments present: bail for now.
-            return pd.Series(data, dtype=dtype, index=index, **kwargs)
+            self.name = name
+            self.values_ = None
+            self.weld_value = data
+            return
 
+        self.name = name
         if isinstance(data, list) and len(data) > 0 and isinstance(data[0], str):
             # Try to convert a list of strings into a supported Numpy array.
-            data = np.array(data, dtype='S')
-
-        if isinstance(data, pd.Series):
-            data = data.values
+            self.values_ = np.array(data, dtype='S')
+        elif isinstance(data, pd.Series):
+            if self.name is None:
+                self.name = data.name
+            if data.values.dtype == 'object' and len(data) > 0 and isinstance(data[0], str):
+                self.values_ = np.array(data, dtype='S')
+            else:
+                self.values_ = data.values
         elif not isinstance(data, np.ndarray):
-            # First, convert the input into a Series backed by an ndarray.
-             s = pd.Series(data, dtype=dtype, index=index, **kwargs)
-             data = s.values
+            # First, convert the input into a Numpy array.
+            self.values_ = np.array(data, dtype=dtype)
+        else:
+            self.values_ = data
 
         # Try to create a Weld type for the input.
-        weld_type = GrizzlySeries._supports_grizzly(data)
-        if weld_type is not None:
-            self = super(GrizzlySeries, cls).__new__(cls)
-            super(GrizzlySeries, self).__init__(data, dtype=dtype, **kwargs)
-            self.weld_value_ = identity(
-                    PhysicalValue(data, weld_type, GrizzlySeries._encoder),
+        weld_type = GrizzlySeries.supported(self.values_)
+        if weld_type:
+            self.weld_value = identity(
+                    PhysicalValue(self.values_, weld_type, GrizzlySeries._encoder),
                     GrizzlySeries._decoder)
-            return self
-
-        # Don't re-convert values if we did it once already -- it's expensive.
-        return s if s is not None else pd.Series(data, dtype=dtype, index=index, **kwargs)
+        else:
+            raise GrizzlyError("unsupported data type '{}'".format(self.values_.dtype))
 
     # ---------------------- StringMethods ------------------------------
 
     @property
     def str(self):
-        # TODO(shoumik.palkar): Use pandas.core.accessor.CachedAccessor?
         return StringMethods(self)
 
     # ---------------------- Aggregation ------------------------------
@@ -337,9 +317,9 @@ def agg(self, funcs):
             # Result is a primitive, so we can use the default primitive decoder.
             decoder = None
 
-        result_weld_value = weldagg.agg(self.weld_value_, self.elem_type, funcs)(output_type, decoder)
+        result_weld_value = weldagg.agg(self.weld_value, self.elem_type, funcs)(output_type, decoder)
         if decoder is not None:
-            return GrizzlySeries(result_weld_value, dtype=wenp.weld_type_to_dtype(output_elem_type))
+            return GrizzlySeries(result_weld_value, dtype=wenp.weld_type_to_dtype(output_elem_type), name=self.name)
         else:
             # TODO(shoumik.palkar): Do we want to evaluate here? For now, we will, but eventually it may
             # be advantageous to have a lazy scalar value that can be used elsewhere.
@@ -536,7 +516,7 @@ def __getitem__(self, key):
 
         """
         # If the key is a scalar
-        scalar_key = GrizzlySeries._scalar_ty(key, I64())
+        scalar_key = GrizzlySeries.scalar_ty(key, I64())
         if isinstance(scalar_key, I64):
             self.evaluate()
             return self.values[key]
@@ -552,7 +532,7 @@ def normalize_slice_arg(arg, default=None):
                     arg = default
                 else:
                     raise GrizzlyError("slice got 'None' when value where expected")
-            arg_ty = GrizzlySeries._scalar_ty(arg, I64())
+            arg_ty = GrizzlySeries.scalar_ty(arg, I64())
             if not isinstance(arg_ty, I64):
                 raise GrizzlyError("slices in __getitem__() must be integers")
             return int(arg)
@@ -565,8 +545,8 @@ def normalize_slice_arg(arg, default=None):
             start = normalize_slice_arg(key.start, default=0)
             stop = normalize_slice_arg(key.stop, default=None)
             # Weld's slice operator takes a size instead of a stopping index.
-            lazy = slice_expr(self.weld_value_, start, stop - start)(self.output_type, GrizzlySeries._decoder)
-            return GrizzlySeries(lazy, dtype=self.dtype)
+            lazy = slice_expr(self.weld_value, start, stop - start)(self.output_type, GrizzlySeries._decoder)
+            return GrizzlySeries(lazy, dtype=self.dtype, name=self.name)
 
         if not isinstance(key, GrizzlySeries):
             raise GrizzlyError("array-like key in __getitem__ must be a GrizzlySeries.")
@@ -599,47 +579,37 @@ def mask(self, bool_mask, other=None):
         """
         assert other is None
         if not isinstance(bool_mask, GrizzlySeries) or\
-                not isinstance(bool_mask.output_type.elem_type, Bool):
+                not isinstance(bool_mask.elem_type, Bool):
                     raise GrizzlyError("bool_mask must be a GrizzlySeries with dtype=bool")
 
-        lazy = mask(self.weld_value_, self.output_type.elem_type, bool_mask.weld_value_)(self.output_type, GrizzlySeries._decoder)
-        return GrizzlySeries(lazy, dtype=self.dtype)
+        lazy = mask(self.weld_value, self.elem_type, bool_mask.weld_value)(self.output_type, GrizzlySeries._decoder)
+        return GrizzlySeries(lazy, dtype=self.dtype, name=self.name)
 
     # ---------------------- Operators ------------------------------
 
-    @classmethod
-    def _scalar_ty(cls, value, cast_ty):
+    def _arithmetic_binop_impl(self, other, op, truediv=False, weld_elem_type=None):
         """
-        Returns the scalar Weld type of a scalar Python value. If the value is not a scalar,
-        returns None. For primitive 'int' values, returns 'cast_ty'.
-
-        This returns 'None' if the value type is not supported.
+        Performs the operation on two `Series` elementwise.
 
         Parameters
         ----------
-        value : any
-            value whose dtype to obtain.
+        other: scalar or GrizzlySeries
+            the RHS operand
+        op: str
+            the operator to apply.
+        truediv: bool, optional
+            is this a truediv?
+        weld_elem_type: WeldType or None
+            the element type produced by this operation. None
+            means it is inferred based on Numpy's type conversion rules.
 
         Returns
         -------
-        np.dtype
+        GrizzlySeries
 
         """
-        if hasattr(value, 'dtype') and hasattr(value, 'shape') and value.shape == ():
-            return wenp.dtype_to_weld_type(value.dtype)
-        if isinstance(value, int):
-            return cast_ty
-        if isinstance(value, float):
-            return F64()
-        if isinstance(value, bool):
-            return Bool()
-
-    def _arithmetic_binop_impl(self, other, op, truediv=False, weld_elem_type=None):
-        """
-        Performs the operation on two `Series` elementwise.
-        """
-        left_ty = self.output_type.elem_type
-        scalar_ty = GrizzlySeries._scalar_ty(other, left_ty)
+        left_ty = self.elem_type
+        scalar_ty = GrizzlySeries.scalar_ty(other, left_ty)
         if scalar_ty is not None:
             # Inline scalars directly instead of adding them
             # as dependencies.
@@ -650,19 +620,28 @@ def _arithmetic_binop_impl(self, other, op, truediv=False, weld_elem_type=None):
             # GrizzlySeries.
             if not isinstance(other, GrizzlySeries):
                 raise GrizzlyError("RHS of binary operator must be a GrizzlySeries")
-            right_ty = other.output_type.elem_type
-            rightval = other.weld_value_
+            right_ty = other.elem_type
+            rightval = other.weld_value
+
+        is_string = isinstance(left_ty, WeldVec)
+        if op != "==" and op != "!=" and is_string:
+            raise TypeError("Unsupported operand type(s) for '{}': {} and {}".format(op, left_ty, right_ty))
+
+        # Don't cast if we're dealing with strings.
+        if is_string:
+            cast_type = ""
+        else:
+            cast_type = wenp.binop_output_type(left_ty, right_ty, truediv)
 
-        cast_type = wenp.binop_output_type(left_ty, right_ty, truediv)
         output_type = cast_type if weld_elem_type is None else weld_elem_type
         lazy = binary_map(op,
                 left_type=str(left_ty),
                 right_type=str(right_ty),
-                leftval=self.weld_value_,
+                leftval=self.weld_value,
                 rightval=rightval,
                 scalararg=scalar_ty is not None,
                 cast_type=cast_type)(WeldVec(output_type), GrizzlySeries._decoder)
-        return GrizzlySeries(lazy, dtype=wenp.weld_type_to_dtype(output_type))
+        return GrizzlySeries(lazy, dtype=wenp.weld_type_to_dtype(output_type), name=self.name)
 
     def _compare_binop_impl(self, other, op):
         """
@@ -671,39 +650,211 @@ def _compare_binop_impl(self, other, op):
         return self._arithmetic_binop_impl(other, op, weld_elem_type=Bool())
 
     def add(self, other):
+        """
+        Performs an element-wise addition.
+
+        Examples
+        --------
+        >>> s = GrizzlySeries([10, 20, 30])
+        >>> s.add(10).evaluate()
+        0    20
+        1    30
+        2    40
+        dtype: int64
+
+        """
         return self._arithmetic_binop_impl(other, '+')
 
     def sub(self, other):
+        """
+        Performs an element-wise subtraction.
+
+        Examples
+        --------
+        >>> s = GrizzlySeries([10, 20, 30])
+        >>> s.sub(10).evaluate()
+        0     0
+        1    10
+        2    20
+        dtype: int64
+
+        """
         return self._arithmetic_binop_impl(other, '-')
 
     def mod(self, other):
+        """
+        Performs an element-wise modulo.
+
+        Examples
+        --------
+        >>> s = GrizzlySeries([10, 25, 31])
+        >>> s.mod(10).evaluate()
+        0    0
+        1    5
+        2    1
+        dtype: int64
+
+        """
         return self._arithmetic_binop_impl(other, '%')
 
     def mul(self, other):
+        """
+        Performs an element-wise multiplication.
+
+        Examples
+        --------
+        >>> s = GrizzlySeries([10, 25, 31])
+        >>> s.mul(10).evaluate()
+        0    100
+        1    250
+        2    310
+        dtype: int64
+
+        """
         return self._arithmetic_binop_impl(other, '*')
 
     def truediv(self, other):
+        """
+        Performs an element-wise "true" division.
+
+        Examples
+        --------
+        >>> s = GrizzlySeries([10, 20, 30])
+        >>> s.truediv(10).evaluate()
+        0    1.0
+        1    2.0
+        2    3.0
+        dtype: float64
+
+        """
         return self._arithmetic_binop_impl(other, '/', truediv=True)
 
     def divide(self, other):
+        """
+        Performs an element-wise "true" division.
+
+        Examples
+        --------
+        >>> s = GrizzlySeries([10, 20, 30])
+        >>> s.divide(10).evaluate()
+        0    1.0
+        1    2.0
+        2    3.0
+        dtype: float64
+
+        """
         return self.truediv(other)
 
     def div(self, other):
+        """
+        Performs an element-wise "true" division.
+
+        Examples
+        --------
+        >>> s = GrizzlySeries([10, 20, 30])
+        >>> s.div(10).evaluate()
+        0    1.0
+        1    2.0
+        2    3.0
+        dtype: float64
+
+        """
         return self.truediv(other)
 
     def eq(self, other):
+        """
+        Performs an element-wise equality comparison.
+
+        Examples
+        --------
+        >>> s = GrizzlySeries([10, 20, 30])
+        >>> s.eq(10).evaluate()
+        0     True
+        1    False
+        2    False
+        dtype: bool
+
+        """
         return self._compare_binop_impl(other, '==')
 
+    def ne(self, other):
+        """
+        Performs an element-wise not-equal comparison.
+
+        Examples
+        --------
+        >>> s = GrizzlySeries([10, 20, 30])
+        >>> s.ne(10).evaluate()
+        0    False
+        1     True
+        2     True
+        dtype: bool
+
+        """
+        return self._compare_binop_impl(other, '!=')
+
     def ge(self, other):
+        """
+        Performs an element-wise '>=' comparison.
+
+        Examples
+        --------
+        >>> s = GrizzlySeries([10, 20, 30])
+        >>> s.ge(20).evaluate()
+        0    False
+        1     True
+        2     True
+        dtype: bool
+
+        """
         return self._compare_binop_impl(other, '>=')
 
     def gt(self, other):
+        """
+        Performs an element-wise '>' comparison.
+
+        Examples
+        --------
+        >>> s = GrizzlySeries([10, 20, 30])
+        >>> s.gt(20).evaluate()
+        0    False
+        1    False
+        2     True
+        dtype: bool
+
+        """
         return self._compare_binop_impl(other, '>')
 
     def le(self, other):
+        """
+        Performs an element-wise '<=' comparison.
+
+        Examples
+        --------
+        >>> s = GrizzlySeries([10, 20, 30])
+        >>> s.le(20).evaluate()
+        0     True
+        1     True
+        2    False
+        dtype: bool
+
+        """
         return self._compare_binop_impl(other, '<=')
 
     def lt(self, other):
+        """
+        Performs an element-wise '<' comparison.
+
+        Examples
+        --------
+        >>> s = GrizzlySeries([10, 20, 30])
+        >>> s.lt(20).evaluate()
+        0     True
+        1    False
+        2    False
+        dtype: bool
+
+        """
         return self._compare_binop_impl(other, '<')
 
     def __add__(self, other):
@@ -727,6 +878,9 @@ def __mod__(self, other):
     def __eq__(self, other):
         return self.eq(other)
 
+    def __ne__(self, other):
+        return self.ne(other)
+
     def __ge__(self, other):
         return self.ge(other)
 
@@ -742,70 +896,12 @@ def __lt__(self, other):
     def __str__(self):
         if self.is_value:
             # This is guaranteed to be 0-copy.
-            return str(self.to_pandas())
-        return "GrizzlySeries({}, dtype: {}, deps: [{}])".format(
-                self.weld_value_.expression,
-                str(wenp.weld_type_to_dtype(self.output_type.elem_type)),
+            return str(self.to_pandas(copy=False))
+        return "GrizzlySeries({}, name: {}, dtype: {}, deps: [{}])".format(
+                self.weld_value.expression,
+                self.name,
+                str(self.dtype),
                 ", ".join([str(child.id) for child in self.children]))
 
     def __repr__(self):
         return str(self)
-
-    # ---------------------- Method forwarding setup ------------------------------
-
-    @classmethod
-    def _get_class_that_defined_method(cls, meth):
-        """
-        Returns the class that defines the requested method. For methods that are
-        defined outside of a particular set of Grizzly-defined classes, Grizzly will
-        first evaluate lazy results before forwarding the data to `pandas.Series`.
-        """
-        if inspect.ismethod(meth):
-            for cls in inspect.getmro(meth.__self__.__class__):
-                if cls.__dict__.get(meth.__name__) is meth:
-                    return cls
-        if inspect.isfunction(meth):
-            return getattr(inspect.getmodule(meth),
-                           meth.__qualname__.split('.<locals>', 1)[0].rsplit('.', 1)[0])
-
-    @classmethod
-    def _requires_forwarding(cls, meth):
-        defined_in = cls._get_class_that_defined_method(meth)
-        if defined_in is not None and defined_in is not cls:
-            return True
-        else:
-            return False
-
-    @classmethod
-    def forward(cls):
-        from functools import wraps
-        def forward_decorator(func):
-            @wraps(func)
-            def forwarding_wrapper(self, *args, **kwargs):
-                self.evaluate()
-                result = func(self, *args, **kwargs)
-                # Unsupported functions will return Series -- try to
-                # switch back to GrizzlySeries.
-                if not isinstance(result, GrizzlySeries) and isinstance(result, pd.Series):
-                    try_convert = GrizzlySeries(data=result.values, index=result.index)
-                    if not isinstance(try_convert, GrizzlySeries):
-                        warnings.warn("Unsupported operation '{}' produced unsupported Series: falling back to Pandas".format(
-                            func.__name__))
-                    return try_convert
-                return result
-            return forwarding_wrapper
-        return forward_decorator
-
-    @classmethod
-    def _add_forwarding_methods(cls):
-        methods = dir(cls)
-        for meth in methods:
-            if meth.startswith("_"):
-                # We only want to do this for API methods.
-                continue
-            attr = getattr(cls, meth)
-            if cls._requires_forwarding(attr):
-                setattr(cls, meth, cls.forward()(attr))
-
-# Wrap public API methods for forwarding
-GrizzlySeries._add_forwarding_methods()
diff --git a/weld-python/weld/grizzly/weld/ops.py b/weld-python/weld/grizzly/weld/ops.py
index 70e3b0bec..ae31686fd 100644
--- a/weld-python/weld/grizzly/weld/ops.py
+++ b/weld-python/weld/grizzly/weld/ops.py
@@ -35,6 +35,22 @@ def _binary_apply(op, leftval, rightval, cast_type, infix=True):
         return "{op}({cast_type}({leftval}), {cast_type}({rightval}))".format(
                 op=op, leftval=leftval, rightval=rightval, cast_type=cast_type)
 
+@weld.lazy.weldfunc
+def make_struct(*args):
+    """
+    Constructs a struct with the provided args.
+
+    Examples
+    --------
+    >>> make_struct("weldlazy1", "2", "3").code
+    '{weldlazy1, 2, 3}'
+    >>> make_struct("weldlazy1").code
+    '{weldlazy1}'
+
+    """
+    assert len(args) > 0
+    return "{" + ", ".join(args) + "}"
+
 @weld.lazy.weldfunc
 def unary_map(op, ty, value):
     """
diff --git a/weld-python/weld/types.py b/weld-python/weld/types.py
index 1bd29dc18..5ae6ca691 100644
--- a/weld-python/weld/types.py
+++ b/weld-python/weld/types.py
@@ -22,6 +22,9 @@ def __str__(self):
         """
         pass
 
+    def __repr__(self):
+        return str(self)
+
     def __eq__(self, other):
         return str(self) == str(other)