Skip to content

Commit

Permalink
Use Ecto.Enum for GTFS enum fields; convert corresponding CSV fields …
Browse files Browse the repository at this point in the history
…to int before casting
  • Loading branch information
jzimbel-mbta committed Oct 1, 2024
1 parent 9d355d2 commit 119887a
Show file tree
Hide file tree
Showing 14 changed files with 139 additions and 290 deletions.
4 changes: 3 additions & 1 deletion lib/arrow/gtfs.ex
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ defmodule Arrow.Gtfs do
end
rescue
error ->
Logger.warn("GTFS import failure message=#{Exception.message(error)}")
message = Exception.format(:error, error, __STACKTRACE__)
Logger.warn("GTFS import failure:")
Logger.warn(message)
:error
end
end
Expand Down
6 changes: 4 additions & 2 deletions lib/arrow/gtfs/calendar.ex
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,13 @@ defmodule Arrow.Gtfs.Calendar do
field day, :boolean
end

field :start_date, Arrow.Gtfs.Types.Date
field :end_date, Arrow.Gtfs.Types.Date
field :start_date, :date
field :end_date, :date
end

def changeset(calendar, attrs) do
attrs = values_to_iso8601_datestamp(attrs, ~w[start_date end_date])

calendar
|> cast(
attrs,
Expand Down
9 changes: 7 additions & 2 deletions lib/arrow/gtfs/calendar_date.ex
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,17 @@ defmodule Arrow.Gtfs.CalendarDate do

schema "gtfs_calendar_dates" do
belongs_to :service, Arrow.Gtfs.Service, primary_key: true
field :date, Arrow.Gtfs.Types.Date, primary_key: true
field :exception_type, Arrow.Gtfs.Types.Enum, values: [added: 1, removed: 2]
field :date, :date, primary_key: true
field :exception_type, Ecto.Enum, values: [added: 1, removed: 2]
field :holiday_name, :string
end

def changeset(calendar_date, attrs) do
attrs =
attrs
|> values_to_iso8601_datestamp(~w[date])
|> values_to_int(~w[exception_type])

calendar_date
|> cast(attrs, ~w[service_id date exception_type holiday_name]a)
|> validate_required(~w[service_id date exception_type]a)
Expand Down
6 changes: 1 addition & 5 deletions lib/arrow/gtfs/direction.ex
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,7 @@ defmodule Arrow.Gtfs.Direction do
# Taking liberties:
# `direction` is inconsistently named--the human-readable name is
# "#{table}_desc" in all other tables.
|> Map.pop("direction")
|> then(fn
{nil, attrs} -> attrs
{desc, attrs} -> Map.put(attrs, "desc", desc)
end)
|> rename_key("direction", "desc")
|> remove_table_prefix("direction", except: ["direction_id"])

direction
Expand Down
9 changes: 6 additions & 3 deletions lib/arrow/gtfs/feed_info.ex
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,17 @@ defmodule Arrow.Gtfs.FeedInfo do
field :publisher_name, :string
field :publisher_url, :string
field :lang, :string
field :start_date, Arrow.Gtfs.Types.Date
field :end_date, Arrow.Gtfs.Types.Date
field :start_date, :date
field :end_date, :date
field :version, :string
field :contact_email, :string
end

def changeset(feed_info, attrs) do
attrs = remove_table_prefix(attrs, "feed")
attrs =
attrs
|> remove_table_prefix("feed")
|> values_to_iso8601_datestamp(~w[start_date end_date])

feed_info
|> cast(
Expand Down
68 changes: 68 additions & 0 deletions lib/arrow/gtfs/import_helper.ex
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,74 @@ defmodule Arrow.Gtfs.ImportHelper do
end
end

@doc """
Renames the given `old_key` in `map` to `new_key`, if it exists.
Otherwise, returns the map unchanged.
iex> rename_key(%{foo: 5}, :foo, :bar)
%{bar: 5}
iex> rename_key(%{baz: 6}, :foo, :bar)
%{baz: 6}
"""
@spec rename_key(map, term, term) :: map
def rename_key(map, old_key, new_key) do
case Map.pop(map, old_key) do
{nil, map} -> map
{value, map} -> Map.put(map, new_key, value)
end
end

@doc """
Calls `String.to_integer/1` on the values of `keys` in `map`.
This is useful for preprocessing CSV fields corresponding to `Ecto.Enum`-typed schema fields--
`Ecto.Enum.cast/2` expects either integer or (textual) string values, but the
values for these CSV fields come in as numeric strings.
iex> values_to_int(%{"route_type" => "1", "other" => "value"}, ["route_type"])
%{"route_type" => 1, "other" => "value"}
iex> values_to_int(%{"route_type" => "1", "other" => "value"}, ["route_type", "exception_type"])
%{"route_type" => 1, "other" => "value"}
iex> values_to_int(%{"maybe_empty" => ""}, ["maybe_empty"])
%{"maybe_empty" => ""}
"""
@spec values_to_int(map, Enumerable.t(term)) :: map
def values_to_int(map, keys) do
Enum.reduce(keys, map, fn k, m ->
Map.replace_lazy(m, k, fn
k when byte_size(k) > 0 -> String.to_integer(k)
"" -> ""
end)
end)
end

@doc """
Edits the GTFS-datestamp values under `keys` in `map` to be ISO8601-compliant.
This is useful for preprocessing CSV fields corresponding to `:date`-typed schema fields--
Ecto's date type expects incoming strings to be in ISO8601 format.
iex> map = %{"start_date" => "20240925", "end_date" => "20240926", "blind_date" => "", "other" => "value"}
iex> values_to_iso8601_datestamp(map, ~w[start_date end_date blind_date double_date])
%{"start_date" => "2024-09-25", "end_date" => "2024-09-26", "blind_date" => "", "other" => "value"}
"""
@spec values_to_iso8601_datestamp(map, Enumerable.t(term)) :: map
def values_to_iso8601_datestamp(map, keys) do
Enum.reduce(keys, map, fn k, m ->
Map.replace_lazy(m, k, fn
<<y::binary-size(4), m::binary-size(2), d::binary-size(2)>> ->
<<y::binary, ?-, m::binary, ?-, d::binary>>

"" ->
""
end)
end)
end

@doc """
Strips metadata and association fields from an Ecto.Schema-defined struct, so
that it contains only the fields corresponding to its source table's columns.
Expand Down
12 changes: 8 additions & 4 deletions lib/arrow/gtfs/route.ex
Original file line number Diff line number Diff line change
Expand Up @@ -25,30 +25,34 @@ defmodule Arrow.Gtfs.Route do
network_id: String.t()
}

@route_type_values Enum.with_index(~w[light_rail heavy_rail commuter_rail bus ferry]a)

schema "gtfs_routes" do
belongs_to :agency, Arrow.Gtfs.Agency
field :short_name, :string
field :long_name, :string
field :desc, :string

field :type, Arrow.Gtfs.Types.Enum,
values: Enum.with_index(~w[light_rail heavy_rail commuter_rail bus ferry]a)
field :type, Ecto.Enum, values: @route_type_values

field :url, :string
field :color, :string
field :text_color, :string
field :sort_order, :integer
field :fare_class, :string
belongs_to :line, Arrow.Gtfs.Line
field :listed_route, Arrow.Gtfs.Types.Enum, values: Enum.with_index(~w[Included Excluded]a)
field :listed_route, Ecto.Enum, values: Enum.with_index(~w[Included Excluded]a)
field :network_id, :string

has_many :directions, Arrow.Gtfs.Direction
has_many :trips, Arrow.Gtfs.Trip
end

def changeset(route, attrs) do
attrs = remove_table_prefix(attrs, "route")
attrs =
attrs
|> remove_table_prefix("route")
|> values_to_int(~w[type listed_route])

route
|> cast(
Expand Down
16 changes: 5 additions & 11 deletions lib/arrow/gtfs/route_pattern.ex
Original file line number Diff line number Diff line change
Expand Up @@ -43,22 +43,19 @@ defmodule Arrow.Gtfs.RoutePattern do
has_many :directions, through: [:route, :directions]
field :name, :string
field :time_desc, :string
field :typicality, Arrow.Gtfs.Types.Enum, values: @typicality_values
field :typicality, Ecto.Enum, values: @typicality_values
field :sort_order, :integer
belongs_to :representative_trip, Arrow.Gtfs.Trip
has_many :trips, Arrow.Gtfs.Trip
field :canonical, Arrow.Gtfs.Types.Enum, values: @canonicality_values
field :canonical, Ecto.Enum, values: @canonicality_values
end

def changeset(route_pattern, attrs) do
attrs =
attrs
|> remove_table_prefix("route_pattern")
|> Map.pop("canonical_route_pattern")
|> then(fn
{nil, attrs} -> attrs
{canonical, attrs} -> Map.put(attrs, "canonical", canonical)
end)
|> rename_key("canonical_route_pattern", "canonical")
|> values_to_int(~w[typicality canonical])

route_pattern
|> cast(
Expand All @@ -69,10 +66,7 @@ defmodule Arrow.Gtfs.RoutePattern do
~w[id route_id direction_id name typicality sort_order representative_trip_id canonical]a
)
|> assoc_constraint(:route)

# No assoc_constraint for representative_trip_id because the relationship
# is circular and we populate this table before gtfs_trips.
# (DB has a deferred FK constraint for representative_trip_id, though)
|> assoc_constraint(:representative_trip)
end

@impl Arrow.Gtfs.Importable
Expand Down
4 changes: 3 additions & 1 deletion lib/arrow/gtfs/service.ex
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ defmodule Arrow.Gtfs.Service do
end

def changeset(service, attrs) do
cast(service, attrs, [:id])
service
|> cast(attrs, [:id])
|> validate_required(~w[id]a)
end

@impl Arrow.Gtfs.Importable
Expand Down
39 changes: 16 additions & 23 deletions lib/arrow/gtfs/stop.ex
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,16 @@ defmodule Arrow.Gtfs.Stop do
times: list(Arrow.Gtfs.StopTime.t()) | Ecto.Association.NotLoaded.t()
}

@location_type_values Enum.with_index(
~w[stop_platform parent_station entrance_exit generic_node boarding_area]a
)

@wheelchair_boarding_values Enum.with_index(
~w[no_info_inherit_from_parent accessible not_accessible]a
)

@vehicle_type_values Enum.with_index(~w[light_rail heavy_rail commuter_rail bus ferry]a)

schema "gtfs_stops" do
field :code, :string
field :name, :string
Expand All @@ -43,40 +53,23 @@ defmodule Arrow.Gtfs.Stop do
field :address, :string
field :url, :string
belongs_to :level, Arrow.Gtfs.Level

field :location_type, Arrow.Gtfs.Types.Enum,
values:
Enum.with_index(
~w[stop_platform parent_station entrance_exit generic_node boarding_area]a
)

field :location_type, Ecto.Enum, values: @location_type_values
belongs_to :parent_station, Arrow.Gtfs.Stop

field :wheelchair_boarding,
Arrow.Gtfs.Types.Enum,
values: Enum.with_index(~w[no_info_inherit_from_parent accessible not_accessible]a)

field :wheelchair_boarding, Ecto.Enum, values: @wheelchair_boarding_values
field :municipality, :string
field :on_street, :string
field :at_street, :string

field :vehicle_type, Arrow.Gtfs.Types.Enum,
values: Enum.with_index(~w[light_rail heavy_rail commuter_rail bus ferry]a)

field :vehicle_type, Ecto.Enum, values: @vehicle_type_values
has_many :times, Arrow.Gtfs.StopTime
end

def changeset(stop, attrs) do
attrs =
attrs
|> remove_table_prefix("stop")
# `parent_station` is inconsistently named--this changes the key to
# `parent_station_id` if it's set. (Which it should be!)
|> Map.pop("parent_station")
|> then(fn
{nil, attrs} -> attrs
{parent_station_id, attrs} -> Map.put(attrs, "parent_station_id", parent_station_id)
end)
# `parent_station` is inconsistently named--this changes the key to `parent_station_id`.
|> rename_key("parent_station", "parent_station_id")
|> values_to_int(~w[location_type wheelchair_boarding vehicle_type])

stop
|> cast(
Expand Down
16 changes: 11 additions & 5 deletions lib/arrow/gtfs/stop_time.ex
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,21 @@ defmodule Arrow.Gtfs.StopTime do

belongs_to :stop, Arrow.Gtfs.Stop
field :stop_headsign, :string
field :pickup_type, Arrow.Gtfs.Types.Enum, values: @pickup_drop_off_types
field :drop_off_type, Arrow.Gtfs.Types.Enum, values: @pickup_drop_off_types
field :timepoint, Arrow.Gtfs.Types.Enum, values: Enum.with_index(~w[approximate exact]a)
field :pickup_type, Ecto.Enum, values: @pickup_drop_off_types
field :drop_off_type, Ecto.Enum, values: @pickup_drop_off_types
field :timepoint, Ecto.Enum, values: Enum.with_index(~w[approximate exact]a)
belongs_to :checkpoint, Arrow.Gtfs.Checkpoint
field :continuous_pickup, Arrow.Gtfs.Types.Enum, values: @continuous_pickup_drop_off_types
field :continuous_drop_off, Arrow.Gtfs.Types.Enum, values: @continuous_pickup_drop_off_types
field :continuous_pickup, Ecto.Enum, values: @continuous_pickup_drop_off_types
field :continuous_drop_off, Ecto.Enum, values: @continuous_pickup_drop_off_types
end

def changeset(stop_time, attrs) do
attrs =
values_to_int(
attrs,
~w[pickup_type drop_off_type timepoint continuous_pickup continuous_drop_off]
)

stop_time
|> cast(
attrs,
Expand Down
11 changes: 7 additions & 4 deletions lib/arrow/gtfs/trip.ex
Original file line number Diff line number Diff line change
Expand Up @@ -51,19 +51,22 @@ defmodule Arrow.Gtfs.Trip do
field :block_id, :string
belongs_to :shape, Arrow.Gtfs.Shape
has_many :shape_points, through: [:shape, :points]
field :wheelchair_accessible, Arrow.Gtfs.Types.Enum, values: @wheelchair_accessibility_values
field :route_type, Arrow.Gtfs.Types.Enum, values: @route_type_values
field :wheelchair_accessible, Ecto.Enum, values: @wheelchair_accessibility_values
field :route_type, Ecto.Enum, values: @route_type_values
belongs_to :route_pattern, Arrow.Gtfs.RoutePattern

has_one :representing_route_pattern, Arrow.Gtfs.RoutePattern,
foreign_key: :representative_trip_id

field :bikes_allowed, Arrow.Gtfs.Types.Enum, values: @bike_boarding_values
field :bikes_allowed, Ecto.Enum, values: @bike_boarding_values
has_many :stop_times, Arrow.Gtfs.StopTime, preload_order: [:stop_sequence]
end

def changeset(trip, attrs) do
attrs = remove_table_prefix(attrs, "trip")
attrs =
attrs
|> remove_table_prefix("trip")
|> values_to_int(~w[wheelchair_accessible route_type bikes_allowed])

trip
|> cast(
Expand Down
34 changes: 0 additions & 34 deletions lib/arrow/gtfs/types/date.ex

This file was deleted.

Loading

0 comments on commit 119887a

Please sign in to comment.