diff --git a/lib/arrow/gtfs.ex b/lib/arrow/gtfs.ex index 47ab1500..7df15c21 100644 --- a/lib/arrow/gtfs.ex +++ b/lib/arrow/gtfs.ex @@ -49,7 +49,9 @@ defmodule Arrow.Gtfs do end rescue error -> - Logger.warn("GTFS import failure message=#{Exception.message(error)}") + message = Exception.format(:error, error, __STACKTRACE__) + Logger.warn("GTFS import failure:") + Logger.warn(message) :error end end diff --git a/lib/arrow/gtfs/calendar.ex b/lib/arrow/gtfs/calendar.ex index a463fe0e..0079a7a9 100644 --- a/lib/arrow/gtfs/calendar.ex +++ b/lib/arrow/gtfs/calendar.ex @@ -30,11 +30,13 @@ defmodule Arrow.Gtfs.Calendar do field day, :boolean end - field :start_date, Arrow.Gtfs.Types.Date - field :end_date, Arrow.Gtfs.Types.Date + field :start_date, :date + field :end_date, :date end def changeset(calendar, attrs) do + attrs = values_to_iso8601_datestamp(attrs, ~w[start_date end_date]) + calendar |> cast( attrs, diff --git a/lib/arrow/gtfs/calendar_date.ex b/lib/arrow/gtfs/calendar_date.ex index 1b14e235..cd8aa6f1 100644 --- a/lib/arrow/gtfs/calendar_date.ex +++ b/lib/arrow/gtfs/calendar_date.ex @@ -19,12 +19,17 @@ defmodule Arrow.Gtfs.CalendarDate do schema "gtfs_calendar_dates" do belongs_to :service, Arrow.Gtfs.Service, primary_key: true - field :date, Arrow.Gtfs.Types.Date, primary_key: true - field :exception_type, Arrow.Gtfs.Types.Enum, values: [added: 1, removed: 2] + field :date, :date, primary_key: true + field :exception_type, Ecto.Enum, values: [added: 1, removed: 2] field :holiday_name, :string end def changeset(calendar_date, attrs) do + attrs = + attrs + |> values_to_iso8601_datestamp(~w[date]) + |> values_to_int(~w[exception_type]) + calendar_date |> cast(attrs, ~w[service_id date exception_type holiday_name]a) |> validate_required(~w[service_id date exception_type]a) diff --git a/lib/arrow/gtfs/direction.ex b/lib/arrow/gtfs/direction.ex index d838d7b7..d9513619 100644 --- a/lib/arrow/gtfs/direction.ex +++ b/lib/arrow/gtfs/direction.ex @@ -30,11 +30,7 @@ defmodule Arrow.Gtfs.Direction do # Taking liberties: # `direction` is inconsistently named--the human-readable name is # "#{table}_desc" in all other tables. - |> Map.pop("direction") - |> then(fn - {nil, attrs} -> attrs - {desc, attrs} -> Map.put(attrs, "desc", desc) - end) + |> rename_key("direction", "desc") |> remove_table_prefix("direction", except: ["direction_id"]) direction diff --git a/lib/arrow/gtfs/feed_info.ex b/lib/arrow/gtfs/feed_info.ex index 04450ddd..477cbbf9 100644 --- a/lib/arrow/gtfs/feed_info.ex +++ b/lib/arrow/gtfs/feed_info.ex @@ -23,14 +23,17 @@ defmodule Arrow.Gtfs.FeedInfo do field :publisher_name, :string field :publisher_url, :string field :lang, :string - field :start_date, Arrow.Gtfs.Types.Date - field :end_date, Arrow.Gtfs.Types.Date + field :start_date, :date + field :end_date, :date field :version, :string field :contact_email, :string end def changeset(feed_info, attrs) do - attrs = remove_table_prefix(attrs, "feed") + attrs = + attrs + |> remove_table_prefix("feed") + |> values_to_iso8601_datestamp(~w[start_date end_date]) feed_info |> cast( diff --git a/lib/arrow/gtfs/import_helper.ex b/lib/arrow/gtfs/import_helper.ex index 5c1be2b0..30dba7b6 100644 --- a/lib/arrow/gtfs/import_helper.ex +++ b/lib/arrow/gtfs/import_helper.ex @@ -44,6 +44,74 @@ defmodule Arrow.Gtfs.ImportHelper do end end + @doc """ + Renames the given `old_key` in `map` to `new_key`, if it exists. + + Otherwise, returns the map unchanged. + + iex> rename_key(%{foo: 5}, :foo, :bar) + %{bar: 5} + + iex> rename_key(%{baz: 6}, :foo, :bar) + %{baz: 6} + """ + @spec rename_key(map, term, term) :: map + def rename_key(map, old_key, new_key) do + case Map.pop(map, old_key) do + {nil, map} -> map + {value, map} -> Map.put(map, new_key, value) + end + end + + @doc """ + Calls `String.to_integer/1` on the values of `keys` in `map`. + + This is useful for preprocessing CSV fields corresponding to `Ecto.Enum`-typed schema fields-- + `Ecto.Enum.cast/2` expects either integer or (textual) string values, but the + values for these CSV fields come in as numeric strings. + + iex> values_to_int(%{"route_type" => "1", "other" => "value"}, ["route_type"]) + %{"route_type" => 1, "other" => "value"} + + iex> values_to_int(%{"route_type" => "1", "other" => "value"}, ["route_type", "exception_type"]) + %{"route_type" => 1, "other" => "value"} + + iex> values_to_int(%{"maybe_empty" => ""}, ["maybe_empty"]) + %{"maybe_empty" => ""} + """ + @spec values_to_int(map, Enumerable.t(term)) :: map + def values_to_int(map, keys) do + Enum.reduce(keys, map, fn k, m -> + Map.replace_lazy(m, k, fn + k when byte_size(k) > 0 -> String.to_integer(k) + "" -> "" + end) + end) + end + + @doc """ + Edits the GTFS-datestamp values under `keys` in `map` to be ISO8601-compliant. + + This is useful for preprocessing CSV fields corresponding to `:date`-typed schema fields-- + Ecto's date type expects incoming strings to be in ISO8601 format. + + iex> map = %{"start_date" => "20240925", "end_date" => "20240926", "blind_date" => "", "other" => "value"} + iex> values_to_iso8601_datestamp(map, ~w[start_date end_date blind_date double_date]) + %{"start_date" => "2024-09-25", "end_date" => "2024-09-26", "blind_date" => "", "other" => "value"} + """ + @spec values_to_iso8601_datestamp(map, Enumerable.t(term)) :: map + def values_to_iso8601_datestamp(map, keys) do + Enum.reduce(keys, map, fn k, m -> + Map.replace_lazy(m, k, fn + <> -> + <> + + "" -> + "" + end) + end) + end + @doc """ Strips metadata and association fields from an Ecto.Schema-defined struct, so that it contains only the fields corresponding to its source table's columns. diff --git a/lib/arrow/gtfs/route.ex b/lib/arrow/gtfs/route.ex index 2f9dfd42..22ba7772 100644 --- a/lib/arrow/gtfs/route.ex +++ b/lib/arrow/gtfs/route.ex @@ -25,14 +25,15 @@ defmodule Arrow.Gtfs.Route do network_id: String.t() } + @route_type_values Enum.with_index(~w[light_rail heavy_rail commuter_rail bus ferry]a) + schema "gtfs_routes" do belongs_to :agency, Arrow.Gtfs.Agency field :short_name, :string field :long_name, :string field :desc, :string - field :type, Arrow.Gtfs.Types.Enum, - values: Enum.with_index(~w[light_rail heavy_rail commuter_rail bus ferry]a) + field :type, Ecto.Enum, values: @route_type_values field :url, :string field :color, :string @@ -40,7 +41,7 @@ defmodule Arrow.Gtfs.Route do field :sort_order, :integer field :fare_class, :string belongs_to :line, Arrow.Gtfs.Line - field :listed_route, Arrow.Gtfs.Types.Enum, values: Enum.with_index(~w[Included Excluded]a) + field :listed_route, Ecto.Enum, values: Enum.with_index(~w[Included Excluded]a) field :network_id, :string has_many :directions, Arrow.Gtfs.Direction @@ -48,7 +49,10 @@ defmodule Arrow.Gtfs.Route do end def changeset(route, attrs) do - attrs = remove_table_prefix(attrs, "route") + attrs = + attrs + |> remove_table_prefix("route") + |> values_to_int(~w[type listed_route]) route |> cast( diff --git a/lib/arrow/gtfs/route_pattern.ex b/lib/arrow/gtfs/route_pattern.ex index e2df92b8..c44ae493 100644 --- a/lib/arrow/gtfs/route_pattern.ex +++ b/lib/arrow/gtfs/route_pattern.ex @@ -43,22 +43,19 @@ defmodule Arrow.Gtfs.RoutePattern do has_many :directions, through: [:route, :directions] field :name, :string field :time_desc, :string - field :typicality, Arrow.Gtfs.Types.Enum, values: @typicality_values + field :typicality, Ecto.Enum, values: @typicality_values field :sort_order, :integer belongs_to :representative_trip, Arrow.Gtfs.Trip has_many :trips, Arrow.Gtfs.Trip - field :canonical, Arrow.Gtfs.Types.Enum, values: @canonicality_values + field :canonical, Ecto.Enum, values: @canonicality_values end def changeset(route_pattern, attrs) do attrs = attrs |> remove_table_prefix("route_pattern") - |> Map.pop("canonical_route_pattern") - |> then(fn - {nil, attrs} -> attrs - {canonical, attrs} -> Map.put(attrs, "canonical", canonical) - end) + |> rename_key("canonical_route_pattern", "canonical") + |> values_to_int(~w[typicality canonical]) route_pattern |> cast( @@ -69,10 +66,7 @@ defmodule Arrow.Gtfs.RoutePattern do ~w[id route_id direction_id name typicality sort_order representative_trip_id canonical]a ) |> assoc_constraint(:route) - - # No assoc_constraint for representative_trip_id because the relationship - # is circular and we populate this table before gtfs_trips. - # (DB has a deferred FK constraint for representative_trip_id, though) + |> assoc_constraint(:representative_trip) end @impl Arrow.Gtfs.Importable diff --git a/lib/arrow/gtfs/service.ex b/lib/arrow/gtfs/service.ex index 9a2d77fd..e50f7c45 100644 --- a/lib/arrow/gtfs/service.ex +++ b/lib/arrow/gtfs/service.ex @@ -22,7 +22,9 @@ defmodule Arrow.Gtfs.Service do end def changeset(service, attrs) do - cast(service, attrs, [:id]) + service + |> cast(attrs, [:id]) + |> validate_required(~w[id]a) end @impl Arrow.Gtfs.Importable diff --git a/lib/arrow/gtfs/stop.ex b/lib/arrow/gtfs/stop.ex index 81b73666..5bcf02b5 100644 --- a/lib/arrow/gtfs/stop.ex +++ b/lib/arrow/gtfs/stop.ex @@ -31,6 +31,16 @@ defmodule Arrow.Gtfs.Stop do times: list(Arrow.Gtfs.StopTime.t()) | Ecto.Association.NotLoaded.t() } + @location_type_values Enum.with_index( + ~w[stop_platform parent_station entrance_exit generic_node boarding_area]a + ) + + @wheelchair_boarding_values Enum.with_index( + ~w[no_info_inherit_from_parent accessible not_accessible]a + ) + + @vehicle_type_values Enum.with_index(~w[light_rail heavy_rail commuter_rail bus ferry]a) + schema "gtfs_stops" do field :code, :string field :name, :string @@ -43,26 +53,13 @@ defmodule Arrow.Gtfs.Stop do field :address, :string field :url, :string belongs_to :level, Arrow.Gtfs.Level - - field :location_type, Arrow.Gtfs.Types.Enum, - values: - Enum.with_index( - ~w[stop_platform parent_station entrance_exit generic_node boarding_area]a - ) - + field :location_type, Ecto.Enum, values: @location_type_values belongs_to :parent_station, Arrow.Gtfs.Stop - - field :wheelchair_boarding, - Arrow.Gtfs.Types.Enum, - values: Enum.with_index(~w[no_info_inherit_from_parent accessible not_accessible]a) - + field :wheelchair_boarding, Ecto.Enum, values: @wheelchair_boarding_values field :municipality, :string field :on_street, :string field :at_street, :string - - field :vehicle_type, Arrow.Gtfs.Types.Enum, - values: Enum.with_index(~w[light_rail heavy_rail commuter_rail bus ferry]a) - + field :vehicle_type, Ecto.Enum, values: @vehicle_type_values has_many :times, Arrow.Gtfs.StopTime end @@ -70,13 +67,9 @@ defmodule Arrow.Gtfs.Stop do attrs = attrs |> remove_table_prefix("stop") - # `parent_station` is inconsistently named--this changes the key to - # `parent_station_id` if it's set. (Which it should be!) - |> Map.pop("parent_station") - |> then(fn - {nil, attrs} -> attrs - {parent_station_id, attrs} -> Map.put(attrs, "parent_station_id", parent_station_id) - end) + # `parent_station` is inconsistently named--this changes the key to `parent_station_id`. + |> rename_key("parent_station", "parent_station_id") + |> values_to_int(~w[location_type wheelchair_boarding vehicle_type]) stop |> cast( diff --git a/lib/arrow/gtfs/stop_time.ex b/lib/arrow/gtfs/stop_time.ex index 5873aece..a994173d 100644 --- a/lib/arrow/gtfs/stop_time.ex +++ b/lib/arrow/gtfs/stop_time.ex @@ -50,15 +50,21 @@ defmodule Arrow.Gtfs.StopTime do belongs_to :stop, Arrow.Gtfs.Stop field :stop_headsign, :string - field :pickup_type, Arrow.Gtfs.Types.Enum, values: @pickup_drop_off_types - field :drop_off_type, Arrow.Gtfs.Types.Enum, values: @pickup_drop_off_types - field :timepoint, Arrow.Gtfs.Types.Enum, values: Enum.with_index(~w[approximate exact]a) + field :pickup_type, Ecto.Enum, values: @pickup_drop_off_types + field :drop_off_type, Ecto.Enum, values: @pickup_drop_off_types + field :timepoint, Ecto.Enum, values: Enum.with_index(~w[approximate exact]a) belongs_to :checkpoint, Arrow.Gtfs.Checkpoint - field :continuous_pickup, Arrow.Gtfs.Types.Enum, values: @continuous_pickup_drop_off_types - field :continuous_drop_off, Arrow.Gtfs.Types.Enum, values: @continuous_pickup_drop_off_types + field :continuous_pickup, Ecto.Enum, values: @continuous_pickup_drop_off_types + field :continuous_drop_off, Ecto.Enum, values: @continuous_pickup_drop_off_types end def changeset(stop_time, attrs) do + attrs = + values_to_int( + attrs, + ~w[pickup_type drop_off_type timepoint continuous_pickup continuous_drop_off] + ) + stop_time |> cast( attrs, diff --git a/lib/arrow/gtfs/trip.ex b/lib/arrow/gtfs/trip.ex index c65228e1..2a47d3b7 100644 --- a/lib/arrow/gtfs/trip.ex +++ b/lib/arrow/gtfs/trip.ex @@ -51,19 +51,22 @@ defmodule Arrow.Gtfs.Trip do field :block_id, :string belongs_to :shape, Arrow.Gtfs.Shape has_many :shape_points, through: [:shape, :points] - field :wheelchair_accessible, Arrow.Gtfs.Types.Enum, values: @wheelchair_accessibility_values - field :route_type, Arrow.Gtfs.Types.Enum, values: @route_type_values + field :wheelchair_accessible, Ecto.Enum, values: @wheelchair_accessibility_values + field :route_type, Ecto.Enum, values: @route_type_values belongs_to :route_pattern, Arrow.Gtfs.RoutePattern has_one :representing_route_pattern, Arrow.Gtfs.RoutePattern, foreign_key: :representative_trip_id - field :bikes_allowed, Arrow.Gtfs.Types.Enum, values: @bike_boarding_values + field :bikes_allowed, Ecto.Enum, values: @bike_boarding_values has_many :stop_times, Arrow.Gtfs.StopTime, preload_order: [:stop_sequence] end def changeset(trip, attrs) do - attrs = remove_table_prefix(attrs, "trip") + attrs = + attrs + |> remove_table_prefix("trip") + |> values_to_int(~w[wheelchair_accessible route_type bikes_allowed]) trip |> cast( diff --git a/lib/arrow/gtfs/types/date.ex b/lib/arrow/gtfs/types/date.ex deleted file mode 100644 index a9d949d4..00000000 --- a/lib/arrow/gtfs/types/date.ex +++ /dev/null @@ -1,34 +0,0 @@ -defmodule Arrow.Gtfs.Types.Date do - @moduledoc """ - Custom Ecto type to handle datestamps as they appear in the GTFS feed. - - e.g. "20240901" - """ - - use Ecto.Type - def type, do: :date - - def cast(<>) do - with {year, ""} <- Integer.parse(year), - {month, ""} <- Integer.parse(month), - {day, ""} <- Integer.parse(day) do - Date.new(year, month, day) - else - _ -> :error - end - end - - def cast(_), do: :error - - def load(%Date{} = date), do: {:ok, date} - def load(_), do: :error - - def dump(%Date{} = date), do: {:ok, date} - def dump(_), do: :error - - def equal?(%Date{} = d1, %Date{} = d2) do - Date.compare(d1, d2) == :eq - end - - def equal?(_, _), do: false -end diff --git a/lib/arrow/gtfs/types/enum.ex b/lib/arrow/gtfs/types/enum.ex deleted file mode 100644 index e7d4f31a..00000000 --- a/lib/arrow/gtfs/types/enum.ex +++ /dev/null @@ -1,195 +0,0 @@ -# credo:disable-for-this-file -defmodule Arrow.Gtfs.Types.Enum do - @moduledoc """ - Ecto.Enum, except it accepts string-formatted integers when casting to an - integer-valued enum. - - Code is copied directly from Ecto.Enum @3.11.0, with the only - change being the expression bound to `on_cast`. - """ - - use Ecto.ParameterizedType - - @impl true - def type(params), do: params.type - - @impl true - def init(opts) do - values = opts[:values] - - {type, mappings} = - cond do - is_list(values) and Enum.all?(values, &is_atom/1) -> - validate_unique!(values) - {:string, Enum.map(values, fn atom -> {atom, to_string(atom)} end)} - - type = Keyword.keyword?(values) and infer_type(Keyword.values(values)) -> - validate_unique!(Keyword.keys(values)) - validate_unique!(Keyword.values(values)) - {type, values} - - true -> - raise ArgumentError, """ - Ecto.Enum types must have a values option specified as a list of atoms or a - keyword list with a mapping from atoms to either integer or string values. - - For example: - - field :my_field, Ecto.Enum, values: [:foo, :bar] - - or - - field :my_field, Ecto.Enum, values: [foo: 1, bar: 2, baz: 5] - """ - end - - on_load = Map.new(mappings, fn {key, val} -> {val, key} end) - on_dump = Map.new(mappings) - - # This is the only thing that's different from Ecto.Enum. - on_cast = - case type do - :integer -> - mappings - |> Enum.flat_map(fn {key, val} -> - [{Atom.to_string(key), key}, {Integer.to_string(val), key}] - end) - |> Map.new() - - _ -> - Map.new(mappings, fn {key, _} -> {Atom.to_string(key), key} end) - end - - embed_as = - case Keyword.get(opts, :embed_as, :values) do - :values -> - :self - - :dumped -> - :dump - - other -> - raise ArgumentError, """ - the `:embed_as` option for `Ecto.Enum` accepts either `:values` or `:dumped`, - received: `#{inspect(other)}` - """ - end - - %{ - on_load: on_load, - on_dump: on_dump, - on_cast: on_cast, - mappings: mappings, - embed_as: embed_as, - type: type - } - end - - defp validate_unique!(values) do - if length(Enum.uniq(values)) != length(values) do - raise ArgumentError, """ - Ecto.Enum type values must be unique. - - For example: - - field :my_field, Ecto.Enum, values: [:foo, :bar, :foo] - - is invalid, while - - field :my_field, Ecto.Enum, values: [:foo, :bar, :baz] - - is valid - """ - end - end - - defp infer_type(values) do - cond do - Enum.all?(values, &is_integer/1) -> :integer - Enum.all?(values, &is_binary/1) -> :string - true -> nil - end - end - - @impl true - def cast(nil, _params), do: {:ok, nil} - - def cast(data, params) do - case params do - %{on_load: %{^data => as_atom}} -> {:ok, as_atom} - %{on_dump: %{^data => _}} -> {:ok, data} - %{on_cast: %{^data => as_atom}} -> {:ok, as_atom} - _ -> :error - end - end - - @impl true - def load(nil, _, _), do: {:ok, nil} - - def load(data, _loader, %{on_load: on_load}) do - case on_load do - %{^data => as_atom} -> {:ok, as_atom} - _ -> :error - end - end - - @impl true - def dump(nil, _, _), do: {:ok, nil} - - def dump(data, _dumper, %{on_dump: on_dump}) do - case on_dump do - %{^data => as_string} -> {:ok, as_string} - _ -> :error - end - end - - @impl true - def equal?(a, b, _params), do: a == b - - @impl true - def embed_as(_, %{embed_as: embed_as}), do: embed_as - - @impl true - def format(%{mappings: mappings}) do - "#Ecto.Enum" - end - - @doc "Returns the possible values for a given schema or types map and field" - @spec values(map | module, atom) :: [atom()] - def values(schema_or_types, field) do - schema_or_types - |> mappings(field) - |> Keyword.keys() - end - - @doc "Returns the possible dump values for a given schema or types map and field" - @spec dump_values(map | module, atom) :: [String.t()] | [integer()] - def dump_values(schema_or_types, field) do - schema_or_types - |> mappings(field) - |> Keyword.values() - end - - @doc "Returns the mappings between values and dumped values" - @spec mappings(map, atom) :: Keyword.t() - def mappings(types, field) when is_map(types) do - case types do - %{^field => {:parameterized, Ecto.Enum, %{mappings: mappings}}} -> mappings - %{^field => {_, {:parameterized, Ecto.Enum, %{mappings: mappings}}}} -> mappings - %{^field => _} -> raise ArgumentError, "#{field} is not an Ecto.Enum field" - %{} -> raise ArgumentError, "#{field} does not exist" - end - end - - @spec mappings(module, atom) :: Keyword.t() - def mappings(schema, field) do - try do - schema.__changeset__() - rescue - _ in UndefinedFunctionError -> - raise ArgumentError, "#{inspect(schema)} is not an Ecto schema or types map" - else - %{} = types -> mappings(types, field) - end - end -end