Skip to content

Commit

Permalink
Merge pull request #1671 from Logflare/fix/cte-table-alias-translation
Browse files Browse the repository at this point in the history
fix: cte table alias handling for compound identifier for translations
  • Loading branch information
Ziinc authored Aug 6, 2023
2 parents 0c8c7f6 + 3a5173e commit 880ea94
Show file tree
Hide file tree
Showing 3 changed files with 170 additions and 94 deletions.
204 changes: 111 additions & 93 deletions lib/logflare/sql.ex
Original file line number Diff line number Diff line change
Expand Up @@ -791,20 +791,29 @@ defmodule Logflare.Sql do
}}

"timestamp_trunc" ->
to_trunc = get_in(v, ["args", Access.at(0)])
to_trunc = get_in(v, ["args", Access.at(0), "Unnamed", "Expr"])

interval_type =
get_in(v, ["args", Access.at(1), "Unnamed", "Expr", "Identifier", "value"])
|> String.downcase()

field_arg =
if is_timestamp_identifier?(to_trunc) do
at_time_zone(to_trunc)
else
to_trunc
end

{k,
%{
v
| "args" => [
%{
"Unnamed" => %{"Expr" => %{"Value" => %{"SingleQuotedString" => interval_type}}}
},
bq_to_pg_convert_functions(to_trunc)
%{
"Unnamed" => %{"Expr" => field_arg}
}
],
"name" => [%{"quote_style" => nil, "value" => "date_trunc"}]
}}
Expand All @@ -828,6 +837,19 @@ defmodule Logflare.Sql do

defp bq_to_pg_convert_functions(kv), do: kv

# handle timestamp references in binary operations
defp pg_traverse_final_pass(
{"BinaryOp" = k,
%{
"left" => left,
"right" => right
} = v}
) do
updated_left = if is_timestamp_identifier?(left), do: at_time_zone(left), else: left
updated_right = if is_timestamp_identifier?(right), do: at_time_zone(right), else: right
{k, %{v | "left" => updated_left, "right" => updated_right} |> pg_traverse_final_pass()}
end

# convert backticks to double quotes
defp pg_traverse_final_pass({"quote_style" = k, "`"}), do: {k, "\""}
# drop cross join unnest
Expand Down Expand Up @@ -916,7 +938,7 @@ defmodule Logflare.Sql do
cte_aliases: cte_aliases,
cte_from_aliases: cte_from_aliases,
in_cte_tables_tree: false,
in_cast: false,
in_function_or_cast: false,
in_projection_tree: false,
from_table_aliases: [],
from_table_values: [],
Expand Down Expand Up @@ -947,98 +969,34 @@ defmodule Logflare.Sql do
]
)
when cte_aliases == %{} or in_cte_tables_tree == true do
%{
at_time_zone(%{
"Nested" => %{
"AtTimeZone" => %{
"time_zone" => "UTC",
"timestamp" => %{
"Function" => %{
"args" => [
%{
"Unnamed" => %{
"Expr" => %{
"BinaryOp" => %{
"left" => %{
"Cast" => %{
"data_type" => %{"BigInt" => nil},
"expr" => %{
"Nested" => %{
"JsonAccess" => %{
"left" => %{
"CompoundIdentifier" => [
%{"quote_style" => nil, "value" => table},
%{"quote_style" => nil, "value" => "body"}
]
},
"operator" => "LongArrow",
"right" => %{"Value" => %{"SingleQuotedString" => "timestamp"}}
}
}
}
}
},
"op" => "Divide",
"right" => %{"Value" => %{"Number" => ["1000000.0", false]}}
}
}
}
}
],
"distinct" => false,
"name" => [%{"quote_style" => nil, "value" => "to_timestamp"}],
"over" => nil,
"special" => false
}
}
"JsonAccess" => %{
"left" => %{
"CompoundIdentifier" => [
%{"quote_style" => nil, "value" => table},
%{"quote_style" => nil, "value" => "body"}
]
},
"operator" => "LongArrow",
"right" => %{"Value" => %{"SingleQuotedString" => "timestamp"}}
}
}
}
})
end

defp convert_keys_to_json_query(%{"Identifier" => %{"value" => "timestamp"}}, _data, "body") do
%{
at_time_zone(%{
"Nested" => %{
"AtTimeZone" => %{
"time_zone" => "UTC",
"timestamp" => %{
"Function" => %{
"args" => [
%{
"Unnamed" => %{
"Expr" => %{
"BinaryOp" => %{
"left" => %{
"Cast" => %{
"data_type" => %{"BigInt" => nil},
"expr" => %{
"Nested" => %{
"JsonAccess" => %{
"left" => %{
"Identifier" => %{"quote_style" => nil, "value" => "body"}
},
"operator" => "LongArrow",
"right" => %{"Value" => %{"SingleQuotedString" => "timestamp"}}
}
}
}
}
},
"op" => "Divide",
"right" => %{"Value" => %{"Number" => ["1000000.0", false]}}
}
}
}
}
],
"distinct" => false,
"name" => [%{"quote_style" => nil, "value" => "to_timestamp"}],
"over" => nil,
"special" => false
}
}
"JsonAccess" => %{
"left" => %{
"Identifier" => %{"quote_style" => nil, "value" => "body"}
},
"operator" => "LongArrow",
"right" => %{"Value" => %{"SingleQuotedString" => "timestamp"}}
}
}
}
})
end

defp convert_keys_to_json_query(
Expand All @@ -1055,7 +1013,8 @@ defmodule Logflare.Sql do
%{"quote_style" => nil, "value" => field}
]
},
"operator" => if(data.in_cast or data.in_binaryop, do: "LongArrow", else: "Arrow"),
"operator" =>
if(data.in_function_or_cast or data.in_binaryop, do: "LongArrow", else: "Arrow"),
"right" => %{"Value" => %{"SingleQuotedString" => key}}
}
}
Expand All @@ -1071,13 +1030,15 @@ defmodule Logflare.Sql do
"Nested" => %{
"JsonAccess" => %{
"left" => %{"Identifier" => %{"quote_style" => nil, "value" => base}},
"operator" => if(data.in_cast or data.in_binaryop, do: "LongArrow", else: "Arrow"),
"operator" =>
if(data.in_function_or_cast or data.in_binaryop, do: "LongArrow", else: "Arrow"),
"right" => %{"Value" => %{"SingleQuotedString" => key}}
}
}
}
end

# handle cross join aliases when there are different base field names as compared to what is referenced
defp convert_keys_to_json_query(
%{"CompoundIdentifier" => [%{"value" => _join_alias}, %{"value" => key} | _]},
data,
Expand All @@ -1091,7 +1052,10 @@ defmodule Logflare.Sql do
"JsonAccess" => %{
"left" => %{"Identifier" => %{"quote_style" => nil, "value" => base}},
"operator" =>
if(data.in_cast or data.in_binaryop, do: "HashLongArrow", else: "HashArrow"),
if(data.in_function_or_cast or data.in_binaryop,
do: "HashLongArrow",
else: "HashArrow"
),
"right" => %{"Value" => %{"SingleQuotedString" => path}}
}
}
Expand All @@ -1111,7 +1075,10 @@ defmodule Logflare.Sql do
"JsonAccess" => %{
"left" => %{"Identifier" => %{"quote_style" => nil, "value" => base}},
"operator" =>
if(data.in_cast or data.in_binaryop, do: "HashLongArrow", else: "HashArrow"),
if(data.in_function_or_cast or data.in_binaryop,
do: "HashLongArrow",
else: "HashArrow"
),
"right" => %{"Value" => %{"SingleQuotedString" => path}}
}
}
Expand All @@ -1127,7 +1094,8 @@ defmodule Logflare.Sql do
"Nested" => %{
"JsonAccess" => %{
"left" => %{"Identifier" => %{"quote_style" => nil, "value" => base}},
"operator" => if(data.in_cast or data.in_binaryop, do: "LongArrow", else: "Arrow"),
"operator" =>
if(data.in_function_or_cast or data.in_binaryop, do: "LongArrow", else: "Arrow"),
"right" => %{"Value" => %{"SingleQuotedString" => name}}
}
}
Expand Down Expand Up @@ -1276,8 +1244,8 @@ defmodule Logflare.Sql do
{k, traverse_convert_identifiers(v, data)}
end

defp traverse_convert_identifiers({"Cast" = k, v}, data) do
{k, traverse_convert_identifiers(v, Map.put(data, :in_cast, true))}
defp traverse_convert_identifiers({k, v}, data) when k in ["Function", "Cast"] do
{k, traverse_convert_identifiers(v, Map.put(data, :in_function_or_cast, true))}
end

# auto set the column alias if not set
Expand All @@ -1300,6 +1268,7 @@ defmodule Logflare.Sql do
{"CompoundIdentifier" = k, [%{"value" => head_val}, tail] = v},
data
) do
# dbg({v, data})
cond do
is_map_key(data.alias_path_mappings, head_val) and
length(data.alias_path_mappings[head_val || []]) > 1 ->
Expand All @@ -1318,6 +1287,12 @@ defmodule Logflare.Sql do
|> Map.to_list()
|> List.first()

# outside of a cte, referencing table alias
# preserve as is
head_val in data.from_table_aliases and data.in_cte_tables_tree == false and
data.cte_aliases != %{} ->
{k, v}

# first OR condition: outside of cte and non-cte
# second OR condition: inside a cte
head_val in data.from_table_aliases or
Expand Down Expand Up @@ -1392,4 +1367,47 @@ defmodule Logflare.Sql do
|> Map.to_list()
|> List.first()
end

defp is_timestamp_identifier?(%{"Identifier" => %{"value" => "timestamp"}}), do: true

defp is_timestamp_identifier?(%{"CompoundIdentifier" => [_head, %{"value" => "timestamp"}]}),
do: true

defp is_timestamp_identifier?(_), do: false

defp at_time_zone(identifier) do
%{
"Nested" => %{
"AtTimeZone" => %{
"time_zone" => "UTC",
"timestamp" => %{
"Function" => %{
"args" => [
%{
"Unnamed" => %{
"Expr" => %{
"BinaryOp" => %{
"left" => %{
"Cast" => %{
"data_type" => %{"BigInt" => nil},
"expr" => identifier
}
},
"op" => "Divide",
"right" => %{"Value" => %{"Number" => ["1000000.0", false]}}
}
}
}
}
],
"distinct" => false,
"name" => [%{"quote_style" => nil, "value" => "to_timestamp"}],
"over" => nil,
"special" => false
}
}
}
}
}
end
end
42 changes: 41 additions & 1 deletion test/logflare/sql_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ defmodule Logflare.SqlTest do
assert Sql.Parser.parse("postgres", translated) == Sql.Parser.parse("postgres", pg_query)
end

test "timestamp_trunc" do
test "timestamp_trunc without a field reference" do
bq_query = "select timestamp_trunc(current_timestamp(), day) as t"
pg_query = ~s|select date_trunc('day', current_timestamp) as t|
{:ok, translated} = Sql.translate(:bq_sql, :pg_sql, bq_query)
Expand Down Expand Up @@ -653,6 +653,14 @@ defmodule Logflare.SqlTest do
assert Sql.Parser.parse("postgres", translated) == Sql.Parser.parse("postgres", pg_query)
end

test "field references within a DATE_TRUNC() are converted to ->> syntax for string casting" do
bq_query = ~s|select DATE_TRUNC('day', col) as date from my_table|
pg_query = ~s|select DATE_TRUNC('day', (body ->> 'col')) as date from my_table|

{:ok, translated} = Sql.translate(:bq_sql, :pg_sql, bq_query)
assert Sql.Parser.parse("postgres", translated) == Sql.Parser.parse("postgres", pg_query)
end

test "field references in left-right operators are converted to ->> syntax" do
bq_query = ~s|select t.id = 'test' as value from my_table t|
pg_query = ~s|select (t.body ->> 'id') = 'test' as value from my_table t|
Expand Down Expand Up @@ -770,6 +778,38 @@ defmodule Logflare.SqlTest do
assert Sql.Parser.parse("postgres", translated) == Sql.Parser.parse("postgres", pg_query)
end

test "special handling of timestamp field and date_trunc : " do
bq_query = ~s"""
with edge_logs as (select t.timestamp from `cloudflare.logs.prod` t)
select timestamp_trunc(t.timestamp, day) as timestamp from edge_logs t
"""

pg_query = ~s"""
with edge_logs as ( select (t.body -> 'timestamp') as timestamp from "cloudflare.logs.prod" t )
SELECT date_trunc('day', (to_timestamp( t.timestamp::bigint / 1000000.0) AT TIME ZONE 'UTC') ) AS timestamp FROM edge_logs t
"""

{:ok, translated} = Sql.translate(:bq_sql, :pg_sql, bq_query)
assert Sql.Parser.parse("postgres", translated) == Sql.Parser.parse("postgres", pg_query)
end

test "special handling of timestamp field for binary ops" do
bq_query = ~s"""
with edge_logs as (select t.timestamp from `cloudflare.logs.prod` t)
select t.timestamp as timestamp from edge_logs t
where t.timestamp > '2023-08-05T09:00:00.000Z'
"""

pg_query = ~s"""
with edge_logs as ( select (t.body -> 'timestamp') as timestamp from "cloudflare.logs.prod" t )
SELECT t.timestamp AS timestamp FROM edge_logs t
where (to_timestamp(CAST(t.timestamp AS BIGINT) / 1000000.0) AT TIME ZONE 'UTC') > '2023-08-05T09:00:00.000Z'
"""

{:ok, translated} = Sql.translate(:bq_sql, :pg_sql, bq_query)
assert Sql.Parser.parse("postgres", translated) == Sql.Parser.parse("postgres", pg_query)
end

# functions metrics
# test "APPROX_QUANTILES is translated"
# tes "offset() and indexing is translated"
Expand Down
Loading

0 comments on commit 880ea94

Please sign in to comment.