From baa765942b46e90aeaa3dfcfc2929ef767fba742 Mon Sep 17 00:00:00 2001 From: Roman Bodavskiy Date: Wed, 4 Dec 2024 15:44:26 +0300 Subject: [PATCH] backend/feat: clickhouse subqueries --- .../src/Kernel/Storage/ClickhouseV2.hs | 1 + .../Internal/ClickhouseColumns.hs | 103 +++++++++--------- .../ClickhouseV2/Internal/ClickhouseQuery.hs | 41 ++++--- .../Storage/ClickhouseV2/Internal/Types.hs | 94 +++++++++++++++- .../Kernel/Storage/ClickhouseV2/Operators.hs | 83 +++++++++++--- .../Kernel/Storage/ClickhouseV2/Queries.hs | 16 ++- 6 files changed, 241 insertions(+), 97 deletions(-) diff --git a/lib/mobility-core/src/Kernel/Storage/ClickhouseV2.hs b/lib/mobility-core/src/Kernel/Storage/ClickhouseV2.hs index fdf35ca93..af7f4afe7 100644 --- a/lib/mobility-core/src/Kernel/Storage/ClickhouseV2.hs +++ b/lib/mobility-core/src/Kernel/Storage/ClickhouseV2.hs @@ -17,5 +17,6 @@ module Kernel.Storage.ClickhouseV2 (module Reexport) where import Kernel.Storage.ClickhouseV2.ClickhouseDb as Reexport import Kernel.Storage.ClickhouseV2.ClickhouseTable as Reexport import Kernel.Storage.ClickhouseV2.ClickhouseValue as Reexport +import Kernel.Storage.ClickhouseV2.Internal.ClickhouseColumns () import Kernel.Storage.ClickhouseV2.Operators as Reexport import Kernel.Storage.ClickhouseV2.Queries as Reexport diff --git a/lib/mobility-core/src/Kernel/Storage/ClickhouseV2/Internal/ClickhouseColumns.hs b/lib/mobility-core/src/Kernel/Storage/ClickhouseV2/Internal/ClickhouseColumns.hs index 0036da9f1..22050b0d4 100644 --- a/lib/mobility-core/src/Kernel/Storage/ClickhouseV2/Internal/ClickhouseColumns.hs +++ b/lib/mobility-core/src/Kernel/Storage/ClickhouseV2/Internal/ClickhouseColumns.hs @@ -11,6 +11,7 @@ General Public License along with this program. If not, see . -} +{-# OPTIONS_GHC -Wno-orphans #-} module Kernel.Storage.ClickhouseV2.Internal.ClickhouseColumns where @@ -23,18 +24,10 @@ import Kernel.Storage.ClickhouseV2.ClickhouseTable import Kernel.Storage.ClickhouseV2.ClickhouseValue import Kernel.Storage.ClickhouseV2.Internal.Types -data Select a db table cols gr ord where - Select :: (ClickhouseTable table, ClickhouseColumns a cols) => cols -> GroupBy a gr -> Q db table cols ord -> Select a db table cols gr ord - -class ClickhouseColumns (a :: IsAggregated) cols where - type ColumnsType a cols - showClickhouseColumns :: Proxy a -> cols -> String - parseColumns :: Proxy a -> cols -> A.Value -> Either String (ColumnsType a cols) - instance (FromJSON (ColumnsType 'NOT_AGG (Columns 'NOT_AGG t)), ClickhouseTable t) => ClickhouseColumns 'NOT_AGG (Columns 'NOT_AGG t) where type ColumnsType 'NOT_AGG (Columns 'NOT_AGG t) = t Identity - showClickhouseColumns _ _ = "*" - parseColumns _ _ = eitherResult . A.fromJSON + showClickhouseColumns _ _ _ = "*" + parseColumns _ _ val _ = eitherResult . A.fromJSON $ val -- should be all AGG columns or all NOT_AGG columns instance (ClickhouseValue v) => ClickhouseColumns a (Column a t v) where @@ -75,21 +68,23 @@ parseColumns1 :: ClickhouseValue v1 => Column a t v1 -> A.Value -> + SubQueryLevel -> Either String v1 -parseColumns1 c1 json = do +parseColumns1 c1 json l = do mapResult <- eitherResult . A.fromJSON @(A.KeyMap (Value NotSpecified)) $ json - parseValueFromMap @a @t @v1 1 c1 mapResult + parseValueFromMap @a @t @v1 1 c1 mapResult l parseColumns2 :: forall a t v1 v2. (C2 ClickhouseValue v1 v2) => T2 (Column a t) v1 v2 -> A.Value -> + SubQueryLevel -> Either String (v1, v2) -parseColumns2 (c1, c2) json = do +parseColumns2 (c1, c2) json l = do mapResult <- eitherResult . A.fromJSON @(A.KeyMap (Value NotSpecified)) $ json - v1 <- parseValueFromMap @a @t @v1 1 c1 mapResult - v2 <- parseValueFromMap @a @t @v2 2 c2 mapResult + v1 <- parseValueFromMap @a @t @v1 1 c1 mapResult l + v2 <- parseValueFromMap @a @t @v2 2 c2 mapResult l pure (v1, v2) parseColumns3 :: @@ -97,12 +92,13 @@ parseColumns3 :: (C3 ClickhouseValue v1 v2 v3) => T3 (Column a t) v1 v2 v3 -> A.Value -> + SubQueryLevel -> Either String (v1, v2, v3) -parseColumns3 (c1, c2, c3) json = do +parseColumns3 (c1, c2, c3) json l = do mapResult <- eitherResult . A.fromJSON @(A.KeyMap (Value NotSpecified)) $ json - v1 <- parseValueFromMap @a @t @v1 1 c1 mapResult - v2 <- parseValueFromMap @a @t @v2 2 c2 mapResult - v3 <- parseValueFromMap @a @t @v3 3 c3 mapResult + v1 <- parseValueFromMap @a @t @v1 1 c1 mapResult l + v2 <- parseValueFromMap @a @t @v2 2 c2 mapResult l + v3 <- parseValueFromMap @a @t @v3 3 c3 mapResult l pure (v1, v2, v3) parseColumns4 :: @@ -110,13 +106,14 @@ parseColumns4 :: (C4 ClickhouseValue v1 v2 v3 v4) => T4 (Column a t) v1 v2 v3 v4 -> A.Value -> + SubQueryLevel -> Either String (v1, v2, v3, v4) -parseColumns4 (c1, c2, c3, c4) json = do +parseColumns4 (c1, c2, c3, c4) json l = do mapResult <- eitherResult . A.fromJSON @(A.KeyMap (Value NotSpecified)) $ json - v1 <- parseValueFromMap @a @t @v1 1 c1 mapResult - v2 <- parseValueFromMap @a @t @v2 2 c2 mapResult - v3 <- parseValueFromMap @a @t @v3 3 c3 mapResult - v4 <- parseValueFromMap @a @t @v4 4 c4 mapResult + v1 <- parseValueFromMap @a @t @v1 1 c1 mapResult l + v2 <- parseValueFromMap @a @t @v2 2 c2 mapResult l + v3 <- parseValueFromMap @a @t @v3 3 c3 mapResult l + v4 <- parseValueFromMap @a @t @v4 4 c4 mapResult l pure (v1, v2, v3, v4) parseColumns5 :: @@ -124,14 +121,15 @@ parseColumns5 :: (C5 ClickhouseValue v1 v2 v3 v4 v5) => T5 (Column a t) v1 v2 v3 v4 v5 -> A.Value -> + SubQueryLevel -> Either String (v1, v2, v3, v4, v5) -parseColumns5 (c1, c2, c3, c4, c5) json = do +parseColumns5 (c1, c2, c3, c4, c5) json l = do mapResult <- eitherResult . A.fromJSON @(A.KeyMap (Value NotSpecified)) $ json - v1 <- parseValueFromMap @a @t @v1 1 c1 mapResult - v2 <- parseValueFromMap @a @t @v2 2 c2 mapResult - v3 <- parseValueFromMap @a @t @v3 3 c3 mapResult - v4 <- parseValueFromMap @a @t @v4 4 c4 mapResult - v5 <- parseValueFromMap @a @t @v5 5 c5 mapResult + v1 <- parseValueFromMap @a @t @v1 1 c1 mapResult l + v2 <- parseValueFromMap @a @t @v2 2 c2 mapResult l + v3 <- parseValueFromMap @a @t @v3 3 c3 mapResult l + v4 <- parseValueFromMap @a @t @v4 4 c4 mapResult l + v5 <- parseValueFromMap @a @t @v5 5 c5 mapResult l pure (v1, v2, v3, v4, v5) parseColumns6 :: @@ -139,53 +137,52 @@ parseColumns6 :: (C6 ClickhouseValue v1 v2 v3 v4 v5 v6) => T6 (Column a t) v1 v2 v3 v4 v5 v6 -> A.Value -> + SubQueryLevel -> Either String (v1, v2, v3, v4, v5, v6) -parseColumns6 (c1, c2, c3, c4, c5, c6) json = do +parseColumns6 (c1, c2, c3, c4, c5, c6) json l = do mapResult <- eitherResult . A.fromJSON @(A.KeyMap (Value NotSpecified)) $ json - v1 <- parseValueFromMap @a @t @v1 1 c1 mapResult - v2 <- parseValueFromMap @a @t @v2 2 c2 mapResult - v3 <- parseValueFromMap @a @t @v3 3 c3 mapResult - v4 <- parseValueFromMap @a @t @v4 4 c4 mapResult - v5 <- parseValueFromMap @a @t @v5 5 c5 mapResult - v6 <- parseValueFromMap @a @t @v6 6 c6 mapResult + v1 <- parseValueFromMap @a @t @v1 1 c1 mapResult l + v2 <- parseValueFromMap @a @t @v2 2 c2 mapResult l + v3 <- parseValueFromMap @a @t @v3 3 c3 mapResult l + v4 <- parseValueFromMap @a @t @v4 4 c4 mapResult l + v5 <- parseValueFromMap @a @t @v5 5 c5 mapResult l + v6 <- parseValueFromMap @a @t @v6 6 c6 mapResult l pure (v1, v2, v3, v4, v5, v6) -- FIXME should parse Numbers also parseValueFromMap :: forall a t v. (ClickhouseValue v) => - Int -> + ColumnNumber -> Column a t v -> A.KeyMap (Value NotSpecified) -> + SubQueryLevel -> Either String v -parseValueFromMap n column mapResult = do +parseValueFromMap n column mapResult l = do let columnName = showColumn column - val <- case A.lookup (fromString $ getColumnSynonym n) mapResult of - Nothing -> Left $ "Key \"" <> getColumnSynonym n <> "\" for column \"" <> columnName <> "\" did not found" + val <- case A.lookup (fromString $ getColumnSynonym n l) mapResult of + Nothing -> Left $ "Key \"" <> getColumnSynonym n l <> "\" for column \"" <> columnName <> "\" did not found" Just val -> pure $ coerce @(Value NotSpecified) @(Value v) val - either (\err -> Left $ "Failed to parse key \"" <> getColumnSynonym n <> "\" for column \"" <> columnName <> "\": " <> err) pure $ + either (\err -> Left $ "Failed to parse key \"" <> getColumnSynonym n l <> "\" for column \"" <> columnName <> "\": " <> err) pure $ getExcept $ fromClickhouseValue @v val -zipColumnsWithSynonyms1 :: Column a t v1 -> String +zipColumnsWithSynonyms1 :: Column a t v1 -> SubQueryLevel -> String zipColumnsWithSynonyms1 c1 = zipColumns [showColumn c1] -zipColumnsWithSynonyms2 :: T2 (Column a t) v1 v2 -> String +zipColumnsWithSynonyms2 :: T2 (Column a t) v1 v2 -> SubQueryLevel -> String zipColumnsWithSynonyms2 (c1, c2) = zipColumns [showColumn c1, showColumn c2] -zipColumnsWithSynonyms3 :: T3 (Column a t) v1 v2 v3 -> String +zipColumnsWithSynonyms3 :: T3 (Column a t) v1 v2 v3 -> SubQueryLevel -> String zipColumnsWithSynonyms3 (c1, c2, c3) = zipColumns [showColumn c1, showColumn c2, showColumn c3] -zipColumnsWithSynonyms4 :: T4 (Column a t) v1 v2 v3 v4 -> String +zipColumnsWithSynonyms4 :: T4 (Column a t) v1 v2 v3 v4 -> SubQueryLevel -> String zipColumnsWithSynonyms4 (c1, c2, c3, c4) = zipColumns [showColumn c1, showColumn c2, showColumn c3, showColumn c4] -zipColumnsWithSynonyms5 :: T5 (Column a t) v1 v2 v3 v4 v5 -> String +zipColumnsWithSynonyms5 :: T5 (Column a t) v1 v2 v3 v4 v5 -> SubQueryLevel -> String zipColumnsWithSynonyms5 (c1, c2, c3, c4, c5) = zipColumns [showColumn c1, showColumn c2, showColumn c3, showColumn c4, showColumn c5] -zipColumnsWithSynonyms6 :: T6 (Column a t) v1 v2 v3 v4 v5 v6 -> String +zipColumnsWithSynonyms6 :: T6 (Column a t) v1 v2 v3 v4 v5 v6 -> SubQueryLevel -> String zipColumnsWithSynonyms6 (c1, c2, c3, c4, c5, c6) = zipColumns [showColumn c1, showColumn c2, showColumn c3, showColumn c4, showColumn c5, showColumn c6] -zipColumns :: [String] -> String -zipColumns columns = List.intercalate ", " $ zipWith (\n column -> column <> " as " <> getColumnSynonym n) [1 ..] columns - -getColumnSynonym :: Int -> String -getColumnSynonym n = "res" <> show n +zipColumns :: [String] -> SubQueryLevel -> String +zipColumns columns l = List.intercalate ", " $ zipWith (\n column -> column <> " as " <> getColumnSynonym n l) [1 ..] columns diff --git a/lib/mobility-core/src/Kernel/Storage/ClickhouseV2/Internal/ClickhouseQuery.hs b/lib/mobility-core/src/Kernel/Storage/ClickhouseV2/Internal/ClickhouseQuery.hs index d205f7c50..e9abaceff 100644 --- a/lib/mobility-core/src/Kernel/Storage/ClickhouseV2/Internal/ClickhouseQuery.hs +++ b/lib/mobility-core/src/Kernel/Storage/ClickhouseV2/Internal/ClickhouseQuery.hs @@ -11,8 +11,7 @@ General Public License along with this program. If not, see . -} -{-# LANGUAGE DerivingStrategies #-} -{-# LANGUAGE GeneralizedNewtypeDeriving #-} +{-# OPTIONS_GHC -Wno-orphans #-} module Kernel.Storage.ClickhouseV2.Internal.ClickhouseQuery ( ClickhouseQuery (toClickhouseQuery), @@ -27,35 +26,33 @@ import Kernel.Prelude import Kernel.Storage.ClickhouseV2.ClickhouseDb import Kernel.Storage.ClickhouseV2.ClickhouseTable import Kernel.Storage.ClickhouseV2.ClickhouseValue -import Kernel.Storage.ClickhouseV2.Internal.ClickhouseColumns import Kernel.Storage.ClickhouseV2.Internal.Types import Kernel.Utils.JSON (camelToSnakeCase) -newtype RawQuery = RawQuery {getRawQuery :: String} - deriving newtype (IsString, Semigroup, Monoid) - -class ClickhouseQuery expr where - toClickhouseQuery :: expr -> RawQuery - -instance (ClickhouseDb db, ClickhouseTable t, ClickhouseColumns a cols, ClickhouseQuery gr, ClickhouseQuery ord) => ClickhouseQuery (Select a db t cols gr ord) where +instance + ( ClickhouseDb db, + ClickhouseTable t, + ClickhouseColumns a cols, + ClickhouseQuery gr, + ClickhouseQuery ord, + ClickhouseQuery (AvailableColumns db t acols) + ) => + ClickhouseQuery (Select a db t cols gr ord acols) + where toClickhouseQuery (Select cols groupBy q) = do - -- should we add table name modifier? - let tableName = dropBeforeDot $ camelToSnakeCase . dropTSuffix . show $ typeRep (Proxy @t) let selectModifier = case getSelectModifier (Proxy @t) of NO_SELECT_MODIFIER -> "" SELECT_FINAL_MODIFIER -> " FINAL " "SELECT " - <> RawQuery (showClickhouseColumns @a @cols (Proxy @a) cols) + <> RawQuery (showClickhouseColumns @a @cols (Proxy @a) cols q.subQueryLevelQ) <> " FROM " - <> fromString tableName + <> toClickhouseQuery @(AvailableColumns db t acols) q.tableQ <> selectModifier - <> toClickhouseQuery @(Where t) (q.whereQ cols) + <> mkMaybeClause @(Where t) (q.whereQ <&> ($ cols)) <> toClickhouseQuery @(GroupBy a gr) groupBy <> mkMaybeClause @(OrderBy ord) (q.orderByQ <&> ($ cols)) <> mkMaybeClause @Limit q.limitQ <> mkMaybeClause @Offset q.offsetQ - where - dropTSuffix str = take (length str - 1) str mkMaybeClause :: forall expr. ClickhouseQuery expr => Maybe expr -> RawQuery mkMaybeClause = maybe mempty (toClickhouseQuery @expr) @@ -141,3 +138,13 @@ instance ClickhouseTable t => ClickhouseQuery (T5 (Column a t) v1 v2 v3 v4 v5) w instance ClickhouseTable t => ClickhouseQuery (T6 (Column a t) v1 v2 v3 v4 v5 v6) where toClickhouseQuery (c1, c2, c3, c4, c5, c6) = intercalate ", " [toClickhouseQuery c1, toClickhouseQuery c2, toClickhouseQuery c3, toClickhouseQuery c4, toClickhouseQuery c5, toClickhouseQuery c6] + +instance ClickhouseTable t => ClickhouseQuery (AvailableAllColumns db t) where + toClickhouseQuery _ = do + let tableName = dropBeforeDot $ camelToSnakeCase . dropTSuffix . show $ typeRep (Proxy @t) + fromString tableName + where + dropTSuffix str = take (length str - 1) str + +instance ClickhouseQuery (AvailableSubSelectColumns db t subcols) where + toClickhouseQuery (AvailableColumns (SubSelectColumns subSelect)) = addBrackets . toClickhouseQuery $ subSelect diff --git a/lib/mobility-core/src/Kernel/Storage/ClickhouseV2/Internal/Types.hs b/lib/mobility-core/src/Kernel/Storage/ClickhouseV2/Internal/Types.hs index cdde93719..44fc93acb 100644 --- a/lib/mobility-core/src/Kernel/Storage/ClickhouseV2/Internal/Types.hs +++ b/lib/mobility-core/src/Kernel/Storage/ClickhouseV2/Internal/Types.hs @@ -11,9 +11,12 @@ General Public License along with this program. If not, see . -} +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} module Kernel.Storage.ClickhouseV2.Internal.Types where +import qualified Data.Aeson.Types as A import Data.Kind (Constraint) import qualified Data.Time as Time import Kernel.Prelude @@ -21,6 +24,20 @@ import Kernel.Storage.ClickhouseV2.ClickhouseDb import Kernel.Storage.ClickhouseV2.ClickhouseTable import Kernel.Storage.ClickhouseV2.ClickhouseValue +data Select a db table cols gr ord acols where + Select :: (ClickhouseTable table, ClickhouseColumns a cols) => cols -> GroupBy a gr -> Q db table cols ord acols -> Select a db table cols gr ord acols + +class ClickhouseColumns (a :: IsAggregated) cols where + type ColumnsType a cols + showClickhouseColumns :: Proxy a -> cols -> SubQueryLevel -> String + parseColumns :: Proxy a -> cols -> A.Value -> SubQueryLevel -> Either String (ColumnsType a cols) + +newtype RawQuery = RawQuery {getRawQuery :: String} + deriving newtype (IsString, Semigroup, Monoid) + +class ClickhouseQuery expr where + toClickhouseQuery :: expr -> RawQuery + data IsAggregated = AGG | NOT_AGG class ClickhouseValue v => ClickhouseNum v @@ -32,6 +49,7 @@ instance {-# OVERLAPPING #-} (ClickhouseValue v, Num v) => ClickhouseNum (Maybe data Column (a :: IsAggregated) t v where Column :: (ClickhouseTable t) => FieldModification t v -> Column 'NOT_AGG t v -- initial column Group :: (ClickhouseTable t, ClickhouseValue v) => Column 'NOT_AGG t v -> Column 'AGG t v -- column from groupBy clause + SubColumn :: (ClickhouseTable t, ClickhouseValue v) => Column a t v -> ColumnNumber -> SubQueryLevel -> Column 'NOT_AGG t v -- column synonym will be generated based on these two guys SubQueryLevel and ColumnNumber Sum :: (ClickhouseTable t, ClickhouseNum v) => Column 'NOT_AGG t v -> Column 'AGG t v Count :: (ClickhouseTable t, ClickhouseValue v, ClickhouseValue Int) => Column 'NOT_AGG t v -> Column 'AGG t Int Distinct :: (ClickhouseTable t, ClickhouseValue v) => Column a t v -> Column a t v -- should not be used in where clause @@ -43,6 +61,7 @@ data Column (a :: IsAggregated) t v where ValColumn :: (ClickhouseTable t, ClickhouseValue v) => v -> Column a t v If :: (ClickhouseTable t, ClickhouseValue v) => Column a t Bool -> Column a t v -> Column a t v -> Column a t v EqColumn :: (ClickhouseTable t, ClickhouseValue v) => Column a t v -> Column a t v -> Column a t Bool + ArgMax :: (ClickhouseTable t, ClickhouseValue v1, ClickhouseValue v2) => Column 'NOT_AGG t v1 -> Column 'NOT_AGG t v2 -> Column 'AGG t v1 mkTableColumns :: ClickhouseTable t => FieldModifications t -> Columns 'NOT_AGG t mkTableColumns = mapTable Column @@ -98,6 +117,35 @@ instance (ClickhouseTable t, C6 ClickhouseValue v1 v2 v3 v4 v5 v6) => IsGroupCol type GroupColumnsType (T6 (Column 'NOT_AGG t) v1 v2 v3 v4 v5 v6) = (T6 (Column 'AGG t) v1 v2 v3 v4 v5 v6) groupColumns (c1, c2, c3, c4, c5, c6) = (Group @t @v1 c1, Group @t @v2 c2, Group @t @v3 c3, Group @t @v4 c4, Group @t @v5 c5, Group @t @v6 c6) +-- we need to reset group columns for subqueries, thus we can group twice, first time in subquery, second time in main query +class MkSubColumns cols where + type SubColumnsType cols + subColumnsValue :: cols -> SubQueryLevel -> SubColumnsType cols + +instance (ClickhouseTable t, ClickhouseValue v) => MkSubColumns (Column a t v) where + type SubColumnsType (Column a t v) = Column 'NOT_AGG t v + subColumnsValue c l = SubColumn @t @v c 1 l + +instance (ClickhouseTable t, C2 ClickhouseValue v1 v2) => MkSubColumns (T2 (Column a t) v1 v2) where + type SubColumnsType (T2 (Column a t) v1 v2) = (T2 (Column 'NOT_AGG t) v1 v2) + subColumnsValue (c1, c2) l = (SubColumn @t @v1 c1 1 l, SubColumn @t @v2 c2 2 l) + +instance (ClickhouseTable t, C3 ClickhouseValue v1 v2 v3) => MkSubColumns (T3 (Column a t) v1 v2 v3) where + type SubColumnsType (T3 (Column a t) v1 v2 v3) = (T3 (Column 'NOT_AGG t) v1 v2 v3) + subColumnsValue (c1, c2, c3) l = (SubColumn @t @v1 c1 1 l, SubColumn @t @v2 c2 2 l, SubColumn @t @v3 c3 3 l) + +instance (ClickhouseTable t, C4 ClickhouseValue v1 v2 v3 v4) => MkSubColumns (T4 (Column a t) v1 v2 v3 v4) where + type SubColumnsType (T4 (Column a t) v1 v2 v3 v4) = (T4 (Column 'NOT_AGG t) v1 v2 v3 v4) + subColumnsValue (c1, c2, c3, c4) l = (SubColumn @t @v1 c1 1 l, SubColumn @t @v2 c2 2 l, SubColumn @t @v3 c3 3 l, SubColumn @t @v4 c4 4 l) + +instance (ClickhouseTable t, C5 ClickhouseValue v1 v2 v3 v4 v5) => MkSubColumns (T5 (Column a t) v1 v2 v3 v4 v5) where + type SubColumnsType (T5 (Column a t) v1 v2 v3 v4 v5) = (T5 (Column 'NOT_AGG t) v1 v2 v3 v4 v5) + subColumnsValue (c1, c2, c3, c4, c5) l = (SubColumn @t @v1 c1 1 l, SubColumn @t @v2 c2 2 l, SubColumn @t @v3 c3 3 l, SubColumn @t @v4 c4 4 l, SubColumn @t @v5 c5 5 l) + +instance (ClickhouseTable t, C6 ClickhouseValue v1 v2 v3 v4 v5 v6) => MkSubColumns (T6 (Column a t) v1 v2 v3 v4 v5 v6) where + type SubColumnsType (T6 (Column a t) v1 v2 v3 v4 v5 v6) = (T6 (Column 'NOT_AGG t) v1 v2 v3 v4 v5 v6) + subColumnsValue (c1, c2, c3, c4, c5, c6) l = (SubColumn @t @v1 c1 1 l, SubColumn @t @v2 c2 2 l, SubColumn @t @v3 c3 3 l, SubColumn @t @v4 c4 4 l, SubColumn @t @v5 c5 5 l, SubColumn @t @v6 c6 6 l) + data NotGrouped data NoColumns @@ -128,10 +176,11 @@ instance (ClickhouseTable t, C5 ClickhouseValue v1 v2 v3 v4 v5) => IsOrderColumn instance (ClickhouseTable t, C6 ClickhouseValue v1 v2 v3 v4 v5 v6) => IsOrderColumns (T6 (Column a t) v1 v2 v3 v4 v5 v6) -data Q db table cols ord = (ClickhouseDb db) => +data Q db table cols ord acols = (ClickhouseDb db) => Q - { tableQ :: Columns 'NOT_AGG table, - whereQ :: cols -> Where table, + { tableQ :: AvailableColumns db table acols, + subQueryLevelQ :: SubQueryLevel, + whereQ :: Maybe (cols -> Where table), limitQ :: Maybe Limit, offsetQ :: Maybe Offset, orderByQ :: Maybe (cols -> OrderBy ord) @@ -143,12 +192,40 @@ newtype Limit = Limit Int data Order = Asc | Desc +class HasAvailableColumns (cols :: Type) where + type AvailableColumnsType cols + availableColumnsValue :: cols -> AvailableColumnsType cols + +instance HasAvailableColumns (AllColumns db table) where + type AvailableColumnsType (AllColumns db table) = Columns 'NOT_AGG table + availableColumnsValue (AllColumns cols) = cols + +instance HasAvailableColumns (SubSelectColumns db table subcols) where + type AvailableColumnsType (SubSelectColumns db table subcols) = SubColumnsType subcols + availableColumnsValue (SubSelectColumns (Select subcols _ q)) = subColumnsValue subcols q.subQueryLevelQ + +getAvailableColumnsValue :: + AvailableColumns db table acols -> + AvailableColumnsType acols +getAvailableColumnsValue (AvailableColumns acols) = availableColumnsValue acols + +data AvailableColumns db table acols where + AvailableColumns :: (ClickhouseDb db, ClickhouseTable table, HasAvailableColumns acols) => acols -> AvailableColumns db table acols + data AllColumns db table where AllColumns :: (ClickhouseDb db, ClickhouseTable table) => Columns 'NOT_AGG table -> AllColumns db table +data SubSelectColumns db table subcols where + SubSelectColumns :: (ClickhouseDb db, ClickhouseTable table, ClickhouseQuery (Select a db table subcols gr ord acols), MkSubColumns subcols) => Select a db table subcols gr ord acols -> SubSelectColumns db table subcols + +type AvailableAllColumns db table = AvailableColumns db table (AllColumns db table) + +type AvailableSubSelectColumns db table subcols = AvailableColumns db table (SubSelectColumns db table subcols) + showColumn :: Column a t v -> String showColumn (Column column) = getFieldModification column showColumn (Group column) = showColumn column +showColumn (SubColumn _column n l) = getColumnSynonym n l showColumn (Sum column) = "SUM" <> addBrackets' (showColumn column) showColumn (Count column) = "COUNT" <> addBrackets' (showColumn column) showColumn (Distinct column) = "DISTINCT" <> addBrackets' (showColumn column) @@ -160,6 +237,7 @@ showColumn (TimeDiff column1 column2) = "timeDiff" <> addBrackets' (showColumn c showColumn (ValColumn v) = valToString . toClickhouseValue $ v showColumn (If cond v1 v2) = "if" <> addBrackets' (showColumn cond <> ", " <> showColumn v1 <> ", " <> showColumn v2) showColumn (EqColumn column1 column2) = addBrackets' $ showColumn column1 <> "=" <> showColumn column2 +showColumn (ArgMax arg value) = "argMax" <> addBrackets' (showColumn arg <> ", " <> showColumn value) addBrackets' :: String -> String addBrackets' rq = "(" <> rq <> ")" @@ -183,3 +261,13 @@ type C4 (c :: Type -> Constraint) x1 x2 x3 x4 = (c x1, c x2, c x3, c x4) type C5 (c :: Type -> Constraint) x1 x2 x3 x4 x5 = (c x1, c x2, c x3, c x4, c x5) type C6 (c :: Type -> Constraint) x1 x2 x3 x4 x5 x6 = (c x1, c x2, c x3, c x4, c x5, c x6) + +newtype SubQueryLevel = SubQueryLevel {getSubQueryLevel :: Int} + deriving newtype (Show, Num, Eq) + +newtype ColumnNumber = ColumnNumber {getColumnNumber :: Int} + deriving newtype (Show, Num, Enum, Eq) + +getColumnSynonym :: ColumnNumber -> SubQueryLevel -> String +getColumnSynonym n 0 = "res" <> show n +getColumnSynonym n l = "res" <> show n <> "_sub" <> show l diff --git a/lib/mobility-core/src/Kernel/Storage/ClickhouseV2/Operators.hs b/lib/mobility-core/src/Kernel/Storage/ClickhouseV2/Operators.hs index be633250b..bfd6328b6 100644 --- a/lib/mobility-core/src/Kernel/Storage/ClickhouseV2/Operators.hs +++ b/lib/mobility-core/src/Kernel/Storage/ClickhouseV2/Operators.hs @@ -19,7 +19,7 @@ import Kernel.Prelude import Kernel.Storage.ClickhouseV2.ClickhouseDb import Kernel.Storage.ClickhouseV2.ClickhouseTable import Kernel.Storage.ClickhouseV2.ClickhouseValue -import Kernel.Storage.ClickhouseV2.Internal.ClickhouseColumns +import Kernel.Storage.ClickhouseV2.Internal.ClickhouseColumns () import Kernel.Storage.ClickhouseV2.Internal.Types (==.) :: forall a table value. (ClickhouseTable table, ClickhouseValue value) => Column a table value -> value -> Clause table @@ -70,23 +70,37 @@ isNull column = Is column NullTerm isNotNull :: forall a table value. (ClickhouseTable table, ClickhouseValue value) => Column a table (Maybe value) -> Clause table isNotNull column = Is column NotNullTerm -select :: forall db table ord. ClickhouseTable table => Q db table (Columns 'NOT_AGG table) ord -> Select 'NOT_AGG db table (Columns 'NOT_AGG table) NotGrouped ord -select q = Select q.tableQ NotGrouped q - -select_ :: forall a db table cols gr ord. (ClickhouseTable table, ClickhouseColumns a cols) => (Columns 'NOT_AGG table -> (cols, GroupBy a gr)) -> Q db table cols ord -> Select a db table cols gr ord +select :: + forall db table ord. + ClickhouseTable table => + Q db table (Columns 'NOT_AGG table) ord (AllColumns db table) -> + Select 'NOT_AGG db table (Columns 'NOT_AGG table) NotGrouped ord (AllColumns db table) +select q = Select (getAvailableColumnsValue q.tableQ) NotGrouped q + +select_ :: + forall a db table cols gr ord acols. + (ClickhouseTable table, ClickhouseColumns a cols) => + (AvailableColumnsType acols -> (cols, GroupBy a gr)) -> + Q db table cols ord acols -> + Select a db table cols gr ord acols select_ colsClause q = do - let (cols, gr) = colsClause q.tableQ + let (cols, gr) = colsClause (getAvailableColumnsValue q.tableQ) Select cols gr q -- FIXME Integer -limit_ :: Int -> Q db table cols ord -> Q db table cols ord +limit_ :: Int -> Q db table cols ord subsel -> Q db table cols ord subsel limit_ limitVal q = q {limitQ = Just $ Limit limitVal} -offset_ :: Int -> Q db table cols ord -> Q db table cols ord +offset_ :: Int -> Q db table cols ord subsel -> Q db table cols ord subsel offset_ offsetVal q = q {offsetQ = Just $ Offset offsetVal} -orderBy_ :: forall db table cols ord. ClickhouseTable table => (Columns 'NOT_AGG table -> cols -> OrderBy ord) -> Q db table cols NotOrdered -> Q db table cols ord -orderBy_ orderByClause q = q {orderByQ = Just $ orderByClause (tableQ q)} +orderBy_ :: + forall db table cols ord acols. + ClickhouseTable table => + (AvailableColumnsType acols -> cols -> OrderBy ord) -> + Q db table cols NotOrdered acols -> + Q db table cols ord acols +orderBy_ orderByClause q = q {orderByQ = Just $ orderByClause $ getAvailableColumnsValue (tableQ q)} asc :: forall ord. IsOrderColumns ord => ord -> OrderBy ord asc = OrderBy Asc @@ -107,14 +121,44 @@ all_ :: forall db table. (ClickhouseDb db, ClickhouseTable table) => FieldModifications table -> - AllColumns db table -all_ tableMod = AllColumns (mkTableColumns @table tableMod) + (AvailableAllColumns db table, SubQueryLevel) +all_ tableMod = (AvailableColumns $ AllColumns (mkTableColumns @table tableMod), 0) + +subSelect_ :: + forall a db table subcols gr ord acols. + ( ClickhouseDb db, + ClickhouseTable table, + ClickhouseQuery (Select a db table subcols gr ord acols), + MkSubColumns subcols + ) => + Select a db table subcols gr ord acols -> + (AvailableSubSelectColumns db table subcols, SubQueryLevel) +subSelect_ s@(Select _cols _gr q) = (AvailableColumns . SubSelectColumns $ s, q.subQueryLevelQ + 1) + +filter_ :: + ClickhouseDb db => + (AvailableColumnsType acols -> cols -> Clause table) -> + (AvailableColumns db table acols, SubQueryLevel) -> + Q db table cols NotOrdered acols +filter_ filterClause (table, level) = + Q + { tableQ = table, + subQueryLevelQ = level, + whereQ = Just $ Where . filterClause (getAvailableColumnsValue table), + limitQ = Nothing, + offsetQ = Nothing, + orderByQ = Nothing + } -filter_ :: (Columns 'NOT_AGG table -> cols -> Clause table) -> AllColumns db table -> Q db table cols NotOrdered -filter_ filterClause (AllColumns table) = +emptyFilter :: + ClickhouseDb db => + (AvailableColumns db table acols, SubQueryLevel) -> + Q db table cols NotOrdered acols +emptyFilter (table, level) = Q { tableQ = table, - whereQ = Where . filterClause table, + subQueryLevelQ = level, + whereQ = Nothing, limitQ = Nothing, offsetQ = Nothing, orderByQ = Nothing @@ -158,3 +202,12 @@ if_ = If (==..) = EqColumn infix 4 ==.. + +-- | Calculates the 'arg' value for a maximum 'val' value. +-- If there are multiple rows with equal 'val' being the maximum, which of the associated 'arg' is returned is not deterministic +argMax :: + (ClickhouseTable t, ClickhouseValue v1, ClickhouseValue v2) => + Column 'NOT_AGG t v1 -> -- 'arg' + Column 'NOT_AGG t v2 -> -- 'val' + Column 'AGG t v1 +argMax = ArgMax diff --git a/lib/mobility-core/src/Kernel/Storage/ClickhouseV2/Queries.hs b/lib/mobility-core/src/Kernel/Storage/ClickhouseV2/Queries.hs index 10c832f83..021b2f7ef 100644 --- a/lib/mobility-core/src/Kernel/Storage/ClickhouseV2/Queries.hs +++ b/lib/mobility-core/src/Kernel/Storage/ClickhouseV2/Queries.hs @@ -29,19 +29,17 @@ import qualified EulerHS.Language as L import Kernel.Prelude import Kernel.Storage.Clickhouse.Config import Kernel.Storage.ClickhouseV2.ClickhouseDb -import Kernel.Storage.ClickhouseV2.ClickhouseTable -import Kernel.Storage.ClickhouseV2.Internal.ClickhouseColumns import Kernel.Storage.ClickhouseV2.Internal.ClickhouseQuery +import Kernel.Storage.ClickhouseV2.Internal.Types import Kernel.Utils.Common hiding (Limit, Offset) --- should we throw error if query fails? findAll :: - forall a db t m cols gr ord. - (HasClickhouseEnv db m, ClickhouseTable t, ClickhouseColumns a cols, ClickhouseQuery gr, ClickhouseQuery ord) => - Select a db t cols gr ord -> + forall a db t m cols gr ord acols. + (HasClickhouseEnv db m, ClickhouseQuery (Select a db t cols gr ord acols)) => + Select a db t cols gr ord acols -> m [ColumnsType a cols] -findAll selectClause@(Select cols _ _) = do - let rawQuery = toClickhouseQuery @(Select a db t cols gr ord) selectClause +findAll selectClause@(Select cols _ q) = do + let rawQuery = toClickhouseQuery @(Select a db t cols gr ord acols) selectClause logDebug $ "clickhouse raw query v2: " <> T.pack rawQuery.getRawQuery resJSON <- runRawQuery @db @A.Value @m (Proxy @db) rawQuery case resJSON of @@ -50,7 +48,7 @@ findAll selectClause@(Select cols _ _) = do pure [] Right val@(A.Array xs) -> do logDebug $ "clickhouse raw query v2 json result: " <> show val - case mapM (parseColumns @a @cols (Proxy @a) cols) xs of + case mapM (\val' -> parseColumns @a @cols (Proxy @a) cols val' q.subQueryLevelQ) xs of Left err -> do logError $ "Clickhouse parsing result error: " <> T.pack err pure []