Skip to content

Commit

Permalink
[ADD] Hive data management | tables | clustered_by draft 4
Browse files Browse the repository at this point in the history
  • Loading branch information
MoustafaAMahmoud committed Oct 23, 2023
1 parent 867a5c7 commit 46ce32f
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 69 deletions.
155 changes: 94 additions & 61 deletions Ch03-HadoopMR/Ch03-HadoopMR.tex
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,11 @@ \subsection{Hive Architecture}
\end{frame}

\begin{frame}{Abstract Components of Apache Hive}
\begin{figure}
\includegraphics[width=\textwidth,height=\textheight,keepaspectratio]{./Figures/chapter-03/Hive_Architecture.pdf}
\caption{Abstract Components of Apache Hive}
\end{figure}

\includegraphics[width=\textwidth,height=\textheight,keepaspectratio]{./Figures/chapter-03/Hive_Architecture.pdf}


\end{frame}
Expand Down Expand Up @@ -384,7 +387,11 @@ \subsection{Hive Architecture}
\subsubsection{Job Execution Flow in Hive}
\begin{frame}{Job Execution Flow in Hive}

\includegraphics[width=\textwidth,height=\textheight,keepaspectratio]{./Figures/chapter-03/Hive_Query_Flow.pdf}
\begin{figure}
\includegraphics[width=\textwidth,height=\textheight,keepaspectratio]{./Figures/chapter-03/Hive_Query_Flow.pdf}
\caption{Hive Job execution flow}
\end{figure}


% \begin{itemize}
% \item Receive SQL Query.
Expand Down Expand Up @@ -538,7 +545,7 @@ \subsubsection{Hive Database}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{Hive Warehouse Structure}
% %\vspace{-0.5cm}
%\vspace{-0.5cm}
\begin{figure}
\includegraphics[width=\textwidth,height=.75\textheight]{./Figures/chapter-03/mermaid-diagram-hive_db.png}

Expand All @@ -548,7 +555,7 @@ \subsubsection{Hive Database}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{Hive Warehouse Structure | continued}


%\vspace{.6cm}
\begin{figure}
\includegraphics[width=\textwidth,height=.65\textheight]{./Figures/chapter-03/mermaid-diagram-retail_db.png}

Expand All @@ -558,7 +565,7 @@ \subsubsection{Hive Database}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{Hive Warehouse Structure | continued}


%\vspace{.6cm}
\begin{figure}
\includegraphics[width=\textwidth,height=.65\textheight,keepaspectratio]{./Figures/chapter-03/Screenshot_retail_db.png}

Expand Down Expand Up @@ -632,7 +639,7 @@ \subsubsection{Hive Database}
\subsubsection{Hive Tables}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]{Creating Tables in Hive}
\vspace{-.7cm}
\vspace{-.2cm}
\begin{lstlisting}[caption={Create Table Commands},language=SQL]
CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
[(col_name data_type [column_constraint_specification] [COMMENT col_comment], ... [constraint_specification])]
Expand All @@ -645,8 +652,7 @@ \subsubsection{Hive Tables}
[
[ROW FORMAT row_format]
[STORED AS file_format]
| STORED BY 'storage.handler.class.name' [WITH SERDEPROPERTIES (...)]
]
|STORED BY 'storage.handler.class.name' [WITH SERDEPROPERTIES (...)]]
[LOCATION hdfs_path]
[TBLPROPERTIES (property_name=property_value, ...)]
[AS select_statement];
Expand Down Expand Up @@ -836,7 +842,7 @@ \subsubsection{Hive Tables}
\frametitle{CREATE TABLE in Hive | continued}
\begin{tcolorbox}[colback=white,colframe=black,title= Part 5: Table Partitions]
\small
\begin{lstlisting}[caption={SQL},language=SQL]
\begin{lstlisting}[caption={Simple sql statement for sales table filters on year and region},language=SQL]
SELECT * FROM sales WHERE year=2021 AND region='US';
\end{lstlisting}
\vspace{-0.5cm}
Expand Down Expand Up @@ -870,11 +876,18 @@ \subsubsection{Hive Tables}
\begin{lstlisting}[caption={Explain Plan Command for Non-Partitioned Table },language=SQL]
EXPLAIN SELECT * FROM sales_non_partitioned WHERE year=2021 AND region='US';
\end{lstlisting}
\vspace{-0.5cm}
\begin{figure}
\includegraphics[width=.7\textwidth,height=\textheight,keepaspectratio]{./Figures/chapter-03/non_partitioned_explain.png}
\caption{Non-Partitioned Table Execution Plan Summary}
\end{figure}
\vspace{-0.5cm}
\begin{lstlisting}[caption={Explain Plan Command for Non-Partitioned Table },language=SQL]
STAGE PLANS:
Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
alias: sales
Filter Operator
predicate: (year = 2021 and region = 'US')

\end{lstlisting}
\end{tcolorbox}

\end{frame}
Expand All @@ -885,22 +898,29 @@ \subsubsection{Hive Tables}
\begin{tcolorbox}[colback=white,colframe=black,title= Part 5: Table Partitions]
\small
\vspace{-0.3cm}
\begin{lstlisting}[caption={Explain Plan Command for Partitioned Table},language=SQL]
\begin{lstlisting}[caption={Explain Plan Command for Partitioned Table },language=SQL]
EXPLAIN SELECT * FROM sales_partitioned WHERE year=2021 AND region='US';
\end{lstlisting}
\vspace{-0.5cm}
\begin{figure}
\includegraphics[width=.7\textwidth,height=\textheight,keepaspectratio]{./Figures/chapter-03/partitioned_explain.png}
\caption{Partitioned Table Execution Plan Summary}
\end{figure}
\begin{lstlisting}[caption={Explain Plan Command for Partitioned Table },language=SQL]
STAGE PLANS:
Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
alias: sales_partitioned
Filter Operator
predicate: (year = 2021 and region = 'US')
Partition Pruning: (year = 2021 and region = 'US')
\end{lstlisting}
\end{tcolorbox}

\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]
\frametitle{CREATE TABLE in Hive | continued}
\begin{tcolorbox}[colback=white,colframe=black,title= Part 5: Table Partitions]

\vspace{.5cm}
\begin{table}[h!]
\centering
\resizebox{\textwidth}{!}{%
Expand Down Expand Up @@ -943,10 +963,11 @@ \subsubsection{Hive Tables}
\frametitle{CREATE TABLE in Hive | continued}
\begin{tcolorbox}[colback=white,colframe=black,title= Part 6: Clustering and Sorting | CLUSTERED BY]
\small
vspace{.2cm}
\begin{table}[h!]
\centering
\resizebox{\textwidth}{!}{%
\begin{tabular}{|p{4cm}|p{5cm}|p{5cm}|p{5cm}|}
\begin{tabular}{|p{2cm}|p{2cm}|p{2cm}|p{2cm}|}
\hline
\rowcolor{Gray}
ID & Name & Department & Salary \\
Expand All @@ -960,6 +981,7 @@ \subsubsection{Hive Tables}
\hline
\end{tabular}
}
\caption{Sample data for employee table}
\end{table}
\end{tcolorbox}
\end{frame}
Expand Down Expand Up @@ -1045,11 +1067,13 @@ \subsubsection{Hive Tables}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]
\frametitle{CREATE TABLE in Hive | continued}
\vspace{-0.3cm}
\begin{tcolorbox}[colback=white,colframe=black,title= Part 6: Clustering and Sorting | CLUSTERED BY]
\small
\vspace{-0.3cm}
\begin{lstlisting}[caption={Simplified Explain Plan for Table Without CLUSTERED BY},style=my-yamll]
ExplainPlan:
Stage:
Stage:
- Name: "Stage-1"
Type: "Map Reduce"
Operations:
Expand All @@ -1062,12 +1086,13 @@ \subsubsection{Hive Tables}
\end{frame}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}[fragile]
\frametitle{CREATE TABLE in Hive | continued}
\frametitle{CREATE TABLE in Hive | continued}
\vspace{-0.3cm}
\begin{tcolorbox}[colback=white,colframe=black,title= Part 6: Clustering and Sorting | CLUSTERED BY]
\small
\vspace{-0.3cm}
\begin{lstlisting}[caption={Simplified Explain Plan for Table with CLUSTERED BY},style=my-yamll]
ExplainPlan:
Stage:
Stage:
- Name: "Stage-1"
Type: "Map Reduce"
Operations:
Expand Down Expand Up @@ -1127,53 +1152,61 @@ \subsubsection{Hive Tables}


\begin{frame}[fragile]
\frametitle{CREATE TABLE in Hive | continued}
\vspace{-0.5cm}
\frametitle{CREATE TABLE in Hive | continued}
\vspace{-0.65cm}

\begin{tcolorbox}[colback=white,colframe=black,title= Part 6: Clustering and Sorting | SORTED BY]
\small
\begin{lstlisting}[caption={Explain Plan Query},language=SQL]
EXPLAIN SELECT * FROM Employee WHERE Department = 'HR'
\end{lstlisting}
\end{tcolorbox}
\end{frame}

\begin{frame}[fragile]
\frametitle{CREATE TABLE in Hive | continued}
\vspace{-0.65cm}

\begin{tcolorbox}[colback=white,colframe=black,title= Part 6: Clustering and Sorting | SORTED BY]
\small
\vspace{-0.35cm}
\begin{lstlisting}[caption={Simplified Explain Plan With only CLUSTERED BY},style=my-yamll]
Query:
Text: "EXPLAIN SELECT * FROM Employee WHERE Department = 'HR'"
ExplainPlan:
Stage:
- Name: "Stage-1"
Type: "Map Reduce"
Operations:
- TableScan:
TableName: "Employee"
Bucketing:
PruningEnabled: true
RelevantBuckets: "1/3"
- Filter:
Condition: "Department='HR'"
Stage:
- Name: "Stage-1"
Type: "Map Reduce"
Operations:
- TableScan:
TableName: "Employee"
Bucketing:
PruningEnabled: true
RelevantBuckets: "1/3"
- Filter:
Condition: "Department='HR'"
\end{lstlisting}
\end{tcolorbox}
\end{frame}

\begin{frame}[fragile]
\frametitle{CREATE TABLE in Hive | continued}
\vspace{-0.5cm}
\vspace{-0.65cm}

\begin{tcolorbox}[colback=white,colframe=black,title= Part 6: Clustering and Sorting | SORTED BY]
\small
\vspace{-0.35cm}
\begin{lstlisting}[caption={Simplified Explain Plan: With CLUSTERED BY and SORT BY},style=my-yamll]
Query:
Text: "EXPLAIN SELECT * FROM Employee WHERE Department = 'HR'"
ExplainPlan:
Stage:
- Name: "Stage-1"
Type: "Map Reduce"
Operations:
- TableScan:
TableName: "Employee"
Bucketing:
PruningEnabled: true
RelevantBuckets: "1/3"
- Filter:
Condition: "Department='HR'"
- Sort:
Columns: "ID"
Stage:
- Name: "Stage-1"
Type: "Map Reduce"
Operations:
- TableScan:
TableName: "Employee"
Bucketing:
PruningEnabled: true
RelevantBuckets: "1/3"
- Filter:
Condition: "Department='HR'"
- Sort:
Columns: "ID"
\end{lstlisting}
\end{tcolorbox}
\end{frame}
Expand Down Expand Up @@ -1233,7 +1266,7 @@ \subsubsection{Hive Tables}
\begin{frame}[fragile]
\frametitle{CREATE TABLE in Hive | continued}
\vspace{-0.5cm}
\begin{tcolorbox}[colback=white,colframe=black,title= Part 6: Clustering and Sorting | Sort By: When to Use]
\begin{tcolorbox}[colback=white,colframe=black,title= Part 6: Clustering and Sorting | Conclusion]
\begin{itemize}
\item Understand the query and consumption patterns.
\end{itemize}
Expand All @@ -1252,7 +1285,7 @@ \subsubsection{Hive Tables}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{frame}{CREATE TABLE in Hive | continued}
\begin{tcolorbox}[colback=white,colframe=black,title= Part 5: Data Skewing]
\begin{tcolorbox}[colback=white,colframe=black,title= Part 7: Data Skewing]
\small
\begin{itemize}
\item \texttt{[SKEWED BY (col\_name, ...) ON ((col\_value, ...), ...) [STORED AS DIRECTORIES]]}
Expand Down
Binary file modified main.pdf
Binary file not shown.
10 changes: 4 additions & 6 deletions preamble/code_listing.tex
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,11 @@

% Redefine the lstlisting format to remove the unwanted prefix
\DeclareCaptionFormat{mylst}{#1#2#3}
\DeclareCaptionFont{mycolor}{\color{red}}
\renewcommand\lstlistingname{Code Snippet:}
\renewcommand\lstlistlistingname{Code Snippet:}
%\DeclareCaptionStyle{listing} [justification=raggedright,indention=0pt, labelfont=bf]{}
%\captionsetup[lstlisting]{style=listing, labelsep=none}
\DeclareCaptionFont{mycolor}{\color{blue}}
\renewcommand\lstlistingname{Code C:}
\renewcommand\lstlistlistingname{Code C:}

\captionsetup[lstlisting]{format=mylst,labelfont={color=harvardcrimson},labelsep=space,justification=raggedright}
\captionsetup[lstlisting]{skip=2pt, font=footnotesize, labelfont={color=blue,footnotesize,bf}, format=mylst,labelsep=colon,justification=raggedright}

\lstset{%
frame=tb,
Expand Down
10 changes: 8 additions & 2 deletions preamble/preamble.tex
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,17 @@
\usepackage[inkscapelatex=false]{svg}
\usepackage{tcolorbox}
\usepackage{graphicx}
\usepackage{etoolbox}
%\BeforeBeginEnvironment{figure}{\vspace{-1em}}
%\BeforeBeginEnvironment{lstlisting}{\vspace{-1em}}
\BeforeBeginEnvironment{table}{\vspace{-1em}}

%\usepackage{enumitem}
\usepackage{caption}
\DeclareCaptionLabelFormat{nospace}{#1#2}
\captionsetup[table]{skip=0pt,font=footnotesize,labelfont={color=blue,footnotesize,bf},name=Table T-,labelformat=nospace,labelsep=colon}
\captionsetup[figure]{skip=0pt,font=footnotesize,labelfont={color=blue,footnotesize,bf},name=Figure F-,labelformat=nospace,labelsep=colon}
\captionsetup[table]{skip=1pt,font=footnotesize,labelfont={color=blue,footnotesize,bf},name=Table T-,labelformat=nospace,labelsep=colon}
\captionsetup[figure]{skip=1pt,font=footnotesize,labelfont={color=blue,footnotesize,bf},name=Figure F-,labelformat=nospace,labelsep=colon}



%\overfullrule=2cm
Expand Down

0 comments on commit 46ce32f

Please sign in to comment.