diff --git a/Ch03-HadoopMR/Ch03-HadoopMR.tex b/Ch03-HadoopMR/Ch03-HadoopMR.tex index 3bc909f..72804b9 100644 --- a/Ch03-HadoopMR/Ch03-HadoopMR.tex +++ b/Ch03-HadoopMR/Ch03-HadoopMR.tex @@ -151,8 +151,11 @@ \subsection{Hive Architecture} \end{frame} \begin{frame}{Abstract Components of Apache Hive} +\begin{figure} +\includegraphics[width=\textwidth,height=\textheight,keepaspectratio]{./Figures/chapter-03/Hive_Architecture.pdf} +\caption{Abstract Components of Apache Hive} +\end{figure} -\includegraphics[width=\textwidth,height=\textheight,keepaspectratio]{./Figures/chapter-03/Hive_Architecture.pdf} \end{frame} @@ -384,7 +387,11 @@ \subsection{Hive Architecture} \subsubsection{Job Execution Flow in Hive} \begin{frame}{Job Execution Flow in Hive} -\includegraphics[width=\textwidth,height=\textheight,keepaspectratio]{./Figures/chapter-03/Hive_Query_Flow.pdf} +\begin{figure} + \includegraphics[width=\textwidth,height=\textheight,keepaspectratio]{./Figures/chapter-03/Hive_Query_Flow.pdf} + \caption{Hive Job execution flow} +\end{figure} + % \begin{itemize} % \item Receive SQL Query. @@ -538,7 +545,7 @@ \subsubsection{Hive Database} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[fragile]{Hive Warehouse Structure} -% %\vspace{-0.5cm} +%\vspace{-0.5cm} \begin{figure} \includegraphics[width=\textwidth,height=.75\textheight]{./Figures/chapter-03/mermaid-diagram-hive_db.png} @@ -548,7 +555,7 @@ \subsubsection{Hive Database} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[fragile]{Hive Warehouse Structure | continued} - +%\vspace{.6cm} \begin{figure} \includegraphics[width=\textwidth,height=.65\textheight]{./Figures/chapter-03/mermaid-diagram-retail_db.png} @@ -558,7 +565,7 @@ \subsubsection{Hive Database} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[fragile]{Hive Warehouse Structure | continued} - +%\vspace{.6cm} \begin{figure} \includegraphics[width=\textwidth,height=.65\textheight,keepaspectratio]{./Figures/chapter-03/Screenshot_retail_db.png} @@ -632,7 +639,7 @@ \subsubsection{Hive Database} \subsubsection{Hive Tables} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[fragile]{Creating Tables in Hive} -\vspace{-.7cm} +\vspace{-.2cm} \begin{lstlisting}[caption={Create Table Commands},language=SQL] CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name [(col_name data_type [column_constraint_specification] [COMMENT col_comment], ... [constraint_specification])] @@ -645,8 +652,7 @@ \subsubsection{Hive Tables} [ [ROW FORMAT row_format] [STORED AS file_format] -| STORED BY 'storage.handler.class.name' [WITH SERDEPROPERTIES (...)] -] +|STORED BY 'storage.handler.class.name' [WITH SERDEPROPERTIES (...)]] [LOCATION hdfs_path] [TBLPROPERTIES (property_name=property_value, ...)] [AS select_statement]; @@ -836,7 +842,7 @@ \subsubsection{Hive Tables} \frametitle{CREATE TABLE in Hive | continued} \begin{tcolorbox}[colback=white,colframe=black,title= Part 5: Table Partitions] \small -\begin{lstlisting}[caption={SQL},language=SQL] +\begin{lstlisting}[caption={Simple sql statement for sales table filters on year and region},language=SQL] SELECT * FROM sales WHERE year=2021 AND region='US'; \end{lstlisting} \vspace{-0.5cm} @@ -870,11 +876,18 @@ \subsubsection{Hive Tables} \begin{lstlisting}[caption={Explain Plan Command for Non-Partitioned Table },language=SQL] EXPLAIN SELECT * FROM sales_non_partitioned WHERE year=2021 AND region='US'; \end{lstlisting} - \vspace{-0.5cm} - \begin{figure} - \includegraphics[width=.7\textwidth,height=\textheight,keepaspectratio]{./Figures/chapter-03/non_partitioned_explain.png} - \caption{Non-Partitioned Table Execution Plan Summary} - \end{figure} +\vspace{-0.5cm} +\begin{lstlisting}[caption={Explain Plan Command for Non-Partitioned Table },language=SQL] +STAGE PLANS: +Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: sales + Filter Operator + predicate: (year = 2021 and region = 'US') + +\end{lstlisting} \end{tcolorbox} \end{frame} @@ -885,14 +898,21 @@ \subsubsection{Hive Tables} \begin{tcolorbox}[colback=white,colframe=black,title= Part 5: Table Partitions] \small \vspace{-0.3cm} -\begin{lstlisting}[caption={Explain Plan Command for Partitioned Table},language=SQL] +\begin{lstlisting}[caption={Explain Plan Command for Partitioned Table },language=SQL] EXPLAIN SELECT * FROM sales_partitioned WHERE year=2021 AND region='US'; \end{lstlisting} \vspace{-0.5cm} - \begin{figure} - \includegraphics[width=.7\textwidth,height=\textheight,keepaspectratio]{./Figures/chapter-03/partitioned_explain.png} -\caption{Partitioned Table Execution Plan Summary} -\end{figure} +\begin{lstlisting}[caption={Explain Plan Command for Partitioned Table },language=SQL] + STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: sales_partitioned + Filter Operator + predicate: (year = 2021 and region = 'US') + Partition Pruning: (year = 2021 and region = 'US') +\end{lstlisting} \end{tcolorbox} \end{frame} @@ -900,7 +920,7 @@ \subsubsection{Hive Tables} \begin{frame}[fragile] \frametitle{CREATE TABLE in Hive | continued} \begin{tcolorbox}[colback=white,colframe=black,title= Part 5: Table Partitions] - +\vspace{.5cm} \begin{table}[h!] \centering \resizebox{\textwidth}{!}{% @@ -943,10 +963,11 @@ \subsubsection{Hive Tables} \frametitle{CREATE TABLE in Hive | continued} \begin{tcolorbox}[colback=white,colframe=black,title= Part 6: Clustering and Sorting | CLUSTERED BY] \small + vspace{.2cm} \begin{table}[h!] \centering \resizebox{\textwidth}{!}{% - \begin{tabular}{|p{4cm}|p{5cm}|p{5cm}|p{5cm}|} + \begin{tabular}{|p{2cm}|p{2cm}|p{2cm}|p{2cm}|} \hline \rowcolor{Gray} ID & Name & Department & Salary \\ @@ -960,6 +981,7 @@ \subsubsection{Hive Tables} \hline \end{tabular} } + \caption{Sample data for employee table} \end{table} \end{tcolorbox} \end{frame} @@ -1045,11 +1067,13 @@ \subsubsection{Hive Tables} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[fragile] \frametitle{CREATE TABLE in Hive | continued} +\vspace{-0.3cm} \begin{tcolorbox}[colback=white,colframe=black,title= Part 6: Clustering and Sorting | CLUSTERED BY] \small +\vspace{-0.3cm} \begin{lstlisting}[caption={Simplified Explain Plan for Table Without CLUSTERED BY},style=my-yamll] ExplainPlan: -Stage: +Stage: - Name: "Stage-1" Type: "Map Reduce" Operations: @@ -1062,12 +1086,13 @@ \subsubsection{Hive Tables} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}[fragile] -\frametitle{CREATE TABLE in Hive | continued} +\frametitle{CREATE TABLE in Hive | continued} +\vspace{-0.3cm} \begin{tcolorbox}[colback=white,colframe=black,title= Part 6: Clustering and Sorting | CLUSTERED BY] \small +\vspace{-0.3cm} \begin{lstlisting}[caption={Simplified Explain Plan for Table with CLUSTERED BY},style=my-yamll] -ExplainPlan: -Stage: +Stage: - Name: "Stage-1" Type: "Map Reduce" Operations: @@ -1127,53 +1152,61 @@ \subsubsection{Hive Tables} \begin{frame}[fragile] -\frametitle{CREATE TABLE in Hive | continued} -\vspace{-0.5cm} +\frametitle{CREATE TABLE in Hive | continued} +\vspace{-0.65cm} + +\begin{tcolorbox}[colback=white,colframe=black,title= Part 6: Clustering and Sorting | SORTED BY] + \small +\begin{lstlisting}[caption={Explain Plan Query},language=SQL] +EXPLAIN SELECT * FROM Employee WHERE Department = 'HR' +\end{lstlisting} +\end{tcolorbox} +\end{frame} + +\begin{frame}[fragile] +\frametitle{CREATE TABLE in Hive | continued} +\vspace{-0.65cm} \begin{tcolorbox}[colback=white,colframe=black,title= Part 6: Clustering and Sorting | SORTED BY] \small +\vspace{-0.35cm} \begin{lstlisting}[caption={Simplified Explain Plan With only CLUSTERED BY},style=my-yamll] -Query: - Text: "EXPLAIN SELECT * FROM Employee WHERE Department = 'HR'" - ExplainPlan: - Stage: - - Name: "Stage-1" - Type: "Map Reduce" - Operations: - - TableScan: - TableName: "Employee" - Bucketing: - PruningEnabled: true - RelevantBuckets: "1/3" - - Filter: - Condition: "Department='HR'" +Stage: + - Name: "Stage-1" + Type: "Map Reduce" + Operations: + - TableScan: + TableName: "Employee" + Bucketing: + PruningEnabled: true + RelevantBuckets: "1/3" + - Filter: + Condition: "Department='HR'" \end{lstlisting} \end{tcolorbox} \end{frame} - + \begin{frame}[fragile] \frametitle{CREATE TABLE in Hive | continued} -\vspace{-0.5cm} +\vspace{-0.65cm} \begin{tcolorbox}[colback=white,colframe=black,title= Part 6: Clustering and Sorting | SORTED BY] \small + \vspace{-0.35cm} \begin{lstlisting}[caption={Simplified Explain Plan: With CLUSTERED BY and SORT BY},style=my-yamll] -Query: -Text: "EXPLAIN SELECT * FROM Employee WHERE Department = 'HR'" -ExplainPlan: - Stage: - - Name: "Stage-1" - Type: "Map Reduce" - Operations: - - TableScan: - TableName: "Employee" - Bucketing: - PruningEnabled: true - RelevantBuckets: "1/3" - - Filter: - Condition: "Department='HR'" - - Sort: - Columns: "ID" +Stage: + - Name: "Stage-1" + Type: "Map Reduce" + Operations: + - TableScan: + TableName: "Employee" + Bucketing: + PruningEnabled: true + RelevantBuckets: "1/3" + - Filter: + Condition: "Department='HR'" + - Sort: + Columns: "ID" \end{lstlisting} \end{tcolorbox} \end{frame} @@ -1233,7 +1266,7 @@ \subsubsection{Hive Tables} \begin{frame}[fragile] \frametitle{CREATE TABLE in Hive | continued} \vspace{-0.5cm} -\begin{tcolorbox}[colback=white,colframe=black,title= Part 6: Clustering and Sorting | Sort By: When to Use] +\begin{tcolorbox}[colback=white,colframe=black,title= Part 6: Clustering and Sorting | Conclusion] \begin{itemize} \item Understand the query and consumption patterns. \end{itemize} @@ -1252,7 +1285,7 @@ \subsubsection{Hive Tables} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}{CREATE TABLE in Hive | continued} - \begin{tcolorbox}[colback=white,colframe=black,title= Part 5: Data Skewing] + \begin{tcolorbox}[colback=white,colframe=black,title= Part 7: Data Skewing] \small \begin{itemize} \item \texttt{[SKEWED BY (col\_name, ...) ON ((col\_value, ...), ...) [STORED AS DIRECTORIES]]} diff --git a/main.pdf b/main.pdf index 647ede0..78f3936 100644 Binary files a/main.pdf and b/main.pdf differ diff --git a/preamble/code_listing.tex b/preamble/code_listing.tex index 0cafb7d..65f61f3 100644 --- a/preamble/code_listing.tex +++ b/preamble/code_listing.tex @@ -10,13 +10,11 @@ % Redefine the lstlisting format to remove the unwanted prefix \DeclareCaptionFormat{mylst}{#1#2#3} -\DeclareCaptionFont{mycolor}{\color{red}} -\renewcommand\lstlistingname{Code Snippet:} -\renewcommand\lstlistlistingname{Code Snippet:} -%\DeclareCaptionStyle{listing} [justification=raggedright,indention=0pt, labelfont=bf]{} -%\captionsetup[lstlisting]{style=listing, labelsep=none} +\DeclareCaptionFont{mycolor}{\color{blue}} +\renewcommand\lstlistingname{Code C:} +\renewcommand\lstlistlistingname{Code C:} -\captionsetup[lstlisting]{format=mylst,labelfont={color=harvardcrimson},labelsep=space,justification=raggedright} +\captionsetup[lstlisting]{skip=2pt, font=footnotesize, labelfont={color=blue,footnotesize,bf}, format=mylst,labelsep=colon,justification=raggedright} \lstset{% frame=tb, diff --git a/preamble/preamble.tex b/preamble/preamble.tex index efb3892..d9ef0a4 100644 --- a/preamble/preamble.tex +++ b/preamble/preamble.tex @@ -40,11 +40,17 @@ \usepackage[inkscapelatex=false]{svg} \usepackage{tcolorbox} \usepackage{graphicx} +\usepackage{etoolbox} +%\BeforeBeginEnvironment{figure}{\vspace{-1em}} +%\BeforeBeginEnvironment{lstlisting}{\vspace{-1em}} +\BeforeBeginEnvironment{table}{\vspace{-1em}} + %\usepackage{enumitem} \usepackage{caption} \DeclareCaptionLabelFormat{nospace}{#1#2} -\captionsetup[table]{skip=0pt,font=footnotesize,labelfont={color=blue,footnotesize,bf},name=Table T-,labelformat=nospace,labelsep=colon} -\captionsetup[figure]{skip=0pt,font=footnotesize,labelfont={color=blue,footnotesize,bf},name=Figure F-,labelformat=nospace,labelsep=colon} +\captionsetup[table]{skip=1pt,font=footnotesize,labelfont={color=blue,footnotesize,bf},name=Table T-,labelformat=nospace,labelsep=colon} +\captionsetup[figure]{skip=1pt,font=footnotesize,labelfont={color=blue,footnotesize,bf},name=Figure F-,labelformat=nospace,labelsep=colon} + %\overfullrule=2cm