-
Notifications
You must be signed in to change notification settings - Fork 0
/
hive_operations.sh
40 lines (32 loc) · 919 Bytes
/
hive_operations.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/bin/bash
# Define variables
HIVE_DB="population_db"
HIVE_TABLE="p_data"
HDFS_FILE_PATH="/user/hadoop/population_data/sub-est2021_1.csv"
# Step 1: Create a Hive database
hive -e "CREATE DATABASE IF NOT EXISTS $HIVE_DB;"
# Step 2: Create a Hive table based on the CSV file structure
hive -e "
USE $HIVE_DB;
CREATE TABLE IF NOT EXISTS $HIVE_TABLE (
SUMLEV INT,
STATE INT,
COUNTY INT,
PLACE INT,
COUSUB INT,
CONCIT INT,
PRIMGEO_FLAG INT,
PFUNCSTAT STRING,
NAME STRING,
STNAME STRING,
ESTIMATESBASE2020 INT,
POPESTIMATE2020 INT,
POPESTIMATE2021 INT
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
STORED AS TEXTFILE;"
# Step 3: Load data from HDFS into the Hive table
hive -e "USE $HIVE_DB; LOAD DATA INPATH '$HDFS_FILE_PATH' INTO TABLE $HIVE_TABLE;"
# Step 4: Query the Hive table to verify the data
hive -e "USE $HIVE_DB; SELECT * FROM $HIVE_TABLE LIMIT 10;"