博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
基于Hadoop生态圈的数据仓库实践 —— 环境搭建(三)笔记
阅读量:6535 次
发布时间:2019-06-24

本文共 6496 字,大约阅读时间需要 21 分钟。

-- 建立源库表

USE source;

-- 建立客户表

CREATE TABLE customer (
customer_number INT NOT NULL AUTO_INCREMENT PRIMARY KEY comment '客户编号,主键',
customer_name VARCHAR(50) comment '客户名称',
customer_street_address VARCHAR(50) comment '客户住址',
customer_zip_code INT comment '邮编',
customer_city VARCHAR(30) comment '所在城市',
customer_state VARCHAR(2) comment '所在省份' );

-- 建立产品表

CREATE TABLE product (
product_code INT NOT NULL AUTO_INCREMENT PRIMARY KEY comment '产品编码,主键',
product_name VARCHAR(30) comment '产品名称',
product_category VARCHAR(30) comment '产品类型' );

-- 建立销售订单表

CREATE TABLE sales_order (
order_number INT NOT NULL AUTO_INCREMENT PRIMARY KEY comment '订单号,主键',
customer_number INT comment '客户编号',
product_code INT comment '产品编码',
order_date DATETIME comment '订单日期',
entry_date DATETIME comment '登记日期',
order_amount DECIMAL(10 , 2 ) comment '销售金额',
foreign key (customer_number)
references customer (customer_number)
on delete cascade on update cascade,
foreign key (product_code)
references product (product_code)
on delete cascade on update cascade );

-- 新增客户数据 

INSERT INTO customer (customer_name, customer_street_address, customer_zip_code, customer_city, customer_state) VALUES ('Really Large Customers', '7500 Louise Dr.',17050, 'Mechanicsburg','PA'), ('Small Stores', '2500 Woodland St.',17055, 'Pittsburgh','PA'), ('Medium Retailers','1111 Ritter Rd.',17055,'Pittsburgh','PA'), ('Good Companies','9500 Scott St.',17050,'Mechanicsburg','PA'), ('Wonderful Shops','3333 Rossmoyne Rd.',17050,'Mechanicsburg','PA'), ('Loyal Clients','7070 Ritter Rd.',17055,'Pittsburgh','PA'), ('Distinguished Partners','9999 Scott St.',17050,'Mechanicsburg','PA');

-- 生成产品表测试数据

INSERT INTO product (product_name, product_category ) VALUES ('Hard Disk Drive', 'Storage'), ('Floppy Drive', 'Storage'), ('LCD Panel', 'Monitor');

-- 生成100条销售订单表测试数据
DROP PROCEDURE IF EXISTS generate_sales_order_data;

delimiter //

DROP PROCEDURE IF EXISTS generate_sales_order_data;
create procedure generate_sales_order_data()
begin
DROP TABLE IF EXISTS temp_sales_order_data;
CREATE TABLE temp_sales_order_data AS SELECT * FROM sales_order WHERE 1=0;
SET @start_date := unix_timestamp('2016-03-01');
SET @end_date := unix_timestamp('2016-07-01');
SET @i := 1;
while @i<=100 do
SET @customer_number := floor(1 + rand() * 6);
SET @product_code := floor(1 + rand() * 2);
SET @order_date := from_unixtime(@start_date + rand() * (@end_date - @start_date));
SET @amount := floor(1000 + rand() * 9000);
INSERT INTO temp_sales_order_data VALUES (@i,@customer_number,@product_code,@order_date,@order_date,@amount);
SET @i:=@i+1;
END while;
TRUNCATE TABLE sales_order;
INSERT INTO sales_order SELECT NULL,customer_number,product_code,order_date,entry_date,order_amount FROM temp_sales_order_data ORDER BY order_date;
select * from sales_order;
COMMIT;
end//

delimiter ;
call generate_sales_order_data();

-- 建立RDS库表
USE rds;
-- 建立客户过渡表
CREATE TABLE customer ( customer_number INT comment 'number', customer_name VARCHAR(30) comment 'name', customer_street_address VARCHAR(30) comment 'address', customer_zip_code INT comment 'zipcode', customer_city VARCHAR(30) comment 'city', customer_state VARCHAR(2) comment 'state');
-- 建立产品过渡表
CREATE TABLE product ( product_code INT comment 'code', product_name VARCHAR(30) comment 'name', product_category VARCHAR(30) comment 'category' );
-- 建立销售订单过渡表
CREATE TABLE sales_order ( order_number INT comment 'order number', customer_number INT comment 'customer number', product_code INT comment 'product code', order_date TIMESTAMP comment 'order date', entry_date TIMESTAMP comment 'entry date', order_amount DECIMAL(10 , 2 ) comment 'order amount');
-- 建立数据仓库表
USE dw;
-- 建立客户维度表
CREATE TABLE customer_dim ( customer_sk INT comment 'surrogate key', customer_number INT comment 'number', customer_name VARCHAR(50) comment 'name', customer_street_address VARCHAR(50) comment 'address', customer_zip_code INT comment 'zipcode', customer_city VARCHAR(30) comment 'city', customer_state VARCHAR(2) comment 'state', version INT comment 'version', effective_date DATE comment 'effective date', expiry_date DATE comment 'expiry date' )CLUSTERED BY (customer_sk) INTO 8 BUCKETS STORED AS ORC ;
-- 建立产品维度表
CREATE TABLE product_dim ( product_sk INT comment 'surrogate key', product_code INT comment 'code', product_name VARCHAR(30) comment 'name', product_category VARCHAR(30) comment 'category', version INT comment 'version', effective_date DATE comment 'effective date', expiry_date DATE comment 'expiry date' )CLUSTERED BY (product_sk) INTO 8 BUCKETS STORED AS ORC;
-- 建立订单维度表
CREATE TABLE order_dim ( order_sk INT comment 'surrogate key', order_number INT comment 'number', version INT comment 'version', effective_date DATE comment 'effective date', expiry_date DATE comment 'expiry date' )CLUSTERED BY (order_sk) INTO 8 BUCKETS STORED AS ORC ;
-- 建立销售订单事实表
CREATE TABLE sales_order_fact ( order_sk INT comment 'order surrogate key', customer_sk INT comment 'customer surrogate key', product_sk INT comment 'product surrogate key', order_date_sk INT comment 'date surrogate key', order_amount DECIMAL(10 , 2 ) comment 'order amount' )CLUSTERED BY (order_sk) INTO 8 BUCKETS STORED AS ORC;

#########date_dim_generate.sh########

date1="$1"
date2="$2"
tempdate=`date -d "$date1" +%F`
tempdateSec=`date -d "$date1" +%s`
enddateSec=`date -d "$date2" +%s`
min=1
max=`expr \( $enddateSec - $tempdateSec \) / \( 24 \* 60 \* 60 \) + 1`
cat /dev/null > ./date_dim.csv

#echo max

while [ $min -le $max ]
do
month=`date -d "$tempdate" +%m`
month_name=`date -d "$tempdate" +%B`
quarter=`echo $month | awk '{print int (($0-1)/3)+1}'`
year=`date -d "$tempdate" +%Y`
echo ${min}","${tempdate}","${month}","${month_name}","${quarter}","${year} >> ./date_dim.csv
tempdate=`date -d "+$min day $date1" +%F`
tempdateSec=`date -d "+$min day $date1" +%s`
min=`expr $min + 1`
done

##########create_table_date_dim.sql#############
drop table if exists date_dim;
create table date_dim (
date_sk int comment 'surrogate key',
date date comment 'date,yyyy-mm-dd',
month tinyint comment 'month',
month_name varchar(9) comment 'month name',
quarter tinyint comment 'quarter',
year smallint comment 'year')comment 'date dimension table'row format delimited fields terminated by ',' stored as textfile;

 

./date_dim_generate.sh 2000-01-01 2020-12-31

beeline -u jdbc:hive2://localhost:10000/dw -f create_table_date_dim.sql --silent

hdfs dfs -put -f date_dim.csv /user/hive/warehouse/dw.db/date_dim/

 

转载:

 

转载于:https://www.cnblogs.com/chengjunhao/p/9734967.html

你可能感兴趣的文章
PHPExcel 导出 excel
查看>>
npm 安装
查看>>
WebSocket在spring messagemapping下获取httpsession
查看>>
堆栈的简单实现,以及简单操作
查看>>
一套海量在线用户的移动端IM架构设计实践分享(含详细图文)
查看>>
工作日志
查看>>
运用 myeclipse 自动生成 映射文件时 需要注意的问题
查看>>
spark中的动态executor分配
查看>>
iOS--在TableViewCell中创建UICollectionView
查看>>
Hadoop & HDFS & Hive & HBase关系图
查看>>
IOS 百度地图点聚合使用
查看>>
此地址使用了一个通常用于网络浏览以外的端口。出于安全原因,Firefox 取消了该请求...
查看>>
webpack开发jquery插件——开发环境准备
查看>>
《SQL With ties 的理解 与 》
查看>>
linux常用命令之用户及用户组操作
查看>>
判断图片url是否存在图片
查看>>
企业CIO如何做好免费ERP系统的选型
查看>>
旧版IDEA下载地址
查看>>
【CSS】-----div 和 span的区别
查看>>
Joda-Time 简介
查看>>