定义: 采用了关系模型来组织数据的数据库
代表产品:Mysql,Oracle,sql server
大数据时代下的瓶颈:
定义:Nosql数据库,不是 no sql数据库,类似于no relational(不仅仅是关系型数据库s)。
代表产品:
官网:http://hbase.apache.org/
列簇示意图
image.png
官网指南:http://hbase.apache.org/book.html#quickstart
历史版本:https://archive.apache.org/dist/hbase/
下载上传Hbase文件
#解压hbase到指定目录
tar -zxvf software/hbase-0.98.6-hadoop2-bin.tar.gz -C modules/
#切换到配置文件目录
cd /root/modules/hbase-0.98.6-hadoop2/conf
vim hbase-env.sh
#配置环境变量
export JAVA_HOME=/root/modules/jdk1.7.0_67
#是否使用系统自带的zookeper
export HBASE_MANAGES_ZK=false
#创建临时文件夹
mkdir -p data/tmp
vim hbase-site.xml
vim regionservers
bigguider22.com
考虑jar包的兼容性问题,需要替换jar包
替换jar包
启动服务:
cd /root/modules/hbase-0.98.6-hadoop2
bin/hbase-daemon.sh start master
bin/hbase-daemon.sh start regionserver
jps
6604 HRegionServer
6441 HMaster
访问网址:http://bigguider22.com:60010/
运行
cd /root/modules/hbase-0.98.6-hadoop2
Hbase架构
bin/hbase shell
hbase(main):001:0>
#列出能用到的命令
help
#创建namespace:create_namespace '命名空间名字'
create_namespace 'ns1'
create_namespace 'ns2'
#查看namespace: list_namespace
list_namespace
NAMESPACE
default
hbase
ns1
ns2
#描述namespace: describe_namespace '命名空间名字'
describe_namespace 'ns1'
#修改namespace: alter_namespace
#删除namespace: drop_namespace '命名空间名字' 必须是空的
drop_namespace 'ns2'
#创建表:create
create 'ns1:t1',{NAME => 'f1',VERSIOnS=> 1},{NAME => 'f2'} ,{NAME => 'f3'}
create 'ns1:t2' , 'f1' , 'f2' , 'f3'
#列出所有表:list
#描述表:describe 'ns1:t1'
desc 'ns1:t1'
desc 'ns1:t2'
#修改表:alter
alter 'ns1:t1',{NAME => 'f1',VERSIOnS=> 3}
#禁用表:disable 或 disable_all
disable 'ns1:t1'
disable_all 'ns1:.*'
#删除表:drop 或 drop_all
drop 'ns1:t1'
#启用表:enable 或 enable_all
enable 'ns1:t2'
#查看表是否存在:exists '命名空间:表名'
exists 'ns1:t1'
#查看表是否被禁用:is_disabled '命名空间:表名'
is_disabled 'ns1:t1'
#查看表是否可用:is_enabled '命名空间:表名'
is_enabled 'ns1:t2'
# 添加数据:put '表名' , '行标识' , '组名:字段名','值'
put 'ns1:t2' , '2018_1001' , 'f1:name' , 'jacks'
put 'ns1:t2' , '2018_1001' , 'f1:age' , '18'
put 'ns1:t2' , '2018_1001' , 'f1:sex' , 'male'
put 'ns1:t2' , '2018_1002' , 'f1:name' , 'wite'
put 'ns1:t2' , '2018_1002' , 'f1:age' , '19'
put 'ns1:t2' , '2018_1002' , 'f1:sex' , 'male'
put 'ns1:t2' , '2018_1003' , 'f1:name' , 'zt'
put 'ns1:t2' , '2018_1003' , 'f1:age' , '25'
put 'ns1:t2' , '2018_1003' , 'f1:sex' , 'male'
#查询数据:get查询某行数据,scan查询所有数据
scan 'ns1:t2'
scan 'ns1:t2', {STARTROW => '2018_1002' }
scan 'ns1:t2', {STARTROW => '2018_1002' , STOPROW => '2018_1003'}
get 'ns1:t2' , '2018_1001' , 'f1:name'
get 'ns1:t2' , '2018_1001' , 'f1'
get 'ns1:t2' , '2018_1001'
#删除数据:
delete 'ns1:t2' , '2018_1003' , 'f1:name'
#修改数据:
put 'ns1:t2' , '2018_1001' , 'f1:name' , 'jacks'
#统计数据:
count 'ns1:t2'
Hbase架构
Hbase表模型
region类似于关系型数据库中的分区或者分片
region与regionserver
region数分布式存储的最小单元
region与regionserver
不同的region分布到不同的regionserver上
region拆分
region拆分后,原来的region就没有了
region的结构
适用与0.96版本之前
新的寻址方式
cd /root/modules/zookeeper-3.4.10
bin/zkCli.sh
[zk: localhost:2181(CONNECTED) 0] ls/
[hbase, hadoop-ha, zookeeper]
ls /hbase
[meta-region-server, backup-masters, table, draining, region-in-transition, table-lock, running, master, namespace, hbaseid, online-snapshot, replication, splitWAL, recovering-regions, rs]
quit
默认拆分策略:
if region=1 then: flush size * 2 else : MaxRegionFileSize
进行拆分拆分方式
拆分方式
HLog结构
Hbase存储流程
导出配置文件
pom.xm添加依赖
编写java文件
package com.guider.hadoop.hbase;
Hbase与mapreducer关联
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
public class HBaseClient {
public static HTable getTable(String tname) throws Exception {
//获取配置,使用HBaseConfiguration
Configuration cOnf= HBaseConfiguration.create();
//操作的表
HTable table = new HTable(conf, tname);
return table;
}
public static void getData(HTable table,String rowkey) throws Exception {
//实例化一个get,指定一个rowkey
Get get = new Get(Bytes.toBytes(rowkey));
//get某列的值
//get.addColumn(Bytes.toBytes("f1"),Bytes.toBytes("age"));
//get一个列簇
get.addFamily(Bytes.toBytes("f1"));
//定义一个result
Result rs = table.get(get);
//打印数据
for (Cell cell : rs.rawCells()) {
StringBuffer buffer = new StringBuffer();
buffer.append(Bytes.toString(CellUtil.cloneFamily(cell)))
.append("***")
.append(Bytes.toString(CellUtil.cloneQualifier(cell)))
.append("***")
.append(Bytes.toString(CellUtil.cloneValue(cell)))
.append("***")
.append(cell.getTimestamp());
System.out.println(buffer);
System.out.println("------------------");
}
}
public static void putData(HTable table,String rowkey) throws Exception {
Put put = new Put(Bytes.toBytes(rowkey));
put.add(getBytes("f1"), getBytes("sex"), getBytes("male"));
table.put(put);
getData(table,rowkey);
}
public static void deleteData(HTable table,String rowkey) throws Exception {
Delete del = new Delete(getBytes(rowkey));
del.deleteColumn(getBytes("f1"), getBytes("sex"));
//del.deleteFamily(getBytes("f1"));
table.delete(del);
getData(table,rowkey);
}
public static void scanData(HTable table) throws Exception {
Scan scan = new Scan();
ResultScanner rescan = table.getScanner(scan);
for (Result rs : rescan) {
for (Cell cell : rs.rawCells()) {
StringBuffer buffer = new StringBuffer();
buffer.append(Bytes.toString(CellUtil.cloneFamily(cell)))
.append("***")
.append(Bytes.toString(CellUtil.cloneQualifier(cell)))
.append("***")
.append(Bytes.toString(CellUtil.cloneValue(cell)))
.append("***")
.append(cell.getTimestamp());
System.out.println(buffer);
}
System.out.println("----------------------------");
}
}
public static void rangeData(HTable table) throws Exception {
Scan scan = new Scan();
// conf the scan
//scan.addColumn(Bytes.toBytes("f1"), Bytes.toBytes("name"));
scan.setStartRow(Bytes.toBytes("2018_1002"));
scan.setStopRow(Bytes.toBytes("2018_1003"));
ResultScanner rsscan = table.getScanner(scan);
for (Result rs : rsscan){
for (Cell cell : rs.rawCells()){
StringBuffer buffer = new StringBuffer();
buffer.append(Bytes.toString(CellUtil.cloneFamily(cell)))
.append("***")
.append(Bytes.toString(CellUtil.cloneQualifier(cell)))
.append("***")
.append(Bytes.toString(CellUtil.cloneValue(cell)))
.append("***")
.append(cell.getTimestamp());
System.out.println(buffer);
}
System.out.println("---------------------------");
}
}
public static byte[] getBytes(String value) {
return Bytes.toBytes(value);
}
public static void main(String[] args) throws Exception {
HTable table = getTable("ns1:t2");
//get数据
// getData(table,"2018_1001");
//添加数据
// putData(table,"2018_1004");
//
// //删除数据
// deleteData(table,"2018_1004");
//
// //scan数据
scanData(table);
//
// //scan范围查看
// rangeData(table);
}
}
hadoop中需要关联Hbase相关jar包
#hbase相关jar包路径:/root/modules/hbase-0.98.6-hadoop2/lib
yarn jar hbase-server-0.98.6-hadoop2.jar rowcounter ns1:t2
#查看hbase所需要的jar包
cd /root/modules/hbase-0.98.6-hadoop2
bin/hbase mapredcp
/root/modules/hbase-0.98.6-hadoop2/lib/htrace-core-2.04.jar
:/root/modules/hbase-0.98.6-hadoop2/lib/hbase-protocol-0.98.6-hadoop2.jar
:/root/modules/hbase-0.98.6-hadoop2/lib/hbase-common-0.98.6-hadoop2.jar
:/root/modules/hbase-0.98.6-hadoop2/lib/hbase-client-0.98.6-hadoop2.jar
:/root/modules/hbase-0.98.6-hadoop2/lib/protobuf-java-2.5.0.jar
:/root/modules/hbase-0.98.6-hadoop2/lib/hbase-hadoop-compat-0.98.6-hadoop2.jar
:/root/modules/hbase-0.98.6-hadoop2/lib/high-scale-lib-1.1.1.jar
:/root/modules/hbase-0.98.6-hadoop2/lib/hbase-server-0.98.6-hadoop2.jar
:/root/modules/hbase-0.98.6-hadoop2/lib/guava-12.0.1.jar
:/root/modules/hbase-0.98.6-hadoop2/lib/netty-3.6.6.Final.jar
:/root/modules/hbase-0.98.6-hadoop2/lib/zookeeper-3.4.5.jar
export HBASE_HOME=/root/modules/hbase-0.98.6-hadoop2
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:`$HBASE_HOME/bin/hbase mapredcp`
修改配置文件(关联Hbase与mapreducer之间的jar包)
cd /root/modules/hadoop-2.5.0-cdh5.3.6/etc/hadoop
vim hadoop-env.sh
for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
if [ "$HADOOP_CLASSPATH" ]; then
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
else
export HADOOP_CLASSPATH=$f
fi
done
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:/root/modules/hbase-0.98.6-hadoop2/lib/*
#创建测试数据
create 'ns1:new_stu_info' , 'basicinfo'
create 'ns1:stu_info' , 'basicinfo' , 'secondaryinfo' , 'other'
put 'ns1:stu_info' , '20180610_001' , 'secondaryinfo:education' , 'undergraduate'
put 'ns1:stu_info' , '20180610_001' , 'basicinfo:age' , '18'
put 'ns1:stu_info' , '20180610_001' , 'basicinfo:sex' , 'male'
put 'ns1:stu_info' , '20180610_001' , 'basicinfo:name' , 'zhao'
put 'ns1:stu_info' , '20180610_001' , 'secondaryinfo:work' , 'worker'
put 'ns1:stu_info' , '20180610_001' , 'other:number' , '110'
put 'ns1:stu_info' , '20180610_002' , 'secondaryinfo:education' , 'highschool'
put 'ns1:stu_info' , '20180610_002' , 'basicinfo:age' , '22'
put 'ns1:stu_info' , '20180610_002' , 'basicinfo:sex' , 'female'
put 'ns1:stu_info' , '20180610_002' , 'basicinfo:name' , 'qian'
put 'ns1:stu_info' , '20180610_003' , 'basicinfo:age' , '22'
put 'ns1:stu_info' , '20180610_003' , 'basicinfo:sex' , 'male'
put 'ns1:stu_info' , '20180610_003' , 'basicinfo:name' , 'sun'
put 'ns1:stu_info' , '20180610_004' , 'basicinfo:age' , '18'
put 'ns1:stu_info' , '20180610_004' , 'basicinfo:name' , 'li'
put 'ns1:stu_info' , '20180610_005' , 'basicinfo:age' , '19'
put 'ns1:stu_info' , '20180610_005' , 'basicinfo:name' , 'zhou'
编写处理Hbase的mapreduce(直接运行)
package com.guider.hadoop.hbase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.IOException;
public class HBaseMapReduce extends Configured implements Tool {
/*
* hbase -> hbase , 提取name这一列
*/
public static class HBaseMapper extends TableMapper
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
//数据的筛选,通过操作我们封装的put来进行
Put put = new Put(key.get());
for (Cell cell : value.rawCells()){
//在这里筛选出basicinfo:name这一列
if ("basicinfo".equals(Bytes.toString(CellUtil.cloneFamily(cell)))) {
if ("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))) {
put.add(cell);
}
}
}
context.write(key,put);
}
}
//driver:任务相关设置
@Override
public int run(String[] strings) throws Exception {
Configuration cOnf= this.getConf();
Job job = new Job(conf,"hbase-mapreduce");
job.setJarByClass(HBaseMapReduce.class); // class that contains mapper and reducer
Scan scan = new Scan();
TableMapReduceUtil.initTableMapperJob(
"ns1:stu_info", //输入的表
scan, // Scan instance to control CF and attribute selection
HBaseMapper.class, // mapper class
ImmutableBytesWritable.class, // mapper output key
Put.class, // mapper output value
job
);
TableMapReduceUtil.initTableReducerJob(
"ns1:new_stu_info",
null,
job
);
job.setNumReduceTasks(1); // at least one, adjust as required
boolean isSuccess = job.waitForCompletion(true);
return isSuccess?0:1;
}
public static void main(String[] args) throws Exception{
Configuration cOnf= HBaseConfiguration.create();
//将任务跑起来
//int statas = new WordCountMapReduce().run(args);
int statas = ToolRunner.run(conf, new HBaseMapReduce(), args);
//关闭我们的job
System.exit(statas);
}
}
或者运行jar包
#导出jar包
#运行jar包
#yarn jar 自己写的jar
#产看运行结果
scan 'ns1:new_stu_info'
ROW COLUMN+CELL
20180610_001 column=basicinfo:name, timestamp=1529829239386, value=zhao
20180610_002 column=basicinfo:name, timestamp=1529829265497, value=qian
20180610_003 column=basicinfo:name, timestamp=1529829277812, value=sun
20180610_004 column=basicinfo:name, timestamp=1529829296669, value=li
20180610_005 column=basicinfo:name, timestamp=1529829303928, value=zhou
5 row(s) in 0.0270 seconds
#创建测试数据(自定义分割符,默认分割符是制表符)
完全分布式环境搭建
cd /root/datas/hbase
vim hbase_stu_info.tsv
#将测试数据上传到HDFS文件系统中
hdfs dfs -mkdir -p /user/root/hbase
hdfs dfs -put hbase_stu_info.tsv /user/root/hbase
#创建对应表
create 'ns1:stu_info' , 'basicinfo' , 'secondaryinfo' , 'other'
#将测试数据导入到Hbase表中
yarn jar /root/modules/hbase-0.98.6-hadoop2/lib/hbase-server-0.98.6-hadoop2.jar \
importtsv \
-Dimporttsv.separator=, \
-Dimporttsv.columns=HBASE_ROW_KEY,'basicinfo:name','basicinfo:sex','basicinfo:age' \
'ns1:stu_info' \
/user/root/hbase/hbase_stu_info.tsv
#查看数据是否导入成功
scan 'ns1:stu_info'
#第二种方法
#创建测试数据(自定义分割符,默认分割符是制表符)
cd /root/datas/hbase
vim hbase_stu_info_01.tsv
20180610_008,fei,male,19
20180610_009,huo,female,22
#将测试数据上传到HDFS文件系统中
hdfs dfs -mkdir -p /user/root/hbase
hdfs dfs -put hbase_stu_info_01.tsv /user/root/hbase
#创建对应表
create 'ns1:stu_info' , 'basicinfo' , 'secondaryinfo' , 'other'
#将测试数据转换为指定格式放到HBaseFile
yarn jar /root/modules/hbase-0.98.6-hadoop2/lib/hbase-server-0.98.6-hadoop2.jar \
importtsv \
-Dimporttsv.separator=, \
-Dimporttsv.columns=HBASE_ROW_KEY,'basicinfo:name','basicinfo:sex','basicinfo:age' \
-Dimporttsv.bulk.output=/HBaseFile \
'ns1:stu_info' \
/user/root/hbase/hbase_stu_info_01.tsv
#将转换后的数据移动到Hbase表中
yarn jar /root/modules/hbase-0.98.6-hadoop2/lib/hbase-server-0.98.6-hadoop2.jar \
completebulkload \
/HBaseFile \
'ns1:stu_info'
#查看数据是否导入成功
scan 'ns1:stu_info'
vim hbase-site.xml
vim regionservers
bigguider22.com
bigguider23.com
bigguider24.com
scp -r hbase-0.98.6-hadoop2/ bigguider23.com:/root/modules/
scp -r hbase-0.98.6-hadoop2/ bigguider23.com:/root/modules/
/root/modules/hbase-0.98.6-hadoop2/bin/hbase-daemon.sh start master
/root/modules/hbase-0.98.6-hadoop2/bin/hbase-daemon.sh start regionserver
# http://bigguider22.com:60010/master-status
# http://bigguider23.com:60010/master-status
# http://bigguider24.com:60010/master-status