1、hbase、hive使用S3存储数据
2、使用多主节点,但由于所有的主节点都是存在于一个AZ,做不到跨AZ容灾(勾选 多主控支持)
3、hive元数据推荐存储于glue,稳定、可共享
4、优化hive、hadoop参数(请查看附件json)
[{
"classification": "hbase-site",
"properties": {
"hbase.client.scanner.timeout.period": "1200000",
"hbase.ipc.server.callqueue.handler.factor": "0.2",
"hbase.regionserver.thread.compaction.large": "1",
"hbase.client.write.buffer": "5242880",
"hbase.regionserver.hlog.blocksize": "536870912",
"hbase.regionserver.hlog.splitlog.writer.threads": "10",
"hbase.hstore.blockingStoreFiles": "10",
"hbase.rootdir": "s3://prdhbasedb20201107/",
"zookeeper.session.timeout": "600000",
"hbase.rest.threads.max": "400",
"phoenix.coprocessor.maxServerCacheTimeToLiveMs": "1800000",
"hbase.regionserver.lease.period": "1200000",
"hbase.regionserver.optionallogflushinterval": "10000",
"hbase.rpc.timeout": "1800000",
"hbase.bucketcache.size": "8192",
"hbase.regionserver.handler.count": "150",
"hbase.regionserver.thread.compaction.small": "1",
"hbase.hregion.memstore.flush.size": "134217728",
"hbase.hlog.asyncer.number": "10",
"hbase.hstore.flusher.count": "8",
"hbase.hstore.blockingWaitTime": "30000",
"hbase.hregion.max.filesize": "10737418240",
"hbase.hregion.memstore.block.multiplier": "4"
},
"configurations": []
}, {
"classification": "hdfs-site",
"properties": {
"dfs.datanode.max.transfer.threads": "16384"
},
"configurations": []
}, {
"classification": "hbase",
"properties": {
"hbase.emr.storageMode": "s3"
},
"configurations": []
}, {
"classification": "hive-site",
"properties": {
"hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory",
"hive.blobstore.optimizations.enabled": "true",
"hive.blobstore.use.blobstore.as.scratchdir": "false",
"hive.exec.max.dynamic.partitions.pernode": "1000",
"hive.exec.dynamic.partition.mode": "nonstrict",
"hive.compute.query.using.stats": "false"
},
"configurations": []
}, {
"classification": "presto-connector-hive",
"properties": {
"hive.metastore.glue.datacatalog.enabled": "true"
},
"configurations": []
}]
5、EMR主节点不要stop再start,这样会重置公网IP,该公网IP重置后整个集群会下线,可Reboot服务器,该操作不会重置公网IP