hbase-JAVA API

jar包

hdfs
hbase-client

1
2
3
4
5
6
7
8
9
10
11
12
13
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-hdfs -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.4</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hbase/hbase-client -->
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.2.2</version>
</dependency>

DDL操作基本流程梳理

  1. 建立连接
    Connection conn = ConnectionFactory.createConnection(conf);
  2. 得到admin(org.apache.hadoop.hbase.client.Admin);
    Admin admin = conn.getAdmin();

  3. HTableDescriptor 组装元信息
    HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(tablename));
    tableDesc.addFamily(new HColumnDescriptor(columnFamily));

  4. admin 结合HTableDescriptor 进行DDL操作
    admin.createTable(tableDesc);
  5. 释放资源
    admin.close();
    conn.close();
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    public static void createTable(String tablename, String columnFamily) throws Exception{
    Connection conn = ConnectionFactory.createConnection(conf);
    Admin admin = conn.getAdmin();
    TableName tableNameObj = TableName.valueOf(tablename);
    if (admin.tableExists(tableNameObj)) {
    System.out.println("Table exists!");
    System.exit(0);
    } else {
    HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(tablename));
    tableDesc.addFamily(new HColumnDescriptor(columnFamily));
    admin.createTable(tableDesc);
    System.out.println("create table success!");
    }
    admin.close();
    conn.close();
    }

DML 操作基本流程梳理

  1. 建立连接
    Connection conn = ConnectionFactory.createConnection(conf);
  2. 得到table(org.apache.hadoop.hbase.client.Table);
    Table table = connection.getTable(TableName.valueOf(tableName));
  3. 组装put或者get对象
    Put put = new Put(Bytes.toBytes(rowKey));
    put.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier), Bytes.toBytes(value));
    put.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier), Bytes.toBytes(value));
  4. table+put(get)执行
    table.put(put);
  5. 释放资源
    table.close();
    connection.close();
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    public static void addRecord(String tableName, String rowKey, String family, String qualifier, String value){
    try {
    Connection connection = ConnectionFactory.createConnection(conf);
    Table table = connection.getTable(TableName.valueOf(tableName));
    Put put = new Put(Bytes.toBytes(rowKey));
    put.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier), Bytes.toBytes(value));
    put.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier), Bytes.toBytes(value));
    table.put(put);
    table.close();
    connection.close();
    System.out.println("insert recored " + rowKey + " to table " + tableName + " ok.");
    } catch (IOException e) {
    e.printStackTrace();
    }
    }

资源列表

[Windows+Eclipse+Maven+HBase 1.2.4开发环境搭建](0http://blog.csdn.net/chengyuqiang/article/details/69568496)

java操作hive

版本

hadoop 2.7.4
hive 1.2.2

启动hive远程服务

hiveserver2 默认开启10000端口

1
2
3
4
5
hiveserver2
hive --service hiveserver2
# 验证是否启动成功
netstat -ntulp |grep 10000

依赖的jar包

java.lang.ClassNotFoundException: org.apache.hive.jdbc.HiveDriver

缺少hive-jdbc依赖

1
2
3
4
5
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>${hive.version}</version>
</dependency>

java.lang.NoClassDefFoundError: org/apache/hadoop/conf/Configuration

原因:缺少hadoop-common依赖

1
2
3
4
5
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
</dependency>

jdbc工具类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
package siys16877.HiveDemo.utils;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
public class JDBCUtils {
private static String driver = "org.apache.hive.jdbc.HiveDriver";
private static String url = "jdbc:hive2://127.0.0.1:10000/default";
//注册驱动
static{
try {
Class.forName(driver);
System.out.println("驱动加载成功");
} catch (ClassNotFoundException e) {
System.out.println("驱动加载失败");
throw new ExceptionInInitializerError(e);
}
}
//获取连接
public static Connection getConnection(){
try {
System.out.println("获取连接成功");
return DriverManager.getConnection(url);
} catch (SQLException e) {
// TODO Auto-generated catch block
System.out.println("获取连接失败");
e.printStackTrace();
}
return null;
}
//释放资源
public static void release(Connection conn,Statement st,ResultSet rs){
if(rs != null){
try {
rs.close();
System.out.println("release rs done !");
} catch (SQLException e) {
e.printStackTrace();
}finally{
rs = null;
}
}
if(st != null){
try {
st.close();
System.out.println("release st done ! ");
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
st = null;
}
}
if(conn != null){
try {
conn.close();
System.out.println("release conn done !");
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
conn = null;
}
}
}
}

hive-jdbc-demo测试类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
package siys16877.HiveDemo.hive;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import siys16877.HiveDemo.utils.JDBCUtils;
public class Demo {
public static void main(String[] args) throws ClassNotFoundException {
try {
String sql = "select * from test1";
Connection con = JDBCUtils.getConnection();
Statement statement = con.createStatement();
ResultSet rs = statement.executeQuery(sql);
while(rs.next()) {
String name = rs.getString(2);
System.out.println(name);
}
JDBCUtils.release(con, statement, rs);
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}

hive学习笔记-udf-udaf

依賴 jar包 hive-exec

1
2
3
4
5
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>${hive.version}</version>
</dependency>

udfDemo

1
2
3
4
5
6
7
8
9
10
11
package siys16877.HiveDemo.hive;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
public class udfDemo extends UDF{
public Text evaluate(Text a,Text b) {
return new Text(a.toString()+"------"+b.toString());
}
}

hive被翻译成map-reduce ,Text类型必须是hadoop里面的,hadoop里面 String是无法识别的

use udfDemo

1
2
3
4
5
6
hive> add jar /home/shuai/udfDemo.jar
Added [/home/shuai/udfDemo.jar] to class path
Added resources: [/home/shuai/udfDemo.jar]
# function name is not limit
create temporary function udfDemo as 'siys16877.HiveDemo.hive.udfDemo';
select concat(tid,'--',tname) from test1;

udf使用场景扩充

网上的例子大多数都是字符串连接,没什么实际意义
将查询结果封装成json格式

封装为json

1
2
3
4
5
6
7
8
9
10
package siys16877.HiveDemo.hive;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.Text;
public class udfJson extends UDF{
public Text evaluate(Text a, Text b) {
return new Text("{ tid:" + a.toString() + ','+ "tanme:"
+ b.toString() + " }");
}
}

获取表的列名信息

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
package siys16877.HiveDemo.hive;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import siys16877.HiveDemo.utils.JDBCUtils;
public class ColumnName {
public static void main(String[] args) throws SQLException {
String sql = "select * from test1";
Connection conn = JDBCUtils.getConnection();
Statement st = conn.createStatement();
ResultSet rs = st.executeQuery(sql);
ResultSetMetaData rsmt = rs.getMetaData();
String col1 = rsmt.getColumnName(1);
String col2 = rsmt.getColumnName(2);
System.out.println(col1+'\t'+col2);
JDBCUtils.release(conn, st, rs);
}

udaf 学习案例-求平均数

(数据模型+Evaluator)继承(extends) UDAF

Evaluator 实现 UDAFEvaluator接口

  • init
  • iterate
  • terminatePartial
  • merge
  • terminate
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
package org.apache.hadoop.hive.contrib.udaf.example;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDAF;
import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
/**
* This is a simple UDAF that calculates average.
*
* It should be very easy to follow and can be used as an example for writing
* new UDAFs.
*
* Note that Hive internally uses a different mechanism (called GenericUDAF) to
* implement built-in aggregation functions, which are harder to program but
* more efficient.
*
*/
@Description(name = "example_avg",
value = "_FUNC_(col) - Example UDAF to compute average")
public final class UDAFExampleAvg extends UDAF {
public static class UDAFAvgState {
private long mCount;
private double mSum;
}
/**
* The actual class for doing the aggregation. Hive will automatically look
* for all internal classes of the UDAF that implements UDAFEvaluator.
*/
public static class UDAFExampleAvgEvaluator implements UDAFEvaluator {
UDAFAvgState state;
public UDAFExampleAvgEvaluator() {
super();
state = new UDAFAvgState();
init();
}
/**
* Reset the state of the aggregation.
*/
public void init() {
state.mSum = 0;
state.mCount = 0;
}
/**
* Iterate through one row of original data.
*
* The number and type of arguments need to the same as we call this UDAF
* from Hive command line.
*
* This function should always return true.
*/
public boolean iterate(Double o) {
if (o != null) {
state.mSum += o;
state.mCount++;
}
return true;
}
/**
* Terminate a partial aggregation and return the state. If the state is a
* primitive, just return primitive Java classes like Integer or String.
*/
public UDAFAvgState terminatePartial() {
// This is SQL standard - average of zero items should be null.
return state.mCount == 0 ? null : state;
}
/**
* Merge with a partial aggregation.
*
* This function should always have a single argument which has the same
* type as the return value of terminatePartial().
*/
public boolean merge(UDAFAvgState o) {
if (o != null) {
state.mSum += o.mSum;
state.mCount += o.mCount;
}
return true;
}
/**
* Terminates the aggregation and return the final result.
*/
public Double terminate() {
// This is SQL standard - average of zero items should be null.
return state.mCount == 0 ? null : Double.valueOf(state.mSum
/ state.mCount);
}
}
private UDAFExampleAvg() {
// prevent instantiation
}
}

udaf 学习案例-max_minNUtil

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
package org.apache.hadoop.hive.contrib.udaf.example;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
/**
* The utility class for UDAFMaxN and UDAFMinN.
*/
public final class UDAFExampleMaxMinNUtil {
/**
* This class stores the information during an aggregation.
*
* Note that this class has to have a public constructor, so that Hive can
* serialize/deserialize this class using reflection.
*/
public static class State {
ArrayList<Double> a; // This ArrayList holds the max/min N
int n; // This is the N
}
/**
* The base class of the UDAFEvaluator for UDAFMaxN and UDAFMinN.
* We just need to override the getAscending function to make it work.
*/
public abstract static class Evaluator implements UDAFEvaluator {
private State state;
public Evaluator() {
state = new State();
init();
}
/**
* Reset the state.
*/
public void init() {
state.a = new ArrayList<Double>();
state.n = 0;
}
/**
* Returns true in UDAFMaxN, and false in UDAFMinN.
*/
protected abstract boolean getAscending();
/**
* Iterate through one row of original data.
* This function will update the internal max/min buffer if the internal buffer is not full,
* or the new row is larger/smaller than the current max/min n.
*/
public boolean iterate(Double o, int n) {
boolean ascending = getAscending();
state.n = n;
if (o != null) {
boolean doInsert = state.a.size() < n;
if (!doInsert) {
Double last = state.a.get(state.a.size()-1);
if (ascending) {
doInsert = o < last;
} else {
doInsert = o > last;
}
}
if (doInsert) {
binaryInsert(state.a, o, ascending);
if (state.a.size() > n) {
state.a.remove(state.a.size()-1);
}
}
}
return true;
}
/**
* Get partial aggregation results.
*/
public State terminatePartial() {
// This is SQL standard - max_n of zero items should be null.
return state.a.size() == 0 ? null : state;
}
/** Two pointers are created to track the maximal elements in both o and MaxNArray.
* The smallest element is added into tempArrayList
* Consider the sizes of o and MaxNArray may be different.
*/
public boolean merge(State o) {
if (o != null) {
state.n = o.n;
state.a = sortedMerge(o.a, state.a, getAscending(), o.n);
}
return true;
}
/**
* Terminates the max N lookup and return the final result.
*/
public ArrayList<Double> terminate() {
// This is SQL standard - return state.MaxNArray, or null if the size is zero.
return state.a.size() == 0 ? null : state.a;
}
}
/**
* Returns a comparator based on whether the order is ascending or not.
* Has a dummy parameter to make sure generics can infer the type correctly.
*/
static <T extends Comparable<T>> Comparator<T> getComparator(boolean ascending, T dummy) {
Comparator<T> comp;
if (ascending) {
comp = new Comparator<T>() {
public int compare(T o1, T o2) {
return o1.compareTo(o2);
}
};
} else {
comp = new Comparator<T>() {
public int compare(T o1, T o2) {
return o2.compareTo(o1);
}
};
}
return comp;
}
/**
* Insert an element into an ascending/descending array, and keep the order.
* @param ascending
* if true, the array is sorted in ascending order,
* otherwise it is in descending order.
*
*/
static <T extends Comparable<T>> void binaryInsert(List<T> list, T value, boolean ascending) {
int position = Collections.binarySearch(list, value, getComparator(ascending, (T)null));
if (position < 0) {
position = (-position) - 1;
}
list.add(position, value);
}
/**
* Merge two ascending/descending array and keep the first n elements.
* @param ascending
* if true, the array is sorted in ascending order,
* otherwise it is in descending order.
*/
static <T extends Comparable<T>> ArrayList<T> sortedMerge(List<T> a1, List<T> a2,
boolean ascending, int n) {
Comparator<T> comparator = getComparator(ascending, (T)null);
int n1 = a1.size();
int n2 = a2.size();
int p1 = 0; // The current element in a1
int p2 = 0; // The current element in a2
ArrayList<T> output = new ArrayList<T>(n);
while (output.size() < n && (p1 < n1 || p2 < n2)) {
if (p1 < n1) {
if (p2 == n2 || comparator.compare(a1.get(p1), a2.get(p2)) < 0) {
output.add(a1.get(p1++));
}
}
if (output.size() == n) {
break;
}
if (p2 < n2) {
if (p1 == n1 || comparator.compare(a2.get(p2), a1.get(p1)) < 0) {
output.add(a2.get(p2++));
}
}
}
return output;
}
// No instantiation.
private UDAFExampleMaxMinNUtil() {
}
}

hadoop安装snappy压缩

hadoop检查是否支持snappy

1
hadoop checknative -a

hbase检查是否支持snappy

参考hbase官方-压缩

1
hbase --config ~/conf_hbase org.apache.hadoop.util.NativeLibraryChecker

压缩算法简要介绍

hadoop中常用的压缩算法有bzip2、gzip、lzo、snappy,
其中lzo、snappy需要操作系统安装native库才可以支持
下面这张表,是比较官方一点的统计,不同的场合用不同的压缩算法。bzip2和GZIP是比较消耗CPU的,压缩比最高,GZIP不能被分块并行的处理;Snappy和LZO差不多,稍微胜出一点,cpu消耗的比GZIP少。
通常情况下,想在CPU和IO之间取得平衡的话,用Snappy和lzo比较常见一些

编译安装snappy

版本号: snappy 1.1.7

1
2
3
4
5
6
7
git clone https://github.com/google/snappy.git
cd snappy
mkdir build
cd build
make
# 默认情况下会安装到 /usr/local/lib
sudo make install

编译安装protobuf

查看hadoop 2.7.4源码包的building.txt可知 要求 protobuf为 2.5

1
2
3
4
5
6
7
8
9
10
11
12
Requirements:
* Unix System
* JDK 1.7+
* Maven 3.0 or later
* Findbugs 1.3.9 (if running findbugs)
* ProtocolBuffer 2.5.0
* CMake 2.6 or newer (if compiling native code), must be 3.0 or newer on Mac
* Zlib devel (if compiling native code)
* openssl devel ( if compiling native hadoop-pipes and to get the best HDFS encryption performance )
* Linux FUSE (Filesystem in Userspace) version 2.6 or above ( if compiling fuse_dfs )
* Internet connection for first build (to fetch all Maven and Hadoop dependencies)

下载源码包

https://github.com/google/protobuf/releases

1
2
3
4
5
6
7
./autogen.sh
./configure
make
make check
sudo make install
sudo ldconfig # refresh shared library cache
# 安装完成之后,可以使用protoc –version查看版本号。

源码编译安装hadoop 2.7.4并添加snappy 支持

参考Hadoop源码学习-编译源码

1
2
mvn package -Pdist,native -DskipTests -Dtar -Drequire.snappy
#mvn clean package -Pdist,native -DskipTests -Dtar -Drequire.snappy -Dbundle.snappy -Dsnappy.lib=/usr/local/lib

参考
hadoop安装snappy

编译好之后,将原来的$HADOOP_HOME/lib/native替换为编译好的新的native库

hbase学习笔记-基本操作

开启hbase

start-hbase.sh

hbase shell

1
hbase shell

DDL

创建数据库表

语法:create

, {NAME =>, VERSIONS => }
例示:create ‘product’,{NAME => ‘computer’, VERSIONS => 5},{ NAME => ‘food’ , VERSIONS => 3}

显示数据库表

list

查看表结构描述

describe
describe ‘product’

修改表

修改表结构必须先disable,再修改表,修改完成后,再enable表

1
2
3
disable 'product'
alter 'product',{NAME => 'food',VERSIONS=> 3}
enable 'product'

删除表

1
2
disable 'product'
drop 'product'

DML

插入数据

put

,,,,
put ‘product’,’rowkey001’, ‘computer:name’, ‘ThinkPad E550’

scan:扫描表

get:查询数据

count:查询表中的数据行数

delete:删除行中的某个列值

deleteall:删除行中的某个列值

truncate:删除表中的所有数据

面试经验-xx技术面试

hive用到哪些技术点

数据迁移(oracle 到 hive)
sql脚本

udf udaf举例(hive)

udf 继承extends 重写 evaluate
udf的输入参数一般对应数据表的列,以行为单位

hive/sparkSql实现udaf 需要继承什么,自己需要实现哪部分逻辑

hbase随机热点问题

自己入坑,加了随机前缀,加随机前缀的话怎么查询rowkey
如果随机前缀的话,怎么查询rowkey?随机前缀的话会破坏rowkey,使得rowkey没有意义。可以加随机后缀的方式,这样可以再通过前缀过滤器来查询

rowkey 简短性
rowkey 唯一性
rowkey 散列性

rowkey 设计举例:

userid|timestamp|hashCode

kafka+sparkStreaming中断时,如何保证消息只消费一次

消息中断时采用checkpoint恢复,恢复的原理是什么?sparkStreaming 如何维护offset
spark streaming读取kakfka数据手动维护offset

hbase压缩算法问题

hbase 1.2官方文档

hadoop hbase算法 表示
‘none’
zlib gzip ‘GZ’
lz4 LZ4 ‘LZ4’
snappy Snappy ‘SNAPPY’
bzip2
LZO ‘LZO’

Enabling Compression on a ColumnFamily of an Existing Table using HBaseShell

修改已经存在的表

1
2
3
hbase> disable 'test'
hbase> alter 'test', {NAME => 'cf', COMPRESSION => 'GZ'}
hbase> enable 'test'

Creating a New Table with Compression On a ColumnFamily

创建表时采用压缩算法

1
create 'test2', { NAME => 'cf2', COMPRESSION => 'SNAPPY' }

Verifying a ColumnFamily’s Compression Settings

查看列簇的压缩方式

1
2
3
4
5
6
hive> desc 'test'
{NAME => 'cf', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER', CO
MPRESSION => 'GZ', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
{NAME => 'info', BLOOMFILTER => 'ROW', VERSIONS => '1', IN_MEMORY => 'false', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', TTL => 'FOREVER',
COMPRESSION => 'NONE', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '65536', REPLICATION_SCOPE => '0'}
2 row(s) in 0.0090 seconds

hbase查询(filter)

HBase filter shell操作

接触大数据后,最大的收获是什么

#(我一系列懵逼之后)你擅长哪部分

#

面试经验-凌志软件

基本形式 ,面试人,面试次数

电话 技术经理 初面

介绍一下自己的工作经历

最好包含自己的技术点
大学

介绍一下在项目中的负责哪些工作

写一些ETL过程

数据采集+数据过滤+报表开发
数据采集 用的什么架构 flume

说一下报表开发这块

时间维度报表: 日报,周报,月报,季报,年报
成交清算表
占比分析(topN)

非关系型数据库的数据关联

把相同的字段整理成key,进行join

集群是你管理的吗,用的什么管理软件

是的,用的clouderaManager(CM)
(集群管理指的是什么?管理、监控、诊断、集成)

spark 优化

你有什么要问我的

团队情况(人数),出差,工作地点,是否外包性质

薪资期望

根据平常的工作量