写点什么

OrientDB etl 工具 导入 rdbms 数据

用户头像
wkq2786130
关注
发布于: 2020 年 07 月 22 日



原文 http://weikeqin.com/2018/09/30/orientdb-etl-csv/



这儿使用 用户 - 群组 举例。(可以认为是QQ用户和QQ群的关系)

一个用户有多个群组,一个群组有多个用户,用户和群组是多对多关系。

<!--

假如一个QQ用户想找对象,目标在在QQ好友的好友的好友的范围内。也就是关系在3度及以内。这个时候用rdbms就比较吃力,图数据库可以很方便的解决这个问题。

-->

这儿使用OrientDb官方自带的etl rdbms导入数据。

吐槽:OrientDb ETL 工具 对多对多的关系支持不好,还需要自己写代码处理。(总感觉OrientDb是程序员思维的产品,功能可以用)

Neo4j支持的就特别好,一个语句就解决了,瞬间感觉Neo4j好灵活。



1. 准备环境

下载MySQL的jar包放到OrientDb的lib目录下。

启动OrientDb

准备MySQL数据



<!--more-->

-- ----------------------------
-- Table structure for group
-- ----------------------------
DROP TABLE IF EXISTS `group`;
CREATE TABLE `group` (
`group_id` int(11) NOT NULL AUTO_INCREMENT,
`group_name` varchar(8) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
PRIMARY KEY (`group_id`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 5 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Records of group
-- ----------------------------
INSERT INTO `group` VALUES (1, 'group1');
INSERT INTO `group` VALUES (2, 'group2');
INSERT INTO `group` VALUES (3, 'group3');
INSERT INTO `group` VALUES (4, '组织4');
-- ----------------------------
-- Table structure for person
-- ----------------------------
DROP TABLE IF EXISTS `person`;
CREATE TABLE `person` (
`person_id` int(11) NOT NULL AUTO_INCREMENT,
`person_name` varchar(8) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
PRIMARY KEY (`person_id`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 5 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Records of person
-- ----------------------------
INSERT INTO `person` VALUES (1, 'person1');
INSERT INTO `person` VALUES (2, 'person2');
INSERT INTO `person` VALUES (3, 'person3');
INSERT INTO `person` VALUES (4, '人4');
-- ----------------------------
-- Table structure for person_group
-- ----------------------------
DROP TABLE IF EXISTS `person_group`;
CREATE TABLE `person_group` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`person_id` int(11) NULL DEFAULT NULL,
`person_name` varchar(8) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
`group_id` int(11) NULL DEFAULT NULL,
`group_name` varchar(8) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL,
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 16 CHARACTER SET = utf8mb4 COLLATE = utf8mb4_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Records of person_group
-- ----------------------------
INSERT INTO `person_group` VALUES (1, 1, 'person1', 1, NULL);
INSERT INTO `person_group` VALUES (2, 1, NULL, 2, NULL);
INSERT INTO `person_group` VALUES (3, 1, NULL, 3, NULL);
INSERT INTO `person_group` VALUES (4, 1, NULL, 4, NULL);
INSERT INTO `person_group` VALUES (5, 2, NULL, 1, NULL);
INSERT INTO `person_group` VALUES (6, 2, NULL, 2, NULL);
INSERT INTO `person_group` VALUES (7, 2, NULL, 3, NULL);
INSERT INTO `person_group` VALUES (8, 2, NULL, 4, NULL);
INSERT INTO `person_group` VALUES (9, 3, NULL, 1, NULL);
INSERT INTO `person_group` VALUES (10, 3, NULL, 2, NULL);
INSERT INTO `person_group` VALUES (11, 3, NULL, 3, NULL);
INSERT INTO `person_group` VALUES (12, 3, NULL, 4, NULL);
INSERT INTO `person_group` VALUES (13, 4, NULL, 1, NULL);
INSERT INTO `person_group` VALUES (14, 4, NULL, 2, NULL);
INSERT INTO `person_group` VALUES (15, 4, NULL, 3, NULL);



2. 导入用户(Person)节点



oetl.bat D:/WorkSpaces/data/orientdb/orientdb-etl-rdbms-Person.json



{
"config": {
"log": "debug"
},
"extractor": {
"jdbc": {
"driver": "com.mysql.jdbc.Driver",
"url": "jdbc:mysql://localhost:3306/test?useUnicode=true",
"userName": "root",
"userPassword": "root",
"query": " select person_id, person_name from `person` ; "
}
},
"transformers": [
{
"vertex": {
"class": "Person"
}
}
],
"loader": {
"orientdb": {
"dbURL": "remote:localhost/person_group",
"dbType": "graph",
"dbUser": "root",
"dbPassword": "root",
"serverUser": "root",
"serverPassword": "root",
"classes": [
{
"name": "Person",
"extends": "V"
}
],
"indexes": [
{
"class": "Person",
"fields": [
"person_id:integer"
],
"type": "NOTUNIQUE"
},
{
"class": "Person",
"fields": [
"person_name:string"
],
"type": "NOTUNIQUE"
}
]
}
}
}




3. 导入群组(Group)节点



oetl.bat D:/WorkSpaces/data/orientdb/orientdb-etl-rdbms-Group.json



{
"config": {
"log": "debug"
},
"extractor": {
"jdbc": {
"driver": "com.mysql.jdbc.Driver",
"url": "jdbc:mysql://localhost:3306/test?useUnicode=true",
"userName": "root",
"userPassword": "root",
"query": " select group_id, group_name from `group` ; "
}
},
"transformers": [
{
"vertex": {
"class": "Group"
}
}
],
"loader": {
"orientdb": {
"dbURL": "remote:localhost/person_group",
"dbType": "graph",
"dbUser": "root",
"dbPassword": "root",
"serverUser": "root",
"serverPassword": "root",
"classes": [
{
"name": "Group",
"extends": "V"
}
],
"indexes": [
{
"class": "Group",
"fields": [
"group_id:integer"
],
"type": "NOTUNIQUE"
},
{
"class": "Group",
"fields": [
"group_name:string"
],
"type": "NOTUNIQUE"
}
]
}
}
}




4. 建立关系



在建立多对多关系这块,OrientDb做的不好,etl工具不能直接建立多对多的关系,需要自己想办法处理。

或者多对多关系自己写代码解决。

我是先用节点保存关系,然后再写代码根据PersonGroup建立Person和Group关系,最后删除PersonGroup节点。曲线建关系,生产环境不推荐这么做。



oetl.bat D:/WorkSpaces/data/orientdb/orientdb-etl-rdbms-PersonGroup.json

{
"config": {
"log": "debug"
},
"extractor": {
"jdbc": {
"driver": "com.mysql.jdbc.Driver",
"url": "jdbc:mysql://localhost:3306/test?useUnicode=true",
"userName": "root",
"userPassword": "root",
"query": " select id, person_id, group_id from person_group ; "
}
},
"transformers": [
{
"vertex": {
"class": "PersonGroup"
}
},
{
"edge": {
"class": "Belong",
"joinFieldName": "person_id",
"lookup": "Person.person_id",
"direction": "in",
"unresolvedLinkAction":"CREATE"
}
},
{
"edge": {
"class": "Has",
"joinFieldName": "group_id",
"lookup": "Group.group_id",
"direction": "out",
"unresolvedLinkAction":"CREATE"
}
}
],
"loader": {
"orientdb": {
"dbURL": "remote:localhost/person_group",
"dbType": "graph",
"dbUser": "root",
"dbPassword": "root",
"serverUser": "root",
"serverPassword": "root",
"classes": [
{
"name": "PersonGroup",
"extends": "V"
},
{
"name": "Belong",
"extends": "E"
},
{
"name": "Has",
"extends": "E"
}
],
"indexes": [
{
"class": "PersonGroup",
"fields": [
"id:integer"
],
"type": "UNIQUE"
},
{
"class": "PersonGroup",
"fields": [
"person_id:integer"
],
"type": "NOTUNIQUE"
},
{
"class": "PersonGroup",
"fields": [
"group_id:integer"
],
"type": "NOTUNIQUE"
}
]
}
}
}




发布于: 2020 年 07 月 22 日阅读数: 57
用户头像

wkq2786130

关注

hello 2018.09.28 加入

http://weikeqin.com/

评论

发布
暂无评论
OrientDB etl 工具 导入 rdbms数据