Commit 28ec8004 authored by Álex Cortiñas's avatar Álex Cortiñas

Initial version

parents
root = true
[*]
indent_style = tab
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
[*.md]
trim_trailing_whitespace = false
[*.json]
indent_style = space
indent_size = 2
[*.{yaml,yml}]
indent_style = space
indent_size = 2
.gradle
.classpath
.project
.settings
bin
build
.idea
*.un~
# storage-system
Takes geographic events from Kafka and stores them into different datastores
## Requirements
* Java 8
## Usage
* Change the configuration and kafka topic in `datasource.json` and in `config.properties`. To replace the topic easily, use `rpl -R old-topic new-topic .` on the project folder.
* Run
```sh
./gradlew jar
java -jar build/libs/storage-system.jar
```
apply plugin: 'java'
apply plugin: 'application'
version = '1.0.0'
sourceCompatibility = 1.8
targetCompatibility = 1.8
repositories {
jcenter()
}
dependencies {
// logging
compile 'org.slf4j:slf4j-api:1.7.21'
compile 'org.slf4j:slf4j-log4j12:1.7.21'
compile 'org.apache.logging.log4j:log4j-core:2.6.2'
// kafka consumer
compile 'org.apache.kafka:kafka-clients:0.10.0.0'
// postgres
compile 'org.postgresql:postgresql:9.4.1209'
// tranquility
compile 'io.druid:tranquility-core_2.11:0.8.1'
// jackson
compile 'com.fasterxml.jackson.core:jackson-core:2.8.1'
}
mainClassName = 'es.udc.lbd.gis.storagesystem.StorageSystem'
jar {
doFirst {
from { configurations.compile.collect { it.isDirectory() ? it : zipTree(it) } }
}
exclude 'META-INF/*.RSA', 'META-INF/*.SF','META-INF/*.DSA'
manifest {
attributes(
'Main-Class': 'es.udc.lbd.gis.storagesystem.StorageSystem'
)
}
}
#Fri Feb 10 14:18:11 CET 2017
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-2.14.1-all.zip
#!/usr/bin/env bash
##############################################################################
##
## Gradle start up script for UN*X
##
##############################################################################
# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
ls=`ls -ld "$PRG"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '/.*' > /dev/null; then
PRG="$link"
else
PRG=`dirname "$PRG"`"/$link"
fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null
APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS=""
# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"
warn ( ) {
echo "$*"
}
die ( ) {
echo
echo "$*"
echo
exit 1
}
# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
CYGWIN* )
cygwin=true
;;
Darwin* )
darwin=true
;;
MINGW* )
msys=true
;;
NONSTOP* )
nonstop=true
;;
esac
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD="$JAVA_HOME/jre/sh/java"
else
JAVACMD="$JAVA_HOME/bin/java"
fi
if [ ! -x "$JAVACMD" ] ; then
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
else
JAVACMD="java"
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
MAX_FD_LIMIT=`ulimit -H -n`
if [ $? -eq 0 ] ; then
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
MAX_FD="$MAX_FD_LIMIT"
fi
ulimit -n $MAX_FD
if [ $? -ne 0 ] ; then
warn "Could not set maximum file descriptor limit: $MAX_FD"
fi
else
warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
fi
fi
# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi
# For Cygwin, switch paths to Windows format before running java
if $cygwin ; then
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
JAVACMD=`cygpath --unix "$JAVACMD"`
# We build the pattern for arguments to be converted via cygpath
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
SEP=""
for dir in $ROOTDIRSRAW ; do
ROOTDIRS="$ROOTDIRS$SEP$dir"
SEP="|"
done
OURCYGPATTERN="(^($ROOTDIRS))"
# Add a user-defined pattern to the cygpath arguments
if [ "$GRADLE_CYGPATTERN" != "" ] ; then
OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
fi
# Now convert the arguments - kludge to limit ourselves to /bin/sh
i=0
for arg in "$@" ; do
CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
else
eval `echo args$i`="\"$arg\""
fi
i=$((i+1))
done
case $i in
(0) set -- ;;
(1) set -- "$args0" ;;
(2) set -- "$args0" "$args1" ;;
(3) set -- "$args0" "$args1" "$args2" ;;
(4) set -- "$args0" "$args1" "$args2" "$args3" ;;
(5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
(6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
(7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
(8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
(9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
esac
fi
# Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
function splitJvmOpts() {
JVM_OPTS=("$@")
}
eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem Gradle startup script for Windows
@rem
@rem ##########################################################################
@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal
set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS=
@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome
set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto init
echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
if exist "%JAVA_EXE%" goto init
echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:init
@rem Get command-line arguments, handling Windows variants
if not "%OS%" == "Windows_NT" goto win9xME_args
if "%@eval[2+2]" == "4" goto 4NT_args
:win9xME_args
@rem Slurp the command line arguments.
set CMD_LINE_ARGS=
set _SKIP=2
:win9xME_args_slurp
if "x%~1" == "x" goto execute
set CMD_LINE_ARGS=%*
goto execute
:4NT_args
@rem Get arguments from the 4NT Shell from JP Software
set CMD_LINE_ARGS=%$
:execute
@rem Setup the command line
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd
:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1
:mainEnd
if "%OS%"=="Windows_NT" endlocal
:omega
rootProject.name = 'storage-system'
package es.udc.lbd.gis.storagesystem;
import es.udc.lbd.gis.storagesystem.conectors.DruidWriter;
import es.udc.lbd.gis.storagesystem.conectors.KafkaReader;
import es.udc.lbd.gis.storagesystem.conectors.PostgresWriter;
import es.udc.lbd.gis.storagesystem.config.MyProperties;
import es.udc.lbd.gis.storagesystem.model.Event;
import es.udc.lbd.gis.storagesystem.model.ModelMapper;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import java.util.List;
import java.util.function.Function;
import java.util.stream.Collectors;
public class StorageSystem {
public static void main(String[] args) {
MyProperties prop = new MyProperties("/config.properties");
PostgresWriter pg = new PostgresWriter(
prop.get("datasource"),
prop.get("pg-ip-port"),
prop.get("pg-database"),
prop.get("pg-user"),
prop.get("pg-pass"));
DruidWriter druidWriter = new DruidWriter(
prop.get("druidSpec"),
prop.get("datasource"));
KafkaReader kafka = new KafkaReader(
prop.get("kafka-ip-port"),
prop.get("groupId"),
Boolean.parseBoolean(prop.get("commit")),
prop.get("topic"));
Function<Long, Boolean> notOld = (time) ->
time > System.currentTimeMillis() - Long.parseLong(prop.get("kafka-period"));
try {
kafka.read((List<ConsumerRecord<String, String>> buffer) -> {
List<Event> events = buffer
.stream()
.map(record -> ModelMapper.toEvent(record.value()).complete())
.filter(m -> notOld.apply(m.getTime()))
.collect(Collectors.toList());
if (events.size() > 0) {
pg.insert(events);
druidWriter.insert(events);
}
return null;
});
} finally {
pg.disconnect();
druidWriter.disconnect();
kafka.disconnect();
}
}
}
package es.udc.lbd.gis.storagesystem.conectors;
import com.metamx.tranquility.config.DataSourceConfig;
import com.metamx.tranquility.config.PropertiesBasedConfig;
import com.metamx.tranquility.config.TranquilityConfig;
import com.metamx.tranquility.druid.DruidBeams;
import com.metamx.tranquility.tranquilizer.MessageDroppedException;
import com.metamx.tranquility.tranquilizer.Tranquilizer;
import com.twitter.util.FutureEventListener;
import es.udc.lbd.gis.storagesystem.StorageSystem;
import es.udc.lbd.gis.storagesystem.model.Event;
import es.udc.lbd.gis.storagesystem.model.ModelMapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.runtime.BoxedUnit;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.Map;
public class DruidWriter {
private static final Logger logger = LoggerFactory.getLogger(DruidWriter.class);
private static DataSourceConfig<PropertiesBasedConfig> config = null;
private static long startTime = System.currentTimeMillis();
private static long count = 0;
private static long dropped = 0;
private String datasource;
private static Tranquilizer<Map<String, Object>> sender;
private double auxTime;
private boolean print = false;
public DruidWriter(String specFile, String datasource) {
this.datasource = datasource;
initConfig(specFile);
connect();
}
private void initConfig(String specFile) {
if (config != null)
return;
InputStream input = null;
try {
try {
input = new FileInputStream(specFile);
} catch (IOException ex) {
System.out.println(specFile);
input = StorageSystem.class.getClassLoader().getResourceAsStream(specFile);
}
TranquilityConfig<PropertiesBasedConfig> tConfig = TranquilityConfig.read(input);
this.config = tConfig.getDataSource(datasource);
} finally {
if (input != null) {
try {
input.close();
} catch (IOException ex) {
logger.error("Closing input stream of druid spec file", ex);
}
}
}
}
public void insert(List<Event> stream) {
stream.stream().map(r -> ModelMapper.toMap(r)).forEach(m -> insertItem(m));
}
private void printState() {
auxTime = (System.currentTimeMillis() - startTime) / 1.0E03;
logger.info("DruidWriter: Total: {} events in {} seconds | Events per second: {} | Dropped: {}",
count, auxTime, (int) (count / auxTime), dropped);
}
private void insertItem(Map<String, Object> m) {
sender.send(m).addEventListener(new FutureEventListener<BoxedUnit>() {
@Override
public void onSuccess(BoxedUnit boxedUnit) {
count++;
if (count % 10000 == 0) {
printState();
}
}
@Override
public void onFailure(Throwable e) {
if (e instanceof MessageDroppedException) {
logger.debug(String.format("Dropped message: %s", m.toString()), e);
dropped++;
if (dropped % 1000 == 0) {
printState();
}
} else {
logger.error(String.format("Failed to send message: %s", m.toString()), e);
}
}
});
}
private void connect() {
logger.info("Connecting");
sender = DruidBeams.fromConfig(config).buildTranquilizer(config.tranquilizerBuilder());
sender.start();
}
public void disconnect() {
if (sender != null) {
sender.stop();
sender.close();
sender = null;
}
}
}
package es.udc.lbd.gis.storagesystem.conectors;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;
import java.util.function.Function;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class KafkaReader {
private static final Logger logger = LoggerFactory.getLogger(KafkaReader.class);
private static final int maxPoll = 10000;
private static final int intervalCommit = 60000;
private static Properties props = null;
private KafkaConsumer<String, String> consumer = null;
private String ipPort;
private String topic;
private String groupId;
private boolean commit;
private long count = 0;
public KafkaReader() {
}
public KafkaReader(String ipPort, String groupId, boolean commit, String topic) {
this.ipPort = ipPort;
this.groupId = groupId + "-consumer";
this.commit = commit;
this.topic = topic;
init();
connect();
}
private void init() {
if (props != null)
return;
props = new Properties();
props.put("bootstrap.servers", ipPort);
props.put("group.id", groupId);
if (commit) {
props.put("enable.auto.commit", "true");
} else {
props.put("enable.auto.commit", "false");
}
props.put("max.poll.records", maxPoll);
props.put("auto.commit.interval.ms", intervalCommit);
props.put("session.timeout.ms", "30000");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
}
private void connect() {
consumer = new KafkaConsumer<>(props);
consumer.subscribe(Arrays.asList(topic));
}
public void disconnect() {
if (consumer != null){
consumer.close();
consumer = null;
}
}
public void read(Function<List<ConsumerRecord<String, String>>, Void> store) {
final int minBatchSize = 200;
List<ConsumerRecord<String, String>> buffer = new ArrayList<>();
while (true) {
ConsumerRecords<String, String> records = consumer.poll(100);
for (ConsumerRecord<String, String> record : records) {
buffer.add(record);
}
if (buffer.size() >= minBatchSize) {
count += buffer.size();
logger.info("Readed from Kafka: " + count);
store.apply(buffer);
buffer.clear();
}
}
}
}
package es.udc.lbd.gis.storagesystem.conectors;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.List;
import es.udc.lbd.gis.storagesystem.model.Event;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class PostgresWriter {
private static final Logger logger = LoggerFactory.getLogger(PostgresWriter.class);
private String tableName;
private String ipPort;
private String database;
private String user;
private String pass;
private Connection conn = null;
private long startTime = System.currentTimeMillis();
private double auxTime;
private long count = 0;
public PostgresWriter(String tableName, String ipPort, String database, String user, String pass) {
this.tableName = tableName;
this.ipPort = ipPort;
this.database = database;