Commit 2ab59af0 authored by Daniel García's avatar Daniel García

Scripts used

parent ff8c87ed
import csv
import sys
def anomaly_detection(inp, out, th_lat, th_long, th_timestamp, th_sensor, th_alt, type):
writer = csv.writer(out, delimiter=",")
for row in csv.reader(inp):
if row[0] != "id":
if type == 'gps':
gps_lat_increment = float(row[3])
gps_long_increment = float(row[4])
gps_alt_increment = float(row[5])
else:
gps_lat_increment = 0.0
gps_long_increment = 0.0
gps_alt_increment = 0.0
if type == 'magn':
magn_z = float(row[5])
else:
magn_z = 0.0
timestamp = row[2]
if (gps_lat_increment < th_lat) and (gps_long_increment < th_long) and (len(timestamp) < th_timestamp) and \
(not timestamp.startswith('1970')) and (magn_z < th_sensor) and (gps_alt_increment < th_alt):
writer.writerow(row)
else:
if gps_lat_increment >= th_lat:
print("GPS Latitude increment too high: " + str(gps_lat_increment))
if gps_long_increment >= th_long:
print("GPS Longitude increment too high: " + str(gps_long_increment))
if gps_alt_increment >= th_alt:
print("GPS Altitude increment too high: " + str(gps_alt_increment))
if len(timestamp) >= th_timestamp or timestamp.startswith('1970'):
print("Wrong timestamp: " + timestamp)
if (magn_z > th_sensor):
print("Wrong sensor value: " + str(magn_z))
print("")
else:
writer.writerow(row)
inp.close()
out.close()
if __name__ == '__main__':
acc_input = open('sensoringData_acc.csv', 'r')
gyro_input = open('sensoringData_gyro.csv', 'r')
magn_input = open('sensoringData_magn.csv', 'r')
gps_input = open('sensoringData_gps.csv', 'r')
acc_output = open('sensoringData_acc_clean.csv', 'w', newline="")
gyro_output = open('sensoringData_gyro_clean.csv', 'w', newline="")
magn_output = open('sensoringData_magn_clean.csv', 'w', newline="")
gps_output = open('sensoringData_gps_clean.csv', 'w', newline="")
# 0.2, 0.2, 27, 2000, 500
anomaly_detection(acc_input, acc_output, float(sys.argv[1]), float(sys.argv[2]), int(sys.argv[3]), int(sys.argv[4]),
float(sys.argv[5]), 'acc')
anomaly_detection(gyro_input, gyro_output, float(sys.argv[1]), float(sys.argv[2]), int(sys.argv[3]),
int(sys.argv[4]), float(sys.argv[5]), 'gyro')
anomaly_detection(magn_input, magn_output, float(sys.argv[1]), float(sys.argv[2]), int(sys.argv[3]),
int(sys.argv[4]), float(sys.argv[5]), 'magn')
anomaly_detection(gps_input, gps_output, float(sys.argv[1]), float(sys.argv[2]), int(sys.argv[3]), int(sys.argv[4]),
float(sys.argv[5]), 'gps')
\ No newline at end of file
This diff is collapsed.
import csv
import sys
def sensor_split(input, writers, row_count):
counter = 0
row_counter = 0
username = ""
activity_id = 0
row_list = []
n_div = 0
writer_now = writers[0]
for row in csv.reader(input):
if row[0] != "id":
if counter == 0:
username = row[1]
activity_id = row[6]
if row_count <= row_counter:
row_counter = 0
n_div += 1
writer_now = writers[n_div]
username_now = row[1]
activity_id_now = row[6]
if username_now != username or activity_id_now != activity_id:
for list_row in row_list:
writer_now.writerow(list_row)
row_list.clear()
counter = 0
else:
counter += 1
row_list.append(row)
row_counter += 1
else:
for writer in writers:
writer.writerow(row)
def split_data(n_seconds, n_div):
input_acc = open('sensoringData_acc_prepared_' + str(n_seconds) + '.csv', 'r')
input_gyro = open('sensoringData_gyro_prepared_' + str(n_seconds) + '.csv', 'r')
input_magn = open('sensoringData_magn_prepared_' + str(n_seconds) + '.csv', 'r')
input_gps = open('sensoringData_gps_prepared_' + str(n_seconds) + '.csv', 'r')
row_count_acc = sum(1 for row in csv.reader(input_acc))
row_count_gyro = sum(1 for row in csv.reader(input_gyro))
row_count_magn = sum(1 for row in csv.reader(input_magn))
row_count_gps = sum(1 for row in csv.reader(input_gps))
row_count_acc_div = row_count_acc / n_div
row_count_gyro_div = row_count_gyro / n_div
row_count_magn_div = row_count_magn / n_div
row_count_gps_div = row_count_gps / n_div
i = 1
writers_acc = []
writers_gyro = []
writers_magn = []
writers_gps = []
while i <= n_div:
writers_acc.append(csv.writer(open('sensoringData_acc_prepared_' + str(n_seconds) + '_' + str(i) + '.csv',
'w', newline=""), delimiter=","))
writers_gyro.append(csv.writer(open('sensoringData_gyro_prepared_' + str(n_seconds) + '_' + str(i) + '.csv',
'w', newline=""), delimiter=","))
writers_magn.append(csv.writer(open('sensoringData_magn_prepared_' + str(n_seconds) + '_' + str(i) + '.csv',
'w', newline=""), delimiter=","))
writers_gps.append(csv.writer(open('sensoringData_gps_prepared_' + str(n_seconds) + '_' + str(i) + '.csv',
'w', newline=""), delimiter=","))
i += 1
input_acc = open('sensoringData_acc_prepared_' + str(n_seconds) + '.csv', 'r')
input_gyro = open('sensoringData_gyro_prepared_' + str(n_seconds) + '.csv', 'r')
input_magn = open('sensoringData_magn_prepared_' + str(n_seconds) + '.csv', 'r')
input_gps = open('sensoringData_gps_prepared_' + str(n_seconds) + '.csv', 'r')
sensor_split(input_acc, writers_acc, row_count_acc_div)
sensor_split(input_gyro, writers_gyro, row_count_gyro_div)
sensor_split(input_magn, writers_magn, row_count_magn_div)
sensor_split(input_gps, writers_gps, row_count_gps_div)
if __name__ == '__main__':
split_data(int(sys.argv[1]), int(sys.argv[2]))
This diff is collapsed.
This diff is collapsed.
**************************************************
*The expected flow to use these scripts is the one that follows:
1.- Anomaly_Detector.py
Used to preprocess data and detect possible outliers
that might corrupt the model training. Parameters:
- GPS latitude increments threshold (0.2).
- GPS longitude increments threshold (0.2).
- GPS altitude increments threshold (500).
- Timestamp value length (27).
- Z-axis magnetometer value threshold (2000).
2.- Data_Adapter.py.
This script cuts the first and final X seconds from
each activity session. It also deletes the extra part
in each session, depending of the window size indicated.
Also, replicates GPS data, in order to have at least
one observation from this sensor in each sliding window.
A validSessions file is also created to fasten feature
extraction process by not evaluation sessions that did
not record any GPS observation. Parameters:
- Window size, in seconds (20).
- Seconds to be cut from the first and final part of
each session (5).
3.- Data_Splitter.py
Script used to split the data got from the previous
script in X parts in order to fasten the feature
extraction process. Parameters:
- Window size used, in seconds (20).
- Number of divisions to be applied (8).
4.- Feature_Extraction.py
Here the feature computation for each window size is
made. For each sliding window, we compute mean, var,
mad, max, min and iqr functions over related data. This
creates a file for each of the sets defined:
0 - Acc + GPS (all users)
1 - Acc + Magn + GPS (all users but the ones missing
magnetometer)
2 - Acc + Gyro + Magn + GPS (all users but the ones
missing gyroscope and magnetometer)
It is coded in a Slurm way to be executed as a job array
(one job for every data split). Parameters:
- Window size used, in seconds (20).
- Overlap between windows, in seconds (19).
- Number of seconds set to cut the first and final part of
session (5).
- Number of divisions applied over data (8).
- Slurm job array index, from 1 to the number of divisions
specified before (or -1 to join all data).
5.- SVM.py
Script used to train and test the SVM model proposed and
obtaining the results. It is coded in a Slurm way to be
executed as a job array (one job for every fold computed
over the data). Parameters:
- String formed by the window size, overlap size and
corresponding set, divided by low bars (20_19.0_2).
- Slurm job array index, from 1 to 100.
*Note: the parenthesis in each parameter means the value used in our work.
**************************************************
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment