Commit de6718f7 authored by dagal's avatar dagal
Browse files

Merge remote-tracking branch 'origin/master'

parents 9fef7bf5 7fa038d0
......@@ -6,8 +6,8 @@ CC = g++
## Uncomment for using a dictionary represented as a bitmap-RRR
#export CXXFLAGS = -std=c++11 -O9 -m64 -DNDEBUG -DDICTIONARY_RRR -I libcds/includes/
export CXXFLAGS = -std=c++11 -O9 -m64 -DNDEBUG -DEXPERIMENTS -fPIC -DDICTIONARY_RRR -I libcds/includes/
#export CXXFLAGS = -std=c++11 -g -O0 -m64 -Wall -DEXPERIMENTS -fPIC -DDICTIONARY_RRR -I libcds/includes/
#export CXXFLAGS = -std=c++11 -O9 -m64 -DNDEBUG -DEXPERIMENTS -fPIC -DDICTIONARY_RRR -I libcds/includes/
export CXXFLAGS = -std=c++11 -g -O0 -m64 -Wall -DEXPERIMENTS -fPIC -DDICTIONARY_RRR -I libcds/includes/
#######################################################################
......
This diff is collapsed.
......@@ -33,10 +33,16 @@ def main(argv):
# sys.exit(0)
# starts with x, ends with y
fw = open(path + "10k-starts-with-x-ends-with-y.txt", "w+")
# fw = open(path + "10k-starts-with-x-ends-with-y.txt", "w+")
# for s in random.sample(sys.stdin.readlines(), Q):
# nodes = filter(None, re.split(',|:|\n', s))
# gen_q("%s:%s %s:%s" % (nodes[0], nodes[1], nodes[-3], nodes[-2]), fw, 15)
# fw.close()
fw = open(path + "10k-ends-with-x.txt", "w+")
for s in random.sample(sys.stdin.readlines(), Q):
nodes = filter(None, re.split(',|:|\n', s))
gen_q("%s:%s %s:%s" % (nodes[0], nodes[1], nodes[-3], nodes[-2]), fw, 15)
gen_q("%s:%s" % (nodes[-3], nodes[-2]), fw, 1)
fw.close()
def main2(argv):
......
......@@ -35,9 +35,12 @@ mkdir indexes
cat ./texts/madrid_lines.zst | zstd -d | ../BUILDALLwcsa stdin ./indexes/madrid_lines32 "sPsi=32; nsHuff=16;psiSF=1; bTimes=RG32;bLines=RG32"
mv indexes/madrid_lines32.l indexes/madrid_lines_RG32.l
mv indexes/madrid_lines32.times indexes/madrid_lines_RG32.times
cat ./texts/madrid_lines.zst | zstd -d | ../BUILDALLwcsa stdin ./indexes/madrid_lines128 "sPsi=128; nsHuff=16;psiSF=1; bTimes=RG32;bLines=RRR128"
mv indexes/madrid_lines128.l indexes/madrid_lines_RRR128_1.l
mv indexes/madrid_lines128.times indexes/madrid_lines_RRR128_1.times
cat ./texts/madrid_lines.zst | zstd -d | ../BUILDALLwcsa stdin ./indexes/madrid_lines128 "sPsi=128; nsHuff=16;psiSF=1; bTimes=RRR32;bLines=RRR32"
mv indexes/madrid_lines128.l indexes/madrid_lines_RRR32.l
mv indexes/madrid_lines128.times indexes/madrid_lines_RRR32.times
cat ./texts/madrid_lines.zst | zstd -d | ../BUILDALLwcsa stdin ./indexes/madrid_lines512 "sPsi=512; nsHuff=16;psiSF=1; bTimes=RRR64;bLines=RRR64"
mv indexes/madrid_lines512.l indexes/madrid_lines_RRR64.l
mv indexes/madrid_lines512.times indexes/madrid_lines_RRR64.times
cat ./texts/madrid_lines.zst | zstd -d | ../BUILDALLwcsa stdin ./indexes/madrid_lines512 "sPsi=512; nsHuff=16;psiSF=1; bTimes=RRR128;bLines=RRR128"
mv indexes/madrid_lines512.l indexes/madrid_lines_RRR128_2.l
mv indexes/madrid_lines512.times indexes/madrid_lines_RRR128_2.times
mv indexes/madrid_lines512.l indexes/madrid_lines_RRR128.l
mv indexes/madrid_lines512.times indexes/madrid_lines_RRR128.times
......@@ -75,13 +75,52 @@
# ./goQuery.sh madrid_lines 512 WMRRR128
./goQuery.sh madrid_lines 32 RG32
./goQuery.sh madrid_lines 32 RRR128_1
./goQuery.sh madrid_lines 32 RRR128_2
./goQuery.sh madrid_lines 128 RG32
./goQuery.sh madrid_lines 128 RRR128_1
./goQuery.sh madrid_lines 128 RRR128_2
./goQuery.sh madrid_lines 512 RG32
./goQuery.sh madrid_lines 512 RRR128_1
./goQuery.sh madrid_lines 512 RRR128_2
./goQuery.sh madrid_lines 32 RG32 RG32
./goQuery.sh madrid_lines 32 RG32 RRR32
./goQuery.sh madrid_lines 32 RG32 RRR64
./goQuery.sh madrid_lines 32 RG32 RRR128
./goQuery.sh madrid_lines 32 RRR32 RG32
./goQuery.sh madrid_lines 32 RRR32 RRR32
./goQuery.sh madrid_lines 32 RRR32 RRR64
./goQuery.sh madrid_lines 32 RRR32 RRR128
./goQuery.sh madrid_lines 32 RRR64 RG32
./goQuery.sh madrid_lines 32 RRR64 RRR32
./goQuery.sh madrid_lines 32 RRR64 RRR64
./goQuery.sh madrid_lines 32 RRR64 RRR128
./goQuery.sh madrid_lines 32 RRR128 RG32
./goQuery.sh madrid_lines 32 RRR128 RRR32
./goQuery.sh madrid_lines 32 RRR128 RRR64
./goQuery.sh madrid_lines 32 RRR128 RRR128
./goQuery.sh madrid_lines 128 RG32 RG32
./goQuery.sh madrid_lines 128 RG32 RRR32
./goQuery.sh madrid_lines 128 RG32 RRR64
./goQuery.sh madrid_lines 128 RG32 RRR128
./goQuery.sh madrid_lines 128 RRR32 RG32
./goQuery.sh madrid_lines 128 RRR32 RRR32
./goQuery.sh madrid_lines 128 RRR32 RRR64
./goQuery.sh madrid_lines 128 RRR32 RRR128
./goQuery.sh madrid_lines 128 RRR64 RG32
./goQuery.sh madrid_lines 128 RRR64 RRR32
./goQuery.sh madrid_lines 128 RRR64 RRR64
./goQuery.sh madrid_lines 128 RRR64 RRR128
./goQuery.sh madrid_lines 128 RRR128 RG32
./goQuery.sh madrid_lines 128 RRR128 RRR32
./goQuery.sh madrid_lines 128 RRR128 RRR64
./goQuery.sh madrid_lines 128 RRR128 RRR128
./goQuery.sh madrid_lines 512 RG32 RG32
./goQuery.sh madrid_lines 512 RG32 RRR32
./goQuery.sh madrid_lines 512 RG32 RRR64
./goQuery.sh madrid_lines 512 RG32 RRR128
./goQuery.sh madrid_lines 512 RRR32 RG32
./goQuery.sh madrid_lines 512 RRR32 RRR32
./goQuery.sh madrid_lines 512 RRR32 RRR64
./goQuery.sh madrid_lines 512 RRR32 RRR128
./goQuery.sh madrid_lines 512 RRR64 RG32
./goQuery.sh madrid_lines 512 RRR64 RRR32
./goQuery.sh madrid_lines 512 RRR64 RRR64
./goQuery.sh madrid_lines 512 RRR64 RRR128
./goQuery.sh madrid_lines 512 RRR128 RG32
./goQuery.sh madrid_lines 512 RRR128 RRR32
./goQuery.sh madrid_lines 512 RRR128 RRR64
./goQuery.sh madrid_lines 512 RRR128 RRR128
echo ''
......@@ -142,8 +142,10 @@
#../benchmark indexes/madrid_lines indexes/madrid_lines ./queries/madrid/top-100.txt 0 1000
echo 'TTCTR (psi='$2' times='$3')'
../benchmark indexes/$1$2 indexes/$1_$3 ./queries/madrid/10k-starts-with-x-ends-with-y.txt 0 10000
#echo 'TTCTR (psi='$2' lines='$3' times='$4')'
#../benchmark indexes/$1$2 indexes/$1_$3 indexes/$1_$4 ./queries/madrid/10k-starts-with-x-ends-with-y.txt 0 10000
#../benchmark indexes/$1$2 indexes/$1_$3 ./queries/madrid/10k-starts-with-x.txt 0 1000
#../benchmark indexes/madrid_lines indexes/madrid_lines ./queries/madrid/10k-starts-with-x-ends-with-y.txt 0 10000
\ No newline at end of file
../benchmark indexes/madrid_lines indexes/madrid_lines_RG32 indexes/madrid_lines_RG32 ./queries/madrid/10k-starts-with-x-ends-with-y.txt 0 10000
#../benchmark indexes/madrid_lines indexes/madrid_lines indexes/madrid_lines ./queries/madrid/10k-starts-with-x.txt 0 10000
#../benchmark indexes/madrid_lines indexes/madrid_lines indexes/madrid_lines ./queries/madrid/10k-ends-with-x.txt 0 10000
......@@ -28,7 +28,7 @@ pode haber repetidos)
QueryType queryTypes[16] = {
{0, 2, false, get_starts_with_x},
{1, 1, false, get_ends_with_x},
{1, 2, false, get_ends_with_x},
{2, 1, false, get_x_in_the_middle},
{3, 2, false, get_from_x_to_y},
{4, 1, true, get_top_k},
......@@ -173,6 +173,7 @@ int main(int argc, char ** argv) {
//FILE * flog = fopen("deactivewcsa.log","w");
char * fileName;
char * linesFile;
char * timesFile;
//@@ struct tgs index;
......@@ -181,13 +182,14 @@ int main(int argc, char ** argv) {
unsigned int * gotreslist;
uint gotres = 0;
if (argc < 4) {
printf("Usage: %s <infexfile> <timesfile> <queryfile> [ignore_times] [loops]\n", argv[0]);
if (argc < 5) {
printf("Usage: %s <infexfile> <linesfile> <timesfile> <queryfile> [ignore_times] [loops]\n", argv[0]);
exit(1);
}
fileName = argv[1];
timesFile = argv[2];
linesFile = argv[2];
timesFile = argv[3];
//@@ f.open(fileName, ios::binary);
......@@ -195,7 +197,7 @@ int main(int argc, char ** argv) {
//@@ f.close();
ulong Index_size, Text_length;
int error= load_index (fileName, timesFile, &index);
int error= load_index (fileName, linesFile, timesFile, &index);
IFERROR (error);
//printInfo(index);
......@@ -218,16 +220,16 @@ int main(int argc, char ** argv) {
gotreslist = (uint*)malloc(sizeof(unsigned int)*BUFFER);
int ignore_times = 0;
if (argc > 4) {
ignore_times = atoi(argv[4]);
if (argc > 5) {
ignore_times = atoi(argv[5]);
}
int nqueries = 0;
TimeQuery * queries = readQueries(index, argv[3], &nqueries, ignore_times);
TimeQuery * queries = readQueries(index, argv[4], &nqueries, ignore_times);
int executed_queries = LOOPS;
if (argc > 5) {
executed_queries = atoi(argv[5]);
if (argc > 6) {
executed_queries = atoi(argv[6]);
}
int i,j;
......@@ -236,7 +238,8 @@ int main(int argc, char ** argv) {
printf("We are checking the results... Experiments mode off.\n");
#endif
for (j = 0; j < (queries[0].type->type == 15 ? XY_TOTAL : 1); j++) {
//for (j = 0; j < (queries[0].type->type == 15 ? XY_TOTAL : 1); j++) {
for (j = 0; j < XY_TOTAL; j++) {
totalres = 0;
startClockTime();
for (i = 0; i < executed_queries; i++) {
......
......@@ -418,9 +418,9 @@ int save_index (void *index, char *filename) {
/** Loads index from one or more file(s) named filename, possibly
adding the proper extensions. */
int load_index(char *filename, char *timesFile, void **index){
int load_index(char *filename, char *linesFile, char *timesFile, void **index){
twcsa *wcsa;
wcsa = loadWCSA (filename, timesFile);
wcsa = loadWCSA (filename, linesFile, timesFile);
(*index) = (void *) wcsa;
#ifdef DICTIONARY_HUFFRLE
......@@ -438,11 +438,11 @@ int load_index(char *filename, char *timesFile, void **index){
return 0;
}
int loadTimeIndex(twcsa *wcsa, char *basename) {
int loadTimeIndex(twcsa *wcsa, char *linesFile, char *timesFile) {
char filename[1024];
{
strcpy(filename, basename);
strcpy(filename, linesFile);
strcat(filename, ".");
strcat(filename, LINES_FILE_EXT);
std::ifstream ifs(filename, std::ifstream::in);
......@@ -457,7 +457,7 @@ int loadTimeIndex(twcsa *wcsa, char *basename) {
}
{
strcpy(filename, basename);
strcpy(filename, timesFile);
strcat(filename, ".");
strcat(filename, TIMES_FILE_EXT);
std::ifstream ifs(filename, std::ifstream::in);
......@@ -1059,7 +1059,7 @@ int loadBaseline(twcsa *wcsa, char *basename) {
//wcsa->baseline = new tbaseline{NULL, NULL, NULL, fromXtoY};
}
twcsa *loadWCSA(char *filename, char *timesFile) {
twcsa *loadWCSA(char *filename, char *linesFile, char *timesFile) {
twcsa *wcsa;
wcsa = (twcsa *) my_malloc (sizeof (twcsa) * 1);
......@@ -1067,7 +1067,7 @@ twcsa *loadWCSA(char *filename, char *timesFile) {
loadIntIndex(filename, (void **)&wcsa->myicsa);
loadStructs(wcsa,filename);
loadTimeIndex(wcsa,timesFile);
loadTimeIndex(wcsa, linesFile, timesFile);
// loadBaseline(wcsa, filename);
......@@ -1261,7 +1261,7 @@ uint inline encodeStop(twcsa *g, uint lineId, uint stopId) {
}
int get_starts_with_x(void *index, TimeQuery *query) {
if (query->subtype & (XY_LINE_START | XY_TIME_START) != query->subtype) {
if ((query->subtype & (XY_LINE_START | XY_TIME_START)) != query->subtype) {
return 0;
}
......@@ -1311,45 +1311,57 @@ int get_starts_with_x(void *index, TimeQuery *query) {
}
int get_ends_with_x(void *index, TimeQuery *query) {
if ((query->subtype & (XY_LINE_END | XY_TIME_END)) != query->subtype) {
return 0;
}
twcsa *g = (twcsa *)index;
uint u = mapID(g, query->values[0], NODE);
ulong numocc=0, lu=0, ru=0;
pair<int, int> res;
const auto lineId = query->values[0];
const auto stopId = query->values[1];
uint u = mapID(g, stopId, NODE);
uint pattern[2] = {u, 0};
ulong numocc, lu, ru;
const auto linesWM = dynamic_cast<std::vector<WaveletMatrix *>*>((std::vector<WaveletMatrix *>*)g->linesIndex);
if (g->baseline) {
const auto u = mapID(g, query->values[0], NODE);
const auto &times = g->baseline->endsX->at(u);
if (linesWM == NULL) {
std::cerr << "Either use WM or implement the trackUp operation in the WT!" << std::endl;
throw std::bad_cast();
}
if (query->time) {
numocc = 0;
if (query->time->h_end < query->time->h_start) {
for (auto i = times.begin(); i != times.upper_bound(query->time->h_end); i++) {
numocc += i->second;
}
countIntIndex(g->myicsa, pattern, 2, &numocc, &lu, &ru);
assert(numocc < g->n);
for (auto i = times.lower_bound(query->time->h_start); i != times.end(); i++) {
numocc += i->second;
}
} else {
for (auto i = times.lower_bound(query->time->h_start); i != times.upper_bound(query->time->h_end); i++) {
numocc += i->second;
}
}
if (numocc && query->subtype) {
const auto stop_offset = selectStop(g, u);
assert(lu >= stop_offset);
const auto start_time = query->time->h_start;
const auto end_time = query->time->h_end;
std::vector<uint16_t> lines;
numocc = 0;
ulong n = 0;
if ((query->subtype & (XY_LINE_END | XY_TIME_END)) == XY_TIME_END) {
query->subtype |= XY_LINE_END;
lines = g->stopLines->at(query->values[1]);
} else {
numocc = times.size() > 0 ? times.begin()->second : 0;
lines.push_back(query->values[0]);
}
return numocc;
}
countIntIndex(g->myicsa, pattern, 2, &numocc, &lu, &ru);
for (const auto &line : lines) {
n = getRange(linesWM->at(u), lu-stop_offset, ru-stop_offset, line, line, &res);
printf("%lu %lu\n", lu-stop_offset, ru-stop_offset);
printf("%lu %lu %lu\n", res.first, res.second, n);
assert(n == res.second-res.first+1);
assert(stop_offset+res.second < g->n);
// printf("%lu %lu %lu\n", numocc, lu, ru);
if (n && query->subtype & XY_TIME_END) {
const auto jcodes = getJCodes(g, line, query->values[1], start_time, end_time);
n = getTimeRange(g, stop_offset+res.first, stop_offset+res.second, jcodes.first, jcodes.second);
}
if (query->time && numocc) {
numocc = getRange(g, lu, ru,
query->time->h_start, query->time->h_end);
numocc += n;
}
}
return numocc;
......
......@@ -270,7 +270,7 @@ int printInfo(void *index);
//loading/freeing the data structures into memory.
void loadStructs(twcsa *wcsa, char *basename);
twcsa *loadWCSA(char *filename, char *timesFile);
twcsa *loadWCSA(char *filename, char *linesFile, char *timesFile);
// Definitions of PRIVATE functions
//Auxiliary functions
......
......@@ -142,7 +142,7 @@ int save_index (void *index, char *filename);
/* Loads index from one or more file(s) named filename, possibly
adding the proper extensions. */
int load_index (char *filename, char *timesFile, void **index);
int load_index (char *filename, char *linesFile, char *timesFile, void **index);
//@@void tgs_load(struct tgs *a, ifstream & f);
/* Frees the memory occupied by index. */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment