Commit 763902ba authored by dagal's avatar dagal
Browse files

Moar WMs!

parent a382e4de
......@@ -119,9 +119,7 @@ Size of initialTimes: 450719 bytes, 2.121 bps (6.63% compression)
**** iCSA size = 30927069 bytes
## Building time (**parsing into integers + present_layer: 69.703 secs
## Building time (**parsing into integers + present_layer: 35.780 secs
out file set to : ./indexes/madrid_lines128
Read weeks=0
......@@ -170,7 +168,7 @@ Processing 355.7%
[0 11021 10555 ]
[0 ]
parameters: "sPsi=128; nsHuff=16;psiSF=1; bTimes=RG32;bLines=RRR128"
parameters: "sPsi=128; nsHuff=16;psiSF=1; bTimes=RRR32;bLines=RRR32"
Number of nodes = 11022
Number of times = 9980
......@@ -214,39 +212,160 @@ MALLOC FOR 35573340
Saving structures to disk: ./indexes/madrid_lines128.*Index saved !!
Size of int index: 25467738 bytes, 5.727 bps (40.91% compression)
Size of lines index: 9351388 bytes, 2.103 bps (19.12% compression)
Size of lines index: 10395976 bytes, 2.338 bps (21.25% compression)
Size of times index: 64200082 bytes, 14.438 bps (103.13% compression)
Size of times index: 59671620 bytes, 13.419 bps (95.85% compression)
Size of lineStops: 121972 bytes
Size of stopLines: 144548 bytes
Size of avgTimes: 65182 bytes
Size of initialTimes: 450719 bytes, 2.121 bps (6.63% compression)
Index occupied 99801629 bytes
Index occupied 96317755 bytes
Size of int index: 25467738 bytes, 5.727 bps (40.91% compression)
Size of lines index: 9351388 bytes, 2.103 bps (19.12% compression)
Size of lines index: 10395976 bytes, 2.338 bps (21.25% compression)
Size of times index: 64200082 bytes, 14.438 bps (103.13% compression)
Size of times index: 59671620 bytes, 13.419 bps (95.85% compression)
Size of lineStops: 121972 bytes
Size of stopLines: 144548 bytes
Size of avgTimes: 65182 bytes
Size of initialTimes: 450719 bytes, 2.121 bps (6.63% compression)
[destroying index] ...Freed 99801629 bytes... RAM
[destroying index] ...Freed 96317755 bytes... RAM
[destroying iCSA: compressed PSI structure] ...Freed 20994820 bytes... RAM
[destroying iCSA: D vector] ...Freed 4446668 bytes... RAM
**** [the whole iCSA ocuppied ... 25462829 bytes... RAM
**** iCSA size = 25462829 bytes
## Building time (**parsing into integers + present_layer: 73.984 secs
## Building time (**parsing into integers + present_layer: 36.836 secs
out file set to : ./indexes/madrid_lines512
Read weeks=0
Processing 355.7%
UNSORTED: 4937 > 7720 (record j=3)
INPUT RECORDS ARE #not# SORTED INCREASINGLY
UNSORTED: 2436 > 10332 (record j=2)
SORTED RECORDS ARE #not# SORTED INCREASINGLY
10000001 trajectories read, max nodes = 11022, max-time = 9979
[0 1 2436 340 1 ]
[0 1 10332 4058 342 ]
[0 1 2436 345 ]
[0 1 2436 340 346 ]
[0 1 10824 344 353 ]
[0 1 2435 410 ]
[0 1 1981 ]
[0 1 1981 ]
[0 1 1981 ]
[0 1 1981 ]
...
...
[0 5404 7575 8322 7315 ]
[0 5404 5003 7329 ]
[0 5404 5008 7329 ]
[0 5404 5008 7329 ]
[0 5404 5008 7329 ]
[0 5404 5008 7329 ]
[0 5404 5008 7329 ]
[0 5404 5008 7329 ]
[0 5404 5008 7329 ]
[0 5404 5008 7329 ]
...
...
[0 11021 10555 ]
[0 11021 10555 ]
[0 11021 10555 ]
[0 11021 10555 ]
[0 11021 10555 ]
[0 11021 10555 ]
[0 11021 10555 ]
[0 11021 10555 ]
[0 11021 10555 ]
[0 ]
parameters: "sPsi=512; nsHuff=16;psiSF=1; bTimes=RRR64;bLines=RRR64"
Number of nodes = 11022
Number of times = 9980
real nodes 11022
map_size 11022 Number of lines = 1049
map and unmap vocabulary arrays created sucessfully
**** CREATING CSA-bottom-layer *****
parameters for iCSA: samplePsi=512
: nsHuff=16384, psiSearchFactor = 1 --> jump = 512
*BUILDING THE SUFFIX ARRAY over 35573340 integers... (with sais)
...... ended.
Creating compressed Psi...
Creating compressed Psi HUFFMAN RLE...
MALLOC FOR 16384
MALLOC FOR 35573340
[3] diffsHT.total = 125754163 bits
[3]streamSize = 152627094 , index = 16384
psi: pointersize = 28 bits, sampleSize=26 bits
espacio para Sample-values-psi = 225812 bytes
espacio para Sample-values-psi** = 225812 bytes
espacio para Sample-pointers-psi = 243184 bytes
espacio para stream-psi = 19078388 bytes
@@@@@@@@@ psi samaplePeriod= 512, ns=16384
@@@@@@@@@ psi size= [samples = 225812] bytes
@@@@@@@@@ psi size= [pointers = 243184] bytes
@@@@@@@@@ psi size= [totalsize diffsHt.total = 125754163] bits
@@@@@@@@@ psi size= [streamsize+largevalues =19078388] bytes
@@@@@@@@@ psi size= [sizeHuff tree = 65624] bytes
**** [iCSA built on 35573340 integers. Size = 24081145 bytes... RAM
Test MAP/UNMAP (compressDictionary RRR) passed *OK*,
Building WM Indices...
Done.
Saving structures to disk: ./indexes/madrid_lines512.*Index saved !!
Size of int index: 24086054 bytes, 5.417 bps (38.69% compression)
Size of lines index: 9693728 bytes, 2.180 bps (19.82% compression)
Size of times index: 56614500 bytes, 12.732 bps (90.94% compression)
Size of lineStops: 121972 bytes
Size of stopLines: 144548 bytes
Size of avgTimes: 65182 bytes
Size of initialTimes: 450719 bytes, 2.121 bps (6.63% compression)
Index occupied 91176703 bytes
Size of int index: 24086054 bytes, 5.417 bps (38.69% compression)
Size of lines index: 9693728 bytes, 2.180 bps (19.82% compression)
Size of times index: 56614500 bytes, 12.732 bps (90.94% compression)
Size of lineStops: 121972 bytes
Size of stopLines: 144548 bytes
Size of avgTimes: 65182 bytes
Size of initialTimes: 450719 bytes, 2.121 bps (6.63% compression)
[destroying index] ...Freed 91176703 bytes... RAM
[destroying iCSA: compressed PSI structure] ...Freed 19613136 bytes... RAM
[destroying iCSA: D vector] ...Freed 4446668 bytes... RAM
**** [the whole iCSA ocuppied ... 24081145 bytes... RAM
**** iCSA size = 24081145 bytes
## Building time (**parsing into integers + present_layer: 36.689 secs
out file set to : ./indexes/madrid_lines512
Read weeks=0
......@@ -369,8 +488,7 @@ Size of initialTimes: 450719 bytes, 2.121 bps (6.63% compression)
**** iCSA size = 24081145 bytes
## Building time (**parsing into integers + present_layer: 81.453 secs
## Building time (**parsing into integers + present_layer: 37.160 secs
......
......@@ -35,9 +35,12 @@ mkdir indexes
cat ./texts/madrid_lines.zst | zstd -d | ../BUILDALLwcsa stdin ./indexes/madrid_lines32 "sPsi=32; nsHuff=16;psiSF=1; bTimes=RG32;bLines=RG32"
mv indexes/madrid_lines32.l indexes/madrid_lines_RG32.l
mv indexes/madrid_lines32.times indexes/madrid_lines_RG32.times
cat ./texts/madrid_lines.zst | zstd -d | ../BUILDALLwcsa stdin ./indexes/madrid_lines128 "sPsi=128; nsHuff=16;psiSF=1; bTimes=RG32;bLines=RRR128"
mv indexes/madrid_lines128.l indexes/madrid_lines_RRR128_1.l
mv indexes/madrid_lines128.times indexes/madrid_lines_RRR128_1.times
cat ./texts/madrid_lines.zst | zstd -d | ../BUILDALLwcsa stdin ./indexes/madrid_lines128 "sPsi=128; nsHuff=16;psiSF=1; bTimes=RRR32;bLines=RRR32"
mv indexes/madrid_lines128.l indexes/madrid_lines_RRR32.l
mv indexes/madrid_lines128.times indexes/madrid_lines_RRR32.times
cat ./texts/madrid_lines.zst | zstd -d | ../BUILDALLwcsa stdin ./indexes/madrid_lines512 "sPsi=512; nsHuff=16;psiSF=1; bTimes=RRR64;bLines=RRR64"
mv indexes/madrid_lines512.l indexes/madrid_lines_RRR64.l
mv indexes/madrid_lines512.times indexes/madrid_lines_RRR64.times
cat ./texts/madrid_lines.zst | zstd -d | ../BUILDALLwcsa stdin ./indexes/madrid_lines512 "sPsi=512; nsHuff=16;psiSF=1; bTimes=RRR128;bLines=RRR128"
mv indexes/madrid_lines512.l indexes/madrid_lines_RRR128_2.l
mv indexes/madrid_lines512.times indexes/madrid_lines_RRR128_2.times
mv indexes/madrid_lines512.l indexes/madrid_lines_RRR128.l
mv indexes/madrid_lines512.times indexes/madrid_lines_RRR128.times
......@@ -75,13 +75,52 @@
# ./goQuery.sh madrid_lines 512 WMRRR128
./goQuery.sh madrid_lines 32 RG32
./goQuery.sh madrid_lines 32 RRR128_1
./goQuery.sh madrid_lines 32 RRR128_2
./goQuery.sh madrid_lines 128 RG32
./goQuery.sh madrid_lines 128 RRR128_1
./goQuery.sh madrid_lines 128 RRR128_2
./goQuery.sh madrid_lines 512 RG32
./goQuery.sh madrid_lines 512 RRR128_1
./goQuery.sh madrid_lines 512 RRR128_2
./goQuery.sh madrid_lines 32 RG32 RG32
./goQuery.sh madrid_lines 32 RG32 RRR32
./goQuery.sh madrid_lines 32 RG32 RRR64
./goQuery.sh madrid_lines 32 RG32 RRR128
./goQuery.sh madrid_lines 32 RRR32 RG32
./goQuery.sh madrid_lines 32 RRR32 RRR32
./goQuery.sh madrid_lines 32 RRR32 RRR64
./goQuery.sh madrid_lines 32 RRR32 RRR128
./goQuery.sh madrid_lines 32 RRR64 RG32
./goQuery.sh madrid_lines 32 RRR64 RRR32
./goQuery.sh madrid_lines 32 RRR64 RRR64
./goQuery.sh madrid_lines 32 RRR64 RRR128
./goQuery.sh madrid_lines 32 RRR128 RG32
./goQuery.sh madrid_lines 32 RRR128 RRR32
./goQuery.sh madrid_lines 32 RRR128 RRR64
./goQuery.sh madrid_lines 32 RRR128 RRR128
./goQuery.sh madrid_lines 128 RG32 RG32
./goQuery.sh madrid_lines 128 RG32 RRR32
./goQuery.sh madrid_lines 128 RG32 RRR64
./goQuery.sh madrid_lines 128 RG32 RRR128
./goQuery.sh madrid_lines 128 RRR32 RG32
./goQuery.sh madrid_lines 128 RRR32 RRR32
./goQuery.sh madrid_lines 128 RRR32 RRR64
./goQuery.sh madrid_lines 128 RRR32 RRR128
./goQuery.sh madrid_lines 128 RRR64 RG32
./goQuery.sh madrid_lines 128 RRR64 RRR32
./goQuery.sh madrid_lines 128 RRR64 RRR64
./goQuery.sh madrid_lines 128 RRR64 RRR128
./goQuery.sh madrid_lines 128 RRR128 RG32
./goQuery.sh madrid_lines 128 RRR128 RRR32
./goQuery.sh madrid_lines 128 RRR128 RRR64
./goQuery.sh madrid_lines 128 RRR128 RRR128
./goQuery.sh madrid_lines 512 RG32 RG32
./goQuery.sh madrid_lines 512 RG32 RRR32
./goQuery.sh madrid_lines 512 RG32 RRR64
./goQuery.sh madrid_lines 512 RG32 RRR128
./goQuery.sh madrid_lines 512 RRR32 RG32
./goQuery.sh madrid_lines 512 RRR32 RRR32
./goQuery.sh madrid_lines 512 RRR32 RRR64
./goQuery.sh madrid_lines 512 RRR32 RRR128
./goQuery.sh madrid_lines 512 RRR64 RG32
./goQuery.sh madrid_lines 512 RRR64 RRR32
./goQuery.sh madrid_lines 512 RRR64 RRR64
./goQuery.sh madrid_lines 512 RRR64 RRR128
./goQuery.sh madrid_lines 512 RRR128 RG32
./goQuery.sh madrid_lines 512 RRR128 RRR32
./goQuery.sh madrid_lines 512 RRR128 RRR64
./goQuery.sh madrid_lines 512 RRR128 RRR128
echo ''
......@@ -142,8 +142,8 @@
#../benchmark indexes/madrid_lines indexes/madrid_lines ./queries/madrid/top-100.txt 0 1000
echo 'TTCTR (psi='$2' times='$3')'
../benchmark indexes/$1$2 indexes/$1_$3 ./queries/madrid/10k-starts-with-x-ends-with-y.txt 0 10000
echo 'TTCTR (psi='$2' lines='$3' times='$4')'
../benchmark indexes/$1$2 indexes/$1_$3 indexes/$1_$4 ./queries/madrid/10k-starts-with-x-ends-with-y.txt 0 10000
#../benchmark indexes/$1$2 indexes/$1_$3 ./queries/madrid/10k-starts-with-x.txt 0 1000
#../benchmark indexes/madrid_lines indexes/madrid_lines ./queries/madrid/10k-starts-with-x-ends-with-y.txt 0 10000
\ No newline at end of file
......@@ -173,6 +173,7 @@ int main(int argc, char ** argv) {
//FILE * flog = fopen("deactivewcsa.log","w");
char * fileName;
char * linesFile;
char * timesFile;
//@@ struct tgs index;
......@@ -181,13 +182,14 @@ int main(int argc, char ** argv) {
unsigned int * gotreslist;
uint gotres = 0;
if (argc < 4) {
printf("Usage: %s <infexfile> <timesfile> <queryfile> [ignore_times] [loops]\n", argv[0]);
if (argc < 5) {
printf("Usage: %s <infexfile> <linesfile> <timesfile> <queryfile> [ignore_times] [loops]\n", argv[0]);
exit(1);
}
fileName = argv[1];
timesFile = argv[2];
linesFile = argv[2];
timesFile = argv[3];
//@@ f.open(fileName, ios::binary);
......@@ -195,7 +197,7 @@ int main(int argc, char ** argv) {
//@@ f.close();
ulong Index_size, Text_length;
int error= load_index (fileName, timesFile, &index);
int error= load_index (fileName, linesFile, timesFile, &index);
IFERROR (error);
//printInfo(index);
......@@ -218,16 +220,16 @@ int main(int argc, char ** argv) {
gotreslist = (uint*)malloc(sizeof(unsigned int)*BUFFER);
int ignore_times = 0;
if (argc > 4) {
ignore_times = atoi(argv[4]);
if (argc > 5) {
ignore_times = atoi(argv[5]);
}
int nqueries = 0;
TimeQuery * queries = readQueries(index, argv[3], &nqueries, ignore_times);
TimeQuery * queries = readQueries(index, argv[4], &nqueries, ignore_times);
int executed_queries = LOOPS;
if (argc > 5) {
executed_queries = atoi(argv[5]);
if (argc > 6) {
executed_queries = atoi(argv[6]);
}
int i,j;
......
......@@ -418,9 +418,9 @@ int save_index (void *index, char *filename) {
/** Loads index from one or more file(s) named filename, possibly
adding the proper extensions. */
int load_index(char *filename, char *timesFile, void **index){
int load_index(char *filename, char *linesFile, char *timesFile, void **index){
twcsa *wcsa;
wcsa = loadWCSA (filename, timesFile);
wcsa = loadWCSA (filename, linesFile, timesFile);
(*index) = (void *) wcsa;
#ifdef DICTIONARY_HUFFRLE
......@@ -438,11 +438,11 @@ int load_index(char *filename, char *timesFile, void **index){
return 0;
}
int loadTimeIndex(twcsa *wcsa, char *basename) {
int loadTimeIndex(twcsa *wcsa, char *linesFile, char *timesFile) {
char filename[1024];
{
strcpy(filename, basename);
strcpy(filename, linesFile);
strcat(filename, ".");
strcat(filename, LINES_FILE_EXT);
std::ifstream ifs(filename, std::ifstream::in);
......@@ -457,7 +457,7 @@ int loadTimeIndex(twcsa *wcsa, char *basename) {
}
{
strcpy(filename, basename);
strcpy(filename, timesFile);
strcat(filename, ".");
strcat(filename, TIMES_FILE_EXT);
std::ifstream ifs(filename, std::ifstream::in);
......@@ -1059,7 +1059,7 @@ int loadBaseline(twcsa *wcsa, char *basename) {
//wcsa->baseline = new tbaseline{NULL, NULL, NULL, fromXtoY};
}
twcsa *loadWCSA(char *filename, char *timesFile) {
twcsa *loadWCSA(char *filename, char *linesFile, char *timesFile) {
twcsa *wcsa;
wcsa = (twcsa *) my_malloc (sizeof (twcsa) * 1);
......@@ -1067,7 +1067,7 @@ twcsa *loadWCSA(char *filename, char *timesFile) {
loadIntIndex(filename, (void **)&wcsa->myicsa);
loadStructs(wcsa,filename);
loadTimeIndex(wcsa,timesFile);
loadTimeIndex(wcsa, linesFile, timesFile);
// loadBaseline(wcsa, filename);
......
......@@ -270,7 +270,7 @@ int printInfo(void *index);
//loading/freeing the data structures into memory.
void loadStructs(twcsa *wcsa, char *basename);
twcsa *loadWCSA(char *filename, char *timesFile);
twcsa *loadWCSA(char *filename, char *linesFile, char *timesFile);
// Definitions of PRIVATE functions
//Auxiliary functions
......
......@@ -142,7 +142,7 @@ int save_index (void *index, char *filename);
/* Loads index from one or more file(s) named filename, possibly
adding the proper extensions. */
int load_index (char *filename, char *timesFile, void **index);
int load_index (char *filename, char *linesFile, char *timesFile, void **index);
//@@void tgs_load(struct tgs *a, ifstream & f);
/* Frees the memory occupied by index. */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment