Commit 885d0c61 authored by Adrián Gómez Brandón's avatar Adrián Gómez Brandón
Browse files

Initial commit

parent eb585db2
cmake_minimum_required(VERSION 2.8.7)
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules")
include(AppendCompilerFlags)
project(runs-vectors)
set(PROJECT_VENDOR "Adrian Gomez Brandon")
set(CMAKE_PROJECT_NAME "runs-vectors")
set(PROJECT_CONTACT "adrian.gbrandon@udc.es")
#Change to "Debug" when you need it
set(CMAKE_BUILD_TYPE "Debug")
# C++11 compiler Check
if(NOT CMAKE_CXX_COMPILER_VERSION) # work around for cmake versions smaller than 2.8.10
execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE CMAKE_CXX_COMPILER_VERSION)
endif()
if(CMAKE_CXX_COMPILER MATCHES ".*clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(CMAKE_COMPILER_IS_CLANGXX 1)
endif()
if( (CMAKE_COMPILER_IS_GNUCXX AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.7) OR
(CMAKE_COMPILER_IS_CLANGXX AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.2))
message(FATAL_ERROR "Your C++ compiler does not support C++11. Please install g++ 4.7 (or greater) or clang 3.2 (or greater)")
else()
message(STATUS "Compiler is recent enough to support C++11.")
endif()
if( CMAKE_COMPILER_IS_GNUCXX )
append_cxx_compiler_flags("-std=c++11 -Wall -Wextra " "GCC" CMAKE_CXX_FLAGS)
append_cxx_compiler_flags("-msse4.2 -O3 -ffast-math -funroll-loops -fno-omit-frame-pointer -g" "GCC" CMAKE_CXX_FLAGS_RELEASE)
else()
append_cxx_compiler_flags("-std=c++11" "CLANG" CMAKE_CXX_FLAGS)
append_cxx_compiler_flags("-stdlib=libc++" "CLANG" CMAKE_CXX_FLAGS)
append_cxx_compiler_flags("-msse4.2 -O3 -ffast-math -funroll-loops -DNDEBUG" "CLANG" CMAKE_CXX_FLAGS_RELEASE)
endif()
include(CheckSSE)
FindSSE ()
if( SSE4_2_FOUND )
if( CMAKE_COMPILER_IS_GNUCXX )
append_cxx_compiler_flags("-msse4.2" "GCC" CMAKE_CXX_FLAGS)
else()
append_cxx_compiler_flags("-msse4.2" "CLANG" CMAKE_CXX_FLAGS)
endif()
message(STATUS "CPU does support SSE4.2.")
else()
message(STATUS "CPU does NOT support SSE4.2")
endif()
add_subdirectory(external/sdsl-lite)
include_directories("${PROJECT_SOURCE_DIR}/external/googletest/include"
${CMAKE_HOME_DIRECTORY}/include
${CMAKE_BINARY_DIR}/external/sdsl-lite/include
${CMAKE_BINARY_DIR}/external/sdsl-lite/external/libdivsufsort/include/
${CMAKE_HOME_DIRECTORY}/external/googletest/include
${CMAKE_HOME_DIRECTORY}/external/sdsl-lite/external/FastPFor/headers/
${STXXL_INCLUDE_DIRS}
)
add_executable(main src/main.cpp)
target_link_libraries(main sdsl divsufsort divsufsort64)
add_executable(error src/error.cpp)
target_link_libraries(error sdsl divsufsort divsufsort64)
add_executable(test_hybrid_succ src/test_hybrid_succ.cpp)
target_link_libraries(test_hybrid_succ sdsl divsufsort divsufsort64)
add_executable(test_sd_succ src/test_sd_succ.cpp)
target_link_libraries(test_sd_succ sdsl divsufsort divsufsort64)
add_executable(test_zombit_succ src/test_zombit_succ.cpp)
target_link_libraries(test_zombit_succ sdsl divsufsort divsufsort64)
add_executable(test_zombit_rec_succ src/test_zombit_rec_succ.cpp)
target_link_libraries(test_zombit_rec_succ sdsl divsufsort divsufsort64)
add_executable(test_succ src/test_succ.cpp)
target_link_libraries(test_succ sdsl divsufsort divsufsort64)
\ No newline at end of file
include(CheckCSourceCompiles)
include(CheckCXXSourceCompiles)
macro(append_c_compiler_flags _flags _name _result)
set(SAFE_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
string(REGEX REPLACE "[-+/ ]" "_" cname "${_name}")
string(TOUPPER "${cname}" cname)
foreach(flag ${_flags})
string(REGEX REPLACE "^[-+/ ]+(.*)[-+/ ]*$" "\\1" flagname "${flag}")
string(REGEX REPLACE "[-+/ ]" "_" flagname "${flagname}")
string(TOUPPER "${flagname}" flagname)
set(have_flag "HAVE_${cname}_${flagname}")
set(CMAKE_REQUIRED_FLAGS "${flag}")
check_c_source_compiles("int main() { return 0; }" ${have_flag})
if(${have_flag})
set(${_result} "${${_result}} ${flag}")
endif(${have_flag})
endforeach(flag)
set(CMAKE_REQUIRED_FLAGS ${SAFE_CMAKE_REQUIRED_FLAGS})
endmacro(append_c_compiler_flags)
macro(append_cxx_compiler_flags _flags _name _result)
set(SAFE_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
string(REGEX REPLACE "[-+/ ]" "_" cname "${_name}")
string(TOUPPER "${cname}" cname)
foreach(flag ${_flags})
string(REGEX REPLACE "^[-+/ ]+(.*)[-+/ ]*$" "\\1" flagname "${flag}")
string(REGEX REPLACE "[-+/ ]" "_" flagname "${flagname}")
string(TOUPPER "${flagname}" flagname)
set(have_flag "HAVE_${cname}_${flagname}")
set(CMAKE_REQUIRED_FLAGS "${flag}")
check_cxx_source_compiles("int main() { return 0; }" ${have_flag})
if(${have_flag})
set(${_result} "${${_result}} ${flag}")
endif(${have_flag})
endforeach(flag)
set(CMAKE_REQUIRED_FLAGS ${SAFE_CMAKE_REQUIRED_FLAGS})
endmacro(append_cxx_compiler_flags)
# Check if SSE instructions are available on the machine where
# the project is compiled.
MACRO (FindSSE)
IF(CMAKE_SYSTEM_NAME MATCHES "Linux")
EXEC_PROGRAM(cat ARGS "/proc/cpuinfo" OUTPUT_VARIABLE CPUINFO)
STRING(REGEX REPLACE "^.*(sse2).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "sse2" "${SSE_THERE}" SSE2_TRUE)
IF (SSE2_TRUE)
set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
ELSE (SSE2_TRUE)
set(SSE2_FOUND false CACHE BOOL "SSE2 available on host")
ENDIF (SSE2_TRUE)
# /proc/cpuinfo apparently omits sse3 :(
STRING(REGEX REPLACE "^.*[^s](sse3).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "sse3" "${SSE_THERE}" SSE3_TRUE)
IF (NOT SSE3_TRUE)
STRING(REGEX REPLACE "^.*(T2300).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "T2300" "${SSE_THERE}" SSE3_TRUE)
ENDIF (NOT SSE3_TRUE)
STRING(REGEX REPLACE "^.*(ssse3).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "ssse3" "${SSE_THERE}" SSSE3_TRUE)
IF (SSE3_TRUE OR SSSE3_TRUE)
set(SSE3_FOUND true CACHE BOOL "SSE3 available on host")
ELSE (SSE3_TRUE OR SSSE3_TRUE)
set(SSE3_FOUND false CACHE BOOL "SSE3 available on host")
ENDIF (SSE3_TRUE OR SSSE3_TRUE)
IF (SSSE3_TRUE)
set(SSSE3_FOUND true CACHE BOOL "SSSE3 available on host")
ELSE (SSSE3_TRUE)
set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host")
ENDIF (SSSE3_TRUE)
STRING(REGEX REPLACE "^.*(sse4_1).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "sse4_1" "${SSE_THERE}" SSE41_TRUE)
IF (SSE41_TRUE)
set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host")
ELSE (SSE41_TRUE)
set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
ENDIF (SSE41_TRUE)
STRING(REGEX REPLACE "^.*(sse4_2).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "sse4_2" "${SSE_THERE}" SSE42_TRUE)
IF (SSE42_TRUE)
set(SSE4_2_FOUND true CACHE BOOL "SSE4.2 available on host")
ELSE (SSE42_TRUE)
set(SSE4_2_FOUND false CACHE BOOL "SSE4.2 available on host")
ENDIF (SSE42_TRUE)
ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin")
EXEC_PROGRAM("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE
CPUINFO)
STRING(REGEX REPLACE "^.*[^S](SSE2).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "SSE2" "${SSE_THERE}" SSE2_TRUE)
IF (SSE2_TRUE)
set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
ELSE (SSE2_TRUE)
set(SSE2_FOUND false CACHE BOOL "SSE2 available on host")
ENDIF (SSE2_TRUE)
STRING(REGEX REPLACE "^.*[^S](SSE3).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "SSE3" "${SSE_THERE}" SSE3_TRUE)
IF (SSE3_TRUE)
set(SSE3_FOUND true CACHE BOOL "SSE3 available on host")
ELSE (SSE3_TRUE)
set(SSE3_FOUND false CACHE BOOL "SSE3 available on host")
ENDIF (SSE3_TRUE)
STRING(REGEX REPLACE "^.*(SSSE3).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "SSSE3" "${SSE_THERE}" SSSE3_TRUE)
IF (SSSE3_TRUE)
set(SSSE3_FOUND true CACHE BOOL "SSSE3 available on host")
ELSE (SSSE3_TRUE)
set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host")
ENDIF (SSSE3_TRUE)
STRING(REGEX REPLACE "^.*(SSE4.1).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "SSE4.1" "${SSE_THERE}" SSE41_TRUE)
IF (SSE41_TRUE)
set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host")
ELSE (SSE41_TRUE)
set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
ENDIF (SSE41_TRUE)
STRING(REGEX REPLACE "^.*(SSE4.2).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "SSE4.2" "${SSE_THERE}" SSE42_TRUE)
IF (SSE42_TRUE)
set(SSE4_2_FOUND true CACHE BOOL "SSE4.2 available on host")
ELSE (SSE42_TRUE)
set(SSE4_2_FOUND false CACHE BOOL "SSE4.2 available on host")
ENDIF (SSE42_TRUE)
ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows")
# TODO
set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
set(SSE3_FOUND false CACHE BOOL "SSE3 available on host")
set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host")
set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
set(SSE4_2_FOUND false CACHE BOOL "SSE4.2 available on host")
ELSE(CMAKE_SYSTEM_NAME MATCHES "Linux")
set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
set(SSE3_FOUND false CACHE BOOL "SSE3 available on host")
set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host")
set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
set(SSE4_2_FOUND false CACHE BOOL "SSE4.2 available on host")
ENDIF(CMAKE_SYSTEM_NAME MATCHES "Linux")
IF(CMAKE_COMPILER_IS_GNUCXX)
EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
IF(GCC_VERSION VERSION_LESS 4.2)
set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host" FORCE)
set(SSE4_2_FOUND false CACHE BOOL "SSE4.2 available on host" FORCE)
ENDIF()
ENDIF(CMAKE_COMPILER_IS_GNUCXX)
if(NOT SSE2_FOUND)
MESSAGE(STATUS "Could not find support for SSE2 on this machine.")
endif(NOT SSE2_FOUND)
if(NOT SSE3_FOUND)
MESSAGE(STATUS "Could not find support for SSE3 on this machine.")
endif(NOT SSE3_FOUND)
if(NOT SSSE3_FOUND)
MESSAGE(STATUS "Could not find support for SSSE3 on this machine.")
endif(NOT SSSE3_FOUND)
if(NOT SSE4_1_FOUND)
MESSAGE(STATUS "Could not find support for SSE4.1 on this machine.")
endif(NOT SSE4_1_FOUND)
if(NOT SSE4_2_FOUND)
MESSAGE(STATUS "Could not find support for SSE4.2 on this machine.")
endif(NOT SSE4_2_FOUND)
mark_as_advanced(SSE2_FOUND SSE3_FOUND SSSE3_FOUND SSE4_1_FOUND SSE4_2_FOUND)
ENDMACRO(FindSSE)
#!/bin/sh
echo "Removing folders..."
rm -rf build
echo "DONE!!!"
\ No newline at end of file
#!/bin/sh
echo "Clean project"
sh ./clean.sh
echo "Download external projects"
git submodule init
git submodule update --init --recursive
echo "Create folder build"
mkdir -p build
cd build
echo "Run CMake"
cmake -DCMAKE_BUILD_TYPE=Release ..
echo "Run make"
make
echo "DONE!!!"
\ No newline at end of file
#ifndef __APPLOG_HPP__
#define __APPLOG_HPP__
#include <math.h>
#define FP 16 //fixed point
#define SFP (1<<FP)
class appLog{
public:
appLog(const uint _mapSize):mapSize(_mapSize),logMap(NULL){
logMap = new uint [_mapSize];
if (logMap==NULL){
fprintf(stderr,"cannot allocate for logMap\n");
throw;
}
logMap[0] = 0;
for (uint i = 1; i < mapSize; i++){
logMap[i] = (uint)(log((float)mapSize/i) / log(2.f) * SFP);
}
}
~appLog(){
if (logMap) delete[] logMap;
}
uint get(const uint numer ,const uint denomi) const{
//return log(denomi/number);
uint pos = numer*mapSize/denomi;
if (pos > mapSize){
printf("error in appLog::get() numer:%d denomi:%d pos:%d mapSize:%d\n",numer,denomi,pos,mapSize);
return 0;
}
//printf("numer:%d denomi:%d app:%d log:%f\n",numer,denomi,logMap[numer*mapSize/denomi], log((float)denomi/numer)*SFP);
return logMap[pos];
}
private:
uint* logMap;
uint mapSize;
};
#endif //__APPLOG_HPP__
\ No newline at end of file
#ifndef __BITVECTOR_HPP__
#define __BITVECTOR_HPP__
#include <stdio.h>
#include "common.hpp"
#include "verticalCode.hpp"
using namespace std;
#define BLOCKSIZE 32
class bitVector{
public:
bitVector(uint _size = 0): size(_size), blockSize((_size + BLOCKSIZE - 1)/BLOCKSIZE), B(NULL){
if (size){
B = new uint [blockSize];
if (B == NULL){
printf("cannot allocate for B\n");
throw;
}
memset(B,0,sizeof(uint)*blockSize);
}
}
~bitVector(){
if (B) delete[] B;
}
void resize(uint n){
uint newBlockSize = (n+BLOCKSIZE-1)/BLOCKSIZE;
uint* newB = new uint [newBlockSize];
if (newB == NULL){
printf("cannot allocate at bitVector::resize()");
throw;
}
memset(newB,0,sizeof(uint)*newBlockSize);
uint copySize = min(newBlockSize,blockSize);
for (uint i = 0; i < copySize; i++){
newB[i] = B[i];
}
if (B) delete[] B;
B = newB;
size = n;
blockSize = newBlockSize;
}
void setBit(uint pos, uint x){
uint blockPos = pos / BLOCKSIZE;
uint offset = pos % BLOCKSIZE;
if (x == 0) B[blockPos] &= (~(1U << offset));
else if (x == 1) B[blockPos] |= (1U << offset);
else {
fprintf(stderr,"unexpected value in setBit. x = %d\n",x);
throw;
}
}
void setBits(uint pos, uint width, uint x){
for (uint i = 0; i < width; i++){
setBit(i+pos,(x >> i)&1);
}
}
uint getBit(uint pos)const {
uint blockPos = pos / BLOCKSIZE;
uint offset = pos % BLOCKSIZE;
if (pos >= size){
fprintf(stderr,"error in bitVector::getBit() pos:%d size:%d\n",pos,size);
exit(1);
}
return (B[blockPos] >> offset) & 1;
}
uint getBits(uint pos, uint width) const{
if (width >= 32){
fprintf(stderr,"getBits error. width:%d\n",width);
exit(1);
}
if (pos+width > size){
fprintf(stderr,"getBits error. pos:%d width:%d size:%d\n",pos,width,size);
exit(1);
}
if (width == 0) return 0;
/*
uint x = 0;
for (uint i = 0; i < width; i++){
x += getBit(pos+i) << i;
}
*/
//block version
const uint endPos = pos+width-1;
const uint blockPos_1 = pos / BLOCKSIZE;
const uint blockPos_2 = endPos / BLOCKSIZE;
const uint offset_1 = pos % BLOCKSIZE;
if (blockPos_1 == blockPos_2){
return (B[blockPos_1] >> offset_1) & ((1U << width) - 1);
}
else {
const uint offset_2 = endPos % BLOCKSIZE;
return (B[blockPos_1] >> offset_1) + ((B[blockPos_2] & ((1U << (offset_2+1))-1)) << (32-offset_1));
}
}
uint* getB() const { return B; }
uint getBlock(uint i) const { return B[i]; }
uint getSize() const{
return size;
}
uint getBlockSize() const{
return blockSize;
}
void printBits(){
for (uint i = 0; i < size; i++){
if (getBit(i)) putchar('1');
else putchar('0');
if ((i+1) % 8 == 0) putchar(' ');
if ((i+1) % 32 == 0) putchar('\n');
}
}
void copy(bitVector& from, uint bitPos, uint bitNum){
for (uint i = 0; i < bitNum; i++){
uint x = from.getBit(i);
setBit(bitPos+i,x);
}
}
void write(FILE* outfp){
fwrite(&blockSize,sizeof(uint),1,outfp);
fwrite(B,sizeof(uint),blockSize,outfp);
}
void read(FILE* infp){
uint _blockSize = 0;
fread(&_blockSize,sizeof(uint),1,infp);
resize(_blockSize * 32);
fread(B,sizeof(uint),blockSize,infp);
blockSize = _blockSize;
}
//---: use rank -----
#define LEVELA (1 << 8)
#define LEVELB (1 << 5)
#define B_LEVELA (LEVELA/BLOCKSIZE) //32
#define B_LEVELB (LEVELB/BLOCKSIZE) //4
uint _popCount(const uint x) const{
return popCount[ x >> 24 ] +
popCount[(x >> 16) & 0xFF] +
popCount[(x >> 8 ) & 0xFF] +
popCount[x & 0xFF];
}
void build(){
uint levelASize = size / LEVELA + 1;
uint levelBSize = size / LEVELB + 1;
levelA = new uint[levelASize];
levelB = new uchar[levelBSize];
for (uint i = 0; i < levelASize; i++){
levelA[i] = 0;
}
for (uint i = 0; i < levelBSize; i++){
levelB[i] = 0;
}
uint r = 0;
for (uint ia = 0; ia <= size; ia += LEVELA){
levelA[ia/LEVELA] = r;
for (uint ib = 0; ib < LEVELA && (ia+ib) <= size; ib += LEVELB){
levelB[(ia+ib)/LEVELB] = r - levelA[ia/LEVELA];
r += _popCount(B[(ia+ib)/LEVELB]);
}
}
}
uint rank(uint pos) const{
const uint remain = pos % BLOCKSIZE;
const uint x = B[pos/BLOCKSIZE] & ((1 << remain) - 1);
return levelA[pos/LEVELA] +
levelB[pos/LEVELB] +
_popCount(x);
}
public:
uint getAllocSize() const{
return size + (sizeof(uint)*(size / LEVELA + 1) + sizeof(uchar)*(size / LEVELB + 1))*8;
}
private:
uint* B;
uint size;
uint blockSize;
uint* levelA;
uchar* levelB;
};
#endif
\ No newline at end of file
#define INTSIZE 32
#define ENUINTSIZEDECTBL 65536
#include <stdio.h>
#include "common.hpp"
#define USEDECODETABLE
class comb{
public:
comb(uint _size): size(_size), nCk(NULL){
nCk = new uint* [size+1];
for (uint i = 0; i <= size; i++){
nCk[i] = new uint [size+1];
}
for (uint i = 0; i <= size; i++) for (uint j = 0; j <= size; j++) { nCk[i][j] = 0;}
for (uint i = 0; i <= size; i++) nCk[0][i] = 1;
for (uint i = 0; i <= size; i++) nCk[i][i] = 1;
for (uint j = 1; j <= size; j++) {
for (uint i = 1; i < j; i++){
if (nCk[i-1][j-1] > 0 && nCk[i][j-1] > 0){
nCk[i][j] = nCk[i-1][j-1] + nCk[i][j-1];
}
}
}
for (uint i=0; i<=INTSIZE; i++) {
if (nCk[i][INTSIZE]>0 && nCk[i][INTSIZE] <= ENUINTSIZEDECTBL) {
nCkDec[i] = new uint[nCk[i][INTSIZE]];
for (uint t = 0; t < nCk[i][INTSIZE]; t++){
nCkDec[i][t] = _enumDecode(i,INTSIZE,t);
}
}
else {
nCkDec[i] = NULL;
}
}
}
private:
uint _enumDecode(uint oneNum, uint num, uint x) const{
uint bits = 0;
for (uint k = 0; k < num; k++) {
if (x >= nCk[oneNum][num-k-1]){
x -= nCk[oneNum][num-k-1];
if (oneNum == 0){
//fprintf(stderr,"enumDecode error one:%d num:%d x:%d\n",oneNum,num,x);
return 0;
}
oneNum--;
bits |= (1U << (INTSIZE - 1 - k)); //32 should be replaced
}
}
return bits;
}
public:
uint enumDecode(uint oneNum, uint num, uint x) const{