/* * ASCLITE * Author: Jerome Ajot, Jon Fiscus, Nicolas Radde, Chris Laprun * * This software was developed at the National Institute of Standards and Technology by * employees of the Federal Government in the course of their official duties. Pursuant * to title 17 Section 105 of the United States Code this software is not subject to * copyright protection and is in the public domain. ASCLITE is an experimental system. * NIST assumes no responsibility whatsoever for its use by other parties, and makes no * guarantees, expressed or implied, about its quality, reliability, or any other * characteristic. We would appreciate acknowledgement if the software is used. * * THIS SOFTWARE IS PROVIDED "AS IS." With regard to this software, NIST MAKES NO EXPRESS * OR IMPLIED WARRANTY AS TO ANY MATTER WHATSOEVER, INCLUDING MERCHANTABILITY, * OR FITNESS FOR A PARTICULAR PURPOSE. */ /** * Represent the Levenshtein Distance Matrix with compression */ #include "compressedlevenshteinmatrix.h" Logger* CompressedLevenshteinMatrix::m_pLogger = Logger::getLogger(); CompressedLevenshteinMatrix::CompressedLevenshteinMatrix(const size_t& _NbrDimensions, size_t* _TabDimensionDeep) { m_MaxMemoryKBProp = static_cast(ceil(1024*1024*atof(Properties::GetProperty("recording.maxnbofgb").c_str()))); m_BlockSizeKB = static_cast(atoi(Properties::GetProperty("align.memorycompressionblock").c_str())); if(m_BlockSizeKB > 1048576) m_BlockSizeKB = 1048575; /* LZMA Properties ; see lzma/LzmaLib.h */ m_lzmaLevel = 4; m_lzmaDictionarySize = 1 << 24; m_lzmaLc = 3; m_lzmaLp = 0; m_lzmaPb = 2; m_lzmaFb = 32; m_lzmaNumberThreads = 2; m_lzmaPropertiesSize = LZMA_PROPS_SIZE; m_NbrDimensions = _NbrDimensions; m_TabDimensionDeep = new size_t[m_NbrDimensions]; m_MultiplicatorDimension = new ullint[m_NbrDimensions]; m_TabBlockDivider = new size_t[m_NbrDimensions]; m_TabBlockDimensionDeep = new size_t[m_NbrDimensions]; m_MultiplicatorDimension[0] = 1; m_TabDimensionDeep[0] = _TabDimensionDeep[0] - 1; m_MaxSize = m_TabDimensionDeep[0]; for(size_t i=1; i= decomp_lengh) || (outPropsize > m_lzmaPropertiesSize) ) { //Incompressible data LOG_DEBUG(m_pLogger, "Compression: Incompressible block ignoring compression!"); free(m_TabStartByteCompressed[block_index]); m_TabStartByteCompressed[block_index] = NULL; } else { free(m_TabStartByte[block_index]); m_TabStartByte[block_index] = NULL; m_TabSizes[block_index] = comp_lengh + m_lzmaPropertiesSize; m_TabbIsCompressed[block_index] = true; m_CurrentMemorySize += comp_lengh + m_lzmaPropertiesSize - decomp_lengh; ++m_Compressions; ++m_NbrCompressedBlocks; --m_NbrDecompressedBlocks; } } } bool CompressedLevenshteinMatrix::DecompressBlock(const size_t& block_index) { CreateBlock(block_index); bool decomp = false; if(decomp = m_TabbIsCompressed[block_index]) { // Block is compressed, then decompress it; size_t comp_lengh = m_TabSizes[block_index] - m_lzmaPropertiesSize; size_t decomp_lengh = m_BaseLengthIn; m_TabStartByte[block_index] = (int*) malloc(m_BaseLengthIn); if(LzmaUncompress( (unsigned char*) m_TabStartByte[block_index], &decomp_lengh, (unsigned char*) m_TabStartByteCompressed[block_index] + m_lzmaPropertiesSize, &comp_lengh, (unsigned char*) m_TabStartByteCompressed[block_index], m_lzmaPropertiesSize) != SZ_OK) { LOG_FATAL(m_pLogger, "Decompression: 'LzmaUncompress()' failed!"); exit(EXIT_FAILURE); } free(m_TabStartByteCompressed[block_index]); m_TabStartByteCompressed[block_index] = NULL; m_TabSizes[block_index] = decomp_lengh; m_TabbIsCompressed[block_index] = false; m_CurrentMemorySize += decomp_lengh - comp_lengh; ++m_Decompressions; --m_NbrCompressedBlocks; ++m_NbrDecompressedBlocks; } TouchBlock(block_index); return decomp; } void CompressedLevenshteinMatrix::GarbageCollection() { char buffer[BUFFER_SIZE]; sprintf(buffer, "Compressed Levenshtein Matrix: Current: %lu KB, Limit: %lu KB, CompressedBlocks: %lu, UncompressedBlocks: %lu", m_CurrentMemorySize/1024, static_cast(m_UsableMemoryKB), m_NbrCompressedBlocks, m_NbrDecompressedBlocks); LOG_DEBUG(m_pLogger, buffer); if(isCallGarbageCollector()) { bool found = false; ulint count = 0; do { if(found = ForcedGarbageCollection()) ++count; } while(found && !isStopGarbageCollector()); char buffer[BUFFER_SIZE]; sprintf(buffer, "Garbage collection: %lu blocks compressed", count); LOG_DEBUG(m_pLogger, buffer); } } bool CompressedLevenshteinMatrix::ForcedGarbageCollection() { ulint mintouch = ULONG_MAX; size_t min_index = 0; // Do the ugly Java GC bool found = false; for(size_t i=0; i* PrimeDiv = new vector [m_NbrDimensions]; size_t* tmpDivider = new size_t[m_NbrDimensions]; size_t* tmpBlockDimensions = new size_t[m_NbrDimensions]; size_t blocksize = m_BlockSizeKB*256; // Computation // Initialization for(size_t i=0; i= 1) && ((m_TabDimensionDeep[i]+1) % 2 == 0) && ((m_TabDimensionDeep[i]+1) % j == 0) ) || ( (levelopt >= 2) && ((m_TabDimensionDeep[i]+levelopt) % j == 0) ) ) PrimeDiv[i].push_back(j); Cursor[i] = 0; } // End Initialization // Main research bool finished = false; size_t closestsize = ULONG_MAX; do { if(Cursor[0] == PrimeDiv[0].size()) { finished = true; } else { size_t size = 1; for(size_t i=0; i 0) && (Cursor[currdim] == PrimeDiv[currdim].size()) ) { Cursor[currdim] = 0; --currdim; ++(Cursor[currdim]); } } } while(!finished); // Main research m_BlockSizeElts = 1; m_NbrCompressedTabs = 1; for(size_t i=0; i