A fuzzy logic C++ library
|
Holds information on a given data file: what columns are numeric, what columns are strings, etc. More...
#include <datafile_info.hpp>
Public Member Functions | |
DATAFILE_INFO (std::string fn) | |
DATAFILE_INFO () | |
void | Clear () |
void | GetFileInfo () |
Returns information on file: how many fields, what type, ... | |
void | AssignDataDescription (const DATA_DESCR &descr) |
Assigns description of requested data to the information on file and check consistency. | |
bool | HasDescription () const |
const DATA_DESCR & | GetDescription () const |
const DATA_DESCR & | GetPostReadingDescr () const |
const accessors | |
void | Print (FILE *f) const |
bool | HasAttribNames () const |
bool | IsSet () const |
EN_DF_TYPE | GetFileType () const |
Returns type of file, for string identification, see GetString( EN_DF_TYPE ) | |
size_t | GetNbDataPts () const |
size_t | GetNbNumericFields () const |
size_t | GetNeededNbFields () const |
Returns the number of fields needed for a datapoint, can be either the *real* value, extracted from file, or the requested value, as given in description. | |
size_t | GetTotNbFields () const |
size_t | GetNbStringFields () const |
char | GetDelimChar () const |
EN_DATA_FIELD_TYPE | GetFieldType (size_t idx) const |
Returns field type for index idx . | |
std::string | GetAttribName (size_t idx) const |
bool | FieldIsRequested (size_t idx) const |
Returns true if field idx (0-based) needs to be loaded from the data file. | |
size_t | GetFirstNumeric () const |
Returns the first numeric field. | |
std::string | GetFileName () const |
file related | |
void | OpenFile () |
bool | FileIsGood () |
bool | FileIsOpen () |
void | CloseFile () |
Static Public Member Functions | |
static void | SetCSVDelim (char sep) |
Private Member Functions | |
std::string | P_ReadLine () |
void | P_GetFileInfo_arff () |
Returns information on arff file: how many fields, what type. | |
void | P_GetFileInfo_csv () |
Returns information on csv file, assuming commented line start with '#' and fields separated with ';'. | |
bool | P_FetchArffCommands (const std::string &buf) |
Fetch ARFF commands from line buf , returns true if finished (i.e. if we encounter a DATA command) | |
Private Attributes | |
std::string | _input_fn |
file name | |
std::ifstream | _datafile |
the input data file | |
std::vector< PAIR_ATTRIB_NT > | _vAttribNameType |
attributes names and type | |
size_t | _NbDataPts |
Nb of points. | |
size_t | _NbNumeric |
Nb of numeric values in the columns. | |
size_t | _NbString |
Nb of string values in the columns. | |
EN_DF_TYPE | _FileType |
arff, csv, or other | |
bool | _IsSet |
flag that gets true once the type of file, nb of attributes and names are known | |
bool | _HasStringAttr |
true if file has at least one field of string type | |
bool | _HasAttribNames |
true if data file has attribute names (always true for arff) | |
std::vector< size_t > | _vStringColumns |
indexes of columns that hold a string attribute. | |
std::vector< std::vector < std::string > > | _vvStringNames |
set of possible values for columns holding a string attribute | |
DATA_DESCR | _descr_Original |
DATA_DESCR | _descr_PostReading |
bool | _dfi_HasDescription |
Static Private Attributes | |
static char | s_CSV_sep = ';' |
CSF-files separator, see SetCSVDelim() | |
static char | s_buf [512] |
Friends | |
class | DATA_SET |
class | DATA_POINT |
Holds information on a given data file: what columns are numeric, what columns are strings, etc.
Usage:
DATAFILE_INFO dfi( "myfile.csv" ); dfi.GetFileInfo();
slifis::DATAFILE_INFO::DATAFILE_INFO | ( | std::string | fn | ) | [inline] |
References Clear().
slifis::DATAFILE_INFO::DATAFILE_INFO | ( | ) | [inline] |
References Clear().
void slifis::DATAFILE_INFO::Clear | ( | ) |
References slifis::DFT_UNKNOWN.
Referenced by DATAFILE_INFO().
void slifis::DATAFILE_INFO::GetFileInfo | ( | ) |
Returns information on file: how many fields, what type, ...
Actually, this function opens the file and reads it, but only keeps the metadata.
References __IN__, __OUT__, slifis::DFT_ARFF, slifis::DFT_CSV, slifis::DFT_UNKNOWN, slifis::DT_NUMERIC, slifis::DT_STRING, slifis::ERR_IO_ERROR, SLIFIS_ERROR_2, and SWITCH_ERROR.
Referenced by main().
void slifis::DATAFILE_INFO::AssignDataDescription | ( | const DATA_DESCR & | descr | ) |
Assigns description of requested data to the information on file and check consistency.
References slifis::DATA_DESCR::ComputeIndexesAfterLoading(), slifis::ERR_DATA_BAD_INDEX, slifis::DATA_DESCR::GetInputIndex(), slifis::DATA_DESCR::GetNbInputs(), slifis::DATA_DESCR::GetOutputIndex(), SLIFIS_ERROR, and SLIFIS_ERROR_LOG.
Referenced by main().
bool slifis::DATAFILE_INFO::HasDescription | ( | ) | const [inline] |
References _dfi_HasDescription.
Referenced by slifis::DATA_SET::ReadData().
const DATA_DESCR & slifis::DATAFILE_INFO::GetDescription | ( | ) | const |
References slifis::ERR_DATA_DESCR_INVALID, SLIFIS_ERROR, and SLIFIS_ERROR_LOG.
Referenced by slifis::DATA_SET::ReadData().
const DATA_DESCR & slifis::DATAFILE_INFO::GetPostReadingDescr | ( | ) | const |
References slifis::ERR_DATA_DESCR_INVALID, SLIFIS_ERROR, and SLIFIS_ERROR_LOG.
Referenced by main().
void slifis::DATAFILE_INFO::SetCSVDelim | ( | char | sep | ) | [inline, static] |
References s_CSV_sep.
void slifis::DATAFILE_INFO::Print | ( | FILE * | f | ) | const |
References slifis::DFT_ARFF, slifis::DFT_CSV, slifis::GetChar_FieldType(), and SWITCH_ERROR.
Referenced by main().
bool slifis::DATAFILE_INFO::HasAttribNames | ( | ) | const [inline] |
References __IN__, __OUT__, _HasAttribNames, _IsSet, slifis::ERR_DATA_INFO_INVALID, and SLIFIS_ERROR.
Referenced by main().
bool slifis::DATAFILE_INFO::IsSet | ( | ) | const [inline] |
References _IsSet.
Referenced by slifis::DATA_SET::ReadData().
EN_DF_TYPE slifis::DATAFILE_INFO::GetFileType | ( | ) | const [inline] |
Returns type of file, for string identification, see GetString( EN_DF_TYPE )
References __IN__, __OUT__, _FileType, _IsSet, slifis::ERR_DATA_INFO_INVALID, and SLIFIS_ERROR.
Referenced by slifis::DATA_SET::ReadData().
size_t slifis::DATAFILE_INFO::GetNbDataPts | ( | ) | const [inline] |
References __IN__, __OUT__, _IsSet, _NbDataPts, slifis::ERR_DATA_INFO_INVALID, and SLIFIS_ERROR.
Referenced by main().
size_t slifis::DATAFILE_INFO::GetNbNumericFields | ( | ) | const [inline] |
References __IN__, __OUT__, _IsSet, _NbNumeric, slifis::ERR_DATA_INFO_INVALID, and SLIFIS_ERROR.
Referenced by main().
size_t slifis::DATAFILE_INFO::GetNeededNbFields | ( | ) | const [inline] |
Returns the number of fields needed for a datapoint, can be either the *real* value, extracted from file, or the requested value, as given in description.
References _descr_Original, _dfi_HasDescription, slifis::DATA_DESCR::GetNbInputs(), and GetTotNbFields().
Referenced by slifis::DATA_POINT::DATA_POINT(), and slifis::DATA_POINT::ReadDataFields().
size_t slifis::DATAFILE_INFO::GetTotNbFields | ( | ) | const [inline] |
References __IN__, __OUT__, _IsSet, _vAttribNameType, slifis::ERR_DATA_INFO_INVALID, and SLIFIS_ERROR.
Referenced by GetNeededNbFields(), slifis::DATA_SET::ReadData(), and slifis::DATA_POINT::ReadDataFields().
size_t slifis::DATAFILE_INFO::GetNbStringFields | ( | ) | const [inline] |
References __IN__, __OUT__, _IsSet, _NbString, slifis::ERR_DATA_INFO_INVALID, and SLIFIS_ERROR.
Referenced by slifis::DATA_SET::ReadData().
char slifis::DATAFILE_INFO::GetDelimChar | ( | ) | const [inline] |
References __IN__, __OUT__, _FileType, _IsSet, slifis::DFT_ARFF, slifis::DFT_CSV, slifis::ERR_DATA_BAD_TYPE, slifis::ERR_DATA_INFO_INVALID, s_CSV_sep, and SLIFIS_ERROR.
Referenced by slifis::DATA_POINT::ReadDataFields().
EN_DATA_FIELD_TYPE slifis::DATAFILE_INFO::GetFieldType | ( | size_t | idx | ) | const |
Returns field type for index idx
.
References __IN__, __OUT__, slifis::ERR_DATA_BAD_INDEX, and SLIFIS_ERROR_2.
Referenced by slifis::DATA_POINT::ReadDataFields().
std::string slifis::DATAFILE_INFO::GetAttribName | ( | size_t | idx | ) | const |
References __IN__, __OUT__, slifis::ERR_DATA_BAD_INDEX, and SLIFIS_ERROR_2.
Referenced by main().
bool slifis::DATAFILE_INFO::FieldIsRequested | ( | size_t | idx | ) | const |
Returns true if field idx
(0-based) needs to be loaded from the data file.
Referenced by slifis::DATA_POINT::ReadDataFields().
size_t slifis::DATAFILE_INFO::GetFirstNumeric | ( | ) | const |
Returns the first numeric field.
References __IN__, __OUT__, slifis::DT_NUMERIC, slifis::ERR_DATA_NO_NUMERIC, and SLIFIS_ERROR.
std::string slifis::DATAFILE_INFO::GetFileName | ( | ) | const [inline] |
References _input_fn.
Referenced by slifis::DATA_POINT::ReadDataFields().
void slifis::DATAFILE_INFO::OpenFile | ( | ) |
References __IN__, __OUT__, slifis::ERR_IO_ERROR, and SLIFIS_ERROR_1.
Referenced by main(), and slifis::DATA_SET::ReadData().
bool slifis::DATAFILE_INFO::FileIsGood | ( | ) | [inline] |
References _datafile.
Referenced by main(), and slifis::DATA_SET::ReadData().
bool slifis::DATAFILE_INFO::FileIsOpen | ( | ) | [inline] |
References _datafile.
Referenced by slifis::DATA_POINT::ReadDataFields().
void slifis::DATAFILE_INFO::CloseFile | ( | ) |
References __IN__, __OUT__, slifis::ERR_IO_ERROR, and SLIFIS_ERROR_1.
Referenced by main(), and slifis::DATA_SET::ReadData().
std::string slifis::DATAFILE_INFO::P_ReadLine | ( | ) | [private] |
References SLIFIS_IO_BUF_SIZE.
Referenced by slifis::DATA_POINT::ReadDataFields().
void slifis::DATAFILE_INFO::P_GetFileInfo_arff | ( | ) | [private] |
Returns information on arff file: how many fields, what type.
line, but this function also counts the number of data points) References __IN__, __OUT__, slifis::LineHasContent(), and slifis::TrimCR().
void slifis::DATAFILE_INFO::P_GetFileInfo_csv | ( | ) | [private] |
Returns information on csv file, assuming commented line start with '#' and fields separated with ';'.
References __IN__, __OUT__, slifis::DT_NUMERIC, slifis::DT_STRING, slifis::ERR_IO_ERROR, slifis::LineHasContent(), SLIFIS_ERROR_2, SLIFIS_ERROR_LOG, slifis::TokensList(), and slifis::TrimCR().
bool slifis::DATAFILE_INFO::P_FetchArffCommands | ( | const std::string & | buf | ) | [private] |
Fetch ARFF commands from line buf
, returns true if finished (i.e. if we encounter a DATA command)
_vAttribNameType
with names and types of attributes found in the fileReferences __IN__, __OUT__, slifis::DT_DATE, slifis::DT_DEFAULT, slifis::DT_NUMERIC, slifis::DT_STRING, slifis::ERR_IO_ERROR, SLIFIS_ERROR_2, and slifis::TokensList().
friend class DATA_SET [friend] |
friend class DATA_POINT [friend] |
std::string slifis::DATAFILE_INFO::_input_fn [private] |
file name
Referenced by GetFileName().
std::ifstream slifis::DATAFILE_INFO::_datafile [private] |
the input data file
Referenced by FileIsGood(), and FileIsOpen().
std::vector<PAIR_ATTRIB_NT> slifis::DATAFILE_INFO::_vAttribNameType [private] |
attributes names and type
Referenced by GetTotNbFields().
size_t slifis::DATAFILE_INFO::_NbDataPts [private] |
Nb of points.
Referenced by GetNbDataPts(), and slifis::DATA_SET::ReadData().
size_t slifis::DATAFILE_INFO::_NbNumeric [private] |
Nb of numeric values in the columns.
Referenced by GetNbNumericFields().
size_t slifis::DATAFILE_INFO::_NbString [private] |
Nb of string values in the columns.
Referenced by GetNbStringFields().
EN_DF_TYPE slifis::DATAFILE_INFO::_FileType [private] |
arff, csv, or other
Referenced by GetDelimChar(), and GetFileType().
bool slifis::DATAFILE_INFO::_IsSet [private] |
flag that gets true once the type of file, nb of attributes and names are known
Referenced by GetDelimChar(), GetFileType(), GetNbDataPts(), GetNbNumericFields(), GetNbStringFields(), GetTotNbFields(), HasAttribNames(), and IsSet().
bool slifis::DATAFILE_INFO::_HasStringAttr [private] |
true if file has at least one field of string type
bool slifis::DATAFILE_INFO::_HasAttribNames [private] |
true if data file has attribute names (always true for arff)
Referenced by HasAttribNames().
std::vector<size_t> slifis::DATAFILE_INFO::_vStringColumns [private] |
indexes of columns that hold a string attribute.
std::vector< std::vector<std::string> > slifis::DATAFILE_INFO::_vvStringNames [private] |
set of possible values for columns holding a string attribute
Referenced by GetNeededNbFields().
bool slifis::DATAFILE_INFO::_dfi_HasDescription [private] |
Referenced by GetNeededNbFields(), and HasDescription().
char slifis::DATAFILE_INFO::s_CSV_sep = ';' [static, private] |
CSF-files separator, see SetCSVDelim()
Referenced by GetDelimChar(), and SetCSVDelim().
char slifis::DATAFILE_INFO::s_buf [static, private] |