00001
00002
00003
00004
00005
00006
00007
00008
00009
00014 #include "Input_Output_Manager.hpp"
00015
00016 Input_Output_Manager::Input_Output_Manager( ifstream& basket_file,
00017 const char* output_file_name ):
00018 ofstream(output_file_name), basket_file(basket_file)
00019 {
00020 }
00021
00026 int Input_Output_Manager::read_in_a_line( vector<itemtype>& basket )
00027 {
00028 if( basket_file.eof() ) return 0;
00029 char c;
00030 itemtype pos;
00031
00032 basket.clear();
00033 do
00034 {
00035 int item = 0;
00036 pos = 0;
00037 basket_file.get(c);
00038 while(basket_file.good() && (c >= '0') && (c <= '9'))
00039 {
00040 item *= 10;
00041 item += int(c)-int('0');
00042 basket_file.get(c);
00043 pos++;
00044 }
00045 if( pos ) basket.push_back( (itemtype) item );
00046 }
00047 while( !basket_file.eof() && c != '\n' );
00048 return 1;
00049 }
00050
00058 unsigned long Input_Output_Manager::find_frequent_items(
00059 const unsigned long min_supp, vector<unsigned long>& support_of_items )
00060 {
00061 unsigned long basket_number = 0;
00062 vector<itemtype> basket;
00063 vector< unsigned long > temp_counter_vector;
00064
00066 vector<itemtype>::iterator it_basket;
00067 while( read_in_a_line( basket ) )
00068 {
00069 if( !basket.empty() )
00070 {
00071 basket_number++;
00072 for( it_basket = basket.begin(); it_basket != basket.end();
00073 it_basket++ )
00074 {
00075 if( *it_basket + 1 > temp_counter_vector.size() )
00076 temp_counter_vector.resize( *it_basket + 1, 0 );
00077 temp_counter_vector[*it_basket]++;
00078 }
00079 }
00080 }
00081
00083 vector<unsigned long>::size_type edgeIndex;
00084
00085 set< pair<unsigned long, itemtype> > temp_set;
00086 for( itemtype edgeIndex = 0; edgeIndex < temp_counter_vector.size();
00087 edgeIndex++ )
00088 if( temp_counter_vector[edgeIndex] >= min_supp )
00089 temp_set.insert(
00090 pair<unsigned long, itemtype>(temp_counter_vector[edgeIndex],
00091 edgeIndex));
00092
00093 new_code_inverse.clear();
00094 support_of_items.clear();
00095 for(set< pair<unsigned long, itemtype> >::iterator it = temp_set.begin();
00096 it != temp_set.end(); it++)
00097 {
00098 new_code_inverse.push_back((*it).second);
00099 support_of_items.push_back((*it).first);
00100 }
00101
00102
00103 vector<itemtype>(new_code_inverse).swap(new_code_inverse);
00104 vector<unsigned long >(support_of_items).swap(support_of_items);
00105
00106 new_code.reserve( temp_counter_vector.size() + 1 );
00107 new_code.resize( temp_counter_vector.size() + 1, 0 );
00108 for( edgeIndex = 0; edgeIndex < new_code_inverse.size(); edgeIndex++ )
00109 new_code[new_code_inverse[edgeIndex]] = edgeIndex+1;
00110 return basket_number;
00111 }
00116 void Input_Output_Manager::basket_recode(
00117 const vector<itemtype>& original_basket, vector<itemtype>& new_basket )
00118 {
00119 new_basket.clear();
00120 for( vector<itemtype>::const_iterator it_basket = original_basket.begin();
00121 it_basket != original_basket.end(); it_basket++ )
00122 if( new_code[*it_basket] ) new_basket.push_back( new_code[*it_basket]-1 );
00123 sort( new_basket.begin(), new_basket.end() );
00124 }
00125
00126 void Input_Output_Manager::write_out_basket(const set<itemtype>& basket)
00127 {
00128 for( set<itemtype>::const_iterator it_item = basket.begin();
00129 it_item != basket.end(); it_item++)
00130 {
00131 operator<<( new_code_inverse[*it_item] );
00132 put(' ');
00133 }
00134 }
00135
00136 void Input_Output_Manager::write_out_basket_and_counter(
00137 const set<itemtype>& itemset, const unsigned long counter)
00138 {
00139 for( set<itemtype>::const_iterator it_item = itemset.begin();
00140 it_item != itemset.end(); it_item++)
00141 {
00142 operator<<( new_code_inverse[*it_item] );
00143 put(' ');
00144 }
00145 put('(');
00146 operator<<(counter);
00147 write(")\n",2);
00148 }
00149
00150 void Input_Output_Manager::rewind()
00151 {
00152 basket_file.clear();
00153 basket_file.seekg(0, ios::beg);
00154 }
00155
00156 Input_Output_Manager::~Input_Output_Manager()
00157 {
00158 close();
00159 }