00001
00002
00003
00004
00005
00006
00007
00008
00009 #include <cstdlib>
00010 #include <iostream>
00011 #include <iterator>
00012 #include <numeric>
00013 #include "Apriori_Trie.hpp"
00014
00019 Apriori_Trie::Apriori_Trie(const unsigned long counter_of_emptyset ):
00020 main_trie(counter_of_emptyset)
00021 {
00022 nr_of_freq_itemsets.push_back(1);
00023 }
00024
00029 void Apriori_Trie::insert_frequent_items(
00030 const vector<unsigned long>& counters )
00031 {
00032 for(vector<unsigned long>::size_type item_index = 0;
00033 item_index < counters.size(); item_index++)
00034 main_trie.add_empty_state( item_index, counters[item_index] );
00035 nr_of_freq_itemsets.push_back(counters.size());
00036 }
00037
00042 void Apriori_Trie::candidate_generation( const itemtype& frequent_size,
00043 Input_Output_Manager&
00044 input_output_manager )
00045 {
00046 if( frequent_size == 1 ) candidate_generation_two();
00047 else
00048 {
00049 set<itemtype> maybe_candidate;
00050 candidate_generation_assist( &main_trie, frequent_size-1,
00051 maybe_candidate, input_output_manager );
00052 }
00053 }
00054
00062 void Apriori_Trie::find_candidate( const vector<itemtype>& basket,
00063 const itemtype candidate_size,
00064 const unsigned long counter_incr)
00065 {
00066 if( candidate_size != 2 )
00067 if ( candidate_size<basket.size()+1 )
00068 main_trie.find_candidate( basket.end()-candidate_size+1,
00069 candidate_size,
00070 basket.begin(), counter_incr );
00071 else;
00072 else find_candidate_two( basket, counter_incr );
00073 }
00074
00079 void Apriori_Trie::delete_infrequent( const unsigned long min_occurrence,
00080 const itemtype candidate_size )
00081 {
00082 nr_of_freq_itemsets.push_back(0);
00083 if( candidate_size != 2 )
00084 main_trie.delete_infrequent( min_occurrence,
00085 candidate_size - 1,
00086 nr_of_freq_itemsets[candidate_size]);
00087 else delete_infrequent_two( min_occurrence );
00088 if( !nr_of_freq_itemsets.back() )
00089 nr_of_freq_itemsets.pop_back();
00090 }
00091
00092 bool Apriori_Trie::is_there_any_candidate() const
00093 {
00094 return !main_trie.edgevector.empty();
00095 }
00096
00097 void Apriori_Trie::statistics() const
00098 {
00099 cout << accumulate(nr_of_freq_itemsets.begin(),
00100 nr_of_freq_itemsets.end(), 0) << '\n';
00101 copy(nr_of_freq_itemsets.begin(),
00102 nr_of_freq_itemsets.end(),
00103 ostream_iterator<unsigned long>(cout, "\n"));
00104 }
00105
00106 Apriori_Trie::~Apriori_Trie()
00107 {
00108 }
00109
00114 bool Apriori_Trie::is_all_subset_frequent(
00115 const set<itemtype>& maybe_candidate ) const
00116 {
00117 if( maybe_candidate.size() < 3) return true;
00118
00119 else
00120 {
00121 set<itemtype> temp_itemset(maybe_candidate);
00122 set<itemtype>::const_iterator item_it = --(--maybe_candidate.end());
00123 do
00124 {
00125 item_it--;
00126 temp_itemset.erase( *item_it );
00127 if( !main_trie.is_included( temp_itemset, temp_itemset.begin() ) )
00128 return false;
00129 temp_itemset.insert( *item_it );
00130 }
00131 while ( item_it != maybe_candidate.begin() );
00132 return true;
00133 }
00134 }
00135
00136 void Apriori_Trie::candidate_generation_two()
00137 {
00138 if( !main_trie.edgevector.empty() )
00139 {
00140 temp_counter_array.reserve(main_trie.edgevector.size()-1);
00141 temp_counter_array.resize(main_trie.edgevector.size()-1);
00142 for( vector<Edge>::size_type stateIndex = 0;
00143 stateIndex < main_trie.edgevector.size()-1; stateIndex++ )
00144 {
00145 temp_counter_array[stateIndex].reserve(
00146 main_trie.edgevector.size()-1-stateIndex );
00147 temp_counter_array[stateIndex].resize(
00148 main_trie.edgevector.size()-1-stateIndex, 0);
00149 }
00150 }
00151 }
00152
00153 void Apriori_Trie::candidate_generation_assist(
00154 Trie* trie,
00155 const itemtype distance_from_generator,
00156 set<itemtype>& maybe_candidate,
00157 Input_Output_Manager& input_output_manager)
00158 {
00159 vector<Edge>::iterator itEdge = trie->edgevector.begin();
00160 if( distance_from_generator )
00161 {
00162 while( itEdge != trie->edgevector.end() )
00163 {
00164 maybe_candidate.insert((*itEdge).label);
00165 candidate_generation_assist(
00166 (*itEdge).subtrie, distance_from_generator - 1,
00167 maybe_candidate, input_output_manager );
00168 maybe_candidate.erase((*itEdge).label);
00169 if((*itEdge).subtrie->edgevector.empty())
00170 {
00171 delete (*itEdge).subtrie;
00172 itEdge = trie->edgevector.erase(itEdge);
00173 }
00174 else itEdge++;
00175
00176 }
00177 }
00178 else
00179 {
00180 vector<Edge>::iterator itEdge2;
00181 Trie* toExtend;
00182 while( itEdge != trie->edgevector.end() )
00183 {
00184 maybe_candidate.insert((*itEdge).label);
00185 toExtend = (*itEdge).subtrie;
00186 input_output_manager.write_out_basket_and_counter( maybe_candidate, toExtend->counter );
00187 for( itEdge2 = itEdge + 1;
00188 itEdge2 != trie->edgevector.end(); itEdge2++ )
00189 {
00190 maybe_candidate.insert( (*itEdge2).label );
00191 if( is_all_subset_frequent( maybe_candidate) )
00192 toExtend->add_empty_state( (*itEdge2).label );
00193 maybe_candidate.erase( (*itEdge2).label );
00194 }
00195 (vector<Edge>(toExtend->edgevector)).swap(toExtend->edgevector);
00196 maybe_candidate.erase((*itEdge).label);
00197 if( toExtend->edgevector.empty())
00198 {
00199 delete (*itEdge).subtrie;
00200 itEdge = trie->edgevector.erase(itEdge);
00201 }
00202 else itEdge++;
00203
00204
00205 }
00206
00207 }
00208 }
00209
00216 void Apriori_Trie::find_candidate_two( const vector<itemtype>& basket,
00217 const unsigned long counter )
00218 {
00219 if( basket.size() > 1)
00220 {
00221 vector<itemtype>::const_iterator it1_basket,
00222 it2_basket;
00223
00224 for( it1_basket = basket.begin(); it1_basket != basket.end()-1;
00225 it1_basket++)
00226 for( it2_basket = it1_basket+1; it2_basket != basket.end();
00227 it2_basket++)
00228 temp_counter_array[*it1_basket][*it2_basket-*it1_basket-1]
00229 += counter;
00230 }
00231 }
00232
00236 void Apriori_Trie::delete_infrequent_two( const unsigned long min_occurrence )
00237 {
00238 vector<Edge>::size_type stateIndex_1,
00239 stateIndex_2;
00240 for( stateIndex_1 = 0; stateIndex_1 < main_trie.edgevector.size()-1;
00241 stateIndex_1++ )
00242 {
00243 for( stateIndex_2 = 0;
00244 stateIndex_2 < main_trie.edgevector.size() - 1 - stateIndex_1;
00245 stateIndex_2++ )
00246 {
00247 if( temp_counter_array[stateIndex_1][stateIndex_2] >= min_occurrence )
00248 {
00249 main_trie.edgevector[stateIndex_1].subtrie->add_empty_state(
00250 stateIndex_1 + stateIndex_2 + 1,
00251 temp_counter_array[stateIndex_1][stateIndex_2] );
00252 nr_of_freq_itemsets[2]++;
00253 }
00254 }
00255 temp_counter_array[stateIndex_1].clear();
00257 vector<unsigned long>().swap(temp_counter_array[stateIndex_1]);
00258 }
00259 temp_counter_array.clear();
00261 vector< vector<unsigned long> >().swap(temp_counter_array);
00262 vector<Edge>::iterator it= main_trie.edgevector.begin();
00263 while( it!=main_trie.edgevector.end() )
00264 if((*it).subtrie->edgevector.empty())
00265 {
00266 delete (*it).subtrie;
00267 it = main_trie.edgevector.erase(it);
00268 }
00269 else it++;
00270 }