Main Page | Namespace List | Class List | File List | Class Members | File Members

Apriori_Trie.cpp

Go to the documentation of this file.
00001 /***************************************************************************
00002                           Apriori_Trie.cpp  -  description
00003                              -------------------
00004     begin                : cs dec 26 2002
00005     copyright            : (C) 2002 by Ferenc Bodon
00006     email                : bodon@mit.bme.hu
00007  ***************************************************************************/
00008 
00009 #include <cstdlib>
00010 #include <iostream>
00011 #include <iterator>
00012 #include <numeric>
00013 #include "Apriori_Trie.hpp"
00014 
00019 Apriori_Trie::Apriori_Trie(const unsigned long counter_of_emptyset ):
00020    main_trie(counter_of_emptyset)
00021 {
00022    nr_of_freq_itemsets.push_back(1);
00023 }
00024 
00029 void Apriori_Trie::insert_frequent_items(
00030    const vector<unsigned long>& counters )
00031 {
00032    for(vector<unsigned long>::size_type item_index = 0; 
00033        item_index < counters.size(); item_index++)
00034       main_trie.add_empty_state( item_index, counters[item_index] );
00035    nr_of_freq_itemsets.push_back(counters.size());
00036 }
00037 
00042 void Apriori_Trie::candidate_generation( const itemtype& frequent_size,
00043                                          Input_Output_Manager& 
00044                                          input_output_manager )
00045 {
00046    if( frequent_size == 1 ) candidate_generation_two();
00047    else
00048    {
00049       set<itemtype> maybe_candidate;
00050       candidate_generation_assist( &main_trie, frequent_size-1, 
00051                                    maybe_candidate, input_output_manager );
00052    }
00053 }
00054 
00062 void Apriori_Trie::find_candidate( const vector<itemtype>& basket, 
00063                                    const itemtype candidate_size, 
00064                                    const unsigned long counter_incr)
00065 {
00066    if( candidate_size != 2 ) 
00067       if ( candidate_size<basket.size()+1 )
00068          main_trie.find_candidate( basket.end()-candidate_size+1, 
00069                                    candidate_size, 
00070                                    basket.begin(), counter_incr );
00071       else;
00072    else find_candidate_two( basket, counter_incr );    
00073 }
00074 
00079 void Apriori_Trie::delete_infrequent( const unsigned long min_occurrence, 
00080                                       const itemtype candidate_size )
00081 {
00082    nr_of_freq_itemsets.push_back(0);
00083    if( candidate_size != 2 ) 
00084       main_trie.delete_infrequent( min_occurrence, 
00085                                    candidate_size - 1,
00086                                    nr_of_freq_itemsets[candidate_size]);
00087    else delete_infrequent_two( min_occurrence );
00088    if( !nr_of_freq_itemsets.back() ) 
00089       nr_of_freq_itemsets.pop_back();
00090 }
00091 
00092 bool Apriori_Trie::is_there_any_candidate() const
00093 {
00094    return !main_trie.edgevector.empty();
00095 }
00096 
00097 void Apriori_Trie::statistics() const
00098 {
00099    cout << accumulate(nr_of_freq_itemsets.begin(), 
00100                       nr_of_freq_itemsets.end(), 0) << '\n';
00101    copy(nr_of_freq_itemsets.begin(), 
00102         nr_of_freq_itemsets.end(), 
00103         ostream_iterator<unsigned long>(cout, "\n"));
00104 }
00105 
00106 Apriori_Trie::~Apriori_Trie()
00107 {
00108 }
00109 
00114 bool Apriori_Trie::is_all_subset_frequent( 
00115    const set<itemtype>& maybe_candidate ) const
00116 {
00117    if( maybe_candidate.size() < 3) return true; // because of the 
00118                                                 // candidate generation method!
00119    else
00120    {
00121       set<itemtype>                 temp_itemset(maybe_candidate);
00122       set<itemtype>::const_iterator item_it = --(--maybe_candidate.end());
00123       do
00124       {
00125          item_it--;
00126          temp_itemset.erase( *item_it );
00127          if( !main_trie.is_included( temp_itemset, temp_itemset.begin() ) ) 
00128             return false;
00129          temp_itemset.insert( *item_it );
00130       }
00131       while ( item_it != maybe_candidate.begin() );
00132       return true;
00133    }
00134 }
00135 
00136 void Apriori_Trie::candidate_generation_two()
00137 {
00138    if( !main_trie.edgevector.empty() )
00139    {
00140       temp_counter_array.reserve(main_trie.edgevector.size()-1);
00141       temp_counter_array.resize(main_trie.edgevector.size()-1);
00142       for( vector<Edge>::size_type stateIndex = 0; 
00143            stateIndex < main_trie.edgevector.size()-1; stateIndex++ )
00144       {
00145          temp_counter_array[stateIndex].reserve(
00146             main_trie.edgevector.size()-1-stateIndex );
00147          temp_counter_array[stateIndex].resize(
00148             main_trie.edgevector.size()-1-stateIndex, 0);
00149       }
00150    }
00151 }
00152 
00153 void Apriori_Trie::candidate_generation_assist( 
00154    Trie* trie, 
00155    const itemtype distance_from_generator,
00156    set<itemtype>& maybe_candidate,
00157    Input_Output_Manager& input_output_manager)
00158 {
00159    vector<Edge>::iterator itEdge = trie->edgevector.begin();
00160    if( distance_from_generator )
00161    {
00162       while( itEdge != trie->edgevector.end() )
00163       {
00164          maybe_candidate.insert((*itEdge).label);
00165          candidate_generation_assist(
00166             (*itEdge).subtrie, distance_from_generator - 1, 
00167             maybe_candidate, input_output_manager );
00168          maybe_candidate.erase((*itEdge).label);
00169          if((*itEdge).subtrie->edgevector.empty())
00170          {
00171             delete (*itEdge).subtrie;
00172             itEdge = trie->edgevector.erase(itEdge);
00173          }
00174          else itEdge++;
00175 
00176       }
00177    }
00178    else
00179    {
00180       vector<Edge>::iterator itEdge2;
00181       Trie* toExtend;
00182       while( itEdge != trie->edgevector.end() )
00183       {
00184          maybe_candidate.insert((*itEdge).label);
00185          toExtend = (*itEdge).subtrie;
00186          input_output_manager.write_out_basket_and_counter( maybe_candidate, toExtend->counter );
00187          for( itEdge2 = itEdge + 1; 
00188               itEdge2 != trie->edgevector.end(); itEdge2++ )
00189          {
00190             maybe_candidate.insert( (*itEdge2).label );
00191             if( is_all_subset_frequent( maybe_candidate) )
00192                toExtend->add_empty_state( (*itEdge2).label );
00193             maybe_candidate.erase( (*itEdge2).label );
00194          }
00195          (vector<Edge>(toExtend->edgevector)).swap(toExtend->edgevector);  
00196           maybe_candidate.erase((*itEdge).label);
00197          if( toExtend->edgevector.empty())
00198          {
00199             delete (*itEdge).subtrie;
00200             itEdge = trie->edgevector.erase(itEdge);
00201          }    
00202          else itEdge++;
00203   // we know that state toExtend will not have any more children!
00204 
00205       }
00206 
00207    }
00208 }
00209 
00216 void Apriori_Trie::find_candidate_two( const vector<itemtype>& basket, 
00217                                        const unsigned long counter )
00218 {
00219    if( basket.size() > 1)
00220    {
00221       vector<itemtype>::const_iterator it1_basket,
00222                                        it2_basket;
00223 
00224       for( it1_basket = basket.begin(); it1_basket != basket.end()-1; 
00225            it1_basket++)
00226          for( it2_basket = it1_basket+1; it2_basket != basket.end(); 
00227               it2_basket++)
00228             temp_counter_array[*it1_basket][*it2_basket-*it1_basket-1] 
00229                += counter;
00230    }
00231 }
00232 
00236 void Apriori_Trie::delete_infrequent_two( const unsigned long min_occurrence )
00237 {
00238    vector<Edge>::size_type stateIndex_1,
00239                             stateIndex_2;
00240    for( stateIndex_1 = 0; stateIndex_1 < main_trie.edgevector.size()-1; 
00241         stateIndex_1++ )
00242    {
00243       for( stateIndex_2 = 0; 
00244            stateIndex_2 < main_trie.edgevector.size() - 1 - stateIndex_1; 
00245            stateIndex_2++ )
00246       {
00247         if( temp_counter_array[stateIndex_1][stateIndex_2] >= min_occurrence )
00248         {
00249            main_trie.edgevector[stateIndex_1].subtrie->add_empty_state( 
00250               stateIndex_1 + stateIndex_2 + 1,
00251               temp_counter_array[stateIndex_1][stateIndex_2] );
00252            nr_of_freq_itemsets[2]++;
00253         }
00254       }
00255       temp_counter_array[stateIndex_1].clear();
00257       vector<unsigned long>().swap(temp_counter_array[stateIndex_1]);  
00258    }
00259    temp_counter_array.clear();
00261    vector< vector<unsigned long> >().swap(temp_counter_array);
00262    vector<Edge>::iterator it= main_trie.edgevector.begin();
00263    while( it!=main_trie.edgevector.end() )
00264       if((*it).subtrie->edgevector.empty())
00265       {
00266          delete (*it).subtrie;
00267          it = main_trie.edgevector.erase(it);
00268       }
00269       else it++;
00270 }

Generated on Fri Sep 3 17:23:51 2004 for APRIORI algorithm by doxygen 1.3.5