Dynamic Itemset Counting Algorithm

dic.cpp

#include <iostream>
#include <fstream>
#include <vector>
#include <set>
#include <map>
#include <unordered_map>
#include <sstream>
#include <algorithm>

using namespace std;

typedef set<int> Itemset;

vector<Itemset> generateCandidates(const vector<Itemset>& frequentItemsets, int k) {
    vector<Itemset> candidates;
    for (size_t i = 0; i < frequentItemsets.size(); ++i) {
        for (size_t j = i + 1; j < frequentItemsets.size(); ++j) {
            Itemset candidate(frequentItemsets[i]);
            candidate.insert(frequentItemsets[j].begin(), frequentItemsets[j].end());
            if (candidate.size() == k) {
                candidates.push_back(candidate);
            }
        }
    }
    return candidates;
}

map<Itemset, int> countSupport(const vector<Itemset>& transactions, const vector<Itemset>& candidates, int start, int end) {
    map<Itemset, int> itemsetCounts;
    for (int i = start; i < end; ++i) {
        for (const auto& candidate : candidates) {
            if (includes(transactions[i].begin(), transactions[i].end(), candidate.begin(), candidate.end())) {
                itemsetCounts[candidate]++;
            }
        }
    }
    return itemsetCounts;
}

vector<Itemset> filterBySupport(const map<Itemset, int>& candidateCounts, int minSupport) {
    vector<Itemset> frequentItemsets;
    for (const auto& candidateCount : candidateCounts) {
        if (candidateCount.second >= minSupport) {
            frequentItemsets.push_back(candidateCount.first);
        }
    }
    return frequentItemsets;
}

void DIC(const vector<Itemset>& transactions, int globalMinSupport, int M, const string& outputFilename) {
    ofstream outputFile(outputFilename);

    // Initialize pass and itemset tracking
    vector<Itemset> frequentItemsets;

    // First pass, start with frequent 1-itemsets
    map<int, int> itemCounts;
    int numTransactions = transactions.size();
    int intervalSize = numTransactions / M;

    for (int interval = 0; interval < M; ++interval) {
        int start = interval * intervalSize;
        int end = (interval == M - 1) ? numTransactions : (interval + 1) * intervalSize;

        // Count support for 1-itemsets
        for (int i = start; i < end; ++i) {
            for (int item : transactions[i]) {
                itemCounts[item]++;
            }
        }

        // Filter 1-itemsets by support
        for (const auto& itemCount : itemCounts) {
            if (itemCount.second >= globalMinSupport) {
                frequentItemsets.push_back({ itemCount.first });
            }
        }
    }

    // Output frequent 1-itemsets to file
    outputFile << "Frequent 1-itemsets:\n";
    for (const auto& itemset : frequentItemsets) {
        for (int item : itemset) {
            outputFile << item << " ";
        }
        outputFile << endl;
    }

    // Dynamic itemset introduction for k > 1
    int k = 2;
    while (!frequentItemsets.empty()) {
        vector<Itemset> candidates = generateCandidates(frequentItemsets, k);
        map<Itemset, int> candidateCounts;

        for (int interval = 0; interval < M; ++interval) {
            int start = interval * intervalSize;
            int end = (interval == M - 1) ? numTransactions : (interval + 1) * intervalSize;

            // Count support for candidates in the current interval
            map<Itemset, int> intervalCounts = countSupport(transactions, candidates, start, end);

            // Accumulate candidate counts across intervals
            for (const auto& intervalCount : intervalCounts) {
                candidateCounts[intervalCount.first] += intervalCount.second;
            }

            // Filter candidates by support at each interval dynamically
            frequentItemsets = filterBySupport(candidateCounts, globalMinSupport);

            // Output frequent k-itemsets if found
            if (!frequentItemsets.empty()) {
                outputFile << "\nFrequent " << k << "-itemsets (after interval " << interval + 1 << "):\n";
                for (const auto& itemset : frequentItemsets) {
                    for (int item : itemset) {
                        outputFile << item << " ";
                    }
                    outputFile << endl;
                }
            }
        }

        k++;
    }

    outputFile.close();
    cout << "Results saved to " << outputFilename << endl;
}

// Function to read transactions from a text file
vector<Itemset> readTransactions(const string& filename) {
    ifstream file(filename);
    vector<Itemset> transactions;
    string line;

    if (file.is_open()) {
        while (getline(file, line)) {
            stringstream ss(line);
            Itemset transaction;
            int item;
            while (ss >> item) {
                transaction.insert(item);
            }
            transactions.push_back(transaction);
        }
        file.close();
    }
    else {
        cerr << "Unable to open file" << endl;
    }

    return transactions;
}

int main() {
    string inputFilename = "td.txt";
    string outputFilename = "output.txt";
    int globalMinSupport = 10; 
    int M = 3;  // Number of intervals

    vector<Itemset> transactions = readTransactions(inputFilename);

    DIC(transactions, globalMinSupport, M, outputFilename);

    return 0;
}