#include <iostream>
#include <fstream>
#include <string.h>
#include <ctype.h>
#include <map>
#include <vector>
#include <dirent.h>
#include <cstdarg>
#include <algorithm>
#include <time.h>
#include <iomanip>
#include <cmath>

using namespace std;

typedef vector<int> Itemset;
typedef vector<Itemset > PrefixNode;
typedef vector<PrefixNode > PrefixLevel;
typedef vector<PrefixLevel > PrefixTree;

typedef vector<float> PrefixNodeSup;
typedef vector<PrefixNodeSup > PrefixLevelSup;

int MAX_ITEMS; // number of items
int MAX_TRANS; //number of transactions
float ** TRANS;  // Transactions matrix
float * SUP_ONE; // support for one-item sets
float * MAX_ABS_ITEM; // maximum abs value in each item
int * COVERAGE;
float max_neg =0, max_pos =0;
int num_pos_trans, num_neg_trans, threshold;
float val_pos_trans, val_neg_trans;
int single_vote = 0;
string * ITEM_LABEL; // label of item
string * TRANS_LABEL; // label of trans
int * sup_trans;
float support, h_conf,rs, min_box_val, max_box_val;
float sup_val = 0,hconf_val = 0;

int calculate_support(Itemset item, int print)
{
	int size = (int) item.size();
	int *items = new int[size];
	float *item_vals = new float[size];
	float *sup_one_vals = new float[size];
	float *max_abs_vals = new float[size];
	int return_val;
	int pos_count;
	int neg_count;
	sup_val = 0;
	hconf_val;
	int neg_flag = 0;
	min_box_val = -1;
	max_box_val = -1;
	for(int i=0; i<size; i++)
	{	
		items[i] = item[i];
		//cout<<items[i]<<" ";
		sup_one_vals[i] = SUP_ONE[items[i]];
		max_abs_vals[i] = MAX_ABS_ITEM[items[i]];
	}
	//cout<<endl;

	num_pos_trans=0; num_neg_trans =0;
	val_pos_trans = 0, val_neg_trans = 0;

	sort(sup_one_vals, sup_one_vals+size);
	sort(max_abs_vals, max_abs_vals+size);

	for(int i=0; i<MAX_TRANS; i++)
	{	
		sup_trans[i]=0;
		neg_flag = 0;
		pos_count = 0;
		neg_count = 0;
		for(int j=0; j<size; j++) item_vals[j] = TRANS[i][items[j]];	
		for(int j=0; j<size; j++)
		{
			if(item_vals[j]>0) pos_count++;
			if(item_vals[j]<0) neg_count++;

		}
		if(pos_count!=0 && neg_count!=0) continue;
		if(pos_count==0)
		{
			for(int j=0; j<size; j++) item_vals[j] = fabs(item_vals[j]);
			neg_flag = 1;
		}
		
		sort(item_vals, item_vals+size);			
		if((item_vals[size-1] - item_vals[0]) <= rs*item_vals[0] && item_vals[0] > 0) 
		{
			if(!single_vote) sup_val += item_vals[0];
			else 
			{
				//if(neg_flag) sup_val += 1 + (3*(item_vals[0] - threshold))/abs(max_neg) ;
				//else sup_val += 1 + (3*(item_vals[0] - threshold))/max_pos;
				sup_val += 1;
			}
			if(min_box_val == -1) min_box_val = item_vals[0];
			if(max_box_val < item_vals[size-1]) max_box_val = item_vals[size-1];
			if(min_box_val > item_vals[0]) min_box_val = item_vals[0];
			if(neg_flag)
			{
				num_neg_trans += 1;
				val_neg_trans += item_vals[0];
			}	
			else
			{
				num_pos_trans += 1;
				val_pos_trans += item_vals[0];

			}
			sup_trans[i] = 1;
		}
		//cout<<item_vals[0]<<" ";				
			
	}
	//cout<<endl;


	hconf_val = sup_val/sup_one_vals[size-1];
	//cout<<sup_sum<<" "<<sup_one_vals[size-1]<<" "<<hconf_val<<endl;

	if(sup_val > support && hconf_val > h_conf) 
	{
		return_val= 1;
		if(print == 0)
		{ 
			for(int i=0; i<size; i++)
			{	
 				COVERAGE[items[i]] = 1;
			}
		}

	}
	else return_val= 0;
/*
	if(size==2 )
	{
		if(sup_val > support && hconf_val > 0.967) return_val= 1;
		else return_val= 0;
	}
	else if(size==3)
	{
		if(sup_val > support && hconf_val > 0.967) return_val= 1;
		else return_val= 0;

	}
	else
	{
		if(sup_val > support && hconf_val > h_conf) return_val= 1;
		else return_val= 0;
	}
*/
	if(size == 1) 
	{
		hconf_val = 1;
	}
	delete [] items;
	delete [] item_vals;
	delete [] sup_one_vals;
	return return_val;
		
	
}


void find_trans_item_sizes(string filename)
{
	int sz_items = 0;
	int sz_trans = -1;
	ifstream myfile;
	myfile.open(filename.c_str());

	if(myfile.is_open()) 
        {
		string line1;
		int flag=0;
                while(!myfile.eof())
                {	
			getline(myfile,line1);
			
			sz_trans++;
			if(flag==0)
			{	
				int pos = line1.find(',');
				while (pos != string::npos)
				{
					pos = line1.find(',',pos+1);
					sz_items++;
				}
				flag=1;
			
			}
			
		}

		
		//sz_items +=1;


	}
	myfile.close();
//	cout<<"items:"<<sz_items<<endl;
//	cout<<"trans:"<<sz_trans<<endl;

	MAX_ITEMS = sz_items;
	MAX_TRANS = sz_trans;
}

void read_transactions(string filename)
{
	ifstream myfile;
	myfile.open(filename.c_str());
	
	int i = 0;
	int j = 0;
	if(myfile.is_open()) 
        {
		string line1;
                while(!myfile.eof())
                {	
			
			getline(myfile,line1);
			if(i>=MAX_TRANS) continue;

			int pos = line1.find(',');
			int prev_pos;	
//			cout << strtof(line1.substr(0,pos).c_str(),NULL) << "#";
			TRANS[i][j++] = strtof(line1.substr(0,pos).c_str(),NULL);
			while (pos != string::npos)
			{	
				prev_pos = pos;
				pos = line1.find(',',pos+1);
//				cout << strtof(line1.substr(prev_pos+1,pos-prev_pos-1).c_str(),NULL) << "#";
				TRANS[i][j++] = strtof(line1.substr(prev_pos+1,pos-prev_pos-1).c_str(),NULL);

			}

			i++;	
			j = 0;		
		}

	}
	myfile.close();

}

void read_trans_item_labels(string trans_label_filename, string item_label_filename)
{
	ifstream trans_file;
	trans_file.open(trans_label_filename.c_str());
	ifstream item_file;
	item_file.open(item_label_filename.c_str());

	string line1;
	int i = 0;	
	if(trans_file.is_open()) 
        {

                while(!trans_file.eof())
                {	
			getline(trans_file,line1);
			if(i>=MAX_TRANS) continue;
			TRANS_LABEL[i++] = line1;
		}

	}
	trans_file.close();

	i = 0;	
	if(item_file.is_open()) 
        {

                while(!item_file.eof())
                {	
			getline(item_file,line1);
			if(i>=MAX_ITEMS) continue;
			ITEM_LABEL[i++] = line1;
				
		}

	}
	item_file.close();

}


int covers(Itemset current, Itemset last)
{
	int count = 0;
	int size = (int) last.size();

	for(int i=0; i < size; i++)
	{
 		for(int j=0; j < current.size();j++)
		if(current[j] == last[i]) count ++;
	}
	
	if(count == size) return 1;
	else return 0;

}



int is_not_closed(PrefixLevel new_level, Itemset last, float sup_iset, PrefixLevelSup new_level_sup)
{
	int result = 0;
	Itemset current;
//	Itemset last = lastnode[0];
	
	int size = (int) last.size();
	
	PrefixNode new_node;
       	PrefixNodeSup new_node_sup;
	if(!new_level.empty())
	{
		for(int i=0; i<new_level.size(); i++)
		{
			new_node = new_level[i];
			new_node_sup = new_level_sup[i];
			for(int j=0; j<new_node.size(); j++)
			{
				current = new_node[j];
				if(covers(current, last) && new_node_sup[j] == sup_iset)
					return 1;
				
					
			}
		}
	}


	return result;


}





int main(int argc, char *argv[])
{
	cout<<"--------------------------------------------------------------"<<endl;
	cout<<"RAP:"<<endl;
	cout<<"rap <data file> <min support> <range> <transaction labels> <item labels>"<<endl;
        cout<<"outputs include only closed patterns"<<endl;
	cout<<"--------------------------------------------------------------"<<endl;	

//get the data and label files
	string filename = argv[1];
	string trans_label_filename = argv[4];
	string item_label_filename = argv[5];
	single_vote = 0;	


//initialize support and hconfidence thresholds

	support = (float) atof(argv[2]);
	//h_conf = (float) atof(argv[3]);
	h_conf = 0;
	rs = (float) atof(argv[3]);
	//string sparse_outfilename = argv[5];
	//string trans_outfilename = argv[6];
	//string max_outfilename = argv[7];
	//ofstream outfile, maximal, sparse, hconf, transfile;
	ofstream result;	
	
	string result_file;
	result_file.append("results_closed_");
	result_file.append(filename);
	result_file.append("_");
	result_file.append(argv[2]);
	//result_file.append("_");
	//result_file.append(argv[3]);	
	result_file.append("_");
	result_file.append(argv[3]);
	//if(single_vote==1)
	//	result_file.append("_binary");
	//else 
	//	result_file.append("_real");
  	//outfile.open(result_file.c_str());
  	//maximal.open(maxml_file.c_str());
  		//sparse.open(sparse_file.c_str());
	//hconf.open(hconf_file.c_str());
		//transfile.open(trans_file.c_str());
	//transfile.open(trans_outfilename.c_str());
  	//sparse.open(sparse_outfilename.c_str());
  	//maximal.open(max_outfilename.c_str());
		
	result.open(result_file.c_str());

//track time

	time_t start,end;
	float diff_time;

//find the number of rows (transactions) and cols (items)
	find_trans_item_sizes(filename);

//create the necessary matrix	
	TRANS = new float*[MAX_TRANS];
				
	
	for (int i = 0; i < MAX_TRANS; i++) 
	{
		TRANS[i] = new float[MAX_ITEMS];
  	}
	
	SUP_ONE = new float[MAX_ITEMS];
	MAX_ABS_ITEM = new float[MAX_ITEMS];
	COVERAGE = new int[MAX_ITEMS];
	if (argc>4)
	{
		ITEM_LABEL = new string[MAX_ITEMS];
		TRANS_LABEL = new string[MAX_TRANS];

		read_trans_item_labels(trans_label_filename, item_label_filename);
	}
	sup_trans = new int[MAX_TRANS]; //for writing supporting transactions

	read_transactions(filename);
	
	cout<<"Number of Items: "<<MAX_ITEMS<<endl;
	cout<<"Number of Transaction: "<<MAX_TRANS<<endl;
	cout<<"Support: "<<support<<endl;
	//cout<<"h-Confidence: "<<h_conf<<endl;
	cout<<"Range support: "<<rs<<endl;

	time (&start);

//calculate support of one itemsets
	PrefixLevel one_level, prev_level, new_level;
	PrefixNode one_item_node, prev_node, new_node;

	PrefixLevelSup one_level_sup, prev_level_sup, new_level_sup;
	PrefixNodeSup one_item_node_sup, prev_node_sup, new_node_sup;

	Itemset one_item, prev_item, next_item, new_item, max_item;
	int one_item_count = 0;
	for(int i=0; i < MAX_ITEMS; i++)
	{	
		SUP_ONE[i]=0;
		COVERAGE[i] = 0;
		float temp = 0;
		for (int j= 0; j< MAX_TRANS; j++) 
		{	

			if(!single_vote && TRANS[j][i]!=0) SUP_ONE[i]+=fabs(TRANS[j][i]);
			if(single_vote && TRANS[j][i]!=0) SUP_ONE[i]+=1;
			
			if(TRANS[j][i] > max_pos) max_pos = TRANS[j][i];
			if(TRANS[j][i] < max_neg) max_neg = TRANS[j][i];
			
			if(fabs(TRANS[j][i]) > temp)
			{
				MAX_ABS_ITEM[i] = fabs(TRANS[j][i]);
				temp = fabs(TRANS[j][i]);
			}

		}
//		cout<<SUP_ONE[i]<<"--";
//		cout<<endl; 
		if(SUP_ONE[i] > support)
			{
				one_item.push_back(i);
			//	outfile<<i+1;
			//	outfile<<"\t|support:"<<SUP_ONE[i]<<endl;
				one_item_node.push_back(one_item);
				one_item_node_sup.push_back(SUP_ONE[i]);

				one_item.clear();
				one_item_count ++;
			}
					
	}

	one_level.push_back(one_item_node);
	one_level_sup.push_back(one_item_node_sup);

	time (&end);
	diff_time = difftime (end,start); 	
	
	prev_level = one_level;
	prev_level_sup = one_level_sup;

	if(!prev_level.empty()) 	cout<<"---- No:of single-tons above minsup "<<one_item_count<<" items "<<"time: "<<setprecision(4)<<diff_time<<" secs."<<endl;	
		
	int level_track=1;
	int total_maximal = 0;
	int level_items, level_maximal_items;
	int new_item__threshold_flag = 0;
	int level_coverage = 0;
	while(!prev_level.empty())
	{	level_items=0; 
		level_maximal_items = 0;
		time (&start);
		new_item__threshold_flag = 0;
		for(int i=0; i<prev_level.size(); i++)
		{

			prev_node = prev_level[i];
			prev_node_sup = prev_level_sup[i];

			if(prev_node.size() < 1) continue;

			int *count_maximal = new int[prev_node.size()];

			for(int j=0; j<prev_node.size(); j++) count_maximal[j]=0;
			if(prev_node.size() == 1 &&  is_not_closed(new_level, prev_node[0], prev_node_sup[0], new_level_sup)) count_maximal[0] = 1;
			for(int j=0; j<prev_node.size()-1; j++)
			{	

				prev_item = prev_node[j];
				for(int k = j+1; k<prev_node.size(); k++)
				{	
					new_item.clear();
					new_item__threshold_flag = 0;
					next_item = prev_node[k];
					for(int l=0; l<prev_item.size(); l++)
						new_item.push_back(prev_item[l]);
					new_item.push_back(next_item[next_item.size()-1]);

					//calculate support and h_conf
					if(calculate_support(new_item,0))
					{	
						
						new_item__threshold_flag = 1 ;
						level_items++;
						
						//for(int ma=0; ma<new_item.size(); ma++) cout<<new_item[ma]+1<<" ";
						// cout<<"\t|support:"<<sup_val<<"\t hconf:"<<hconf_val<<endl;

						new_node.push_back(new_item);
						new_node_sup.push_back(sup_val);
						
					}

									
				}
				//check closed for prev_node[j] i.e. for the jth iset in prev_node -- write code
				new_level.push_back(new_node);
				new_level_sup.push_back(new_node_sup);
				if(is_not_closed(new_level, prev_node[j], prev_node_sup[j], new_level_sup)) count_maximal[j] = 1;
				new_node.clear();
				new_node_sup.clear();
				//cout<<"---- node---over"<<endl;
			}
			if(is_not_closed(new_level, prev_node[prev_node.size()-1], prev_node_sup[prev_node.size()-1], new_level_sup)) count_maximal[prev_node.size()-1] = 1;

			for(int j=0; j<prev_node.size(); j++) 
			{
				
				if(count_maximal[j]==0)
				{
					max_item = prev_node[j];
					calculate_support(max_item,1);
					level_maximal_items++;
					
					if(level_track==1) continue; //avoid writing level-1 maximal itemsets
					total_maximal++;
					for(int ma=0; ma<max_item.size(); ma++) 
					{
						result<< max_item[ma]+1<<" ";
						// sparse<<total_maximal<<" "<<max_item[ma]+1<<endl;
					}
					result<<"[";
					//hconf<<hconf_val<<endl;
					
					for(int ma=0; ma<MAX_TRANS-1; ma++)
						if(sup_trans[ma]==1) result<<ma+1<<" ";
					if(sup_trans[MAX_TRANS-1]==1) result<<MAX_TRANS<<"]"<<endl;										
					else result<<"]"<<endl;

					if(argc > 4)
					{
						for(int ma=0; ma<max_item.size(); ma++) 
						{
							result<< ITEM_LABEL[max_item[ma]]<<" ";
							// sparse<<total_maximal<<" "<<max_item[ma]+1<<endl;
						}
						result<<"[";
						//hconf<<hconf_val<<endl;
						
						for(int ma=0; ma<MAX_TRANS-1; ma++)
							if(sup_trans[ma]==1) result<<TRANS_LABEL[ma]<<" ";
						if(sup_trans[MAX_TRANS-1]==1) result<<TRANS_LABEL[MAX_TRANS-1]<<"]"<<endl;										
						else result<<"]"<<endl;
					}

					//result<<"support:"<<sup_val<<" hconf:"<<hconf_val<<" max/min:"<<max_box_val/min_box_val<<" ["<<max_box_val<<" "<<min_box_val<<"]["<<num_pos_trans<<" "<<num_neg_trans<<"]["<<val_pos_trans/num_pos_trans<<" "<<val_neg_trans/num_neg_trans<<"]"<<endl<<endl;
					result<<"support:"<<sup_val<<endl<<endl;
					//for(int ma=0; ma<MAX_TRANS-1; ma++)
					//	transfile<<sup_trans[ma]<<" ";
					//transfile<<sup_trans[MAX_TRANS-1]<<endl;

				}

			}
			
			delete [] count_maximal;
		

		}
		
		
		prev_level = new_level; 
		prev_level_sup = new_level_sup; 
		time (&end); diff_time = difftime (end,start);
		new_level.clear();
		new_level_sup.clear();



		if(level_items!=0)
                {
                        cout<<"---- level-"<<level_track+1<<"---completed--"<<level_items<<" total item sets "<<"time: "<<setprecision(4)<<diff_time<<" secs."<<endl;
                        for(int i=0; i < MAX_ITEMS; i++)
                        {
                                if(COVERAGE[i] == 1)
                                {
                                        level_coverage++;
                                        COVERAGE[i] = 0;
                                }
                        }
                        cout<<"coverage (itemset size >="<<level_track+1<<"): "<<level_coverage<<endl;
                        level_coverage = 0;
                }

                if(level_maximal_items!=0)
                cout<<"---- level-"<<level_track<<" : "<<level_maximal_items<<" closed item sets "<<endl;


/*		if(level_items!=0)
		cout<<"---- level-"<<level_track+1<<"---completed--"<<level_items<<" total item sets "<<"time: "<<setprecision(4)<<diff_time<<" secs."<<endl;
		if(level_maximal_items!=0)
		cout<<"---- level-"<<level_track<<" : "<<level_maximal_items<<" maximal item sets "<<endl;
		for(int i=0; i < MAX_ITEMS; i++)
		{	
			if(COVERAGE[i] == 1)
			{
				level_coverage++;
				COVERAGE[i] = 0;
			}
		}			
		cout<<"coverage (itemset size >="<<level_track+1<<"): "<<level_coverage<<endl;
		level_coverage = 0;
*/
		level_track++;
	}
	


//print transactions
/*
	for (int i = 0; i < MAX_TRANS; i++) 
	{
		for (int j= 0; j< MAX_ITEMS; j++) 
		{	
			cout<<TRANS[i][j]<<" ";
		}
		cout<<endl;
	}
*/





//close output results file

	
	//outfile.close();
	//maximal.close();
	//sparse.close();
	//hconf.close();
	//transfile.close();
	result.close();

// delete the transaction matrix
/*
	for (int i = 0; i < MAX_TRANS; i++) 
	{
		delete [] TRANS[i];
	}
	delete [] TRANS;
*/	
//end of program
	return 0;




}

