Clone-Detection-In-Memory-A.../BasicAlgorithm.java

337 lines
9.0 KiB
Java

package strategies;
import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Predicate;
import org.snt.inmemantlr.tree.Ast;
import org.snt.inmemantlr.tree.AstNode;
import cppparser.Grammar;
public class BasicAlgorithm {
private Map<Ast, String> treeMap=new HashMap<>();
private Map<Integer,List<Ast>> bucketMap=new HashMap<>();
private Map<String, Map<AstNode,Integer>> initialMap=new HashMap<>();
private Map<String, Double> matchPart=new HashMap<>();
/**
* getBucket(int nodeCount) calculates the bucket number for the astTree
* @param nodeCount number of ast nodes in a subtree
* @return
*/
public int getBucket(int nodeCount)
{
int bucket=0;
if(nodeCount<10)
{
bucket=nodeCount;
}
else
{
double bucket1=nodeCount*0.1;
bucket=(int)bucket1;
}
return bucket;
}
/**
*
* @param bucketNumber it represents the bucket in which subtree is present
* @param subAst it gives the list of subtrees
* @param list it represents a list of Asts
*/
public Map<Integer,List<Ast>> populateBucketMap(int bucketNumber, Set<Ast> subAst, List<Ast> list, String fileName)
{
for(Ast a :subAst)
{
int hash=a.hashCode()%bucketNumber;
if(bucketMap.containsKey(hash))
{
list=bucketMap.get(hash);
list.add(a);
bucketMap.put(hash, list);
}
else
{
list=new ArrayList<>();
list.add(a);
bucketMap.put(hash, list);
}
treeMap.put(a, fileName);
}
return bucketMap;
}
/**
*
* @param ok it is a list of C files to be given as input
* @param gfile it is a syntax parser file for C+
*/
public Map<String, Map<AstNode,Integer>> populateInitialMatch(Map<String, Ast> map)
{
List<Ast> list=new ArrayList<>();
Map<AstNode,Integer> m;
/**
* Using hashing to classify subtrees to buckets
*/
for(Map.Entry<String, Ast> entry: map.entrySet())
{
m=new HashMap<>();
Predicate<AstNode> pred=p->p.getChildren()!=null;
int n=entry.getValue().getNodes().size();
int bucketNumber=getBucket(n);
Set<Ast> subAst=entry.getValue().getSubtrees(pred);
for(AstNode n1 : entry.getValue().getNodes())
{
m.put(n1, 0);
}
initialMap.put(entry.getKey(),m);
bucketMap=populateBucketMap(bucketNumber,subAst, list, entry.getKey());
}
return initialMap;
}
/**
* This method populates result for all matching Ast subtrees
* @param firstAst
* @param secondAst
* @param firstFile
* @param secondFile
* @param result
*/
public void populateResult(Ast firstAst, Ast secondAst, List<Ast> firstFile, List<Ast> secondFile, Map<String, List<Ast>> result)
{
if(result.containsKey(treeMap.get(firstAst)))
{
firstFile=result.get(treeMap.get(firstAst));
firstFile.add(firstAst);
result.put(treeMap.get(firstAst),firstFile);
}
else
{
firstFile=new ArrayList<>();
firstFile.add(firstAst);
result.put(treeMap.get(firstAst),firstFile);
}
if(result.containsKey(treeMap.get(secondAst)))
{
secondFile=result.get(treeMap.get(secondAst));
secondFile.add(secondAst);
result.put(treeMap.get(secondAst),secondFile);
}
else
{
secondFile=new ArrayList<>();
secondFile.add(secondAst);
result.put(treeMap.get(secondAst),secondFile);
}
}
/**
* This algorithm calculates the subtree match
* @param map it has file names and corresponding AST
* @param initialMap it contains fileNames with corresponding AST's and their match count
* @param bucketMap it contains bucket number and AST lists
* @param treeMap it contains tree/subtrees with corresponding fileName
*/
public Map<List<String>,List<Double>> basicPlagiarism(File[] files, File cppConfigFile)
{
Map<String, Ast> map=Grammar.runAndGetAsts(files, cppConfigFile);
initialMap=populateInitialMatch(map);
List<String> pairs=new ArrayList<>();
Map<String, List<Ast>> result=new HashMap<>();
Map<String, Integer> counter=new HashMap<>();
Map<List<String>, Map<String, List<Ast>>> finalResult=new HashMap<>();
List<Ast> firstFile=new ArrayList<>();
List<Ast> secondFile=new ArrayList<>();
Map<List<String>,List<Double>> percentResult=new HashMap<>();
/**
* Initialize counter
*/
for(Map.Entry<String, Ast> k:map.entrySet())
{
counter.put(k.getKey(), 0);
}
for(Map.Entry<Integer, List<Ast>> entry: bucketMap.entrySet())
{
if(entry.getValue().size()>=2)
{
List<Ast> newList=new ArrayList<>(entry.getValue());
for(Ast firstAst: entry.getValue())
{
newList.remove(firstAst);
for(Ast secondAst:newList)
{
int num1=firstAst.getNodes().size();
int count1=0;
if(secondAst.getNodes().size()==num1 && treeMap.containsKey(secondAst) && treeMap.containsKey(firstAst)
&& (treeMap.get(secondAst)!=treeMap.get(firstAst)))
{
count1=getSimilarSubtrees(firstAst,secondAst, count1,counter,pairs);
}
if(num1==count1)
{
populateResult(firstAst,secondAst,firstFile, secondFile,result);
finalResult.put(pairs,result);
calculateResult(finalResult,map,percentResult);
}
}
}
}
}
return percentResult;
}
/**
*
* @param firstAst
* @param secondAst
* @param count1
* @param counter
* @param pairs
* @return
*/
public int getSimilarSubtrees(Ast firstAst, Ast secondAst, int count1,Map<String, Integer> counter,List<String> pairs)
{
for(AstNode node: firstAst.getNodes())
{
for(AstNode s1:secondAst.getNodes())
{
if( s1.toString().replaceAll("^[0-9]+", "").equals( node.toString().replaceAll("^[0-9]+", "")))
{
Map<AstNode, Integer> n=initialMap.get(treeMap.get(secondAst));
count1++;
if(n.get(s1)!=1)
{
n.put(s1, 1);
initialMap.put(treeMap.get(secondAst), n);
}
if(counter.containsKey(treeMap.get(secondAst)))
counter.put(treeMap.get(secondAst), counter.get(treeMap.get(secondAst))+1);
Map<AstNode, Integer> w=initialMap.get(treeMap.get(firstAst));
if(w.get(node)!=1)
{
w.put(node, 1);
initialMap.put(treeMap.get(firstAst), w);
}
if(counter.containsKey(treeMap.get(firstAst)))
counter.put(treeMap.get(firstAst), counter.get(treeMap.get(firstAst))1);
pairs=new ArrayList<>();
pairs.add(treeMap.get(secondAst));
pairs.add(treeMap.get(firstAst));
}
}
}
return count1;
}
/**
* This method calculates code for sequence plagiarism detection
* @param finalResult
* @param stringMap
*/
public void SequenceDetection(Map<List<String>, Map<String, List<Ast>>> finalResult, Map<String, Ast> stringMap)
{
for(Map.Entry<List<String>, Map<String, List<Ast>>> map: finalResult.entrySet())
{
Map<String, List<Ast>> res=map.getValue();
for(Map.Entry<String, List<Ast>> result: res.entrySet())
{
List<Ast> subtreeList=result.getValue();
List<Ast> list=new ArrayList<>();
Comparator<Ast> astLengthComparator = new Comparator<Ast>()
{
@Override
public int compare(Ast o1, Ast o2)
{
return Integer.compare(o2.getNodes().size(), o1.getNodes().size());
}
};
Collections.sort(subtreeList, astLengthComparator);
list.add(subtreeList.get(0));
for(Ast firstAst: subtreeList)
{
List<AstNode> parent=firstAst.getNodes();
int count=0;
for(Ast a :list)
{
List<AstNode> l=a.getNodes();
for(AstNode p:parent)
{
for(AstNode s: l)
{
if(s.toString().equals(p.toString()))
{
count++;
}
}
}
}
if(count>=0 && count!=parent.size())
{
list.add(firstAst);
}
}
int nodeCount=stringMap.get(result.getKey()).getNodes().size();
int count=0;
for(Ast a :list)
{
count=count+a.getNodes().size();
}
double percent=((double)count/nodeCount)*100;
matchPart.put(result.getKey(), percent);
}
}
}
/**
* This method calculates result for percentage of plagiarism
* @param finalResult
* @param stringMap
*/
public void calculateResult(Map<List<String>, Map<String, List<Ast>>> finalResult, Map<String, Ast> stringMap,Map<List<String>,List<Double>> percentResult)
{
for(Map.Entry<List<String>, Map<String, List<Ast>>> map: finalResult.entrySet())
{
Map<String, List<Ast>> res=map.getValue();
List<Double> percentage=new ArrayList<>();
List<String> files=new ArrayList<>();
for(Map.Entry<String, List<Ast>> result: res.entrySet())
{
List<Ast> subtreeList=result.getValue();
Set<AstNode> setNode=new HashSet<>();
for(Ast tree:subtreeList)
{
for(AstNode astNode: tree.getNodes())
{
setNode.add(astNode);
}
}
int nodeCount=stringMap.get(result.getKey()).getNodes().size();
double percent=((double)setNode.size()/nodeCount)*100;
matchPart.put(result.getKey(), percent);
percentage.add(percent);
files.add(result.getKey());
}
for(String s: files)
System.out.println(s);
percentResult.put(files,percentage);
}
}
}