package strategies; import java.io.File; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.function.Predicate; import org.snt.inmemantlr.tree.Ast; import org.snt.inmemantlr.tree.AstNode; import cppparser.Grammar; public class BasicAlgorithm { private Map treeMap=new HashMap<>(); private Map> bucketMap=new HashMap<>(); private Map> initialMap=new HashMap<>(); private Map matchPart=new HashMap<>(); /** * getBucket(int nodeCount) calculates the bucket number for the astTree * @param nodeCount number of ast nodes in a subtree * @return */ public int getBucket(int nodeCount) { int bucket=0; if(nodeCount<10) { bucket=nodeCount; } else { double bucket1=nodeCount*0.1; bucket=(int)bucket1; } return bucket; } /** * * @param bucketNumber it represents the bucket in which subtree is present * @param subAst it gives the list of subtrees * @param list it represents a list of Asts */ public Map> populateBucketMap(int bucketNumber, Set subAst, List list, String fileName) { for(Ast a :subAst) { int hash=a.hashCode()%bucketNumber; if(bucketMap.containsKey(hash)) { list=bucketMap.get(hash); list.add(a); bucketMap.put(hash, list); } else { list=new ArrayList<>(); list.add(a); bucketMap.put(hash, list); } treeMap.put(a, fileName); } return bucketMap; } /** * * @param ok it is a list of C files to be given as input * @param gfile it is a syntax parser file for C+ */ public Map> populateInitialMatch(Map map) { List list=new ArrayList<>(); Map m; /** * Using hashing to classify subtrees to buckets */ for(Map.Entry entry: map.entrySet()) { m=new HashMap<>(); Predicate pred=p->p.getChildren()!=null; int n=entry.getValue().getNodes().size(); int bucketNumber=getBucket(n); Set subAst=entry.getValue().getSubtrees(pred); for(AstNode n1 : entry.getValue().getNodes()) { m.put(n1, 0); } initialMap.put(entry.getKey(),m); bucketMap=populateBucketMap(bucketNumber,subAst, list, entry.getKey()); } return initialMap; } /** * This method populates result for all matching Ast subtrees * @param firstAst * @param secondAst * @param firstFile * @param secondFile * @param result */ public void populateResult(Ast firstAst, Ast secondAst, List firstFile, List secondFile, Map> result) { if(result.containsKey(treeMap.get(firstAst))) { firstFile=result.get(treeMap.get(firstAst)); firstFile.add(firstAst); result.put(treeMap.get(firstAst),firstFile); } else { firstFile=new ArrayList<>(); firstFile.add(firstAst); result.put(treeMap.get(firstAst),firstFile); } if(result.containsKey(treeMap.get(secondAst))) { secondFile=result.get(treeMap.get(secondAst)); secondFile.add(secondAst); result.put(treeMap.get(secondAst),secondFile); } else { secondFile=new ArrayList<>(); secondFile.add(secondAst); result.put(treeMap.get(secondAst),secondFile); } } /** * This algorithm calculates the subtree match * @param map it has file names and corresponding AST * @param initialMap it contains fileNames with corresponding AST's and their match count * @param bucketMap it contains bucket number and AST lists * @param treeMap it contains tree/subtrees with corresponding fileName */ public Map,List> basicPlagiarism(File[] files, File cppConfigFile) { Map map=Grammar.runAndGetAsts(files, cppConfigFile); initialMap=populateInitialMatch(map); List pairs=new ArrayList<>(); Map> result=new HashMap<>(); Map counter=new HashMap<>(); Map, Map>> finalResult=new HashMap<>(); List firstFile=new ArrayList<>(); List secondFile=new ArrayList<>(); Map,List> percentResult=new HashMap<>(); /** * Initialize counter */ for(Map.Entry k:map.entrySet()) { counter.put(k.getKey(), 0); } for(Map.Entry> entry: bucketMap.entrySet()) { if(entry.getValue().size()>=2) { List newList=new ArrayList<>(entry.getValue()); for(Ast firstAst: entry.getValue()) { newList.remove(firstAst); for(Ast secondAst:newList) { int num1=firstAst.getNodes().size(); int count1=0; if(secondAst.getNodes().size()==num1 && treeMap.containsKey(secondAst) && treeMap.containsKey(firstAst) && (treeMap.get(secondAst)!=treeMap.get(firstAst))) { count1=getSimilarSubtrees(firstAst,secondAst, count1,counter,pairs); } if(num1==count1) { populateResult(firstAst,secondAst,firstFile, secondFile,result); finalResult.put(pairs,result); calculateResult(finalResult,map,percentResult); } } } } } return percentResult; } /** * * @param firstAst * @param secondAst * @param count1 * @param counter * @param pairs * @return */ public int getSimilarSubtrees(Ast firstAst, Ast secondAst, int count1,Map counter,List pairs) { for(AstNode node: firstAst.getNodes()) { for(AstNode s1:secondAst.getNodes()) { if( s1.toString().replaceAll("^[0-9]+", "").equals( node.toString().replaceAll("^[0-9]+", ""))) { Map n=initialMap.get(treeMap.get(secondAst)); count1++; if(n.get(s1)!=1) { n.put(s1, 1); initialMap.put(treeMap.get(secondAst), n); } if(counter.containsKey(treeMap.get(secondAst))) counter.put(treeMap.get(secondAst), counter.get(treeMap.get(secondAst))+1); Map w=initialMap.get(treeMap.get(firstAst)); if(w.get(node)!=1) { w.put(node, 1); initialMap.put(treeMap.get(firstAst), w); } if(counter.containsKey(treeMap.get(firstAst))) counter.put(treeMap.get(firstAst), counter.get(treeMap.get(firstAst))1); pairs=new ArrayList<>(); pairs.add(treeMap.get(secondAst)); pairs.add(treeMap.get(firstAst)); } } } return count1; } /** * This method calculates code for sequence plagiarism detection * @param finalResult * @param stringMap */ public void SequenceDetection(Map, Map>> finalResult, Map stringMap) { for(Map.Entry, Map>> map: finalResult.entrySet()) { Map> res=map.getValue(); for(Map.Entry> result: res.entrySet()) { List subtreeList=result.getValue(); List list=new ArrayList<>(); Comparator astLengthComparator = new Comparator() { @Override public int compare(Ast o1, Ast o2) { return Integer.compare(o2.getNodes().size(), o1.getNodes().size()); } }; Collections.sort(subtreeList, astLengthComparator); list.add(subtreeList.get(0)); for(Ast firstAst: subtreeList) { List parent=firstAst.getNodes(); int count=0; for(Ast a :list) { List l=a.getNodes(); for(AstNode p:parent) { for(AstNode s: l) { if(s.toString().equals(p.toString())) { count++; } } } } if(count>=0 && count!=parent.size()) { list.add(firstAst); } } int nodeCount=stringMap.get(result.getKey()).getNodes().size(); int count=0; for(Ast a :list) { count=count+a.getNodes().size(); } double percent=((double)count/nodeCount)*100; matchPart.put(result.getKey(), percent); } } } /** * This method calculates result for percentage of plagiarism * @param finalResult * @param stringMap */ public void calculateResult(Map, Map>> finalResult, Map stringMap,Map,List> percentResult) { for(Map.Entry, Map>> map: finalResult.entrySet()) { Map> res=map.getValue(); List percentage=new ArrayList<>(); List files=new ArrayList<>(); for(Map.Entry> result: res.entrySet()) { List subtreeList=result.getValue(); Set setNode=new HashSet<>(); for(Ast tree:subtreeList) { for(AstNode astNode: tree.getNodes()) { setNode.add(astNode); } } int nodeCount=stringMap.get(result.getKey()).getNodes().size(); double percent=((double)setNode.size()/nodeCount)*100; matchPart.put(result.getKey(), percent); percentage.add(percent); files.add(result.getKey()); } for(String s: files) System.out.println(s); percentResult.put(files,percentage); } } }