475 lines
13 KiB
C++
475 lines
13 KiB
C++
/*
|
|
* DBparser.cpp
|
|
*
|
|
* Created on: 2015-4-11
|
|
* Author: cjq
|
|
*/
|
|
|
|
#include "DBparser.h"
|
|
|
|
|
|
DBparser::DBparser()
|
|
{
|
|
_prefix_map.clear();
|
|
}
|
|
|
|
void DBparser::sparqlParser(const std::string& _sparql, SPARQLquery& _sparql_query)
|
|
{
|
|
pANTLR3_INPUT_STREAM input;
|
|
pSparqlLexer lex;
|
|
pANTLR3_COMMON_TOKEN_STREAM tokens;
|
|
pSparqlParser parser;
|
|
input = antlr3StringStreamNew((ANTLR3_UINT8 *)(_sparql.c_str()), ANTLR3_ENC_UTF8, _sparql.length(), (ANTLR3_UINT8 *)"QueryString");
|
|
//input = antlr3FileStreamNew((pANTLR3_UINT8)filePath,ANTLR3_ENC_8BIT);
|
|
lex = SparqlLexerNew(input);
|
|
|
|
tokens = antlr3CommonTokenStreamSourceNew(ANTLR3_SIZE_HINT,TOKENSOURCE(lex));
|
|
parser = SparqlParserNew(tokens);
|
|
|
|
|
|
SparqlParser_workload_return r = parser->workload(parser);
|
|
pANTLR3_BASE_TREE root = r.tree;
|
|
|
|
if (printNode(root) > 0) throw "Some errors are found in the SPARQL query request.";
|
|
parseTree(root,_sparql_query);
|
|
|
|
printquery(_sparql_query);
|
|
|
|
genQueryVec(_sparql_query.getPatternGroup(), _sparql_query);
|
|
|
|
parser->free(parser);
|
|
tokens->free(tokens);
|
|
lex->free(lex);
|
|
input->close(input);
|
|
}
|
|
|
|
int DBparser::printNode(pANTLR3_BASE_TREE node, int depth)
|
|
{
|
|
const char* s = (const char*) node->getText(node)->chars;
|
|
ANTLR3_UINT32 treeType = node->getType(node);
|
|
|
|
int hasErrorNode = 0;
|
|
if (treeType == 0) hasErrorNode = 1;
|
|
|
|
for (int i=0; i < depth; i++) printf(" ");
|
|
printf("%d: %s\n",treeType,s);
|
|
for (unsigned int j = 0; j < node->getChildCount(node); j++)
|
|
{
|
|
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, j);
|
|
int childNodeType = childNode->getType(childNode);
|
|
hasErrorNode += printNode(childNode, depth+1);
|
|
}
|
|
return hasErrorNode;
|
|
}
|
|
|
|
|
|
void DBparser::parseTree(pANTLR3_BASE_TREE node, SPARQLquery& query)
|
|
{
|
|
printf("parseTree\n");
|
|
for (unsigned int j = 0; j < node->getChildCount(node); j++)
|
|
{
|
|
pANTLR3_BASE_TREE childNode = (pANTLR3_BASE_TREE) node->getChild(node, j);
|
|
|
|
//prologue 144
|
|
if (childNode->getType(childNode) == 144)
|
|
{
|
|
parsePrologue(childNode);
|
|
}
|
|
else
|
|
//select clause 156
|
|
if (childNode->getType(childNode) == 156)
|
|
{
|
|
parseSelectClause(childNode, query);
|
|
}
|
|
else
|
|
//group graph pattern 77
|
|
if (childNode->getType(childNode) == 77)
|
|
{
|
|
parseGroupPattern(childNode, query.getPatternGroup());
|
|
}
|
|
else parseTree(childNode, query);
|
|
}
|
|
}
|
|
void DBparser::parsePrologue(pANTLR3_BASE_TREE node)
|
|
{
|
|
printf("parsePrologue\n");
|
|
for (unsigned int j = 0; j < node->getChildCount(node); j++)
|
|
{
|
|
pANTLR3_BASE_TREE childNode = (pANTLR3_BASE_TREE) node->getChild(node, j);
|
|
|
|
//prefix 143
|
|
if (childNode->getType(childNode) == 143)
|
|
{
|
|
parsePrefix(childNode);
|
|
}
|
|
}
|
|
}
|
|
void DBparser::parsePrefix(pANTLR3_BASE_TREE node)
|
|
{
|
|
printf("parsePrefix\n");
|
|
|
|
std::string key;
|
|
std::string value;
|
|
|
|
for (unsigned int j = 0; j < node->getChildCount(node); j++)
|
|
{
|
|
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, j);
|
|
//prefix key string 136
|
|
if (childNode->getType(childNode) == 136)
|
|
{
|
|
parseString(childNode, key);
|
|
}
|
|
|
|
//prefix value URL 89
|
|
if (childNode->getType(childNode) == 89)
|
|
{
|
|
parseString(childNode, value);
|
|
}
|
|
}
|
|
_prefix_map.insert(make_pair(key, value));
|
|
}
|
|
|
|
void DBparser::replacePrefix(std::string& str)
|
|
{
|
|
if (str[0] != '<' && str[0] != '\"' && str[0] != '?')
|
|
{
|
|
int sep=str.find(":");
|
|
if (sep == -1) return;
|
|
std::string prefix=str.substr(0, sep+1);
|
|
std::cout << "prefix: " << prefix << std::endl;
|
|
if (_prefix_map.find(prefix) != _prefix_map.end())
|
|
{
|
|
str=_prefix_map[prefix].substr(0, _prefix_map[prefix].length() - 1) + str.substr(sep + 1 ,str.length() - sep - 1) + ">";
|
|
std::cout << "str: " << str << std::endl;
|
|
}
|
|
else
|
|
{
|
|
std::cout << "prefix not found..." << std::endl;
|
|
throw "Some errors are found in the SPARQL query request.";
|
|
}
|
|
}
|
|
}
|
|
|
|
void DBparser::parseSelectClause(pANTLR3_BASE_TREE node, SPARQLquery& query)
|
|
{
|
|
printf("parseSelectClause\n");
|
|
|
|
for (unsigned int j = 0; j < node->getChildCount(node); j++)
|
|
{
|
|
pANTLR3_BASE_TREE childNode = (pANTLR3_BASE_TREE) node->getChild(node, j);
|
|
|
|
//var 199
|
|
if (childNode->getType(childNode) == 199)
|
|
{
|
|
parseSelectVar(childNode, query);
|
|
}
|
|
}
|
|
}
|
|
|
|
void DBparser::parseSelectVar(pANTLR3_BASE_TREE node, SPARQLquery& query)
|
|
{
|
|
printf("parseSelectVar\n");
|
|
|
|
std::string var = "";
|
|
for (unsigned int j = 0; j < node->getChildCount(node); j++)
|
|
{
|
|
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, j);
|
|
|
|
if (childNode->getType(childNode) == 200)
|
|
{
|
|
parseString(childNode,var);
|
|
query.addOneProjection(var);
|
|
}
|
|
}
|
|
}
|
|
|
|
void DBparser::parseGroupPattern(pANTLR3_BASE_TREE node, SPARQLquery::PatternGroup& patterngroup)
|
|
{
|
|
printf("parseGroupPattern\n");
|
|
|
|
for (unsigned int j = 0; j < node->getChildCount(node); j++)
|
|
{
|
|
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, j);
|
|
|
|
//triples same subject 185
|
|
if (childNode->getType(childNode) == 185)
|
|
{
|
|
parsePattern(childNode, patterngroup);
|
|
}
|
|
|
|
//optional 124
|
|
if (childNode->getType(childNode) == 124)
|
|
{
|
|
parseOptional(childNode, patterngroup);
|
|
}
|
|
|
|
//union 195
|
|
if (childNode->getType(childNode) == 195)
|
|
{
|
|
patterngroup.addOneGroupUnion();
|
|
parseUnion(childNode, patterngroup);
|
|
}
|
|
|
|
//filter 67
|
|
if (childNode->getType(childNode) == 67)
|
|
{
|
|
parseFilter(childNode, patterngroup);
|
|
}
|
|
}
|
|
}
|
|
|
|
void DBparser::parsePattern(pANTLR3_BASE_TREE node, SPARQLquery::PatternGroup& patterngroup)
|
|
{
|
|
printf("parsePattern\n");
|
|
|
|
std::string subject = "";
|
|
std::string predicate = "";
|
|
std::string object = "";
|
|
for (unsigned int j = 0; j < node->getChildCount(node); j++)
|
|
{
|
|
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, j);
|
|
|
|
//subject 177
|
|
if (childNode->getType(childNode) == 177)
|
|
{
|
|
parseString(childNode, subject, 1);
|
|
replacePrefix(subject);
|
|
}
|
|
|
|
//predicate 142
|
|
if (childNode->getType(childNode) == 142)
|
|
{
|
|
parseString(childNode, predicate, 4);
|
|
replacePrefix(predicate);
|
|
}
|
|
|
|
//object 119
|
|
if (childNode->getType(childNode) == 119)
|
|
{
|
|
parseString(childNode, object, 1);
|
|
replacePrefix(object);
|
|
}
|
|
}
|
|
patterngroup.addOnePattern(SPARQLquery::Pattern(SPARQLquery::Element(subject), SPARQLquery::Element(predicate), SPARQLquery::Element(object)));
|
|
}
|
|
|
|
void DBparser::parseOptional(pANTLR3_BASE_TREE node, SPARQLquery::PatternGroup& patterngroup)
|
|
{
|
|
printf("parseOptional\n");
|
|
for (unsigned int j = 0; j < node->getChildCount(node); j++)
|
|
{
|
|
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, j);
|
|
|
|
//group graph pattern 77
|
|
if (childNode->getType(childNode) == 77)
|
|
{
|
|
patterngroup.addOneOptional();
|
|
parseGroupPattern(childNode, patterngroup.getLastOptional());
|
|
}
|
|
}
|
|
}
|
|
|
|
void DBparser::parseUnion(pANTLR3_BASE_TREE node, SPARQLquery::PatternGroup& patterngroup)
|
|
{
|
|
printf("parseUnion\n");
|
|
|
|
for (unsigned int j = 0; j < node->getChildCount(node); j++)
|
|
{
|
|
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, j);
|
|
|
|
//group graph pattern 77
|
|
if (childNode->getType(childNode) == 77)
|
|
{
|
|
patterngroup.addOneUnion();
|
|
parseGroupPattern(childNode, patterngroup.getLastUnion());
|
|
}
|
|
|
|
//union 195
|
|
if (childNode->getType(childNode) == 195)
|
|
{
|
|
parseUnion(childNode, patterngroup);
|
|
}
|
|
}
|
|
}
|
|
|
|
void DBparser::parseFilter(pANTLR3_BASE_TREE node, SPARQLquery::PatternGroup& patterngroup)
|
|
{
|
|
printf("parseFilter\n");
|
|
|
|
for (unsigned int j = 0; j < node->getChildCount(node); j++)
|
|
{
|
|
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, j);
|
|
|
|
patterngroup.addOneFilterTree();
|
|
parseFilterTree(childNode, patterngroup.getLastFilterTree());
|
|
}
|
|
}
|
|
|
|
void DBparser::parseFilterTree(pANTLR3_BASE_TREE node, SPARQLquery::FilterTree& filter)
|
|
{
|
|
printf("parseFilterTree\n");
|
|
|
|
//not 192
|
|
if (node->getType(node) == 192) filter.type = SPARQLquery::FilterTree::Not;
|
|
//or 125
|
|
if (node->getType(node) == 125) filter.type = SPARQLquery::FilterTree::Or;
|
|
//and 8
|
|
if (node->getType(node) == 8) filter.type = SPARQLquery::FilterTree::And;
|
|
//equal 62
|
|
if (node->getType(node) == 62) filter.type = SPARQLquery::FilterTree::Equal;
|
|
//not equal 116
|
|
if (node->getType(node) == 116) filter.type = SPARQLquery::FilterTree::NotEqual;
|
|
//less 100
|
|
if (node->getType(node) == 100) filter.type = SPARQLquery::FilterTree::Less;
|
|
//less equal 101
|
|
if (node->getType(node) == 101) filter.type = SPARQLquery::FilterTree::LessOrEqual;
|
|
//greater 72
|
|
if (node->getType(node) == 72) filter.type = SPARQLquery::FilterTree::Greater;
|
|
//greater equal 73
|
|
if (node->getType(node) == 73) filter.type = SPARQLquery::FilterTree::GreaterOrEqual;
|
|
|
|
for (unsigned int j = 0; j < node->getChildCount(node); j++)
|
|
{
|
|
pANTLR3_BASE_TREE childNode=(pANTLR3_BASE_TREE) node->getChild(node, j);
|
|
|
|
//unary 190
|
|
if (childNode->getType(childNode) == 190)
|
|
if (j == 0)
|
|
{
|
|
parseString(childNode, filter.arg1, 1);
|
|
replacePrefix(filter.arg1);
|
|
}
|
|
else
|
|
{
|
|
parseString(childNode, filter.arg2, 1);
|
|
replacePrefix(filter.arg2);
|
|
}
|
|
else
|
|
if (j == 0)
|
|
{
|
|
filter.parg1 = new SPARQLquery::FilterTree();
|
|
parseFilterTree(childNode, *filter.parg1);
|
|
}
|
|
else
|
|
{
|
|
filter.parg2 = new SPARQLquery::FilterTree();
|
|
parseFilterTree(childNode, *filter.parg2);
|
|
}
|
|
}
|
|
}
|
|
|
|
void DBparser::parseString(pANTLR3_BASE_TREE node, std::string& str, int depth)
|
|
{
|
|
while (depth > 0 && node != NULL)
|
|
{
|
|
node = (pANTLR3_BASE_TREE) node->getChild(node, 0);
|
|
depth--;
|
|
}
|
|
if (node != NULL)
|
|
str = (const char*) node->getText(node)->chars;
|
|
else
|
|
throw "Some errors are found in the SPARQL query request.";
|
|
}
|
|
|
|
|
|
void DBparser::printquery(SPARQLquery& query)
|
|
{
|
|
std::vector <std::string> &varvec = query.getProjections();
|
|
printf("===========================================================================\n");
|
|
printf("var is :");
|
|
for (int i = 0; i < (int)varvec.size(); i++)
|
|
printf("%s\t", varvec[i].c_str());
|
|
printf("\n");
|
|
printgrouppattern(query.getPatternGroup(), 0);
|
|
printf("===========================================================================\n");
|
|
}
|
|
|
|
void DBparser::printgrouppattern(SPARQLquery::PatternGroup &pg, int dep)
|
|
{
|
|
for (int j = 0; j < dep; j++) printf("\t"); printf("{\n");
|
|
for (int j = 0; j < dep; j++) printf("\t"); printf("pattern:\n");
|
|
for(int i = 0; i < pg.patterns.size(); i++)
|
|
{
|
|
for (int j = 0; j < dep; j++) printf("\t");
|
|
printf("\t%s\t%s\t%s\n", pg.patterns[i].subject.value.c_str(), pg.patterns[i].predicate.value.c_str(), pg.patterns[i].object.value.c_str());
|
|
}
|
|
|
|
if (pg.optionals.size() > 0)
|
|
{
|
|
for (int j = 0; j < dep; j++) printf("\t"); printf("optional:\n");
|
|
for (int i = 0; i < pg.optionals.size(); i++)
|
|
printgrouppattern(pg.optionals[i], dep + 1);
|
|
}
|
|
|
|
for (int i = 0; i < pg.unions.size(); i++)
|
|
{
|
|
for (int j = 0; j < dep; j++) printf("\t"); printf("union %d:\n", i + 1);
|
|
for (int k = 0; k < pg.unions[i].size(); k++)
|
|
printgrouppattern(pg.unions[i][k], dep + 1);
|
|
}
|
|
|
|
if (pg.filters.size() > 0)
|
|
{
|
|
for (int j = 0; j < dep; j++) printf("\t"); printf("filter:\n");
|
|
for (int i = 0; i < pg.filters.size(); i++)
|
|
{
|
|
for (int j = 0; j <= dep; j++) printf("\t");
|
|
printfilter(pg.filters[i]);
|
|
printf("\n");
|
|
}
|
|
}
|
|
|
|
for (int j = 0; j < dep; j++) printf("\t"); printf("}\n");
|
|
}
|
|
|
|
void DBparser::printfilter(SPARQLquery::FilterTree &ft)
|
|
{
|
|
printf("(");
|
|
|
|
if (ft.type == SPARQLquery::FilterTree::Not) printf("!");
|
|
|
|
if (ft.parg1 == NULL) printf("%s", ft.arg1.c_str());
|
|
else printfilter(*ft.parg1);
|
|
if (ft.type == SPARQLquery::FilterTree::Or) printf("||");
|
|
if (ft.type == SPARQLquery::FilterTree::And) printf("&&");
|
|
if (ft.type == SPARQLquery::FilterTree::Equal) printf("=");
|
|
if (ft.type == SPARQLquery::FilterTree::NotEqual) printf("!=");
|
|
if (ft.type == SPARQLquery::FilterTree::Less) printf("<");
|
|
if (ft.type == SPARQLquery::FilterTree::LessOrEqual) printf("<=");
|
|
if (ft.type == SPARQLquery::FilterTree::Greater) printf(">");
|
|
if (ft.type == SPARQLquery::FilterTree::GreaterOrEqual) printf(">=");
|
|
|
|
|
|
if (ft.type != SPARQLquery::FilterTree::Not)
|
|
if (ft.parg2 == NULL) printf("%s", ft.arg2.c_str());
|
|
else printfilter(*ft.parg2);
|
|
printf(")");
|
|
}
|
|
|
|
void DBparser::genQueryVec(SPARQLquery::PatternGroup &pg, SPARQLquery& query)
|
|
{
|
|
if (pg.hasVar)
|
|
{
|
|
query.addBasicQuery();
|
|
query.addQueryVarVec();
|
|
|
|
for(int i = 0; i < pg.patterns.size(); i++)
|
|
{
|
|
string &sub = pg.patterns[i].subject.value;
|
|
string &pre = pg.patterns[i].predicate.value;
|
|
string &obj = pg.patterns[i].object.value;
|
|
query.addTriple(Triple(sub, pre, obj));
|
|
|
|
if (sub[0] == '?') query.addQueryVar(sub);
|
|
if (obj[0] == '?') query.addQueryVar(obj);
|
|
}
|
|
}
|
|
|
|
for (int i = 0; i < pg.unions.size(); i++)
|
|
for (int j = 0; j < pg.unions[i].size(); j++)
|
|
genQueryVec(pg.unions[i][j], query);
|
|
|
|
for (int i = 0; i < pg.optionals.size(); i++)
|
|
genQueryVec(pg.optionals[i], query);
|
|
}
|